Spaces:
Sleeping
Sleeping
Upload 14 files
Browse files- .env.example +15 -0
- .gitignore +52 -0
- Dockerfile +32 -0
- LICENSE +15 -0
- config.py +80 -0
- docker-compose.yml +24 -0
- image_utils.py +118 -0
- main.py +225 -0
- models/.gitkeep +2 -0
- quickstart.py +77 -0
- requirements.txt +14 -0
- setup.bat +76 -0
- setup.sh +64 -0
- test_api.py +104 -0
.env.example
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# OmniParser Configuration
|
| 2 |
+
|
| 3 |
+
# Server Settings
|
| 4 |
+
HOST=0.0.0.0
|
| 5 |
+
PORT=8000
|
| 6 |
+
DEBUG=False
|
| 7 |
+
|
| 8 |
+
# Model Settings
|
| 9 |
+
MODEL_NAME=microsoft/OmniParser-v2.0
|
| 10 |
+
DEVICE=cpu
|
| 11 |
+
# DEVICE=cuda # Uncomment for GPU support
|
| 12 |
+
|
| 13 |
+
# API Settings
|
| 14 |
+
MAX_FILE_SIZE=52428800 # 50MB in bytes
|
| 15 |
+
ALLOWED_EXTENSIONS=jpg,jpeg,png,bmp,gif
|
.gitignore
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
env/
|
| 8 |
+
venv/
|
| 9 |
+
ENV/
|
| 10 |
+
build/
|
| 11 |
+
develop-eggs/
|
| 12 |
+
dist/
|
| 13 |
+
downloads/
|
| 14 |
+
eggs/
|
| 15 |
+
.eggs/
|
| 16 |
+
lib/
|
| 17 |
+
lib64/
|
| 18 |
+
parts/
|
| 19 |
+
sdist/
|
| 20 |
+
var/
|
| 21 |
+
wheels/
|
| 22 |
+
*.egg-info/
|
| 23 |
+
.installed.cfg
|
| 24 |
+
*.egg
|
| 25 |
+
|
| 26 |
+
# IDEs
|
| 27 |
+
.vscode/
|
| 28 |
+
.idea/
|
| 29 |
+
*.swp
|
| 30 |
+
*.swo
|
| 31 |
+
*~
|
| 32 |
+
.DS_Store
|
| 33 |
+
|
| 34 |
+
# Environment
|
| 35 |
+
.env
|
| 36 |
+
.env.local
|
| 37 |
+
|
| 38 |
+
# Model cache (large files)
|
| 39 |
+
models/
|
| 40 |
+
*.pt
|
| 41 |
+
*.pth
|
| 42 |
+
*.onnx
|
| 43 |
+
|
| 44 |
+
# Logs
|
| 45 |
+
*.log
|
| 46 |
+
logs/
|
| 47 |
+
|
| 48 |
+
# Test outputs
|
| 49 |
+
test_output/
|
| 50 |
+
outputs/
|
| 51 |
+
*.json
|
| 52 |
+
*.txt
|
Dockerfile
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install system dependencies
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
build-essential \
|
| 8 |
+
libopencv-dev \
|
| 9 |
+
python3-opencv \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
# Copy requirements
|
| 13 |
+
COPY requirements.txt .
|
| 14 |
+
|
| 15 |
+
# Install Python dependencies
|
| 16 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 17 |
+
|
| 18 |
+
# Copy application code
|
| 19 |
+
COPY . .
|
| 20 |
+
|
| 21 |
+
# Create models directory
|
| 22 |
+
RUN mkdir -p models
|
| 23 |
+
|
| 24 |
+
# Expose port
|
| 25 |
+
EXPOSE 8000
|
| 26 |
+
|
| 27 |
+
# Health check
|
| 28 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
| 29 |
+
CMD python -c "import requests; requests.get('http://localhost:8000/health')"
|
| 30 |
+
|
| 31 |
+
# Run application
|
| 32 |
+
CMD ["python", "main.py"]
|
LICENSE
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 4 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 5 |
+
in the Software without restriction, including without limitation the rights
|
| 6 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 7 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 8 |
+
furnished to do so, subject to the following conditions:
|
| 9 |
+
|
| 10 |
+
The above copyright notice and this permission notice shall be included in all
|
| 11 |
+
copies or substantial portions of the Software.
|
| 12 |
+
|
| 13 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 14 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 15 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
config.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration management for OmniParser API
|
| 3 |
+
"""
|
| 4 |
+
from pydantic_settings import BaseSettings
|
| 5 |
+
from typing import Optional
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class Settings(BaseSettings):
|
| 10 |
+
"""Application settings"""
|
| 11 |
+
|
| 12 |
+
# Server
|
| 13 |
+
host: str = "0.0.0.0"
|
| 14 |
+
port: int = 8000
|
| 15 |
+
debug: bool = False
|
| 16 |
+
|
| 17 |
+
# Model
|
| 18 |
+
model_name: str = "microsoft/OmniParser-v2.0"
|
| 19 |
+
device: str = "cpu" # "cpu" or "cuda"
|
| 20 |
+
|
| 21 |
+
# File handling
|
| 22 |
+
max_file_size: int = 52428800 # 50MB
|
| 23 |
+
allowed_extensions: tuple = ("jpg", "jpeg", "png", "bmp", "gif")
|
| 24 |
+
|
| 25 |
+
# HuggingFace
|
| 26 |
+
huggingface_token: Optional[str] = None
|
| 27 |
+
cache_dir: Path = Path("./models")
|
| 28 |
+
|
| 29 |
+
# Processing
|
| 30 |
+
enable_caching: bool = False
|
| 31 |
+
max_workers: int = 4
|
| 32 |
+
|
| 33 |
+
# CORS
|
| 34 |
+
cors_origins: list = ["*"]
|
| 35 |
+
|
| 36 |
+
class Config:
|
| 37 |
+
env_file = ".env"
|
| 38 |
+
env_file_encoding = "utf-8"
|
| 39 |
+
case_sensitive = False
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# Global settings instance
|
| 43 |
+
settings = Settings()
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def get_settings() -> Settings:
|
| 47 |
+
"""Get current settings"""
|
| 48 |
+
return settings
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def validate_image_file(filename: str) -> bool:
|
| 52 |
+
"""Validate if file is allowed image format"""
|
| 53 |
+
ext = Path(filename).suffix.lower().lstrip(".")
|
| 54 |
+
return ext in settings.allowed_extensions
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def get_device():
|
| 58 |
+
"""Get computation device"""
|
| 59 |
+
device = settings.device.lower()
|
| 60 |
+
if device == "cuda":
|
| 61 |
+
try:
|
| 62 |
+
import torch
|
| 63 |
+
if torch.cuda.is_available():
|
| 64 |
+
return "cuda"
|
| 65 |
+
else:
|
| 66 |
+
print("β οΈ CUDA requested but not available, falling back to CPU")
|
| 67 |
+
return "cpu"
|
| 68 |
+
except ImportError:
|
| 69 |
+
print("β οΈ torch not installed, using CPU")
|
| 70 |
+
return "cpu"
|
| 71 |
+
return "cpu"
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
if __name__ == "__main__":
|
| 75 |
+
print("Current Configuration:")
|
| 76 |
+
print("=" * 60)
|
| 77 |
+
for key, value in settings.dict().items():
|
| 78 |
+
if key not in ["huggingface_token"]:
|
| 79 |
+
print(f"{key}: {value}")
|
| 80 |
+
print("=" * 60)
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: '3.8'
|
| 2 |
+
|
| 3 |
+
services:
|
| 4 |
+
omniparser:
|
| 5 |
+
build: .
|
| 6 |
+
container_name: omniparser-api
|
| 7 |
+
ports:
|
| 8 |
+
- "8000:8000"
|
| 9 |
+
environment:
|
| 10 |
+
- HOST=0.0.0.0
|
| 11 |
+
- PORT=8000
|
| 12 |
+
- DEBUG=False
|
| 13 |
+
- DEVICE=cpu
|
| 14 |
+
- MODEL_NAME=microsoft/OmniParser-v2.0
|
| 15 |
+
volumes:
|
| 16 |
+
- ./models:/app/models
|
| 17 |
+
- ./logs:/app/logs
|
| 18 |
+
restart: unless-stopped
|
| 19 |
+
healthcheck:
|
| 20 |
+
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
| 21 |
+
interval: 30s
|
| 22 |
+
timeout: 10s
|
| 23 |
+
retries: 3
|
| 24 |
+
start_period: 5s
|
image_utils.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Image processing helper functions for OmniParser
|
| 3 |
+
"""
|
| 4 |
+
import cv2
|
| 5 |
+
import numpy as np
|
| 6 |
+
from PIL import Image
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import Tuple, List
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def load_image(image_path: str) -> Image.Image:
|
| 12 |
+
"""Load image from file"""
|
| 13 |
+
return Image.open(image_path)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def resize_image(image: Image.Image, max_size: Tuple[int, int] = (1920, 1080)) -> Image.Image:
|
| 17 |
+
"""Resize image to max dimensions while maintaining aspect ratio"""
|
| 18 |
+
image.thumbnail(max_size, Image.Resampling.LANCZOS)
|
| 19 |
+
return image
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def capture_screenshot() -> Image.Image:
|
| 23 |
+
"""Capture screenshot (requires mss or similar)"""
|
| 24 |
+
try:
|
| 25 |
+
import mss
|
| 26 |
+
with mss.mss() as sct:
|
| 27 |
+
monitor = sct.monitors[1] # Primary monitor
|
| 28 |
+
screenshot = sct.grab(monitor)
|
| 29 |
+
return Image.frombytes('RGB', screenshot.size, screenshot.rgb)
|
| 30 |
+
except ImportError:
|
| 31 |
+
print("β οΈ mss not installed. Install with: pip install mss")
|
| 32 |
+
return None
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def annotate_image(image: Image.Image, elements: List[dict]) -> Image.Image:
|
| 36 |
+
"""Draw bounding boxes on image for visualization"""
|
| 37 |
+
img_copy = image.copy()
|
| 38 |
+
from PIL import ImageDraw, ImageFont
|
| 39 |
+
|
| 40 |
+
draw = ImageDraw.Draw(img_copy)
|
| 41 |
+
|
| 42 |
+
colors = {
|
| 43 |
+
"button": "red",
|
| 44 |
+
"textfield": "blue",
|
| 45 |
+
"icon": "green",
|
| 46 |
+
"text": "yellow",
|
| 47 |
+
"image": "purple"
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
for i, elem in enumerate(elements):
|
| 51 |
+
bbox = elem.get("bbox", [])
|
| 52 |
+
if len(bbox) >= 4:
|
| 53 |
+
x1, y1, x2, y2 = bbox[:4]
|
| 54 |
+
elem_type = elem.get("element_type", "unknown")
|
| 55 |
+
color = colors.get(elem_type, "white")
|
| 56 |
+
|
| 57 |
+
# Draw bounding box
|
| 58 |
+
draw.rectangle([x1, y1, x2, y2], outline=color, width=2)
|
| 59 |
+
|
| 60 |
+
# Draw label
|
| 61 |
+
label = f"{elem.get('label', 'elem')} ({elem.get('confidence', 0):.2f})"
|
| 62 |
+
draw.text((x1, y1 - 10), label, fill=color)
|
| 63 |
+
|
| 64 |
+
return img_copy
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def save_image(image: Image.Image, output_path: str):
|
| 68 |
+
"""Save image to file"""
|
| 69 |
+
image.save(output_path)
|
| 70 |
+
print(f"β
Image saved: {output_path}")
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def create_sample_screenshot() -> Image.Image:
|
| 74 |
+
"""Create a simple sample image for testing"""
|
| 75 |
+
# Create a blank image with some shapes
|
| 76 |
+
img = Image.new('RGB', (800, 600), color='white')
|
| 77 |
+
from PIL import ImageDraw
|
| 78 |
+
|
| 79 |
+
draw = ImageDraw.Draw(img)
|
| 80 |
+
|
| 81 |
+
# Draw some sample UI elements
|
| 82 |
+
# Button
|
| 83 |
+
draw.rectangle([50, 50, 200, 100], fill='lightblue', outline='blue', width=2)
|
| 84 |
+
draw.text((80, 65), "Click Me", fill='black')
|
| 85 |
+
|
| 86 |
+
# Search box
|
| 87 |
+
draw.rectangle([250, 50, 700, 100], fill='white', outline='gray', width=2)
|
| 88 |
+
draw.text((260, 65), "Search...", fill='gray')
|
| 89 |
+
|
| 90 |
+
# Menu items
|
| 91 |
+
for i, text in enumerate(['Home', 'About', 'Contact']):
|
| 92 |
+
y = 150 + i * 50
|
| 93 |
+
draw.rectangle([50, y, 200, y + 40], fill='lightgray', outline='black', width=1)
|
| 94 |
+
draw.text((70, y + 10), text, fill='black')
|
| 95 |
+
|
| 96 |
+
# Status area
|
| 97 |
+
draw.rectangle([250, 150, 700, 500], fill='lightyellow', outline='orange', width=2)
|
| 98 |
+
draw.text((260, 160), "Status Area", fill='black')
|
| 99 |
+
|
| 100 |
+
return img
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
if __name__ == "__main__":
|
| 104 |
+
print("Image Processing Examples")
|
| 105 |
+
print("=" * 60)
|
| 106 |
+
|
| 107 |
+
# Create sample image
|
| 108 |
+
print("π· Creating sample screenshot...")
|
| 109 |
+
sample_img = create_sample_screenshot()
|
| 110 |
+
sample_img.save("sample_screenshot.png")
|
| 111 |
+
print("β
Sample saved as: sample_screenshot.png")
|
| 112 |
+
|
| 113 |
+
# Resize example
|
| 114 |
+
print("\nπ Resizing image...")
|
| 115 |
+
resized = resize_image(sample_img, (640, 480))
|
| 116 |
+
print(f"β
Resized to: {resized.size}")
|
| 117 |
+
|
| 118 |
+
print("\nβ
All examples completed!")
|
main.py
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import io
|
| 3 |
+
import base64
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Optional, List
|
| 6 |
+
from fastapi import FastAPI, File, UploadFile, HTTPException
|
| 7 |
+
from fastapi.responses import JSONResponse
|
| 8 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 9 |
+
from pydantic import BaseModel
|
| 10 |
+
import cv2
|
| 11 |
+
import numpy as np
|
| 12 |
+
from PIL import Image
|
| 13 |
+
import logging
|
| 14 |
+
|
| 15 |
+
# Configure logging
|
| 16 |
+
logging.basicConfig(level=logging.INFO)
|
| 17 |
+
logger = logging.getLogger(__name__)
|
| 18 |
+
|
| 19 |
+
# Initialize FastAPI app
|
| 20 |
+
app = FastAPI(
|
| 21 |
+
title="OmniParser-v2.0 API",
|
| 22 |
+
description="Extract UI elements and cursor coordinates from screenshots",
|
| 23 |
+
version="1.0.0"
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
# Add CORS middleware
|
| 27 |
+
app.add_middleware(
|
| 28 |
+
CORSMiddleware,
|
| 29 |
+
allow_origins=["*"],
|
| 30 |
+
allow_credentials=True,
|
| 31 |
+
allow_methods=["*"],
|
| 32 |
+
allow_headers=["*"],
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
# Global OmniParser model (lazy loaded)
|
| 36 |
+
omni_parser = None
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class ParseRequest(BaseModel):
|
| 40 |
+
"""Request model for UI parsing"""
|
| 41 |
+
image_base64: str
|
| 42 |
+
extract_text: bool = True
|
| 43 |
+
extract_icons: bool = True
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
class UIElement(BaseModel):
|
| 47 |
+
"""Model for UI element"""
|
| 48 |
+
element_id: int
|
| 49 |
+
label: str
|
| 50 |
+
bbox: List[int] # [x1, y1, x2, y2]
|
| 51 |
+
element_type: str
|
| 52 |
+
confidence: float
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
class ParseResponse(BaseModel):
|
| 56 |
+
"""Response model for parsing results"""
|
| 57 |
+
elements: List[UIElement]
|
| 58 |
+
image_width: int
|
| 59 |
+
image_height: int
|
| 60 |
+
processing_time: float
|
| 61 |
+
model_used: str = "OmniParser-v2.0"
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def load_omniparser():
|
| 65 |
+
"""Load OmniParser model (lazy loading)"""
|
| 66 |
+
global omni_parser
|
| 67 |
+
if omni_parser is None:
|
| 68 |
+
try:
|
| 69 |
+
logger.info("Loading OmniParser-v2.0 from HuggingFace...")
|
| 70 |
+
# Import and initialize OmniParser
|
| 71 |
+
# For now, we'll use a placeholder that demonstrates the structure
|
| 72 |
+
# You can replace this with actual OmniParser initialization
|
| 73 |
+
omni_parser = {
|
| 74 |
+
"loaded": True,
|
| 75 |
+
"model_name": "microsoft/OmniParser-v2.0"
|
| 76 |
+
}
|
| 77 |
+
logger.info("OmniParser loaded successfully")
|
| 78 |
+
except Exception as e:
|
| 79 |
+
logger.error(f"Failed to load OmniParser: {e}")
|
| 80 |
+
raise
|
| 81 |
+
return omni_parser
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def extract_image_from_base64(image_base64: str) -> Image.Image:
|
| 85 |
+
"""Decode base64 image"""
|
| 86 |
+
try:
|
| 87 |
+
image_data = base64.b64decode(image_base64)
|
| 88 |
+
image = Image.open(io.BytesIO(image_data))
|
| 89 |
+
return image
|
| 90 |
+
except Exception as e:
|
| 91 |
+
raise ValueError(f"Failed to decode image: {e}")
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def parse_ui_elements(image: Image.Image) -> List[UIElement]:
|
| 95 |
+
"""Parse UI elements from image using OmniParser"""
|
| 96 |
+
try:
|
| 97 |
+
# Load model
|
| 98 |
+
load_omniparser()
|
| 99 |
+
|
| 100 |
+
# Placeholder implementation - replace with actual OmniParser logic
|
| 101 |
+
logger.info(f"Processing image of size: {image.size}")
|
| 102 |
+
|
| 103 |
+
# For demonstration, create mock UI elements
|
| 104 |
+
# Replace this with actual OmniParser parsing logic
|
| 105 |
+
elements = [
|
| 106 |
+
UIElement(
|
| 107 |
+
element_id=1,
|
| 108 |
+
label="Button",
|
| 109 |
+
bbox=[10, 10, 100, 50],
|
| 110 |
+
element_type="button",
|
| 111 |
+
confidence=0.95
|
| 112 |
+
),
|
| 113 |
+
UIElement(
|
| 114 |
+
element_id=2,
|
| 115 |
+
label="Search",
|
| 116 |
+
bbox=[150, 10, 400, 50],
|
| 117 |
+
element_type="textfield",
|
| 118 |
+
confidence=0.92
|
| 119 |
+
),
|
| 120 |
+
]
|
| 121 |
+
|
| 122 |
+
return elements
|
| 123 |
+
except Exception as e:
|
| 124 |
+
logger.error(f"Error parsing UI elements: {e}")
|
| 125 |
+
raise
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
@app.get("/")
|
| 129 |
+
async def root():
|
| 130 |
+
"""Root endpoint"""
|
| 131 |
+
return {
|
| 132 |
+
"message": "OmniParser-v2.0 API",
|
| 133 |
+
"status": "running",
|
| 134 |
+
"endpoints": [
|
| 135 |
+
"/docs - API documentation",
|
| 136 |
+
"/health - Health check",
|
| 137 |
+
"/parse - Parse UI elements from screenshot"
|
| 138 |
+
]
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
@app.get("/health")
|
| 143 |
+
async def health_check():
|
| 144 |
+
"""Health check endpoint"""
|
| 145 |
+
try:
|
| 146 |
+
load_omniparser()
|
| 147 |
+
return {"status": "healthy", "model": "OmniParser-v2.0"}
|
| 148 |
+
except Exception as e:
|
| 149 |
+
return JSONResponse(
|
| 150 |
+
status_code=503,
|
| 151 |
+
content={"status": "unhealthy", "error": str(e)}
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
@app.post("/parse", response_model=ParseResponse)
|
| 156 |
+
async def parse_screenshot(file: UploadFile = File(...)):
|
| 157 |
+
"""
|
| 158 |
+
Parse UI elements from a screenshot.
|
| 159 |
+
|
| 160 |
+
- **file**: Image file (PNG, JPG, etc.)
|
| 161 |
+
|
| 162 |
+
Returns UI elements with bounding boxes and cursor coordinates.
|
| 163 |
+
"""
|
| 164 |
+
try:
|
| 165 |
+
import time
|
| 166 |
+
start_time = time.time()
|
| 167 |
+
|
| 168 |
+
# Read uploaded file
|
| 169 |
+
contents = await file.read()
|
| 170 |
+
image = Image.open(io.BytesIO(contents))
|
| 171 |
+
|
| 172 |
+
# Parse UI elements
|
| 173 |
+
elements = parse_ui_elements(image)
|
| 174 |
+
|
| 175 |
+
# Calculate processing time
|
| 176 |
+
processing_time = time.time() - start_time
|
| 177 |
+
|
| 178 |
+
return ParseResponse(
|
| 179 |
+
elements=elements,
|
| 180 |
+
image_width=image.width,
|
| 181 |
+
image_height=image.height,
|
| 182 |
+
processing_time=processing_time
|
| 183 |
+
)
|
| 184 |
+
except Exception as e:
|
| 185 |
+
logger.error(f"Error in parse endpoint: {e}")
|
| 186 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
@app.post("/parse-base64", response_model=ParseResponse)
|
| 190 |
+
async def parse_base64(request: ParseRequest):
|
| 191 |
+
"""
|
| 192 |
+
Parse UI elements from base64-encoded image.
|
| 193 |
+
|
| 194 |
+
Request body:
|
| 195 |
+
- **image_base64**: Base64-encoded image string
|
| 196 |
+
- **extract_text**: Extract text from elements (default: True)
|
| 197 |
+
- **extract_icons**: Extract icons (default: True)
|
| 198 |
+
"""
|
| 199 |
+
try:
|
| 200 |
+
import time
|
| 201 |
+
start_time = time.time()
|
| 202 |
+
|
| 203 |
+
# Decode image
|
| 204 |
+
image = extract_image_from_base64(request.image_base64)
|
| 205 |
+
|
| 206 |
+
# Parse UI elements
|
| 207 |
+
elements = parse_ui_elements(image)
|
| 208 |
+
|
| 209 |
+
# Calculate processing time
|
| 210 |
+
processing_time = time.time() - start_time
|
| 211 |
+
|
| 212 |
+
return ParseResponse(
|
| 213 |
+
elements=elements,
|
| 214 |
+
image_width=image.width,
|
| 215 |
+
image_height=image.height,
|
| 216 |
+
processing_time=processing_time
|
| 217 |
+
)
|
| 218 |
+
except Exception as e:
|
| 219 |
+
logger.error(f"Error in parse-base64 endpoint: {e}")
|
| 220 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
if __name__ == "__main__":
|
| 224 |
+
import uvicorn
|
| 225 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
models/.gitkeep
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This directory will contain downloaded models from HuggingFace
|
| 2 |
+
# Models are large files (2GB+) and should not be committed to git
|
quickstart.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Quick start example for OmniParser API
|
| 3 |
+
"""
|
| 4 |
+
import subprocess
|
| 5 |
+
import sys
|
| 6 |
+
import time
|
| 7 |
+
import requests
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def main():
|
| 12 |
+
"""Quick start guide"""
|
| 13 |
+
print("\n" + "=" * 60)
|
| 14 |
+
print("OmniParser-v2.0 QUICK START GUIDE")
|
| 15 |
+
print("=" * 60 + "\n")
|
| 16 |
+
|
| 17 |
+
print("This guide will help you get started with OmniParser API.\n")
|
| 18 |
+
|
| 19 |
+
# Step 1: Virtual Environment
|
| 20 |
+
print("STEP 1: Setup Virtual Environment")
|
| 21 |
+
print("-" * 60)
|
| 22 |
+
print("Windows:")
|
| 23 |
+
print(" python -m venv venv")
|
| 24 |
+
print(" venv\\Scripts\\activate.bat")
|
| 25 |
+
print("\nLinux/macOS:")
|
| 26 |
+
print(" python3 -m venv venv")
|
| 27 |
+
print(" source venv/bin/activate\n")
|
| 28 |
+
|
| 29 |
+
# Step 2: Install Dependencies
|
| 30 |
+
print("STEP 2: Install Dependencies")
|
| 31 |
+
print("-" * 60)
|
| 32 |
+
print("Run: pip install -r requirements.txt")
|
| 33 |
+
print("(This will take a few minutes)\n")
|
| 34 |
+
|
| 35 |
+
# Step 3: Configuration
|
| 36 |
+
print("STEP 3: Configuration")
|
| 37 |
+
print("-" * 60)
|
| 38 |
+
print("Copy .env.example to .env and edit if needed")
|
| 39 |
+
print("Run: copy .env.example .env (Windows)")
|
| 40 |
+
print(" or: cp .env.example .env (Linux/macOS)\n")
|
| 41 |
+
|
| 42 |
+
# Step 4: Run Server
|
| 43 |
+
print("STEP 4: Run the Server")
|
| 44 |
+
print("-" * 60)
|
| 45 |
+
print("Run: python main.py")
|
| 46 |
+
print("Expected output: 'INFO: Uvicorn running on http://0.0.0.0:8000'\n")
|
| 47 |
+
|
| 48 |
+
# Step 5: Test API
|
| 49 |
+
print("STEP 5: Test the API")
|
| 50 |
+
print("-" * 60)
|
| 51 |
+
print("Option A - Interactive Docs:")
|
| 52 |
+
print(" Open: http://localhost:8000/docs")
|
| 53 |
+
print(" Click 'Try it out' on any endpoint\n")
|
| 54 |
+
|
| 55 |
+
print("Option B - Python Script:")
|
| 56 |
+
print(" python test_api.py\n")
|
| 57 |
+
|
| 58 |
+
print("Option C - cURL:")
|
| 59 |
+
print(" curl -X GET http://localhost:8000/health\n")
|
| 60 |
+
|
| 61 |
+
# Next Steps
|
| 62 |
+
print("NEXT STEPS:")
|
| 63 |
+
print("-" * 60)
|
| 64 |
+
print("1. Upload a screenshot: POST /parse")
|
| 65 |
+
print("2. Extract UI elements with coordinates")
|
| 66 |
+
print("3. Integrate with your application\n")
|
| 67 |
+
|
| 68 |
+
print("For more information:")
|
| 69 |
+
print("- See README.md for detailed documentation")
|
| 70 |
+
print("- Visit: https://huggingface.co/microsoft/OmniParser-v2.0")
|
| 71 |
+
print("- API Docs: http://localhost:8000/docs\n")
|
| 72 |
+
|
| 73 |
+
print("=" * 60 + "\n")
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
if __name__ == "__main__":
|
| 77 |
+
main()
|
requirements.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.104.1
|
| 2 |
+
uvicorn==0.24.0
|
| 3 |
+
python-multipart==0.0.6
|
| 4 |
+
pydantic==2.5.0
|
| 5 |
+
pydantic-settings==2.1.0
|
| 6 |
+
python-dotenv==1.0.0
|
| 7 |
+
pillow==10.1.0
|
| 8 |
+
numpy==1.24.3
|
| 9 |
+
opencv-python==4.8.1.78
|
| 10 |
+
torch==2.1.0
|
| 11 |
+
torchvision==0.16.0
|
| 12 |
+
transformers==4.35.2
|
| 13 |
+
timm==0.9.12
|
| 14 |
+
einops==0.7.0
|
setup.bat
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@echo off
|
| 2 |
+
REM OmniParser Setup Script for Windows
|
| 3 |
+
|
| 4 |
+
echo.
|
| 5 |
+
echo ====================================================
|
| 6 |
+
echo OmniParser-v2.0 Setup Script
|
| 7 |
+
echo ====================================================
|
| 8 |
+
echo.
|
| 9 |
+
|
| 10 |
+
REM Check if Python is installed
|
| 11 |
+
python --version >nul 2>&1
|
| 12 |
+
if errorlevel 1 (
|
| 13 |
+
echo ERROR: Python is not installed or not in PATH
|
| 14 |
+
echo Please install Python 3.8+ from https://www.python.org
|
| 15 |
+
exit /b 1
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
echo Python found:
|
| 19 |
+
python --version
|
| 20 |
+
echo.
|
| 21 |
+
|
| 22 |
+
REM Create virtual environment
|
| 23 |
+
echo Creating virtual environment...
|
| 24 |
+
python -m venv venv
|
| 25 |
+
if errorlevel 1 (
|
| 26 |
+
echo ERROR: Failed to create virtual environment
|
| 27 |
+
exit /b 1
|
| 28 |
+
)
|
| 29 |
+
echo β Virtual environment created
|
| 30 |
+
echo.
|
| 31 |
+
|
| 32 |
+
REM Activate virtual environment
|
| 33 |
+
echo Activating virtual environment...
|
| 34 |
+
call venv\Scripts\activate.bat
|
| 35 |
+
if errorlevel 1 (
|
| 36 |
+
echo ERROR: Failed to activate virtual environment
|
| 37 |
+
exit /b 1
|
| 38 |
+
)
|
| 39 |
+
echo β Virtual environment activated
|
| 40 |
+
echo.
|
| 41 |
+
|
| 42 |
+
REM Upgrade pip
|
| 43 |
+
echo Upgrading pip...
|
| 44 |
+
python -m pip install --upgrade pip >nul 2>&1
|
| 45 |
+
echo β pip upgraded
|
| 46 |
+
echo.
|
| 47 |
+
|
| 48 |
+
REM Install dependencies
|
| 49 |
+
echo Installing dependencies...
|
| 50 |
+
echo This may take a few minutes...
|
| 51 |
+
pip install -r requirements.txt
|
| 52 |
+
if errorlevel 1 (
|
| 53 |
+
echo ERROR: Failed to install dependencies
|
| 54 |
+
exit /b 1
|
| 55 |
+
)
|
| 56 |
+
echo β Dependencies installed
|
| 57 |
+
echo.
|
| 58 |
+
|
| 59 |
+
REM Create .env file from template
|
| 60 |
+
if not exist .env (
|
| 61 |
+
echo Creating .env file...
|
| 62 |
+
copy .env.example .env >nul
|
| 63 |
+
echo β .env file created
|
| 64 |
+
echo.
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
echo ====================================================
|
| 68 |
+
echo Setup completed successfully!
|
| 69 |
+
echo ====================================================
|
| 70 |
+
echo.
|
| 71 |
+
echo Next steps:
|
| 72 |
+
echo 1. Activate environment: venv\Scripts\activate.bat
|
| 73 |
+
echo 2. Run server: python main.py
|
| 74 |
+
echo 3. Visit: http://localhost:8000/docs
|
| 75 |
+
echo.
|
| 76 |
+
pause
|
setup.sh
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# OmniParser Setup Script for Linux/macOS
|
| 3 |
+
|
| 4 |
+
set -e
|
| 5 |
+
|
| 6 |
+
echo ""
|
| 7 |
+
echo "===================================================="
|
| 8 |
+
echo "OmniParser-v2.0 Setup Script"
|
| 9 |
+
echo "===================================================="
|
| 10 |
+
echo ""
|
| 11 |
+
|
| 12 |
+
# Check if Python is installed
|
| 13 |
+
if ! command -v python3 &> /dev/null; then
|
| 14 |
+
echo "ERROR: Python 3 is not installed"
|
| 15 |
+
echo "Please install Python 3.8+ from https://www.python.org"
|
| 16 |
+
exit 1
|
| 17 |
+
fi
|
| 18 |
+
|
| 19 |
+
echo "Python found:"
|
| 20 |
+
python3 --version
|
| 21 |
+
echo ""
|
| 22 |
+
|
| 23 |
+
# Create virtual environment
|
| 24 |
+
echo "Creating virtual environment..."
|
| 25 |
+
python3 -m venv venv
|
| 26 |
+
echo "β Virtual environment created"
|
| 27 |
+
echo ""
|
| 28 |
+
|
| 29 |
+
# Activate virtual environment
|
| 30 |
+
echo "Activating virtual environment..."
|
| 31 |
+
source venv/bin/activate
|
| 32 |
+
echo "β Virtual environment activated"
|
| 33 |
+
echo ""
|
| 34 |
+
|
| 35 |
+
# Upgrade pip
|
| 36 |
+
echo "Upgrading pip..."
|
| 37 |
+
pip install --upgrade pip > /dev/null 2>&1
|
| 38 |
+
echo "β pip upgraded"
|
| 39 |
+
echo ""
|
| 40 |
+
|
| 41 |
+
# Install dependencies
|
| 42 |
+
echo "Installing dependencies..."
|
| 43 |
+
echo "This may take a few minutes..."
|
| 44 |
+
pip install -r requirements.txt
|
| 45 |
+
echo "β Dependencies installed"
|
| 46 |
+
echo ""
|
| 47 |
+
|
| 48 |
+
# Create .env file from template
|
| 49 |
+
if [ ! -f .env ]; then
|
| 50 |
+
echo "Creating .env file..."
|
| 51 |
+
cp .env.example .env
|
| 52 |
+
echo "β .env file created"
|
| 53 |
+
echo ""
|
| 54 |
+
fi
|
| 55 |
+
|
| 56 |
+
echo "===================================================="
|
| 57 |
+
echo "Setup completed successfully!"
|
| 58 |
+
echo "===================================================="
|
| 59 |
+
echo ""
|
| 60 |
+
echo "Next steps:"
|
| 61 |
+
echo "1. Activate environment: source venv/bin/activate"
|
| 62 |
+
echo "2. Run server: python main.py"
|
| 63 |
+
echo "3. Visit: http://localhost:8000/docs"
|
| 64 |
+
echo ""
|
test_api.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Test script for OmniParser API
|
| 3 |
+
"""
|
| 4 |
+
import requests
|
| 5 |
+
import json
|
| 6 |
+
import base64
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
BASE_URL = "http://localhost:8000"
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def health_check():
|
| 13 |
+
"""Check API health"""
|
| 14 |
+
print("π₯ Health Check...")
|
| 15 |
+
response = requests.get(f"{BASE_URL}/health")
|
| 16 |
+
print(f"Status: {response.status_code}")
|
| 17 |
+
print(json.dumps(response.json(), indent=2))
|
| 18 |
+
print()
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def parse_file(image_path: str):
|
| 22 |
+
"""Parse image file"""
|
| 23 |
+
print(f"πΈ Parsing file: {image_path}")
|
| 24 |
+
|
| 25 |
+
if not Path(image_path).exists():
|
| 26 |
+
print(f"β File not found: {image_path}")
|
| 27 |
+
return
|
| 28 |
+
|
| 29 |
+
with open(image_path, "rb") as f:
|
| 30 |
+
files = {"file": f}
|
| 31 |
+
response = requests.post(f"{BASE_URL}/parse", files=files)
|
| 32 |
+
|
| 33 |
+
if response.status_code == 200:
|
| 34 |
+
result = response.json()
|
| 35 |
+
print(f"β
Found {len(result['elements'])} UI elements")
|
| 36 |
+
print(f" Image size: {result['image_width']}x{result['image_height']}")
|
| 37 |
+
print(f" Processing time: {result['processing_time']:.2f}s")
|
| 38 |
+
print("\n Elements:")
|
| 39 |
+
for elem in result['elements']:
|
| 40 |
+
print(f" - {elem['label']}: bbox={elem['bbox']}, confidence={elem['confidence']}")
|
| 41 |
+
else:
|
| 42 |
+
print(f"β Error: {response.status_code}")
|
| 43 |
+
print(response.text)
|
| 44 |
+
print()
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def parse_base64(image_path: str):
|
| 48 |
+
"""Parse base64-encoded image"""
|
| 49 |
+
print(f"π· Parsing base64 image: {image_path}")
|
| 50 |
+
|
| 51 |
+
if not Path(image_path).exists():
|
| 52 |
+
print(f"β File not found: {image_path}")
|
| 53 |
+
return
|
| 54 |
+
|
| 55 |
+
# Read and encode image
|
| 56 |
+
with open(image_path, "rb") as f:
|
| 57 |
+
image_data = base64.b64encode(f.read()).decode('utf-8')
|
| 58 |
+
|
| 59 |
+
payload = {
|
| 60 |
+
"image_base64": image_data,
|
| 61 |
+
"extract_text": True,
|
| 62 |
+
"extract_icons": True
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
response = requests.post(f"{BASE_URL}/parse-base64", json=payload)
|
| 66 |
+
|
| 67 |
+
if response.status_code == 200:
|
| 68 |
+
result = response.json()
|
| 69 |
+
print(f"β
Found {len(result['elements'])} UI elements")
|
| 70 |
+
print(f" Processing time: {result['processing_time']:.2f}s")
|
| 71 |
+
else:
|
| 72 |
+
print(f"β Error: {response.status_code}")
|
| 73 |
+
print(response.text)
|
| 74 |
+
print()
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
if __name__ == "__main__":
|
| 78 |
+
print("=" * 60)
|
| 79 |
+
print("OmniParser API Test Suite")
|
| 80 |
+
print("=" * 60)
|
| 81 |
+
print()
|
| 82 |
+
|
| 83 |
+
# Health check
|
| 84 |
+
health_check()
|
| 85 |
+
|
| 86 |
+
# Test with a sample image (if available)
|
| 87 |
+
sample_images = [
|
| 88 |
+
"screenshot.png",
|
| 89 |
+
"test_image.png",
|
| 90 |
+
"../screenshots/example.png"
|
| 91 |
+
]
|
| 92 |
+
|
| 93 |
+
for img in sample_images:
|
| 94 |
+
if Path(img).exists():
|
| 95 |
+
parse_file(img)
|
| 96 |
+
parse_base64(img)
|
| 97 |
+
break
|
| 98 |
+
else:
|
| 99 |
+
print("β οΈ No test images found. Upload an image and try again.")
|
| 100 |
+
print(" Expected: screenshot.png or test_image.png")
|
| 101 |
+
|
| 102 |
+
print("=" * 60)
|
| 103 |
+
print("β
Test suite completed")
|
| 104 |
+
print("=" * 60)
|