Spaces:

Samfredoly
/

omni

Sleeping

App Files Files Community

Samfredoly commited on 7 days ago

Commit

2a729e6

verified ·

1 Parent(s): 36cf618

Upload 14 files

Browse files

Files changed (14) hide show

.env.example +15 -0
.gitignore +52 -0
Dockerfile +32 -0
LICENSE +15 -0
config.py +80 -0
docker-compose.yml +24 -0
image_utils.py +118 -0
main.py +225 -0
models/.gitkeep +2 -0
quickstart.py +77 -0
requirements.txt +14 -0
setup.bat +76 -0
setup.sh +64 -0
test_api.py +104 -0

.env.example ADDED Viewed

	@@ -0,0 +1,15 @@

+# OmniParser Configuration
+# Server Settings
+HOST=0.0.0.0
+PORT=8000
+DEBUG=False
+# Model Settings
+MODEL_NAME=microsoft/OmniParser-v2.0
+DEVICE=cpu
+# DEVICE=cuda  # Uncomment for GPU support
+# API Settings
+MAX_FILE_SIZE=52428800  # 50MB in bytes
+ALLOWED_EXTENSIONS=jpg,jpeg,png,bmp,gif

.gitignore ADDED Viewed

	@@ -0,0 +1,52 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+venv/
+ENV/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# IDEs
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.DS_Store
+# Environment
+.env
+.env.local
+# Model cache (large files)
+models/
+*.pt
+*.pth
+*.onnx
+# Logs
+*.log
+logs/
+# Test outputs
+test_output/
+outputs/
+*.json
+*.txt

Dockerfile ADDED Viewed

	@@ -0,0 +1,32 @@

+FROM python:3.11-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    libopencv-dev \
+    python3-opencv \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Create models directory
+RUN mkdir -p models
+# Expose port
+EXPOSE 8000
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD python -c "import requests; requests.get('http://localhost:8000/health')"
+# Run application
+CMD ["python", "main.py"]

LICENSE ADDED Viewed

	@@ -0,0 +1,15 @@

+MIT License
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

config.py ADDED Viewed

	@@ -0,0 +1,80 @@

+"""
+Configuration management for OmniParser API
+"""
+from pydantic_settings import BaseSettings
+from typing import Optional
+from pathlib import Path
+class Settings(BaseSettings):
+    """Application settings"""
+    # Server
+    host: str = "0.0.0.0"
+    port: int = 8000
+    debug: bool = False
+    # Model
+    model_name: str = "microsoft/OmniParser-v2.0"
+    device: str = "cpu"  # "cpu" or "cuda"
+    # File handling
+    max_file_size: int = 52428800  # 50MB
+    allowed_extensions: tuple = ("jpg", "jpeg", "png", "bmp", "gif")
+    # HuggingFace
+    huggingface_token: Optional[str] = None
+    cache_dir: Path = Path("./models")
+    # Processing
+    enable_caching: bool = False
+    max_workers: int = 4
+    # CORS
+    cors_origins: list = ["*"]
+    class Config:
+        env_file = ".env"
+        env_file_encoding = "utf-8"
+        case_sensitive = False
+# Global settings instance
+settings = Settings()
+def get_settings() -> Settings:
+    """Get current settings"""
+    return settings
+def validate_image_file(filename: str) -> bool:
+    """Validate if file is allowed image format"""
+    ext = Path(filename).suffix.lower().lstrip(".")
+    return ext in settings.allowed_extensions
+def get_device():
+    """Get computation device"""
+    device = settings.device.lower()
+    if device == "cuda":
+        try:
+            import torch
+            if torch.cuda.is_available():
+                return "cuda"
+            else:
+                print("⚠️  CUDA requested but not available, falling back to CPU")
+                return "cpu"
+        except ImportError:
+            print("⚠️  torch not installed, using CPU")
+            return "cpu"
+    return "cpu"
+if __name__ == "__main__":
+    print("Current Configuration:")
+    print("=" * 60)
+    for key, value in settings.dict().items():
+        if key not in ["huggingface_token"]:
+            print(f"{key}: {value}")
+    print("=" * 60)

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,24 @@

+version: '3.8'
+services:
+  omniparser:
+    build: .
+    container_name: omniparser-api
+    ports:
+      - "8000:8000"
+    environment:
+      - HOST=0.0.0.0
+      - PORT=8000
+      - DEBUG=False
+      - DEVICE=cpu
+      - MODEL_NAME=microsoft/OmniParser-v2.0
+    volumes:
+      - ./models:/app/models
+      - ./logs:/app/logs
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 5s

image_utils.py ADDED Viewed

	@@ -0,0 +1,118 @@

+"""
+Image processing helper functions for OmniParser
+"""
+import cv2
+import numpy as np
+from PIL import Image
+from pathlib import Path
+from typing import Tuple, List
+def load_image(image_path: str) -> Image.Image:
+    """Load image from file"""
+    return Image.open(image_path)
+def resize_image(image: Image.Image, max_size: Tuple[int, int] = (1920, 1080)) -> Image.Image:
+    """Resize image to max dimensions while maintaining aspect ratio"""
+    image.thumbnail(max_size, Image.Resampling.LANCZOS)
+    return image
+def capture_screenshot() -> Image.Image:
+    """Capture screenshot (requires mss or similar)"""
+    try:
+        import mss
+        with mss.mss() as sct:
+            monitor = sct.monitors[1]  # Primary monitor
+            screenshot = sct.grab(monitor)
+            return Image.frombytes('RGB', screenshot.size, screenshot.rgb)
+    except ImportError:
+        print("⚠️  mss not installed. Install with: pip install mss")
+        return None
+def annotate_image(image: Image.Image, elements: List[dict]) -> Image.Image:
+    """Draw bounding boxes on image for visualization"""
+    img_copy = image.copy()
+    from PIL import ImageDraw, ImageFont
+    draw = ImageDraw.Draw(img_copy)
+    colors = {
+        "button": "red",
+        "textfield": "blue",
+        "icon": "green",
+        "text": "yellow",
+        "image": "purple"
+    }
+    for i, elem in enumerate(elements):
+        bbox = elem.get("bbox", [])
+        if len(bbox) >= 4:
+            x1, y1, x2, y2 = bbox[:4]
+            elem_type = elem.get("element_type", "unknown")
+            color = colors.get(elem_type, "white")
+            # Draw bounding box
+            draw.rectangle([x1, y1, x2, y2], outline=color, width=2)
+            # Draw label
+            label = f"{elem.get('label', 'elem')} ({elem.get('confidence', 0):.2f})"
+            draw.text((x1, y1 - 10), label, fill=color)
+    return img_copy
+def save_image(image: Image.Image, output_path: str):
+    """Save image to file"""
+    image.save(output_path)
+    print(f"✅ Image saved: {output_path}")
+def create_sample_screenshot() -> Image.Image:
+    """Create a simple sample image for testing"""
+    # Create a blank image with some shapes
+    img = Image.new('RGB', (800, 600), color='white')
+    from PIL import ImageDraw
+    draw = ImageDraw.Draw(img)
+    # Draw some sample UI elements
+    # Button
+    draw.rectangle([50, 50, 200, 100], fill='lightblue', outline='blue', width=2)
+    draw.text((80, 65), "Click Me", fill='black')
+    # Search box
+    draw.rectangle([250, 50, 700, 100], fill='white', outline='gray', width=2)
+    draw.text((260, 65), "Search...", fill='gray')
+    # Menu items
+    for i, text in enumerate(['Home', 'About', 'Contact']):
+        y = 150 + i * 50
+        draw.rectangle([50, y, 200, y + 40], fill='lightgray', outline='black', width=1)
+        draw.text((70, y + 10), text, fill='black')
+    # Status area
+    draw.rectangle([250, 150, 700, 500], fill='lightyellow', outline='orange', width=2)
+    draw.text((260, 160), "Status Area", fill='black')
+    return img
+if __name__ == "__main__":
+    print("Image Processing Examples")
+    print("=" * 60)
+    # Create sample image
+    print("📷 Creating sample screenshot...")
+    sample_img = create_sample_screenshot()
+    sample_img.save("sample_screenshot.png")
+    print("✅ Sample saved as: sample_screenshot.png")
+    # Resize example
+    print("\n📐 Resizing image...")
+    resized = resize_image(sample_img, (640, 480))
+    print(f"✅ Resized to: {resized.size}")
+    print("\n✅ All examples completed!")

main.py ADDED Viewed

	@@ -0,0 +1,225 @@

+import os
+import io
+import base64
+from pathlib import Path
+from typing import Optional, List
+from fastapi import FastAPI, File, UploadFile, HTTPException
+from fastapi.responses import JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+import cv2
+import numpy as np
+from PIL import Image
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Initialize FastAPI app
+app = FastAPI(
+    title="OmniParser-v2.0 API",
+    description="Extract UI elements and cursor coordinates from screenshots",
+    version="1.0.0"
+)
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Global OmniParser model (lazy loaded)
+omni_parser = None
+class ParseRequest(BaseModel):
+    """Request model for UI parsing"""
+    image_base64: str
+    extract_text: bool = True
+    extract_icons: bool = True
+class UIElement(BaseModel):
+    """Model for UI element"""
+    element_id: int
+    label: str
+    bbox: List[int]  # [x1, y1, x2, y2]
+    element_type: str
+    confidence: float
+class ParseResponse(BaseModel):
+    """Response model for parsing results"""
+    elements: List[UIElement]
+    image_width: int
+    image_height: int
+    processing_time: float
+    model_used: str = "OmniParser-v2.0"
+def load_omniparser():
+    """Load OmniParser model (lazy loading)"""
+    global omni_parser
+    if omni_parser is None:
+        try:
+            logger.info("Loading OmniParser-v2.0 from HuggingFace...")
+            # Import and initialize OmniParser
+            # For now, we'll use a placeholder that demonstrates the structure
+            # You can replace this with actual OmniParser initialization
+            omni_parser = {
+                "loaded": True,
+                "model_name": "microsoft/OmniParser-v2.0"
+            }
+            logger.info("OmniParser loaded successfully")
+        except Exception as e:
+            logger.error(f"Failed to load OmniParser: {e}")
+            raise
+    return omni_parser
+def extract_image_from_base64(image_base64: str) -> Image.Image:
+    """Decode base64 image"""
+    try:
+        image_data = base64.b64decode(image_base64)
+        image = Image.open(io.BytesIO(image_data))
+        return image
+    except Exception as e:
+        raise ValueError(f"Failed to decode image: {e}")
+def parse_ui_elements(image: Image.Image) -> List[UIElement]:
+    """Parse UI elements from image using OmniParser"""
+    try:
+        # Load model
+        load_omniparser()
+        # Placeholder implementation - replace with actual OmniParser logic
+        logger.info(f"Processing image of size: {image.size}")
+        # For demonstration, create mock UI elements
+        # Replace this with actual OmniParser parsing logic
+        elements = [
+            UIElement(
+                element_id=1,
+                label="Button",
+                bbox=[10, 10, 100, 50],
+                element_type="button",
+                confidence=0.95
+            ),
+            UIElement(
+                element_id=2,
+                label="Search",
+                bbox=[150, 10, 400, 50],
+                element_type="textfield",
+                confidence=0.92
+            ),
+        ]
+        return elements
+    except Exception as e:
+        logger.error(f"Error parsing UI elements: {e}")
+        raise
+@app.get("/")
+async def root():
+    """Root endpoint"""
+    return {
+        "message": "OmniParser-v2.0 API",
+        "status": "running",
+        "endpoints": [
+            "/docs - API documentation",
+            "/health - Health check",
+            "/parse - Parse UI elements from screenshot"
+        ]
+    }
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    try:
+        load_omniparser()
+        return {"status": "healthy", "model": "OmniParser-v2.0"}
+    except Exception as e:
+        return JSONResponse(
+            status_code=503,
+            content={"status": "unhealthy", "error": str(e)}
+        )
+@app.post("/parse", response_model=ParseResponse)
+async def parse_screenshot(file: UploadFile = File(...)):
+    """
+    Parse UI elements from a screenshot.
+    - **file**: Image file (PNG, JPG, etc.)
+    Returns UI elements with bounding boxes and cursor coordinates.
+    """
+    try:
+        import time
+        start_time = time.time()
+        # Read uploaded file
+        contents = await file.read()
+        image = Image.open(io.BytesIO(contents))
+        # Parse UI elements
+        elements = parse_ui_elements(image)
+        # Calculate processing time
+        processing_time = time.time() - start_time
+        return ParseResponse(
+            elements=elements,
+            image_width=image.width,
+            image_height=image.height,
+            processing_time=processing_time
+        )
+    except Exception as e:
+        logger.error(f"Error in parse endpoint: {e}")
+        raise HTTPException(status_code=400, detail=str(e))
+@app.post("/parse-base64", response_model=ParseResponse)
+async def parse_base64(request: ParseRequest):
+    """
+    Parse UI elements from base64-encoded image.
+    Request body:
+    - **image_base64**: Base64-encoded image string
+    - **extract_text**: Extract text from elements (default: True)
+    - **extract_icons**: Extract icons (default: True)
+    """
+    try:
+        import time
+        start_time = time.time()
+        # Decode image
+        image = extract_image_from_base64(request.image_base64)
+        # Parse UI elements
+        elements = parse_ui_elements(image)
+        # Calculate processing time
+        processing_time = time.time() - start_time
+        return ParseResponse(
+            elements=elements,
+            image_width=image.width,
+            image_height=image.height,
+            processing_time=processing_time
+        )
+    except Exception as e:
+        logger.error(f"Error in parse-base64 endpoint: {e}")
+        raise HTTPException(status_code=400, detail=str(e))
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

models/.gitkeep ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # This directory will contain downloaded models from HuggingFace
2	+ # Models are large files (2GB+) and should not be committed to git

quickstart.py ADDED Viewed

	@@ -0,0 +1,77 @@

+"""
+Quick start example for OmniParser API
+"""
+import subprocess
+import sys
+import time
+import requests
+from pathlib import Path
+def main():
+    """Quick start guide"""
+    print("\n" + "=" * 60)
+    print("OmniParser-v2.0 QUICK START GUIDE")
+    print("=" * 60 + "\n")
+    print("This guide will help you get started with OmniParser API.\n")
+    # Step 1: Virtual Environment
+    print("STEP 1: Setup Virtual Environment")
+    print("-" * 60)
+    print("Windows:")
+    print("  python -m venv venv")
+    print("  venv\\Scripts\\activate.bat")
+    print("\nLinux/macOS:")
+    print("  python3 -m venv venv")
+    print("  source venv/bin/activate\n")
+    # Step 2: Install Dependencies
+    print("STEP 2: Install Dependencies")
+    print("-" * 60)
+    print("Run: pip install -r requirements.txt")
+    print("(This will take a few minutes)\n")
+    # Step 3: Configuration
+    print("STEP 3: Configuration")
+    print("-" * 60)
+    print("Copy .env.example to .env and edit if needed")
+    print("Run: copy .env.example .env  (Windows)")
+    print("  or: cp .env.example .env   (Linux/macOS)\n")
+    # Step 4: Run Server
+    print("STEP 4: Run the Server")
+    print("-" * 60)
+    print("Run: python main.py")
+    print("Expected output: 'INFO:     Uvicorn running on http://0.0.0.0:8000'\n")
+    # Step 5: Test API
+    print("STEP 5: Test the API")
+    print("-" * 60)
+    print("Option A - Interactive Docs:")
+    print("  Open: http://localhost:8000/docs")
+    print("  Click 'Try it out' on any endpoint\n")
+    print("Option B - Python Script:")
+    print("  python test_api.py\n")
+    print("Option C - cURL:")
+    print("  curl -X GET http://localhost:8000/health\n")
+    # Next Steps
+    print("NEXT STEPS:")
+    print("-" * 60)
+    print("1. Upload a screenshot: POST /parse")
+    print("2. Extract UI elements with coordinates")
+    print("3. Integrate with your application\n")
+    print("For more information:")
+    print("- See README.md for detailed documentation")
+    print("- Visit: https://huggingface.co/microsoft/OmniParser-v2.0")
+    print("- API Docs: http://localhost:8000/docs\n")
+    print("=" * 60 + "\n")
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+fastapi==0.104.1
+uvicorn==0.24.0
+python-multipart==0.0.6
+pydantic==2.5.0
+pydantic-settings==2.1.0
+python-dotenv==1.0.0
+pillow==10.1.0
+numpy==1.24.3
+opencv-python==4.8.1.78
+torch==2.1.0
+torchvision==0.16.0
+transformers==4.35.2
+timm==0.9.12
+einops==0.7.0

setup.bat ADDED Viewed

	@@ -0,0 +1,76 @@

+@echo off
+REM OmniParser Setup Script for Windows
+echo.
+echo ====================================================
+echo OmniParser-v2.0 Setup Script
+echo ====================================================
+echo.
+REM Check if Python is installed
+python --version >nul 2>&1
+if errorlevel 1 (
+    echo ERROR: Python is not installed or not in PATH
+    echo Please install Python 3.8+ from https://www.python.org
+    exit /b 1
+)
+echo Python found:
+python --version
+echo.
+REM Create virtual environment
+echo Creating virtual environment...
+python -m venv venv
+if errorlevel 1 (
+    echo ERROR: Failed to create virtual environment
+    exit /b 1
+)
+echo ✓ Virtual environment created
+echo.
+REM Activate virtual environment
+echo Activating virtual environment...
+call venv\Scripts\activate.bat
+if errorlevel 1 (
+    echo ERROR: Failed to activate virtual environment
+    exit /b 1
+)
+echo ✓ Virtual environment activated
+echo.
+REM Upgrade pip
+echo Upgrading pip...
+python -m pip install --upgrade pip >nul 2>&1
+echo ✓ pip upgraded
+echo.
+REM Install dependencies
+echo Installing dependencies...
+echo This may take a few minutes...
+pip install -r requirements.txt
+if errorlevel 1 (
+    echo ERROR: Failed to install dependencies
+    exit /b 1
+)
+echo ✓ Dependencies installed
+echo.
+REM Create .env file from template
+if not exist .env (
+    echo Creating .env file...
+    copy .env.example .env >nul
+    echo ✓ .env file created
+    echo.
+)
+echo ====================================================
+echo Setup completed successfully!
+echo ====================================================
+echo.
+echo Next steps:
+echo 1. Activate environment: venv\Scripts\activate.bat
+echo 2. Run server: python main.py
+echo 3. Visit: http://localhost:8000/docs
+echo.
+pause

setup.sh ADDED Viewed

	@@ -0,0 +1,64 @@

+#!/bin/bash
+# OmniParser Setup Script for Linux/macOS
+set -e
+echo ""
+echo "===================================================="
+echo "OmniParser-v2.0 Setup Script"
+echo "===================================================="
+echo ""
+# Check if Python is installed
+if ! command -v python3 &> /dev/null; then
+    echo "ERROR: Python 3 is not installed"
+    echo "Please install Python 3.8+ from https://www.python.org"
+    exit 1
+fi
+echo "Python found:"
+python3 --version
+echo ""
+# Create virtual environment
+echo "Creating virtual environment..."
+python3 -m venv venv
+echo "✓ Virtual environment created"
+echo ""
+# Activate virtual environment
+echo "Activating virtual environment..."
+source venv/bin/activate
+echo "✓ Virtual environment activated"
+echo ""
+# Upgrade pip
+echo "Upgrading pip..."
+pip install --upgrade pip > /dev/null 2>&1
+echo "✓ pip upgraded"
+echo ""
+# Install dependencies
+echo "Installing dependencies..."
+echo "This may take a few minutes..."
+pip install -r requirements.txt
+echo "✓ Dependencies installed"
+echo ""
+# Create .env file from template
+if [ ! -f .env ]; then
+    echo "Creating .env file..."
+    cp .env.example .env
+    echo "✓ .env file created"
+    echo ""
+fi
+echo "===================================================="
+echo "Setup completed successfully!"
+echo "===================================================="
+echo ""
+echo "Next steps:"
+echo "1. Activate environment: source venv/bin/activate"
+echo "2. Run server: python main.py"
+echo "3. Visit: http://localhost:8000/docs"
+echo ""

test_api.py ADDED Viewed

	@@ -0,0 +1,104 @@

+"""
+Test script for OmniParser API
+"""
+import requests
+import json
+import base64
+from pathlib import Path
+BASE_URL = "http://localhost:8000"
+def health_check():
+    """Check API health"""
+    print("🏥 Health Check...")
+    response = requests.get(f"{BASE_URL}/health")
+    print(f"Status: {response.status_code}")
+    print(json.dumps(response.json(), indent=2))
+    print()
+def parse_file(image_path: str):
+    """Parse image file"""
+    print(f"📸 Parsing file: {image_path}")
+    if not Path(image_path).exists():
+        print(f"❌ File not found: {image_path}")
+        return
+    with open(image_path, "rb") as f:
+        files = {"file": f}
+        response = requests.post(f"{BASE_URL}/parse", files=files)
+    if response.status_code == 200:
+        result = response.json()
+        print(f"✅ Found {len(result['elements'])} UI elements")
+        print(f"   Image size: {result['image_width']}x{result['image_height']}")
+        print(f"   Processing time: {result['processing_time']:.2f}s")
+        print("\n   Elements:")
+        for elem in result['elements']:
+            print(f"   - {elem['label']}: bbox={elem['bbox']}, confidence={elem['confidence']}")
+    else:
+        print(f"❌ Error: {response.status_code}")
+        print(response.text)
+    print()
+def parse_base64(image_path: str):
+    """Parse base64-encoded image"""
+    print(f"📷 Parsing base64 image: {image_path}")
+    if not Path(image_path).exists():
+        print(f"❌ File not found: {image_path}")
+        return
+    # Read and encode image
+    with open(image_path, "rb") as f:
+        image_data = base64.b64encode(f.read()).decode('utf-8')
+    payload = {
+        "image_base64": image_data,
+        "extract_text": True,
+        "extract_icons": True
+    }
+    response = requests.post(f"{BASE_URL}/parse-base64", json=payload)
+    if response.status_code == 200:
+        result = response.json()
+        print(f"✅ Found {len(result['elements'])} UI elements")
+        print(f"   Processing time: {result['processing_time']:.2f}s")
+    else:
+        print(f"❌ Error: {response.status_code}")
+        print(response.text)
+    print()
+if __name__ == "__main__":
+    print("=" * 60)
+    print("OmniParser API Test Suite")
+    print("=" * 60)
+    print()
+    # Health check
+    health_check()
+    # Test with a sample image (if available)
+    sample_images = [
+        "screenshot.png",
+        "test_image.png",
+        "../screenshots/example.png"
+    ]
+    for img in sample_images:
+        if Path(img).exists():
+            parse_file(img)
+            parse_base64(img)
+            break
+    else:
+        print("⚠️  No test images found. Upload an image and try again.")
+        print("   Expected: screenshot.png or test_image.png")
+    print("=" * 60)
+    print("✅ Test suite completed")
+    print("=" * 60)