"""
SpotSeg — Text-Guided Object Segmentation
Gradio app for HuggingFace Spaces deployment.
Highlights objects in images using natural language prompts.
Optimized for free-tier CPU inference.
"""

import os
import time
import gradio as gr
import numpy as np
from PIL import Image

from seg_models import ObjectSegmentor
from seg_utils import (
    create_highlight_overlay,
    create_blur_background,
    create_detection_visualization,
    create_contour_outline,
)
from download_examples import download_examples


# ──────────────────────────────────────────────
# Download example images on startup
# ──────────────────────────────────────────────
EXAMPLES_DIR = os.path.join(os.path.dirname(__file__), "examples")
download_examples(EXAMPLES_DIR)

# ──────────────────────────────────────────────
# Load models once at startup
# ──────────────────────────────────────────────
print("Loading SpotSeg models...")
segmentor = ObjectSegmentor()
print("Models loaded successfully.")


def predict(
    image: Image.Image,
    text_query: str,
    mode: str,
    highlight_color: str,
    threshold: float,
) -> tuple:
    """
    Main prediction endpoint.

    Args:
        image: Input PIL image
        text_query: Object to find (ignored in Auto-Detect mode)
        mode: "Highlight Object" | "Blur Background" | "Contour Outline" | "Auto-Detect All"
        highlight_color: Hex color for highlight overlay
        threshold: Confidence threshold (0.0 - 1.0)

    Returns:
        (result_image, stats_string, detected_objects_text)
    """
    if image is None:
        raise gr.Error("Please upload an image first.")

    # Ensure RGB
    if image.mode != "RGB":
        image = image.convert("RGB")

    start = time.time()

    if mode == "Auto-Detect All":
        # Use YOLOv8 to detect all objects
        detections = segmentor.detect_all_objects(image, conf=threshold)
        elapsed = time.time() - start

        result_img = create_detection_visualization(image, detections, highlight_color)

        # Build object list
        if detections:
            obj_list = []
            for d in detections:
                obj_list.append(f"{d['label']} ({d['confidence']:.0%})")
            objects_text = ", ".join(obj_list)
            unique_classes = len(set(d["label"] for d in detections))
            stats = (
                f"{image.size[0]}×{image.size[1]} · "
                f"{elapsed:.2f}s · "
                f"{len(detections)} objects · "
                f"{unique_classes} classes"
            )
        else:
            objects_text = "No objects detected — try lowering the threshold."
            stats = (
                f"{image.size[0]}×{image.size[1]} · "
                f"{elapsed:.2f}s · "
                f"0 objects"
            )

        return result_img, stats, objects_text

    else:
        # Text-guided segmentation with CLIPSeg
        if not text_query or text_query.strip() == "":
            raise gr.Error("Please enter an object to find (e.g., 'dog', 'car', 'person').")

        # Support comma-separated queries → pick the one with highest confidence
        queries = [q.strip() for q in text_query.split(",") if q.strip()]

        masks = []
        for q in queries:
            mask, score = segmentor.segment_object(image, q, threshold=threshold)
            if mask is not None:
                masks.append((mask, q, score))

        elapsed = time.time() - start

        if not masks:
            raise gr.Error(
                f"Could not find '{text_query}' in the image. "
                "Try a different query or lower the threshold."
            )

        # Combine all masks
        combined_mask = np.zeros_like(masks[0][0], dtype=np.float32)
        for mask, _, _ in masks:
            combined_mask = np.maximum(combined_mask, mask)

        found_labels = [f"{q} ({s:.0%})" for _, q, s in masks]

        if mode == "Highlight Object":
            result_img = create_highlight_overlay(image, combined_mask, highlight_color)
        elif mode == "Blur Background":
            result_img = create_blur_background(image, combined_mask)
        elif mode == "Contour Outline":
            result_img = create_contour_outline(image, combined_mask, highlight_color)
        else:
            result_img = create_highlight_overlay(image, combined_mask, highlight_color)

        stats = (
            f"{image.size[0]}×{image.size[1]} · "
            f"{elapsed:.2f}s · "
            f"CLIPSeg"
        )
        objects_text = "Found: " + ", ".join(found_labels)

        return result_img, stats, objects_text


# ──────────────────────────────────────────────
# Build Gradio Interface
# ──────────────────────────────────────────────

example_images = []
for fname in sorted(os.listdir(EXAMPLES_DIR)):
    if fname.lower().endswith((".jpg", ".jpeg", ".png", ".webp")):
        example_images.append(os.path.join(EXAMPLES_DIR, fname))

with gr.Blocks(
    title="SpotSeg — Text-Guided Object Segmentation",
    theme=gr.themes.Base(
        primary_hue="teal",
        neutral_hue="slate",
    ),
    css="""
    .gradio-container { max-width: 960px !important; }
    .gr-button-primary { background: #4fd1c5 !important; color: #0b0f14 !important; }
    """,
) as demo:
    gr.Markdown(
        """
        # 🎯 SpotSeg — Text-Guided Object Segmentation
        Type what you're looking for, and SpotSeg will highlight it in the image.
        Use **Auto-Detect All** to find every object automatically.
        """
    )

    with gr.Row():
        with gr.Column(scale=1):
            input_image = gr.Image(type="pil", label="Upload Image", height=320)
            text_query = gr.Textbox(
                label="What to find",
                placeholder="e.g. dog, car, person (comma-separate for multiple)",
                info="Leave empty for Auto-Detect mode",
            )
            mode = gr.Dropdown(
                choices=[
                    "Highlight Object",
                    "Blur Background",
                    "Contour Outline",
                    "Auto-Detect All",
                ],
                value="Highlight Object",
                label="Mode",
            )
            with gr.Row():
                highlight_color = gr.ColorPicker(
                    value="#4fd1c5",
                    label="Highlight Color",
                )
                threshold = gr.Slider(
                    minimum=0.05,
                    maximum=0.95,
                    value=0.35,
                    step=0.05,
                    label="Threshold",
                )
            run_btn = gr.Button("Find Objects", variant="primary", size="lg")

        with gr.Column(scale=1):
            output_image = gr.Image(type="pil", label="Result", height=320)
            stats_text = gr.Textbox(label="Stats", interactive=False)
            objects_text = gr.Textbox(label="Detected Objects", interactive=False)

    run_btn.click(
        fn=predict,
        inputs=[input_image, text_query, mode, highlight_color, threshold],
        outputs=[output_image, stats_text, objects_text],
    )

    if example_images:
        gr.Examples(
            examples=[
                [example_images[2], "dog", "Highlight Object", "#4fd1c5", 0.30],
                [example_images[1], "person", "Blur Background", "#f6ad55", 0.35],
                [example_images[2], "dog", "Contour Outline", "#b794f4", 0.30],
                [example_images[0], "", "Auto-Detect All", "#4fd1c5", 0.30],
            ],
            inputs=[input_image, text_query, mode, highlight_color, threshold],
            outputs=[output_image, stats_text, objects_text],
            fn=predict,
            cache_examples=False,
        )

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)