"""
Depth estimation model wrapper using MiDaS.

Supports two model sizes:
  - small: MiDaS v2.1 Small (EfficientNet-Lite backbone, fast CPU inference)
  - large: DPT-Large (Vision Transformer backbone, highest quality)
"""

import torch
import numpy as np
from PIL import Image


# Model configurations
MODEL_CONFIGS = {
    "small": {
        "repo": "intel-isl/MiDaS",
        "model_name": "MiDaS_small",
        "transform_name": "small_transform",
        "description": "MiDaS v2.1 Small — Fast CPU inference (~0.5s)",
    },
    "large": {
        "repo": "intel-isl/MiDaS",
        "model_name": "DPT_Large",
        "transform_name": "dpt_transform",
        "description": "DPT-Large — Highest quality depth estimation (~3s)",
    },
}


class DepthEstimator:
    """Monocular depth estimation using MiDaS models."""

    def __init__(self, model_size: str = "small", device: str = None):
        """
        Initialize the depth estimator.

        Args:
            model_size: 'small' or 'large'
            device: 'cpu' or 'cuda' (auto-detected if None)
        """
        if model_size not in MODEL_CONFIGS:
            raise ValueError(f"Unknown model size '{model_size}'. Choose from: {list(MODEL_CONFIGS.keys())}")

        self.model_size = model_size
        self.config = MODEL_CONFIGS[model_size]
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")

        self._load_model()

    def _load_model(self):
        """Load the MiDaS model and transforms from PyTorch Hub."""
        print(f"Loading {self.config['description']}...")

        # Pre-trust the rwightman repo that MiDaS_small internally loads via
        # blocks.py without passing trust_repo=True, which causes an EOFError
        # in non-interactive environments like HuggingFace Spaces.
        torch.hub.list("rwightman/gen-efficientnet-pytorch", trust_repo=True)

        # Load model
        self.model = torch.hub.load(
            self.config["repo"],
            self.config["model_name"],
            trust_repo=True,
        )
        self.model.to(self.device)
        self.model.eval()

        # Load transforms
        midas_transforms = torch.hub.load(
            self.config["repo"],
            "transforms",
            trust_repo=True,
        )

        if self.model_size == "small":
            self.transform = midas_transforms.small_transform
        else:
            self.transform = midas_transforms.dpt_transform

        print(f"Model loaded on {self.device}")

    @torch.no_grad()
    def predict(self, image: Image.Image) -> np.ndarray:
        """
        Predict depth from a PIL Image.

        Args:
            image: Input PIL Image (RGB)

        Returns:
            depth_map: Normalized depth array (H, W) with values in [0, 1].
                       Higher values = closer to camera.
        """
        # Convert PIL to numpy RGB
        img_np = np.array(image.convert("RGB"))

        # Apply MiDaS transform
        input_tensor = self.transform(img_np).to(self.device)

        # Run inference
        prediction = self.model(input_tensor)

        # Resize to original dimensions
        prediction = torch.nn.functional.interpolate(
            prediction.unsqueeze(1),
            size=img_np.shape[:2],
            mode="bicubic",
            align_corners=False,
        ).squeeze()

        depth = prediction.cpu().numpy()

        # Normalize to [0, 1]
        depth_min = depth.min()
        depth_max = depth.max()
        if depth_max - depth_min > 1e-6:
            depth = (depth - depth_min) / (depth_max - depth_min)
        else:
            depth = np.zeros_like(depth)

        return depth

    def __repr__(self):
        return f"DepthEstimator(model_size='{self.model_size}', device='{self.device}')"