#!/usr/bin/env python3
"""
Annex 9.3  t-SNE Embedding Visualisations
==========================================

Produces the t-SNE scatter plots shown in **Annex 9.3** of the paper.

The script loads the local validation dataset, encodes each image with the
main GAP-CLIP model (and, optionally, the CLIP baseline), then reduces the
512-D embeddings to 2-D via t-SNE and renders:

* **Colour overlay** – points coloured by garment colour, convex hulls drawn
  around each colour cluster.
* **Hierarchy overlay** – points coloured by clothing category (top, bottom,
  shoes, …), convex hulls drawn around each category cluster.
* **Per-hierarchy colour scatter** – one subplot per category, showing how
  colours are distributed within each category.

These plots complement the quantitative separation scores in §5.3.6 and
provide an intuitive sanity check that the dedicated embedding dimensions
(0–15 for colour, 16–79 for hierarchy) encode the intended structure.

See also:
  - §5.3.6 (``sec536_embedding_structure.py``) – quantitative Tests A/B/C
  - Annex 9.2 (``annex92_color_heatmaps.py``) – pairwise colour heatmaps
"""

import math

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import torch
from matplotlib.patches import Polygon
from PIL import Image
from sklearn.manifold import TSNE
from sklearn.metrics import (
    silhouette_score,
    davies_bouldin_score,
    calinski_harabasz_score,
)
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_similarity
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from tqdm import tqdm
from transformers import CLIPModel as CLIPModel_transformers, CLIPProcessor

try:
    from scipy.spatial import ConvexHull
except ImportError:
    ConvexHull = None

from config import (
    color_column,
    color_emb_dim,
    column_local_image_path,
    device,
    hierarchy_column,
    hierarchy_emb_dim,
    images_dir,
    local_dataset_path,
    main_model_path,
)


class ImageDataset(Dataset):
    """Lightweight dataset to load local images along with colors and hierarchies."""

    def __init__(self, dataframe: pd.DataFrame, root_dir: str):
        self.df = dataframe.reset_index(drop=True)
        self.root_dir = root_dir
        self.transform = transforms.Compose(
            [
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(
                    mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225],
                ),
            ]
        )

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = row[column_local_image_path]
        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)
        color = row[color_column]
        hierarchy = row[hierarchy_column]
        return image, color, hierarchy


def load_main_model():
    """Load the main model with the trained weights."""
    checkpoint = torch.load(main_model_path, map_location=device)
    state_dict = checkpoint.get("model_state_dict", checkpoint)
    model = CLIPModel_transformers.from_pretrained(
        "laion/CLIP-ViT-B-32-laion2B-s34B-b79K"
    )
    model.load_state_dict(state_dict)
    model.to(device)
    model.eval()
    # Load processor for text tokenization
    processor = CLIPProcessor.from_pretrained("laion/CLIP-ViT-B-32-laion2B-s34B-b79K")
    return model, processor


def load_clip_baseline():
    """Load the CLIP baseline model from transformers."""
    print("🤗 Loading CLIP baseline model from transformers...")
    clip_model = CLIPModel_transformers.from_pretrained("openai/clip-vit-base-patch32").to(device)
    clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
    clip_model.eval()
    print("✅ CLIP baseline model loaded successfully")
    return clip_model, clip_processor


def enforce_min_hierarchy_samples(df, min_per_hierarchy):
    """Filter out hierarchy groups with fewer than min_per_hierarchy rows."""
    if not min_per_hierarchy or min_per_hierarchy <= 0:
        return df
    counts = df[hierarchy_column].value_counts()
    keep_values = counts[counts >= min_per_hierarchy].index
    filtered = df[df[hierarchy_column].isin(keep_values)].reset_index(drop=True)
    return filtered


def prepare_dataframe(df, sample_size, per_color_limit, min_per_hierarchy=None):
    """Subsample the dataframe to speed up the t-SNE."""
    if per_color_limit and per_color_limit > 0:
        df_limited = (
            df.groupby(color_column)
            .apply(lambda g: g.sample(min(len(g), per_color_limit), random_state=42))
            .reset_index(drop=True)
        )
    else:
        df_limited = df

    if sample_size and 0 < sample_size < len(df_limited):
        df_limited = df_limited.sample(sample_size, random_state=42).reset_index(
            drop=True
        )
    df_limited = enforce_min_hierarchy_samples(df_limited, min_per_hierarchy)
    return df_limited


def compute_embeddings(model, dataloader):
    """Extract color, hierarchy, and combined embeddings."""
    color_embeddings = []
    hierarchy_embeddings = []
    color_labels = []
    hierarchy_labels = []
    with torch.no_grad():
        for images, colors, hierarchies in tqdm(
            dataloader, desc="Extracting embeddings"
        ):
            images = images.to(device)
            if images.shape[1] == 1:  # safety in case
                images = images.expand(-1, 3, -1, -1)
            image_embeds = model.get_image_features(pixel_values=images)
            color_part = image_embeds[:, :color_emb_dim]
            hierarchy_part = image_embeds[
                :, color_emb_dim : color_emb_dim + hierarchy_emb_dim
            ]
            color_embeddings.append(color_part.cpu().numpy())
            hierarchy_embeddings.append(hierarchy_part.cpu().numpy())
            color_labels.extend(colors)
            hierarchy_labels.extend(hierarchies)
    return (
        np.concatenate(color_embeddings, axis=0),
        np.concatenate(hierarchy_embeddings, axis=0),
        color_labels,
        hierarchy_labels,
    )


def compute_clip_embeddings(clip_model, clip_processor, dataloader):
    """Extract CLIP baseline embeddings (full image embeddings, not separated)."""
    all_embeddings = []
    color_labels = []
    hierarchy_labels = []
    
    with torch.no_grad():
        for images, colors, hierarchies in tqdm(
            dataloader, desc="Extracting CLIP embeddings"
        ):
            batch_embeddings = []
            for i in range(images.shape[0]):
                # Get single image from batch
                image_tensor = images[i]  # Shape: (3, 224, 224)
                
                # Denormalize on CPU (safer for PIL conversion)
                mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
                std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
                image_tensor = image_tensor * std + mean
                image_tensor = torch.clamp(image_tensor, 0, 1)
                
                # Convert to PIL Image (must be on CPU)
                image_pil = transforms.ToPILImage()(image_tensor.cpu())
                
                # Process with CLIP (using empty text since we only need image embeddings)
                inputs = clip_processor(
                    text="", 
                    images=image_pil, 
                    return_tensors="pt", 
                    padding=True
                ).to(device)
                
                outputs = clip_model(**inputs)
                # Get normalized image embeddings
                image_emb = outputs.image_embeds / outputs.image_embeds.norm(p=2, dim=-1, keepdim=True)
                batch_embeddings.append(image_emb.cpu().numpy())
            
            all_embeddings.append(np.vstack(batch_embeddings))
            color_labels.extend(colors)
            hierarchy_labels.extend(hierarchies)
    
    # For CLIP, we use the full embeddings for all visualizations
    # (no separation into color/hierarchy dimensions)
    full_embeddings = np.concatenate(all_embeddings, axis=0)
    return (
        full_embeddings,  # color_embeddings (using full CLIP embeddings)
        full_embeddings,  # hierarchy_embeddings (using full CLIP embeddings)
        full_embeddings,  # color_hier_embeddings (using full CLIP embeddings)
        color_labels,
        hierarchy_labels,
    )


def compute_dunn_index(embeddings, labels):
    """
    Compute the Dunn Index for clustering evaluation.
    
    The Dunn Index is the ratio of the minimum inter-cluster distance
    to the maximum intra-cluster distance. Higher values indicate better clustering.
    
    Args:
        embeddings: Array of embeddings [N, embed_dim]
        labels: Array of cluster labels [N]
        
    Returns:
        Dunn Index value (float) or None if calculation fails
    """
    try:
        unique_labels = np.unique(labels)
        if len(unique_labels) < 2:
            return None
        
        # Calculate intra-cluster distances (maximum within each cluster)
        max_intra_cluster_dist = 0
        for label in unique_labels:
            cluster_points = embeddings[labels == label]
            if len(cluster_points) > 1:
                # Calculate pairwise distances within cluster
                from scipy.spatial.distance import pdist
                intra_dists = pdist(cluster_points, metric='euclidean')
                if len(intra_dists) > 0:
                    max_intra = np.max(intra_dists)
                    max_intra_cluster_dist = max(max_intra_cluster_dist, max_intra)
        
        if max_intra_cluster_dist == 0:
            return None
        
        # Calculate inter-cluster distances (minimum between clusters)
        min_inter_cluster_dist = float('inf')
        for i, label1 in enumerate(unique_labels):
            for label2 in unique_labels[i+1:]:
                cluster1_points = embeddings[labels == label1]
                cluster2_points = embeddings[labels == label2]
                
                # Calculate distances between clusters
                from scipy.spatial.distance import cdist
                inter_dists = cdist(cluster1_points, cluster2_points, metric='euclidean')
                min_inter = np.min(inter_dists)
                min_inter_cluster_dist = min(min_inter_cluster_dist, min_inter)
        
        if min_inter_cluster_dist == float('inf'):
            return None
        
        # Dunn Index = minimum inter-cluster distance / maximum intra-cluster distance
        dunn_index = min_inter_cluster_dist / max_intra_cluster_dist
        return float(dunn_index)
    except Exception as e:
        print(f"⚠️ Error computing Dunn Index: {e}")
        return None


def build_color_map(labels, prefer_true_colors=False):
    """Build a color mapping for labels."""
    unique_labels = sorted(set(labels))
    palette = sns.color_palette("husl", len(unique_labels))
    return {label: palette[idx] for idx, label in enumerate(unique_labels)}


def run_tsne(embeddings,legend_labels,output_path,perplexity,title,scatter_color_labels=None,prefer_true_colors=False):
    """Calculate and plot a t-SNE projection."""
    tsne = TSNE(
        n_components=2,
        perplexity=perplexity,
        init="pca",
        learning_rate="auto",
        random_state=42,
    )
    reduced = tsne.fit_transform(embeddings)

    label_array = np.array(legend_labels)
    color_labels = (
        np.array(scatter_color_labels) if scatter_color_labels is not None else label_array
    )

    # Calculate silhouette scores
    unique_labels_list = sorted(set(label_array))
    if len(unique_labels_list) > 1 and len(label_array) > 1:
        # Convert labels to numeric indices for silhouette_score
        label_to_idx = {label: idx for idx, label in enumerate(unique_labels_list)}
        numeric_labels = np.array([label_to_idx[label] for label in label_array])
        
        # Calculate in original embedding space (ground truth - measures real separation)
        silhouette = silhouette_score(embeddings, numeric_labels, metric='euclidean')
        davies_bouldin = davies_bouldin_score(embeddings, numeric_labels)
        calinski_harabasz = calinski_harabasz_score(embeddings, numeric_labels)
        dunn = compute_dunn_index(embeddings, numeric_labels)

    else:
        silhouette = None
        davies_bouldin = None
        calinski_harabasz = None
        dunn = None

    # Helpful reference for the reported clustering indices:
    #   • Silhouette Score ∈ [-1, 1] — closer to 1 means points fit their cluster well, 0 means overlap, < 0 suggests misassignment.
    #   • Davies–Bouldin Index ∈ [0, +∞) — lower is better; quantifies average similarity between clusters relative to their size.
    #   • Calinski–Harabasz Index ∈ [0, +∞) — higher is better; ratio of between-cluster dispersion to within-cluster dispersion.
    #   • Dunn Index ∈ [0, +∞) — higher is better; compares the tightest cluster diameter to the closest distance between clusters.

    # Build color map for visualization
    color_map = build_color_map(color_labels, prefer_true_colors=prefer_true_colors)
    color_series = np.array([color_map[label] for label in color_labels])

    plt.figure(figsize=(10, 8))
    unique_labels = sorted(set(label_array))
    for label in unique_labels:
        mask = label_array == label
        if 'color' in title:
            c = label
        else:
            c = color_series[mask]
        plt.scatter(
            reduced[mask, 0],
            reduced[mask, 1],
            c=c,
            s=15,
            alpha=0.8,
            label=label,
        )

    # Add silhouette score to title
    if silhouette is not None:
        title_with_score = f"{title}\n(t-SNE Silhouette: {silhouette:.3f} | Davies-Bouldin: {davies_bouldin:.3f} | Calinski-Harabasz: {calinski_harabasz:.3f} | Dunn: {dunn:.3f})"
    else:
        title_with_score = title
    
    plt.title(title_with_score)
    plt.xlabel("t-SNE 1")
    plt.ylabel("t-SNE 2")
    plt.legend(
        bbox_to_anchor=(1.05, 1), loc="upper left", fontsize="small", frameon=False
    )
    plt.tight_layout()
    plt.savefig(output_path, dpi=300)
    plt.close()
    print(f"✅ Figure saved in {output_path}")
    print(f"   📊 t-SNE space: {silhouette:.3f} (matches visualization) | Davies-Bouldin: {davies_bouldin:.3f} | Calinski-Harabasz: {calinski_harabasz:.3f} | Dunn: {dunn:.3f}")
    

def filter_valid_rows(dataframe: pd.DataFrame) -> pd.DataFrame:
    """Keep only rows with valid local image paths and colors."""
    dataframe = dataframe[dataframe['color'] != 'unknown'].copy()
    df = dataframe.dropna(
        subset=[column_local_image_path, color_column, hierarchy_column]
    ).copy()
    mask = df[column_local_image_path].apply(lambda x: isinstance(x, str) and len(x.strip()) > 0)
    return df[mask].reset_index(drop=True)

if __name__ == "__main__":
    sample_size = None
    per_color_limit = 500
    min_per_hierarchy = 200
    batch_size = 32
    perplexity = 30
    output_color = "evaluation/evaluation_results/tsne/tsne_color_space.png"
    output_hierarchy = "evaluation/evaluation_results/tsne/tsne_hierarchy_space.png"

    print("📥 Loading the dataset...")
    df = pd.read_csv("data/data.csv")
    df = filter_valid_rows(df)
    print(f"Total len if the dataset: {len(df)}")
    df = prepare_dataframe(df, sample_size, per_color_limit, min_per_hierarchy)
    print(f"✅ {len(df)} samples will be used for the t-SNE")
    print(f"Number of colors in the dataset: {len(df['color'].unique())}")
    print(f"Colors in the dataset: {df['color'].unique()}")
    dataset = ImageDataset(df, images_dir)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=4)

    # 2) Loading the models
    print("⚙️ Loading the main model...")
    model, processor = load_main_model()

    print("⚙️ Loading CLIP baseline model...")
    clip_model, clip_processor = load_clip_baseline()

    # 3) Extracting the embeddings
    print("🎯 Extracting the embeddings...")
    
    (
        color_embeddings,
        hierarchy_embeddings,
        colors,
        hierarchies,
    ) = compute_embeddings(model, dataloader)
    
    # 4) Calculating the t-SNE
    print("🌀 Calculating the color t-SNE...")
    run_tsne(
        color_embeddings,
        colors,
        output_color,
        perplexity,
        "t-SNE of the color embeddings of the main model",
        scatter_color_labels=colors,
        prefer_true_colors=True,
    )

    print("🌀 Calculating the hierarchy t-SNE...")
    run_tsne(
        hierarchy_embeddings,
        hierarchies,
        output_hierarchy,
        perplexity,
        "t-SNE of the hierarchy embeddings of the main model",
        scatter_color_labels=hierarchies,
    )

    # ========== CLIP BASELINE EVALUATION ==========
    print("\n" + "="*60)
    print("🔄 Starting CLIP Baseline Evaluation")
    print("="*60)

    print("🎯 Extracting CLIP embeddings...")
    (
        clip_color_embeddings,
        clip_hierarchy_embeddings,
        clip_color_hier_embeddings,
        clip_colors,
        clip_hierarchies,
    ) = compute_clip_embeddings(clip_model, clip_processor, dataloader)

    # Output paths for CLIP baseline
    clip_output_color = "evaluation/evaluation_results/tsne/clip_baseline_tsne_color_space.png"
    clip_output_hierarchy = "evaluation/evaluation_results/tsne/clip_baseline_tsne_hierarchy_space.png"

    print("🌀 Calculating CLIP baseline color t-SNE...")
    run_tsne(
        clip_color_embeddings,
        clip_colors,
        clip_output_color,
        perplexity,
        "t-SNE of the color embeddings (CLIP Baseline)",
        scatter_color_labels=clip_colors,
        prefer_true_colors=True,
    )

    print("🌀 Calculating CLIP baseline hierarchy t-SNE...")
    run_tsne(
        clip_hierarchy_embeddings,
        clip_hierarchies,
        clip_output_hierarchy,
        perplexity,
        "t-SNE of the hierarchy embeddings (CLIP Baseline)",
        scatter_color_labels=clip_hierarchies,
    )
    
    print("\n✅ All t-SNE visualizations completed!")
    print("   - Main model: evaluation/evaluation_results/tsne/tsne_*.png")
    print("   - CLIP baseline: evaluation/evaluation_results/tsne/clip_baseline_tsne_*.png")