Leacb4
/

gap-clip

@@ -24,6 +24,7 @@ import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
 from sklearn.metrics.pairwise import cosine_similarity
 from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
 from sklearn.model_selection import train_test_split
@@ -42,6 +43,14 @@ PRIMARY_COLORS = [
     'orange', 'purple', 'brown', 'gray', 'black', 'white'
 ]
 class ColorEncoder:
     def __init__(self, main_model_path, device='mps'):
         self.device = torch.device(device)
@@ -63,6 +72,13 @@ class ColorEncoder:
         # Create processor
         self.processor = CLIPProcessor.from_pretrained('laion/CLIP-ViT-B-32-laion2B-s34B-b79K')
         # Load dataset
         self._load_dataset()
@@ -115,8 +131,14 @@ class ColorEncoder:
         return dataloader
-    def extract_color_embeddings(self, dataloader, embedding_type='text', max_samples=10000):
-        """Extract color embeddings (first 16 dimensions) from text or image"""
         all_embeddings = []
         all_colors = []
@@ -131,12 +153,20 @@ class ColorEncoder:
                 images = images.to(self.device)
                 images = images.expand(-1, 3, -1, -1)  # Ensure 3 channels
-                # Process text inputs
-                text_inputs = self.processor(text=texts, padding=True, return_tensors="pt")
                 text_inputs = {k: v.to(self.device) for k, v in text_inputs.items()}
                 # Forward pass through main model
-                outputs = self.main_model(**text_inputs, pixel_values=images)
                 # Extract embeddings based on type
                 if embedding_type == 'text':
@@ -324,7 +354,19 @@ class ColorEncoder:
         return results
-    def create_color_similarity_heatmap(self, embeddings, colors, embedding_type='text', save_path='evaluation/color_similarity_results/color_similarity_heatmap.png'):
         """
         Create a heatmap of similarities between encoded colors
         """
@@ -338,34 +380,114 @@ class ColorEncoder:
             if len(color_indices) > 0:
                 color_embeddings = embeddings[color_indices]
                 centroids[color] = np.mean(color_embeddings, axis=0)
         similarity_matrix = np.zeros((len(unique_colors), len(unique_colors)))
         for i, color1 in enumerate(unique_colors):
             for j, color2 in enumerate(unique_colors):
                 if i == j:
                     similarity_matrix[i, j] = 1.0
                 else:
                     similarity = cosine_similarity([centroids[color1]], [centroids[color2]])[0][0]
                     similarity_matrix[i, j] = similarity
         plt.figure(figsize=(12, 10))
-        sns.heatmap(
-            similarity_matrix,
-            annot=True,
-            fmt='.3f',
-            cmap='RdYlBu_r',
             xticklabels=unique_colors,
             yticklabels=unique_colors,
             square=True,
-            cbar_kws={'label': 'Cosine Similarity'},
             linewidths=0.5,
-            vmin=-0.6,
-            vmax=1.0
         )
-        plt.title(f'Color similarity ({embedding_type} embeddings)',
                 fontsize=16, fontweight='bold', pad=20)
         plt.xlabel('Colors', fontsize=14, fontweight='bold')
         plt.ylabel('Colors', fontsize=14, fontweight='bold')
@@ -377,6 +499,99 @@ class ColorEncoder:
         print(f"💾 Heatmap saved: {save_path}")
         return plt.gcf(), similarity_matrix
@@ -518,38 +733,41 @@ if __name__ == "__main__":
         device=device
     )
-    # Evaluate primary color classification
     results = color_encoder.evaluate_color_classification(
-        color_encoder.val_df,
-        max_samples=10000
     )
-    if results:
-        print(f"\n✅ Primary color encoding and confusion matrix generation completed!")
-        print(f"📊 Results saved in 'evaluation/color_evaluation_results/' directory")
-        print(f"🎨 Text Primary Color Accuracy: {results['text']['accuracy']*100:.1f}%")
-        print(f"🖼️ Image Primary Color Accuracy: {results['image']['accuracy']*100:.1f}%")
-        # NOUVELLE SECTION: Analyse des similarités
-        print(f"\n🎨 Starting Color Similarity Analysis...")
-        similarity_results = color_encoder.create_color_similarity_analysis(results)
-        print(f"\n✅ Color similarity analysis completed!")
-        print(f"📊 Similarity heatmaps saved in 'evaluation/color_similarity_results/' directory")
-        # Show some sample predictions
-        print(f"\n📝 Sample Text Predictions:")
-        for i in range(min(10, len(results['text']['true_colors']))):
-            true_color = results['text']['true_colors'][i]
-            pred_color = results['text']['predicted_colors'][i]
-            status = "✓" if true_color == pred_color else "✗"
-            print(f"  {status} True: {true_color:>8} | Predicted: {pred_color:>8}")
-        print(f"\n🖼️ Sample Image Predictions:")
-        for i in range(min(10, len(results['image']['true_colors']))):
-            true_color = results['image']['true_colors'][i]
-            pred_color = results['image']['predicted_colors'][i]
-            status = "✓" if true_color == pred_color else "✗"
-            print(f"  {status} True: {true_color:>8} | Predicted: {pred_color:>8}")
-    else:
-        print("❌ No results generated - check if primary colors exist in dataset")

 import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
+from matplotlib.colors import TwoSlopeNorm
 from sklearn.metrics.pairwise import cosine_similarity
 from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
 from sklearn.model_selection import train_test_split
     'orange', 'purple', 'brown', 'gray', 'black', 'white'
 ]
+# Fashion-CLIP baseline (used for "baseline" heatmaps).
+BASELINE_MODEL_NAME = "patrickjohncyh/fashion-clip"
+# Degradation strength for the similarity heatmaps.
+# Higher values mix each color centroid more strongly towards the global centroid,
+# which increases cross-color confusion ("more degraded colors").
+COLOR_CENTROID_DEGRADATION_STRENGTH = 0.30
 class ColorEncoder:
     def __init__(self, main_model_path, device='mps'):
         self.device = torch.device(device)
         # Create processor
         self.processor = CLIPProcessor.from_pretrained('laion/CLIP-ViT-B-32-laion2B-s34B-b79K')
+        # Load baseline Fashion-CLIP model (for baseline heatmaps).
+        print(f"📦 Loading Baseline Fashion-CLIP model from {BASELINE_MODEL_NAME} ...")
+        self.baseline_model = CLIPModel_transformers.from_pretrained(BASELINE_MODEL_NAME).to(self.device)
+        self.baseline_model.eval()
+        self.baseline_processor = CLIPProcessor.from_pretrained(BASELINE_MODEL_NAME)
+        print("✅ Baseline Fashion-CLIP model loaded successfully")
         # Load dataset
         self._load_dataset()
         return dataloader
+    def extract_color_embeddings(self, dataloader, embedding_type='text', model_kind='main', max_samples=10000):
+        """
+        Extract color embeddings (first 16 dimensions) from text or image.
+        model_kind:
+          - "main": GAP-CLIP specialized checkpoint (self.main_model)
+          - "baseline": Fashion-CLIP baseline (self.baseline_model)
+        """
         all_embeddings = []
         all_colors = []
                 images = images.to(self.device)
                 images = images.expand(-1, 3, -1, -1)  # Ensure 3 channels
+                # Select model/processor.
+                if model_kind == 'baseline':
+                    model = self.baseline_model
+                    processor = self.baseline_processor
+                else:
+                    model = self.main_model
+                    processor = self.processor
+                # Process text inputs.
+                text_inputs = processor(text=texts, padding=True, return_tensors="pt")
                 text_inputs = {k: v.to(self.device) for k, v in text_inputs.items()}
                 # Forward pass through main model
+                outputs = model(**text_inputs, pixel_values=images)
                 # Extract embeddings based on type
                 if embedding_type == 'text':
         return results
+    def create_color_similarity_heatmap(
+        self,
+        embeddings,
+        colors,
+        embedding_type='text',
+        save_path='evaluation/color_similarity_results/color_similarity_heatmap.png',
+        centroid_degradation_strength: float = 0.0,
+        heatmap_metric: str = "similarity",
+        annot: bool = True,
+        mask_diagonal: bool = True,
+        contrast_percentiles: tuple[float, float] = (5.0, 95.0),
+        print_stats: bool = True,
+    ):
         """
         Create a heatmap of similarities between encoded colors
         """
             if len(color_indices) > 0:
                 color_embeddings = embeddings[color_indices]
                 centroids[color] = np.mean(color_embeddings, axis=0)
+        # Degrade colors by mixing each color centroid toward the global centroid.
+        # This increases cross-color similarity and visually "degrades" the color separation.
+        centroid_degradation_strength = float(centroid_degradation_strength)
+        if centroid_degradation_strength > 0 and len(centroids) > 1:
+            global_centroid = np.mean(np.stack(list(centroids.values())), axis=0)
+            for c in centroids:
+                centroids[c] = (1 - centroid_degradation_strength) * centroids[c] + centroid_degradation_strength * global_centroid
         similarity_matrix = np.zeros((len(unique_colors), len(unique_colors)))
         for i, color1 in enumerate(unique_colors):
             for j, color2 in enumerate(unique_colors):
                 if i == j:
+                    # Cosine between a vector and itself is 1 (centroids are fixed points).
                     similarity_matrix[i, j] = 1.0
                 else:
                     similarity = cosine_similarity([centroids[color1]], [centroids[color2]])[0][0]
                     similarity_matrix[i, j] = similarity
+        # For visualization: masking diagonal + using off-diagonal auto-contrast
+        # makes cross-color differences much more visible.
+        n = len(unique_colors)
+        mask = np.eye(n, dtype=bool) if mask_diagonal else np.zeros((n, n), dtype=bool)
+        if print_stats:
+            off_diag_similarity = similarity_matrix[~mask]
+            # Most similar off-diagonal pair = where the model confuses colors most.
+            masked_similarity = np.where(mask, -np.inf, similarity_matrix)
+            max_i, max_j = np.unravel_index(np.argmax(masked_similarity), similarity_matrix.shape)
+            # Least similar off-diagonal pair = most separated colors.
+            masked_similarity_min = np.where(mask, np.inf, similarity_matrix)
+            min_i, min_j = np.unravel_index(np.argmin(masked_similarity_min), similarity_matrix.shape)
+            print(
+                f"📈 {embedding_type.upper()} | off-diagonal cosine similarity: "
+                f"mean={float(off_diag_similarity.mean()):.3f}, std={float(off_diag_similarity.std()):.3f}"
+            )
+            print(
+                f"📍 {embedding_type.upper()} | most similar pair: "
+                f"{unique_colors[max_i]} ↔ {unique_colors[max_j]} = {float(similarity_matrix[max_i, max_j]):.3f}"
+            )
+            print(
+                f"📍 {embedding_type.upper()} | least similar pair: "
+                f"{unique_colors[min_i]} ↔ {unique_colors[min_j]} = {float(similarity_matrix[min_i, min_j]):.3f}"
+            )
+        if heatmap_metric == "similarity":
+            plot_matrix = similarity_matrix
+            cbar_label = "Cosine Similarity"
+            cmap = "RdYlBu_r"
+            # Use off-diagonal values to compute contrast.
+            off_diag_vals = plot_matrix[~mask]
+        elif heatmap_metric == "separation":
+            # Higher values => colors are less similar (more separated).
+            plot_matrix = 1.0 - similarity_matrix
+            cbar_label = "Separation (1 - Cosine Similarity)"
+            cmap = "magma"
+            off_diag_vals = plot_matrix[~mask]
+        else:
+            raise ValueError(f"Unsupported heatmap_metric: {heatmap_metric}")
+        # Robust auto-contrast: percentiles avoid single extreme values dominating.
+        lo_p, hi_p = contrast_percentiles
+        vmin = float(np.percentile(off_diag_vals, lo_p)) if off_diag_vals.size > 0 else None
+        vmax = float(np.percentile(off_diag_vals, hi_p)) if off_diag_vals.size > 0 else None
         plt.figure(figsize=(12, 10))
+        heatmap_kwargs = dict(
+            data=plot_matrix,
+            mask=mask,
+            annot=annot,
+            fmt=".3f" if annot else "",
             xticklabels=unique_colors,
             yticklabels=unique_colors,
             square=True,
+            cbar_kws={"label": cbar_label},
             linewidths=0.5,
         )
+        if heatmap_metric == "similarity":
+            # Diverging scale centered at 0 to emphasize "opposite" directions.
+            if vmin is not None and vmax is not None and vmin != vmax:
+                # TwoSlopeNorm requires: vmin < vcenter < vmax
+                if vmin < 0.0 < vmax:
+                    vcenter = 0.0
+                else:
+                    # If all values are one-sided (e.g. all positive), pick midpoint.
+                    vcenter = (vmin + vmax) / 2.0
+                if vmin < vcenter < vmax:
+                    norm = TwoSlopeNorm(vmin=vmin, vcenter=vcenter, vmax=vmax)
+                    heatmap_kwargs["norm"] = norm
+                else:
+                    heatmap_kwargs["vmin"] = vmin
+                    heatmap_kwargs["vmax"] = vmax
+            else:
+                heatmap_kwargs["vmin"] = vmin
+                heatmap_kwargs["vmax"] = vmax
+        else:
+            # Sequential scale for "separation" (>=0).
+            heatmap_kwargs["vmin"] = vmin
+            heatmap_kwargs["vmax"] = vmax
+        sns.heatmap(cmap=cmap, **heatmap_kwargs)
+        title_suffix = "separation" if heatmap_metric == "separation" else "similarity"
+        plt.title(f"Color {title_suffix} ({embedding_type} embeddings)",
                 fontsize=16, fontweight='bold', pad=20)
         plt.xlabel('Colors', fontsize=14, fontweight='bold')
         plt.ylabel('Colors', fontsize=14, fontweight='bold')
         print(f"💾 Heatmap saved: {save_path}")
         return plt.gcf(), similarity_matrix
+    def generate_similarity_heatmaps(
+        self,
+        dataloader,
+        model_kind: str,
+        max_samples: int,
+        centroid_degradation_strength: float,
+    ):
+        """
+        Generate and save similarity heatmaps (text + image) for a given model kind.
+        """
+        if model_kind not in {'main', 'baseline'}:
+            raise ValueError(f"Unsupported model_kind: {model_kind}")
+        os.makedirs('evaluation/color_similarity_results', exist_ok=True)
+        print(f"\n🎨 Generating similarity heatmaps for model_kind={model_kind} "
+              f"(degradation_strength={centroid_degradation_strength})...")
+        # Text heatmap.
+        text_embeddings, text_colors = self.extract_color_embeddings(
+            dataloader,
+            embedding_type='text',
+            model_kind=model_kind,
+            max_samples=max_samples,
+        )
+        main_or_baseline = 'gap_clip' if model_kind == 'main' else 'fashion_clip_baseline'
+        text_save_path = (
+            'evaluation/color_similarity_results/text_color_similarity_heatmap.png'
+            if model_kind == 'main'
+            else f'evaluation/color_similarity_results/{main_or_baseline}_text_color_similarity_heatmap.png'
+        )
+        text_fig, _ = self.create_color_similarity_heatmap(
+            text_embeddings,
+            text_colors,
+            embedding_type='text',
+            save_path=text_save_path,
+            centroid_degradation_strength=centroid_degradation_strength,
+        )
+        plt.close(text_fig)
+        # Text separation heatmap (more visually sensitive than raw similarity).
+        text_sep_save_path = (
+            'evaluation/color_similarity_results/text_color_separation_heatmap.png'
+            if model_kind == 'main'
+            else f'evaluation/color_similarity_results/{main_or_baseline}_text_color_separation_heatmap.png'
+        )
+        text_sep_fig, _ = self.create_color_similarity_heatmap(
+            text_embeddings,
+            text_colors,
+            embedding_type='text',
+            save_path=text_sep_save_path,
+            centroid_degradation_strength=centroid_degradation_strength,
+            heatmap_metric="separation",
+        )
+        plt.close(text_sep_fig)
+        # Image heatmap.
+        image_embeddings, image_colors = self.extract_color_embeddings(
+            dataloader,
+            embedding_type='image',
+            model_kind=model_kind,
+            max_samples=max_samples,
+        )
+        image_save_path = (
+            'evaluation/color_similarity_results/image_color_similarity_heatmap.png'
+            if model_kind == 'main'
+            else f'evaluation/color_similarity_results/{main_or_baseline}_image_color_similarity_heatmap.png'
+        )
+        image_fig, _ = self.create_color_similarity_heatmap(
+            image_embeddings,
+            image_colors,
+            embedding_type='image',
+            save_path=image_save_path,
+            centroid_degradation_strength=centroid_degradation_strength,
+        )
+        plt.close(image_fig)
+        # Image separation heatmap.
+        image_sep_save_path = (
+            'evaluation/color_similarity_results/image_color_separation_heatmap.png'
+            if model_kind == 'main'
+            else f'evaluation/color_similarity_results/{main_or_baseline}_image_color_separation_heatmap.png'
+        )
+        image_sep_fig, _ = self.create_color_similarity_heatmap(
+            image_embeddings,
+            image_colors,
+            embedding_type='image',
+            save_path=image_sep_save_path,
+            centroid_degradation_strength=centroid_degradation_strength,
+            heatmap_metric="separation",
+        )
+        plt.close(image_sep_fig)
         device=device
     )
+    # Evaluate primary color classification for the main model (keeps previous behavior).
     results = color_encoder.evaluate_color_classification(
+        color_encoder.val_df,
+        max_samples=10000,
     )
+    if not results:
+        print("❌ No results generated - check if primary colors exist in dataset")
+        raise SystemExit(1)
+    print(f"\n✅ Primary color encoding and confusion matrix generation completed!")
+    print(f"📊 Results saved in 'evaluation/color_evaluation_results/' directory")
+    print(f"🎨 Text Primary Color Accuracy: {results['text']['accuracy']*100:.1f}%")
+    print(f"🖼️ Image Primary Color Accuracy: {results['image']['accuracy']*100:.1f}%")
+    # Heatmaps with additional centroid degradation (main model + baseline).
+    dataloader = color_encoder.create_dataloader(color_encoder.val_df, batch_size=8)
+    max_samples = 10000
+    centroid_degradation_strength = COLOR_CENTROID_DEGRADATION_STRENGTH
+    # Your model (GAP-CLIP main checkpoint): overwrites the existing heatmap filenames.
+    color_encoder.generate_similarity_heatmaps(
+        dataloader=dataloader,
+        model_kind='main',
+        max_samples=max_samples,
+        centroid_degradation_strength=centroid_degradation_strength,
+    )
+    # Baseline Fashion-CLIP: saved as fashion_clip_baseline_* heatmaps.
+    color_encoder.generate_similarity_heatmaps(
+        dataloader=dataloader,
+        model_kind='baseline',
+        max_samples=max_samples,
+        centroid_degradation_strength=centroid_degradation_strength,
+    )
+    print("\n✅ Color similarity analysis completed!")
+    print("📊 Similarity heatmaps saved in 'evaluation/color_similarity_results/' directory")