"""
Ranker Service — loads the trained CrossEncoder model and scores
(query, product_title) pairs for relevance ranking.
Loaded once at startup, reused for all requests.
"""

import os
import torch
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from config import RANKER_MODEL_PATH


class RankerService:
    """Singleton service that scores query-product relevance using a CrossEncoder."""

    def __init__(self):
        self.model = None
        self.tokenizer = None
        self.device = None
        self._loaded = False

    def load(self):
        """Load the trained CrossEncoder model and tokenizer. Call once at app startup."""
        if self._loaded:
            return

        model_path = RANKER_MODEL_PATH
        if not os.path.exists(model_path):
            print(f"[RankerService] WARNING: Model not found at {model_path}")
            print("[RankerService] Search will use classification-only ranking (no CrossEncoder re-ranking)")
            return

        print(f"[RankerService] Loading CrossEncoder from {model_path}...")
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = AutoModelForSequenceClassification.from_pretrained(model_path).to(self.device)
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        self.model.eval()
        self._loaded = True
        print(f"[RankerService] CrossEncoder loaded on {self.device}")

    def rank(self, query: str, product_titles: list[str], batch_size: int = 64) -> np.ndarray:
        """
        Score a query against multiple product titles using the CrossEncoder.
        Returns a numpy array of relevance scores, shape (N,).
        Higher score = more relevant.
        """
        if not self._loaded:
            # Return neutral scores if model not loaded
            return np.zeros(len(product_titles))

        n = len(product_titles)
        scores = np.zeros(n)

        with torch.no_grad():
            for i in range(0, n, batch_size):
                j = min(i + batch_size, n)
                batch_titles = product_titles[i:j]
                batch_queries = [query] * len(batch_titles)

                features = self.tokenizer(
                    batch_queries,
                    batch_titles,
                    padding=True,
                    truncation=True,
                    return_tensors="pt",
                ).to(self.device)

                logits = self.model(**features).logits
                scores[i:j] = logits.squeeze(-1).cpu().numpy()

        return scores

    def normalize_scores(self, scores: np.ndarray) -> np.ndarray:
        """Normalize scores to [0, 1] range using min-max normalization."""
        if len(scores) == 0:
            return scores
        min_s = scores.min()
        max_s = scores.max()
        if max_s - min_s < 1e-8:
            return np.ones_like(scores)  # All same score → all 1.0
        return (scores - min_s) / (max_s - min_s)


# Global singleton instance
ranker_service = RankerService()