RetailTalk / backend /models /ranker.py
Dashm
Initial commit β€” RetailTalk backend for HuggingFace Spaces
26d82f3
"""
Ranker Service β€” loads the trained CrossEncoder model and scores
(query, product_title) pairs for relevance ranking.
Loaded once at startup, reused for all requests.
"""
import os
import torch
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from config import RANKER_MODEL_PATH
class RankerService:
"""Singleton service that scores query-product relevance using a CrossEncoder."""
def __init__(self):
self.model = None
self.tokenizer = None
self.device = None
self._loaded = False
def load(self):
"""Load the trained CrossEncoder model and tokenizer. Call once at app startup."""
if self._loaded:
return
model_path = RANKER_MODEL_PATH
if not os.path.exists(model_path):
print(f"[RankerService] WARNING: Model not found at {model_path}")
print("[RankerService] Search will use classification-only ranking (no CrossEncoder re-ranking)")
return
print(f"[RankerService] Loading CrossEncoder from {model_path}...")
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model = AutoModelForSequenceClassification.from_pretrained(model_path).to(self.device)
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
self.model.eval()
self._loaded = True
print(f"[RankerService] CrossEncoder loaded on {self.device}")
def rank(self, query: str, product_titles: list[str], batch_size: int = 64) -> np.ndarray:
"""
Score a query against multiple product titles using the CrossEncoder.
Returns a numpy array of relevance scores, shape (N,).
Higher score = more relevant.
"""
if not self._loaded:
# Return neutral scores if model not loaded
return np.zeros(len(product_titles))
n = len(product_titles)
scores = np.zeros(n)
with torch.no_grad():
for i in range(0, n, batch_size):
j = min(i + batch_size, n)
batch_titles = product_titles[i:j]
batch_queries = [query] * len(batch_titles)
features = self.tokenizer(
batch_queries,
batch_titles,
padding=True,
truncation=True,
return_tensors="pt",
).to(self.device)
logits = self.model(**features).logits
scores[i:j] = logits.squeeze(-1).cpu().numpy()
return scores
def normalize_scores(self, scores: np.ndarray) -> np.ndarray:
"""Normalize scores to [0, 1] range using min-max normalization."""
if len(scores) == 0:
return scores
min_s = scores.min()
max_s = scores.max()
if max_s - min_s < 1e-8:
return np.ones_like(scores) # All same score β†’ all 1.0
return (scores - min_s) / (max_s - min_s)
# Global singleton instance
ranker_service = RankerService()