Spaces:

dashm
/

RetailTalk

Running

Dashm

Initial commit — RetailTalk backend for HuggingFace Spaces

26d82f3 11 days ago

3.08 kB

	"""
	Ranker Service — loads the trained CrossEncoder model and scores
	(query, product_title) pairs for relevance ranking.
	Loaded once at startup, reused for all requests.
	"""

	import os
	import torch
	import numpy as np
	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	from config import RANKER_MODEL_PATH


	class RankerService:
	"""Singleton service that scores query-product relevance using a CrossEncoder."""

	def __init__(self):
	self.model = None
	self.tokenizer = None
	self.device = None
	self._loaded = False

	def load(self):
	"""Load the trained CrossEncoder model and tokenizer. Call once at app startup."""
	if self._loaded:
	return

	model_path = RANKER_MODEL_PATH
	if not os.path.exists(model_path):
	print(f"[RankerService] WARNING: Model not found at {model_path}")
	print("[RankerService] Search will use classification-only ranking (no CrossEncoder re-ranking)")
	return

	print(f"[RankerService] Loading CrossEncoder from {model_path}...")
	self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	self.model = AutoModelForSequenceClassification.from_pretrained(model_path).to(self.device)
	self.tokenizer = AutoTokenizer.from_pretrained(model_path)
	self.model.eval()
	self._loaded = True
	print(f"[RankerService] CrossEncoder loaded on {self.device}")

	def rank(self, query: str, product_titles: list[str], batch_size: int = 64) -> np.ndarray:
	"""
	Score a query against multiple product titles using the CrossEncoder.
	Returns a numpy array of relevance scores, shape (N,).
	Higher score = more relevant.
	"""
	if not self._loaded:
	# Return neutral scores if model not loaded
	return np.zeros(len(product_titles))

	n = len(product_titles)
	scores = np.zeros(n)

	with torch.no_grad():
	for i in range(0, n, batch_size):
	j = min(i + batch_size, n)
	batch_titles = product_titles[i:j]
	batch_queries = [query] * len(batch_titles)

	features = self.tokenizer(
	batch_queries,
	batch_titles,
	padding=True,
	truncation=True,
	return_tensors="pt",
	).to(self.device)

	logits = self.model(**features).logits
	scores[i:j] = logits.squeeze(-1).cpu().numpy()

	return scores

	def normalize_scores(self, scores: np.ndarray) -> np.ndarray:
	"""Normalize scores to [0, 1] range using min-max normalization."""
	if len(scores) == 0:
	return scores
	min_s = scores.min()
	max_s = scores.max()
	if max_s - min_s < 1e-8:
	return np.ones_like(scores) # All same score → all 1.0
	return (scores - min_s) / (max_s - min_s)


	# Global singleton instance
	ranker_service = RankerService()