Spaces:
Running
Running
| """ | |
| Ranker Service β loads the trained CrossEncoder model and scores | |
| (query, product_title) pairs for relevance ranking. | |
| Loaded once at startup, reused for all requests. | |
| """ | |
| import os | |
| import torch | |
| import numpy as np | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
| from config import RANKER_MODEL_PATH | |
| class RankerService: | |
| """Singleton service that scores query-product relevance using a CrossEncoder.""" | |
| def __init__(self): | |
| self.model = None | |
| self.tokenizer = None | |
| self.device = None | |
| self._loaded = False | |
| def load(self): | |
| """Load the trained CrossEncoder model and tokenizer. Call once at app startup.""" | |
| if self._loaded: | |
| return | |
| model_path = RANKER_MODEL_PATH | |
| if not os.path.exists(model_path): | |
| print(f"[RankerService] WARNING: Model not found at {model_path}") | |
| print("[RankerService] Search will use classification-only ranking (no CrossEncoder re-ranking)") | |
| return | |
| print(f"[RankerService] Loading CrossEncoder from {model_path}...") | |
| self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| self.model = AutoModelForSequenceClassification.from_pretrained(model_path).to(self.device) | |
| self.tokenizer = AutoTokenizer.from_pretrained(model_path) | |
| self.model.eval() | |
| self._loaded = True | |
| print(f"[RankerService] CrossEncoder loaded on {self.device}") | |
| def rank(self, query: str, product_titles: list[str], batch_size: int = 64) -> np.ndarray: | |
| """ | |
| Score a query against multiple product titles using the CrossEncoder. | |
| Returns a numpy array of relevance scores, shape (N,). | |
| Higher score = more relevant. | |
| """ | |
| if not self._loaded: | |
| # Return neutral scores if model not loaded | |
| return np.zeros(len(product_titles)) | |
| n = len(product_titles) | |
| scores = np.zeros(n) | |
| with torch.no_grad(): | |
| for i in range(0, n, batch_size): | |
| j = min(i + batch_size, n) | |
| batch_titles = product_titles[i:j] | |
| batch_queries = [query] * len(batch_titles) | |
| features = self.tokenizer( | |
| batch_queries, | |
| batch_titles, | |
| padding=True, | |
| truncation=True, | |
| return_tensors="pt", | |
| ).to(self.device) | |
| logits = self.model(**features).logits | |
| scores[i:j] = logits.squeeze(-1).cpu().numpy() | |
| return scores | |
| def normalize_scores(self, scores: np.ndarray) -> np.ndarray: | |
| """Normalize scores to [0, 1] range using min-max normalization.""" | |
| if len(scores) == 0: | |
| return scores | |
| min_s = scores.min() | |
| max_s = scores.max() | |
| if max_s - min_s < 1e-8: | |
| return np.ones_like(scores) # All same score β all 1.0 | |
| return (scores - min_s) / (max_s - min_s) | |
| # Global singleton instance | |
| ranker_service = RankerService() | |