| """ |
| Two-Tower Model |
| |
| Combined model with User Tower (Isengard) and Wine Tower (Mordor). |
| Computes match score via dot product of normalized embeddings. |
| |
| Integrates with HuggingFace Hub for model upload/download via PyTorchModelHubMixin. |
| """ |
|
|
| import torch |
| import torch.nn as nn |
| from typing import Optional |
| import io |
|
|
| try: |
| from huggingface_hub import PyTorchModelHubMixin |
|
|
| HAS_HF_HUB = True |
| except ImportError: |
| |
| PyTorchModelHubMixin = object |
| HAS_HF_HUB = False |
|
|
| from .user_tower import UserTower |
| from .wine_tower import WineTower |
| from .config import ( |
| EMBEDDING_DIM, |
| USER_VECTOR_DIM, |
| WINE_VECTOR_DIM, |
| HIDDEN_DIM, |
| CATEGORICAL_ENCODING_DIM, |
| ) |
|
|
|
|
| class TwoTowerModel( |
| nn.Module, |
| PyTorchModelHubMixin, |
| library_name="swirl-wine-recommendations", |
| tags=["recommendation", "two-tower", "wine"], |
| ): |
| """ |
| Two-Tower Recommendation Model |
| |
| Isengard (User Tower): Encodes user preferences from reviewed wines |
| Mordor (Wine Tower): Encodes wine characteristics |
| |
| Score = dot_product(user_vector, wine_vector) * 100 |
| |
| Since both vectors are L2 normalized, the dot product is in [-1, 1], |
| which we scale to [0, 100] for match percentage. |
| |
| HuggingFace Integration: |
| # Upload to Hub |
| model.push_to_hub("swirl/two-tower-recommender") |
| |
| # Load from Hub |
| model = TwoTowerModel.from_pretrained("swirl/two-tower-recommender") |
| """ |
|
|
| def __init__( |
| self, |
| embedding_dim: int = EMBEDDING_DIM, |
| hidden_dim: int = HIDDEN_DIM, |
| output_dim: int = USER_VECTOR_DIM, |
| categorical_dim: int = CATEGORICAL_ENCODING_DIM, |
| ): |
| super().__init__() |
|
|
| assert USER_VECTOR_DIM == WINE_VECTOR_DIM, "Tower output dims must match" |
|
|
| |
| self.config = { |
| "embedding_dim": embedding_dim, |
| "hidden_dim": hidden_dim, |
| "output_dim": output_dim, |
| "categorical_dim": categorical_dim, |
| } |
|
|
| self.user_tower = UserTower( |
| embedding_dim=embedding_dim, |
| hidden_dim=hidden_dim, |
| output_dim=output_dim, |
| ) |
|
|
| self.wine_tower = WineTower( |
| embedding_dim=embedding_dim, |
| categorical_dim=categorical_dim, |
| hidden_dim=hidden_dim, |
| output_dim=output_dim, |
| ) |
|
|
| def forward( |
| self, |
| user_wine_embeddings: torch.Tensor, |
| user_ratings: torch.Tensor, |
| candidate_wine_embedding: torch.Tensor, |
| candidate_categorical: torch.Tensor, |
| user_mask: Optional[torch.Tensor] = None, |
| ) -> torch.Tensor: |
| """ |
| Forward pass computing match scores. |
| |
| Args: |
| user_wine_embeddings: (batch, num_wines, 768) |
| user_ratings: (batch, num_wines) |
| candidate_wine_embedding: (batch, 768) |
| candidate_categorical: (batch, categorical_dim) |
| user_mask: (batch, num_wines) optional padding mask |
| |
| Returns: |
| scores: (batch,) match scores in [0, 100] |
| """ |
| |
| user_vector = self.user_tower(user_wine_embeddings, user_ratings, user_mask) |
|
|
| |
| wine_vector = self.wine_tower(candidate_wine_embedding, candidate_categorical) |
|
|
| |
| |
| dot_product = (user_vector * wine_vector).sum(dim=-1) |
|
|
| |
| scores = (dot_product + 1) * 50 |
|
|
| return scores |
|
|
| def get_user_embedding( |
| self, |
| wine_embeddings: torch.Tensor, |
| ratings: torch.Tensor, |
| mask: Optional[torch.Tensor] = None, |
| ) -> torch.Tensor: |
| """Get user embedding for caching/batch scoring.""" |
| return self.user_tower(wine_embeddings, ratings, mask) |
|
|
| def get_wine_embedding( |
| self, |
| wine_embedding: torch.Tensor, |
| categorical_features: torch.Tensor, |
| ) -> torch.Tensor: |
| """Get wine embedding for caching/batch scoring.""" |
| return self.wine_tower(wine_embedding, categorical_features) |
|
|
| def score_from_embeddings( |
| self, |
| user_vector: torch.Tensor, |
| wine_vector: torch.Tensor, |
| ) -> torch.Tensor: |
| """Score from pre-computed tower embeddings.""" |
| dot_product = (user_vector * wine_vector).sum(dim=-1) |
| return (dot_product + 1) * 50 |
|
|
| |
| |
| |
|
|
| def save(self, path: str) -> None: |
| """Save model state dict to file.""" |
| torch.save( |
| { |
| "state_dict": self.state_dict(), |
| "config": self.config, |
| }, |
| path, |
| ) |
|
|
| @classmethod |
| def load(cls, path: str) -> "TwoTowerModel": |
| """Load model from file.""" |
| checkpoint = torch.load(path, map_location="cpu") |
| model = cls(**checkpoint["config"]) |
| model.load_state_dict(checkpoint["state_dict"]) |
| model.eval() |
| return model |
|
|
| def to_bytes(self) -> bytes: |
| """Serialize model to bytes for storage.""" |
| buffer = io.BytesIO() |
| torch.save( |
| { |
| "state_dict": self.state_dict(), |
| "config": self.config, |
| }, |
| buffer, |
| ) |
| return buffer.getvalue() |
|
|
| @classmethod |
| def from_bytes(cls, data: bytes) -> "TwoTowerModel": |
| """Load model from bytes.""" |
| buffer = io.BytesIO(data) |
| checkpoint = torch.load(buffer, map_location="cpu") |
| model = cls(**checkpoint["config"]) |
| model.load_state_dict(checkpoint["state_dict"]) |
| model.eval() |
| return model |
|
|
|
|
| |
| |
| |
|
|
|
|
| def create_training_script() -> str: |
| """ |
| Generate a training script for HuggingFace Spaces AutoTrain. |
| |
| This script can be uploaded to a HF Space for remote GPU training. |
| |
| Usage: |
| autotrain spacerunner --project-name two-tower-training \\ |
| --script-path script.py \\ |
| --username swirl \\ |
| --token $HF_TOKEN \\ |
| --backend spaces-a10g-large |
| """ |
| script = ''' |
| """ |
| Two-Tower Model Training Script for HuggingFace Spaces |
| |
| Run with: autotrain spacerunner --script-path script.py |
| """ |
| |
| import torch |
| import torch.nn as nn |
| from torch.utils.data import DataLoader, Dataset |
| from huggingface_hub import login |
| import os |
| |
| # Login to HF |
| login(token=os.environ.get("HF_TOKEN")) |
| |
| from two_tower.model import TwoTowerModel |
| from two_tower.config import TRIPLET_MARGIN, LEARNING_RATE, BATCH_SIZE |
| |
| class WineRecommendationDataset(Dataset): |
| """Dataset of (user_wines, positive_wine, negative_wine) triplets.""" |
| |
| def __init__(self, triplets): |
| self.triplets = triplets |
| |
| def __len__(self): |
| return len(self.triplets) |
| |
| def __getitem__(self, idx): |
| return self.triplets[idx] |
| |
| |
| def train_model( |
| model: TwoTowerModel, |
| train_loader: DataLoader, |
| epochs: int = 10, |
| lr: float = LEARNING_RATE, |
| ): |
| """Train the two-tower model using triplet loss.""" |
| optimizer = torch.optim.Adam(model.parameters(), lr=lr) |
| triplet_loss = nn.TripletMarginLoss(margin=TRIPLET_MARGIN) |
| |
| model.train() |
| for epoch in range(epochs): |
| total_loss = 0 |
| for batch in train_loader: |
| optimizer.zero_grad() |
| |
| # Get embeddings |
| anchor = model.get_user_embedding(batch["user_wines"], batch["ratings"]) |
| positive = model.get_wine_embedding(batch["positive_wine"], batch["positive_cat"]) |
| negative = model.get_wine_embedding(batch["negative_wine"], batch["negative_cat"]) |
| |
| # Compute triplet loss |
| loss = triplet_loss(anchor, positive, negative) |
| loss.backward() |
| optimizer.step() |
| |
| total_loss += loss.item() |
| |
| print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}") |
| |
| return model |
| |
| |
| if __name__ == "__main__": |
| # Load training data (would be fetched from your database) |
| # triplets = load_training_triplets() |
| |
| # Create model |
| model = TwoTowerModel() |
| |
| # Train |
| # train_loader = DataLoader(WineRecommendationDataset(triplets), batch_size=BATCH_SIZE) |
| # model = train_model(model, train_loader, epochs=10) |
| |
| # Push to Hub |
| model.push_to_hub("swirl/two-tower-recommender") |
| print("Model uploaded to HuggingFace Hub!") |
| ''' |
| return script |
|
|