swirl
/

two-tower-recommender

+"""
+Isengard - User Tower
+Neural network that encodes a user's wine preferences from their reviewed wines.
+Uses attention-weighted aggregation of wine embeddings based on user ratings.
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from typing import Optional
+from .config import (
+    EMBEDDING_DIM,
+    USER_VECTOR_DIM,
+    HIDDEN_DIM,
+)
+class UserTower(nn.Module):
+    """
+    Isengard: Encodes user preferences from their reviewed wines.
+    Architecture:
+        1. Rating-weighted attention over wine embeddings
+        2. MLP: 768 → 256 → 128
+        3. L2 normalization to unit sphere
+    Input:
+        wine_embeddings: (batch, num_wines, 768) - embeddings of reviewed wines
+        ratings: (batch, num_wines) - user ratings for each wine
+        mask: (batch, num_wines) - optional mask for padding
+    Output:
+        user_vector: (batch, 128) - normalized user embedding
+    """
+    def __init__(
+        self,
+        embedding_dim: int = EMBEDDING_DIM,
+        hidden_dim: int = HIDDEN_DIM,
+        output_dim: int = USER_VECTOR_DIM,
+    ):
+        super().__init__()
+        self.embedding_dim = embedding_dim
+        self.output_dim = output_dim
+        # MLP layers
+        self.fc1 = nn.Linear(embedding_dim, hidden_dim)
+        self.fc2 = nn.Linear(hidden_dim, output_dim)
+        # Dropout for regularization
+        self.dropout = nn.Dropout(0.1)
+    def forward(
+        self,
+        wine_embeddings: torch.Tensor,
+        ratings: torch.Tensor,
+        mask: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        """
+        Forward pass through the user tower.
+        Args:
+            wine_embeddings: (batch, num_wines, embedding_dim)
+            ratings: (batch, num_wines) - raw ratings (1-5 scale)
+            mask: (batch, num_wines) - 1 for valid wines, 0 for padding
+        Returns:
+            user_vector: (batch, output_dim) - L2 normalized
+        """
+        # Convert ratings to attention weights
+        # Higher ratings = more attention
+        # Shift ratings to be positive and scale
+        attention_weights = (ratings - 2.5) / 2.5  # Normalize: 1→-0.6, 5→1.0
+        attention_weights = F.softmax(attention_weights, dim=-1)
+        # Apply mask if provided
+        if mask is not None:
+            attention_weights = attention_weights * mask
+            # Re-normalize after masking
+            attention_weights = attention_weights / (
+                attention_weights.sum(dim=-1, keepdim=True) + 1e-8
+            )
+        # Weighted aggregation: (batch, num_wines) @ (batch, num_wines, embed_dim)
+        # Result: (batch, embed_dim)
+        aggregated = torch.bmm(
+            attention_weights.unsqueeze(1),  # (batch, 1, num_wines)
+            wine_embeddings,  # (batch, num_wines, embed_dim)
+        ).squeeze(1)  # (batch, embed_dim)
+        # MLP projection
+        x = F.relu(self.fc1(aggregated))
+        x = self.dropout(x)
+        user_vector = self.fc2(x)
+        # L2 normalize to unit sphere
+        user_vector = F.normalize(user_vector, p=2, dim=-1)
+        return user_vector