swirl
/

two-tower-recommender

swirl-wine-recommendations

model_hub_mixin

pytorch_model_hub_mixin

Model card Files Files and versions

two-tower-recommender / wine_tower.py

swirl's picture

Upload wine_tower.py with huggingface_hub

b9b6890 verified 3 months ago

history blame contribute delete

4.03 kB

	"""
	Mordor - Wine Tower

	Neural network that encodes wine characteristics from embedding + categorical features.
	"""

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from typing import Dict

	from .config import (
	EMBEDDING_DIM,
	WINE_VECTOR_DIM,
	HIDDEN_DIM,
	CATEGORICAL_ENCODING_DIM,
	)


	class WineTower(nn.Module):
	"""
	Mordor: Encodes wine characteristics from embedding and metadata.

	Architecture:
	1. Concatenate wine embedding + categorical one-hot encoding
	2. MLP: (768 + 31) → 256 → 128
	3. L2 normalization to unit sphere

	Input:
	wine_embedding: (batch, 768) - google-text-embedding-004 vector
	categorical_features: (batch, 31) - one-hot encoded categoricals

	Output:
	wine_vector: (batch, 128) - normalized wine embedding
	"""

	def __init__(
	self,
	embedding_dim: int = EMBEDDING_DIM,
	categorical_dim: int = CATEGORICAL_ENCODING_DIM,
	hidden_dim: int = HIDDEN_DIM,
	output_dim: int = WINE_VECTOR_DIM,
	):
	super().__init__()

	self.embedding_dim = embedding_dim
	self.categorical_dim = categorical_dim
	self.output_dim = output_dim

	# Input dimension: embedding + categorical
	input_dim = embedding_dim + categorical_dim

	# MLP layers
	self.fc1 = nn.Linear(input_dim, hidden_dim)
	self.fc2 = nn.Linear(hidden_dim, output_dim)

	# Dropout for regularization
	self.dropout = nn.Dropout(0.1)

	def forward(
	self,
	wine_embedding: torch.Tensor,
	categorical_features: torch.Tensor,
	) -> torch.Tensor:
	"""
	Forward pass through the wine tower.

	Args:
	wine_embedding: (batch, embedding_dim)
	categorical_features: (batch, categorical_dim) - one-hot encoded

	Returns:
	wine_vector: (batch, output_dim) - L2 normalized
	"""
	# Concatenate embedding and categorical features
	x = torch.cat([wine_embedding, categorical_features], dim=-1)

	# MLP projection
	x = F.relu(self.fc1(x))
	x = self.dropout(x)
	wine_vector = self.fc2(x)

	# L2 normalize to unit sphere
	wine_vector = F.normalize(wine_vector, p=2, dim=-1)

	return wine_vector


	def encode_categorical_features(wine_data: Dict) -> torch.Tensor:
	"""
	Convert wine metadata dict to one-hot encoded tensor.

	Args:
	wine_data: Dict with keys: color, type, style, climate_type,
	climate_band, vintage_band

	Returns:
	Tensor of shape (categorical_dim,) with one-hot encoding
	"""
	from .config import CATEGORICAL_VOCAB_SIZES, CATEGORICAL_FEATURES

	# Vocabulary mappings (could be loaded from config)
	vocab_maps = {
	"color": {
	"red": 0,
	"white": 1,
	"rosé": 2,
	"rose": 2,
	"orange": 3,
	"sparkling": 4,
	},
	"type": {"still": 0, "sparkling": 1, "fortified": 2, "dessert": 3},
	"style": {
	"natural": 0,
	"organic": 1,
	"biodynamic": 2,
	"conventional": 3,
	"sustainable": 4,
	"vegan": 5,
	"other": 6,
	},
	"climate_type": {"cool": 0, "moderate": 1, "warm": 2, "hot": 3},
	"climate_band": {"cool": 0, "moderate": 1, "warm": 2, "hot": 3},
	"vintage_band": {"young": 0, "developing": 1, "mature": 2, "non_vintage": 3},
	}

	encoded = []

	for feature in CATEGORICAL_FEATURES:
	vocab_size = CATEGORICAL_VOCAB_SIZES[feature]
	one_hot = torch.zeros(vocab_size)

	value = wine_data.get(feature)
	if value and feature in vocab_maps:
	value_lower = str(value).lower()
	if value_lower in vocab_maps[feature]:
	idx = vocab_maps[feature][value_lower]
	one_hot[idx] = 1.0

	encoded.append(one_hot)

	return torch.cat(encoded, dim=0)