andykr1k commited on
Commit
07f113d
·
1 Parent(s): 93ab020

Added more post interactions

Browse files
Files changed (2) hide show
  1. app.py +161 -35
  2. test.py +297 -0
app.py CHANGED
@@ -13,6 +13,7 @@ import logging
13
  from zoneinfo import ZoneInfo
14
  from sentence_transformers import SentenceTransformer
15
  from apscheduler.schedulers.background import BackgroundScheduler
 
16
 
17
  # Configure logging
18
  logging.basicConfig(level=logging.INFO)
@@ -55,12 +56,14 @@ sentence_model = SentenceTransformer('all-MiniLM-L6-v2', cache_folder='/tmp/cach
55
  SUPABASE_URL = os.getenv('supabaseUrl')
56
  SUPABASE_KEY = os.getenv('supabaseAnonKey')
57
 
 
58
  def get_supabase_client():
59
  global supabase_client
60
  if supabase_client is None:
61
  supabase_client = create_client(SUPABASE_URL, SUPABASE_KEY)
62
  return supabase_client
63
 
 
64
  def parse_datetime(dt_str: str) -> datetime:
65
  """Parse ISO datetime string and ensure correct microsecond precision."""
66
  try:
@@ -69,7 +72,7 @@ def parse_datetime(dt_str: str) -> datetime:
69
  time_part, tz_part = time_part.split('+')
70
  if '.' in time_part:
71
  time_without_micro, micro = time_part.split('.')
72
- micro = micro.ljust(6, '0') # Ensure microseconds are 6 digits
73
  time_part = f"{time_without_micro}.{micro}"
74
  dt_str = f"{date_part}T{time_part}+00:00"
75
  return datetime.fromisoformat(dt_str).astimezone(TIMEZONE)
@@ -77,13 +80,46 @@ def parse_datetime(dt_str: str) -> datetime:
77
  logger.error(f"Error parsing datetime: {dt_str} - {str(e)}")
78
  raise
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  class Recommender:
81
- def __init__(self):
 
82
  self.user_profiles = defaultdict(lambda: np.zeros(384))
83
  self.post_popularity = defaultdict(float)
84
  self.last_update = datetime.now(TIMEZONE) - HISTORY_WINDOW
85
 
86
- async def fetch_all_rows(self, table_name: str, columns: str, last_update: datetime):
 
 
 
 
 
 
 
87
  """Fetch all rows from a table using pagination."""
88
  supabase = get_supabase_client()
89
  page_size = 1000
@@ -91,42 +127,126 @@ class Recommender:
91
  all_data = []
92
 
93
  while True:
94
- response = await asyncio.to_thread(
95
- supabase.table(table_name)
96
- .select(columns)
97
- .gt('created_at', last_update.isoformat())
98
- .range(page * page_size, (page + 1) * page_size - 1)
99
- .execute
100
- )
101
-
 
 
 
 
 
 
 
 
 
 
102
  if not response.data:
103
  break
104
-
105
  all_data.extend(response.data)
106
  page += 1
107
 
108
  return all_data
109
 
110
  async def update_data(self):
111
- # Fetch all likes and comments with pagination
112
- likes = await recommender.fetch_all_rows('likes', 'user_id, post_id, created_at', self.last_update)
113
- comments = await recommender.fetch_all_rows('comments', 'user_id, post_id, created_at', self.last_update)
114
-
115
- for interaction in likes + comments:
116
- user_id = interaction['user_id']
117
- post_id = interaction['post_id']
 
 
 
 
 
 
 
 
 
118
  user_interactions[user_id].add(post_id)
119
- self.post_popularity[post_id] += 1.0 if 'likes' else 0.5
120
  if post_id in post_features:
121
- self.user_profiles[user_id] += post_features[post_id]
122
-
123
- # Fetch posts
124
- posts = await recommender.fetch_all_rows('posts', '*', self.last_update)
125
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  post_texts, post_ids = [], []
127
  for post in posts:
128
  post_id = post['id']
129
- text = f"{post.get('movie_name', '')} {post.get('content', '')}".strip()
 
130
  post_texts.append(text)
131
  post_ids.append(post_id)
132
  post['created_at'] = parse_datetime(post['created_at'])
@@ -134,17 +254,20 @@ class Recommender:
134
  post_metadata[post_id] = post
135
 
136
  if post_texts:
137
- embeddings = sentence_model.encode(post_texts, show_progress_bar=False, convert_to_numpy=True)
 
138
  for post_id, embedding in zip(post_ids, embeddings):
139
  post_features[post_id] = embedding / np.linalg.norm(embedding)
140
 
141
  self.last_update = datetime.now(TIMEZONE)
142
- logger.info("Data updated: {} posts, {} interactions".format(len(posts), len(likes) + len(comments)))
 
 
 
143
 
144
  def get_recommendations(self, user_id: str) -> List[Dict]:
145
  user_profile = self.user_profiles[user_id]
146
  seen_posts = user_interactions[user_id]
147
-
148
  scores = {}
149
  now = datetime.now(TIMEZONE)
150
 
@@ -152,17 +275,20 @@ class Recommender:
152
  if post_id in seen_posts:
153
  continue
154
 
155
- sim_score = np.dot(user_profile, feature) if np.any(user_profile) else 0
 
156
  time_diff = now - post_metadata[post_id]['created_at']
157
- freshness = 1.0 / (1.0 + (time_diff.days / 7))
158
-
159
- score = 0.6 * sim_score + 0.3 * np.log1p(self.post_popularity[post_id]) + 0.1 * freshness
 
 
160
  scores[post_id] = score + random.uniform(-0.1, 0.1)
161
 
162
- top_posts = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:TOP_K]
 
163
  results = [post_metadata[post_id] for post_id, _ in top_posts]
164
  random.shuffle(results)
165
-
166
  return results
167
 
168
  recommender = Recommender()
 
13
  from zoneinfo import ZoneInfo
14
  from sentence_transformers import SentenceTransformer
15
  from apscheduler.schedulers.background import BackgroundScheduler
16
+ import math
17
 
18
  # Configure logging
19
  logging.basicConfig(level=logging.INFO)
 
56
  SUPABASE_URL = os.getenv('supabaseUrl')
57
  SUPABASE_KEY = os.getenv('supabaseAnonKey')
58
 
59
+
60
  def get_supabase_client():
61
  global supabase_client
62
  if supabase_client is None:
63
  supabase_client = create_client(SUPABASE_URL, SUPABASE_KEY)
64
  return supabase_client
65
 
66
+
67
  def parse_datetime(dt_str: str) -> datetime:
68
  """Parse ISO datetime string and ensure correct microsecond precision."""
69
  try:
 
72
  time_part, tz_part = time_part.split('+')
73
  if '.' in time_part:
74
  time_without_micro, micro = time_part.split('.')
75
+ micro = micro.ljust(6, '0')
76
  time_part = f"{time_without_micro}.{micro}"
77
  dt_str = f"{date_part}T{time_part}+00:00"
78
  return datetime.fromisoformat(dt_str).astimezone(TIMEZONE)
 
80
  logger.error(f"Error parsing datetime: {dt_str} - {str(e)}")
81
  raise
82
 
83
+
84
+ def decay_weight(interaction_time: datetime, current_time: datetime, decay_rate: float = 0.0001) -> float:
85
+ """
86
+ Compute an exponential decay weight for an interaction based on its age.
87
+ The decay_rate can be tuned to control how fast interactions lose weight.
88
+ """
89
+ time_diff = (current_time - interaction_time).total_seconds()
90
+ return math.exp(-decay_rate * time_diff)
91
+
92
+
93
+ def normalize_profile(profile: np.ndarray) -> np.ndarray:
94
+ norm = np.linalg.norm(profile)
95
+ return profile / norm if norm > 0 else profile
96
+
97
+
98
+ def compute_score(sim_score: float, popularity: float, freshness: float) -> float:
99
+ """
100
+ Compute a non-linear recommendation score.
101
+ - sim_score is squared to emphasize strong similarities.
102
+ - Popularity is log-transformed.
103
+ - Freshness is combined linearly.
104
+ """
105
+ return 0.6 * (sim_score ** 2) + 0.3 * np.log1p(popularity) + 0.1 * freshness
106
+
107
+
108
  class Recommender:
109
+ def __init__(self, like_weight=1.0, comment_weight=0.5, comment_like_weight=0.3,
110
+ reply_weight=0.5, reply_like_weight=0.3):
111
  self.user_profiles = defaultdict(lambda: np.zeros(384))
112
  self.post_popularity = defaultdict(float)
113
  self.last_update = datetime.now(TIMEZONE) - HISTORY_WINDOW
114
 
115
+ # Parameterized weights
116
+ self.like_weight = like_weight
117
+ self.comment_weight = comment_weight
118
+ self.comment_like_weight = comment_like_weight
119
+ self.reply_weight = reply_weight
120
+ self.reply_like_weight = reply_like_weight
121
+
122
+ async def fetch_all_rows(self, table_name: str, columns: str, last_update: datetime, post_id_not_null: bool):
123
  """Fetch all rows from a table using pagination."""
124
  supabase = get_supabase_client()
125
  page_size = 1000
 
127
  all_data = []
128
 
129
  while True:
130
+ if post_id_not_null:
131
+ response = await asyncio.to_thread(
132
+ supabase.table(table_name)
133
+ .select(columns)
134
+ .gt('created_at', last_update.isoformat())
135
+ .not_.is_("post_id", None)
136
+ .range(page * page_size, (page + 1) * page_size - 1)
137
+ .execute
138
+ )
139
+ else:
140
+ response = await asyncio.to_thread(
141
+ supabase.table(table_name)
142
+ .select(columns)
143
+ .gt('created_at', last_update.isoformat())
144
+ .range(page * page_size, (page + 1) * page_size - 1)
145
+ .execute
146
+ )
147
+
148
  if not response.data:
149
  break
150
+
151
  all_data.extend(response.data)
152
  page += 1
153
 
154
  return all_data
155
 
156
  async def update_data(self):
157
+ # Current time for decay calculation
158
+ current_time = datetime.now(TIMEZONE)
159
+
160
+ # Fetch all interaction data since last update
161
+ likes = await self.fetch_all_rows('likes', 'user_id, post_id, created_at', self.last_update, True)
162
+ comments = await self.fetch_all_rows('comments', 'id, user_id, post_id, created_at, comment', self.last_update, True)
163
+ commentlikes = await self.fetch_all_rows('commentlikes', 'user_id, author_id, post_id, created_at, comment_id', self.last_update, True)
164
+ replies = await self.fetch_all_rows('replies', 'user_id, to, post_id, created_at, comment, comment_id, reply_id', self.last_update, True)
165
+ replylikes = await self.fetch_all_rows('replylikes', 'user_id, author_id, post_id, created_at, reply_id', self.last_update, True)
166
+
167
+ # Process likes with time decay
168
+ for like in likes:
169
+ user_id = like['user_id']
170
+ post_id = like['post_id']
171
+ interaction_time = parse_datetime(like['created_at'])
172
+ weight = decay_weight(interaction_time, current_time)
173
  user_interactions[user_id].add(post_id)
174
+ self.post_popularity[post_id] += self.like_weight * weight
175
  if post_id in post_features:
176
+ self.user_profiles[user_id] += post_features[post_id] * \
177
+ self.like_weight * weight
178
+
179
+ # Process comments with time decay
180
+ for comment in comments:
181
+ user_id = comment['user_id']
182
+ post_id = comment['post_id']
183
+ interaction_time = parse_datetime(comment['created_at'])
184
+ weight = decay_weight(interaction_time, current_time)
185
+ user_interactions[user_id].add(post_id)
186
+ self.post_popularity[post_id] += self.comment_weight * weight
187
+ if post_id in post_features:
188
+ self.user_profiles[user_id] += post_features[post_id] * \
189
+ self.comment_weight * weight
190
+
191
+ # Process comment likes with time decay
192
+ for clike in commentlikes:
193
+ user_id = clike['user_id'] # User who liked the comment
194
+ post_id = clike['post_id']
195
+ interaction_time = parse_datetime(clike['created_at'])
196
+ weight = decay_weight(interaction_time, current_time)
197
+ user_interactions[user_id].add(post_id)
198
+ self.post_popularity[post_id] += self.comment_like_weight * weight
199
+ if post_id in post_features:
200
+ self.user_profiles[user_id] += post_features[post_id] * \
201
+ self.comment_like_weight * weight
202
+
203
+ # Process replies with time decay
204
+ for reply in replies:
205
+ user_id = reply['user_id']
206
+ post_id = reply['post_id']
207
+ interaction_time = parse_datetime(reply['created_at'])
208
+ weight = decay_weight(interaction_time, current_time)
209
+ user_interactions[user_id].add(post_id)
210
+ self.post_popularity[post_id] += self.reply_weight * weight
211
+ if post_id in post_features:
212
+ self.user_profiles[user_id] += post_features[post_id] * \
213
+ self.reply_weight * weight
214
+
215
+ # Process reply likes with time decay
216
+ for rlike in replylikes:
217
+ user_id = rlike['user_id']
218
+ post_id = rlike['post_id']
219
+ interaction_time = parse_datetime(rlike['created_at'])
220
+ weight = decay_weight(interaction_time, current_time)
221
+ user_interactions[user_id].add(post_id)
222
+ self.post_popularity[post_id] += self.reply_like_weight * weight
223
+ if post_id in post_features:
224
+ self.user_profiles[user_id] += post_features[post_id] * \
225
+ self.reply_like_weight * weight
226
+
227
+ # OPTIONAL: Process negative feedback if available
228
+ # for negative in negative_interactions:
229
+ # user_id = negative['user_id']
230
+ # post_id = negative['post_id']
231
+ # interaction_time = parse_datetime(negative['created_at'])
232
+ # weight = decay_weight(interaction_time, current_time)
233
+ # user_interactions[user_id].add(post_id)
234
+ # self.post_popularity[post_id] -= some_negative_weight * weight
235
+ # if post_id in post_features:
236
+ # self.user_profiles[user_id] -= post_features[post_id] * some_negative_weight * weight
237
+
238
+ # Normalize user profiles after processing interactions
239
+ for user_id in self.user_profiles:
240
+ self.user_profiles[user_id] = normalize_profile(
241
+ self.user_profiles[user_id])
242
+
243
+ # Fetch and update post features
244
+ posts = await self.fetch_all_rows('posts', '*', self.last_update, False)
245
  post_texts, post_ids = [], []
246
  for post in posts:
247
  post_id = post['id']
248
+ text = f"{post.get('movie_name', '')} {post.get('content', '')}".strip(
249
+ )
250
  post_texts.append(text)
251
  post_ids.append(post_id)
252
  post['created_at'] = parse_datetime(post['created_at'])
 
254
  post_metadata[post_id] = post
255
 
256
  if post_texts:
257
+ embeddings = sentence_model.encode(
258
+ post_texts, show_progress_bar=False, convert_to_numpy=True)
259
  for post_id, embedding in zip(post_ids, embeddings):
260
  post_features[post_id] = embedding / np.linalg.norm(embedding)
261
 
262
  self.last_update = datetime.now(TIMEZONE)
263
+ total_interactions = len(likes) + len(comments) + \
264
+ len(commentlikes) + len(replies) + len(replylikes)
265
+ logger.info(
266
+ f"Data updated: {len(posts)} posts, {total_interactions} interactions")
267
 
268
  def get_recommendations(self, user_id: str) -> List[Dict]:
269
  user_profile = self.user_profiles[user_id]
270
  seen_posts = user_interactions[user_id]
 
271
  scores = {}
272
  now = datetime.now(TIMEZONE)
273
 
 
275
  if post_id in seen_posts:
276
  continue
277
 
278
+ sim_score = np.dot(user_profile, feature) if np.any(
279
+ user_profile) else 0
280
  time_diff = now - post_metadata[post_id]['created_at']
281
+ # Freshness is defined as an exponential decay based on weeks old
282
+ freshness = math.exp(-time_diff.days / 7.0)
283
+ score = compute_score(
284
+ sim_score, self.post_popularity[post_id], freshness)
285
+ # Adding a small random noise for exploration
286
  scores[post_id] = score + random.uniform(-0.1, 0.1)
287
 
288
+ top_posts = sorted(
289
+ scores.items(), key=lambda x: x[1], reverse=True)[:TOP_K]
290
  results = [post_metadata[post_id] for post_id, _ in top_posts]
291
  random.shuffle(results)
 
292
  return results
293
 
294
  recommender = Recommender()
test.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import math
3
+ import random
4
+ import asyncio
5
+ import logging
6
+ from collections import defaultdict
7
+ from datetime import datetime, timedelta
8
+ from zoneinfo import ZoneInfo
9
+ from typing import List, Dict
10
+
11
+ import numpy as np
12
+ from dotenv import load_dotenv
13
+ from supabase import create_client
14
+ from sentence_transformers import SentenceTransformer
15
+ import pdb
16
+
17
+ # Configure logging
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Load environment variables
22
+ load_dotenv()
23
+
24
+ # Configuration
25
+ SEED = 42
26
+ random.seed(SEED)
27
+ np.random.seed(SEED)
28
+
29
+ TOP_K = 75
30
+ HISTORY_WINDOW = timedelta(days=1000)
31
+ TIMEZONE = ZoneInfo("UTC")
32
+
33
+ # Global variables
34
+ supabase_client = None
35
+ user_interactions = defaultdict(set)
36
+ post_features = {}
37
+ post_metadata = {}
38
+
39
+ if not os.path.exists('/tmp/cache'):
40
+ os.makedirs('/tmp/cache')
41
+ sentence_model = SentenceTransformer(
42
+ 'all-MiniLM-L6-v2', cache_folder='/tmp/cache')
43
+
44
+ SUPABASE_URL = os.getenv('supabaseUrl')
45
+ SUPABASE_KEY = os.getenv('supabaseAnonKey')
46
+
47
+
48
+ def get_supabase_client():
49
+ global supabase_client
50
+ if supabase_client is None:
51
+ supabase_client = create_client(SUPABASE_URL, SUPABASE_KEY)
52
+ return supabase_client
53
+
54
+
55
+ def parse_datetime(dt_str: str) -> datetime:
56
+ """Parse ISO datetime string and ensure correct microsecond precision."""
57
+ try:
58
+ if '.' in dt_str:
59
+ date_part, time_part = dt_str.split('T')
60
+ time_part, tz_part = time_part.split('+')
61
+ if '.' in time_part:
62
+ time_without_micro, micro = time_part.split('.')
63
+ micro = micro.ljust(6, '0')
64
+ time_part = f"{time_without_micro}.{micro}"
65
+ dt_str = f"{date_part}T{time_part}+00:00"
66
+ return datetime.fromisoformat(dt_str).astimezone(TIMEZONE)
67
+ except Exception as e:
68
+ logger.error(f"Error parsing datetime: {dt_str} - {str(e)}")
69
+ raise
70
+
71
+
72
+ def decay_weight(interaction_time: datetime, current_time: datetime, decay_rate: float = 0.0001) -> float:
73
+ """
74
+ Compute an exponential decay weight for an interaction based on its age.
75
+ The decay_rate can be tuned to control how fast interactions lose weight.
76
+ """
77
+ time_diff = (current_time - interaction_time).total_seconds()
78
+ return math.exp(-decay_rate * time_diff)
79
+
80
+
81
+ def normalize_profile(profile: np.ndarray) -> np.ndarray:
82
+ norm = np.linalg.norm(profile)
83
+ return profile / norm if norm > 0 else profile
84
+
85
+
86
+ def compute_score(sim_score: float, popularity: float, freshness: float) -> float:
87
+ """
88
+ Compute a non-linear recommendation score.
89
+ - sim_score is squared to emphasize strong similarities.
90
+ - Popularity is log-transformed.
91
+ - Freshness is combined linearly.
92
+ """
93
+ return 0.6 * (sim_score ** 2) + 0.3 * np.log1p(popularity) + 0.1 * freshness
94
+
95
+
96
+ class Recommender:
97
+ def __init__(self, like_weight=1.0, comment_weight=0.5, comment_like_weight=0.3,
98
+ reply_weight=0.5, reply_like_weight=0.3):
99
+ self.user_profiles = defaultdict(lambda: np.zeros(384))
100
+ self.post_popularity = defaultdict(float)
101
+ self.last_update = datetime.now(TIMEZONE) - HISTORY_WINDOW
102
+
103
+ # Parameterized weights
104
+ self.like_weight = like_weight
105
+ self.comment_weight = comment_weight
106
+ self.comment_like_weight = comment_like_weight
107
+ self.reply_weight = reply_weight
108
+ self.reply_like_weight = reply_like_weight
109
+
110
+ async def fetch_all_rows(self, table_name: str, columns: str, last_update: datetime, post_id_not_null: bool):
111
+ """Fetch all rows from a table using pagination."""
112
+ supabase = get_supabase_client()
113
+ page_size = 1000
114
+ page = 0
115
+ all_data = []
116
+
117
+ while True:
118
+ if post_id_not_null:
119
+ response = await asyncio.to_thread(
120
+ supabase.table(table_name)
121
+ .select(columns)
122
+ .gt('created_at', last_update.isoformat())
123
+ .not_.is_("post_id", None)
124
+ .range(page * page_size, (page + 1) * page_size - 1)
125
+ .execute
126
+ )
127
+ else:
128
+ response = await asyncio.to_thread(
129
+ supabase.table(table_name)
130
+ .select(columns)
131
+ .gt('created_at', last_update.isoformat())
132
+ .range(page * page_size, (page + 1) * page_size - 1)
133
+ .execute
134
+ )
135
+
136
+ if not response.data:
137
+ break
138
+
139
+ all_data.extend(response.data)
140
+ page += 1
141
+
142
+ return all_data
143
+
144
+ async def update_data(self):
145
+ # Current time for decay calculation
146
+ current_time = datetime.now(TIMEZONE)
147
+
148
+ # Fetch all interaction data since last update
149
+ likes = await self.fetch_all_rows('likes', 'user_id, post_id, created_at', self.last_update, True)
150
+ comments = await self.fetch_all_rows('comments', 'id, user_id, post_id, created_at, comment', self.last_update, True)
151
+ commentlikes = await self.fetch_all_rows('commentlikes', 'user_id, author_id, post_id, created_at, comment_id', self.last_update, True)
152
+ replies = await self.fetch_all_rows('replies', 'user_id, to, post_id, created_at, comment, comment_id, reply_id', self.last_update, True)
153
+ replylikes = await self.fetch_all_rows('replylikes', 'user_id, author_id, post_id, created_at, reply_id', self.last_update, True)
154
+
155
+ # Process likes with time decay
156
+ for like in likes:
157
+ user_id = like['user_id']
158
+ post_id = like['post_id']
159
+ interaction_time = parse_datetime(like['created_at'])
160
+ weight = decay_weight(interaction_time, current_time)
161
+ user_interactions[user_id].add(post_id)
162
+ self.post_popularity[post_id] += self.like_weight * weight
163
+ if post_id in post_features:
164
+ self.user_profiles[user_id] += post_features[post_id] * \
165
+ self.like_weight * weight
166
+
167
+ # Process comments with time decay
168
+ for comment in comments:
169
+ user_id = comment['user_id']
170
+ post_id = comment['post_id']
171
+ interaction_time = parse_datetime(comment['created_at'])
172
+ weight = decay_weight(interaction_time, current_time)
173
+ user_interactions[user_id].add(post_id)
174
+ self.post_popularity[post_id] += self.comment_weight * weight
175
+ if post_id in post_features:
176
+ self.user_profiles[user_id] += post_features[post_id] * \
177
+ self.comment_weight * weight
178
+
179
+ # Process comment likes with time decay
180
+ for clike in commentlikes:
181
+ user_id = clike['user_id'] # User who liked the comment
182
+ post_id = clike['post_id']
183
+ interaction_time = parse_datetime(clike['created_at'])
184
+ weight = decay_weight(interaction_time, current_time)
185
+ user_interactions[user_id].add(post_id)
186
+ self.post_popularity[post_id] += self.comment_like_weight * weight
187
+ if post_id in post_features:
188
+ self.user_profiles[user_id] += post_features[post_id] * \
189
+ self.comment_like_weight * weight
190
+
191
+ # Process replies with time decay
192
+ for reply in replies:
193
+ user_id = reply['user_id']
194
+ post_id = reply['post_id']
195
+ interaction_time = parse_datetime(reply['created_at'])
196
+ weight = decay_weight(interaction_time, current_time)
197
+ user_interactions[user_id].add(post_id)
198
+ self.post_popularity[post_id] += self.reply_weight * weight
199
+ if post_id in post_features:
200
+ self.user_profiles[user_id] += post_features[post_id] * \
201
+ self.reply_weight * weight
202
+
203
+ # Process reply likes with time decay
204
+ for rlike in replylikes:
205
+ user_id = rlike['user_id']
206
+ post_id = rlike['post_id']
207
+ interaction_time = parse_datetime(rlike['created_at'])
208
+ weight = decay_weight(interaction_time, current_time)
209
+ user_interactions[user_id].add(post_id)
210
+ self.post_popularity[post_id] += self.reply_like_weight * weight
211
+ if post_id in post_features:
212
+ self.user_profiles[user_id] += post_features[post_id] * \
213
+ self.reply_like_weight * weight
214
+
215
+ # OPTIONAL: Process negative feedback if available
216
+ # for negative in negative_interactions:
217
+ # user_id = negative['user_id']
218
+ # post_id = negative['post_id']
219
+ # interaction_time = parse_datetime(negative['created_at'])
220
+ # weight = decay_weight(interaction_time, current_time)
221
+ # user_interactions[user_id].add(post_id)
222
+ # self.post_popularity[post_id] -= some_negative_weight * weight
223
+ # if post_id in post_features:
224
+ # self.user_profiles[user_id] -= post_features[post_id] * some_negative_weight * weight
225
+
226
+ # Normalize user profiles after processing interactions
227
+ for user_id in self.user_profiles:
228
+ self.user_profiles[user_id] = normalize_profile(
229
+ self.user_profiles[user_id])
230
+
231
+ # Fetch and update post features
232
+ posts = await self.fetch_all_rows('posts', '*', self.last_update, False)
233
+ post_texts, post_ids = [], []
234
+ for post in posts:
235
+ post_id = post['id']
236
+ text = f"{post.get('movie_name', '')} {post.get('content', '')}".strip(
237
+ )
238
+ post_texts.append(text)
239
+ post_ids.append(post_id)
240
+ post['created_at'] = parse_datetime(post['created_at'])
241
+ post['type'] = 'post'
242
+ post_metadata[post_id] = post
243
+
244
+ if post_texts:
245
+ embeddings = sentence_model.encode(
246
+ post_texts, show_progress_bar=False, convert_to_numpy=True)
247
+ for post_id, embedding in zip(post_ids, embeddings):
248
+ post_features[post_id] = embedding / np.linalg.norm(embedding)
249
+
250
+ self.last_update = datetime.now(TIMEZONE)
251
+ total_interactions = len(likes) + len(comments) + \
252
+ len(commentlikes) + len(replies) + len(replylikes)
253
+ logger.info(
254
+ f"Data updated: {len(posts)} posts, {total_interactions} interactions")
255
+
256
+ def get_recommendations(self, user_id: str) -> List[Dict]:
257
+ user_profile = self.user_profiles[user_id]
258
+ seen_posts = user_interactions[user_id]
259
+ scores = {}
260
+ now = datetime.now(TIMEZONE)
261
+
262
+ for post_id, feature in post_features.items():
263
+ if post_id in seen_posts:
264
+ continue
265
+
266
+ sim_score = np.dot(user_profile, feature) if np.any(
267
+ user_profile) else 0
268
+ time_diff = now - post_metadata[post_id]['created_at']
269
+ # Freshness is defined as an exponential decay based on weeks old
270
+ freshness = math.exp(-time_diff.days / 7.0)
271
+ score = compute_score(
272
+ sim_score, self.post_popularity[post_id], freshness)
273
+ # Adding a small random noise for exploration
274
+ scores[post_id] = score + random.uniform(-0.1, 0.1)
275
+
276
+ top_posts = sorted(
277
+ scores.items(), key=lambda x: x[1], reverse=True)[:TOP_K]
278
+ results = [post_metadata[post_id] for post_id, _ in top_posts]
279
+ random.shuffle(results)
280
+ return results
281
+
282
+
283
+ recommender = Recommender()
284
+
285
+
286
+ async def main():
287
+ # pdb.set_trace()
288
+ user_id = "d7411324-c8ea-42cb-ae59-7b8cbc61594c"
289
+ await recommender.update_data()
290
+ recommendations = recommender.get_recommendations(user_id)
291
+ print(f"Recommendations for user {user_id}:")
292
+ for post in recommendations:
293
+ print(
294
+ f"- Post {post['id']}: Movie: {post.get('movie_name', '')} Caption: {post.get('content', '')}")
295
+
296
+ if __name__ == "__main__":
297
+ asyncio.run(main())