andykr1k commited on
Commit
3eb6086
·
1 Parent(s): 6cbcc1b

Added deleted posts update

Browse files
Files changed (1) hide show
  1. app.py +41 -0
app.py CHANGED
@@ -42,6 +42,7 @@ TOP_K = 75
42
  HISTORY_WINDOW = timedelta(days=1000)
43
  TIMEZONE = ZoneInfo("UTC")
44
  UPDATE_INTERVAL = 300 # In seconds (5 minutes)
 
45
 
46
  # Global variables
47
  supabase_client = None
@@ -119,6 +120,43 @@ class Recommender:
119
  self.reply_weight = reply_weight
120
  self.reply_like_weight = reply_like_weight
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  async def fetch_all_rows(self, table_name: str, columns: str, last_update: datetime, post_id_not_null: bool):
123
  """Fetch all rows from a table using pagination."""
124
  supabase = get_supabase_client()
@@ -259,6 +297,9 @@ class Recommender:
259
  for post_id, embedding in zip(post_ids, embeddings):
260
  post_features[post_id] = embedding / np.linalg.norm(embedding)
261
 
 
 
 
262
  self.last_update = datetime.now(TIMEZONE)
263
  total_interactions = len(likes) + len(comments) + \
264
  len(commentlikes) + len(replies) + len(replylikes)
 
42
  HISTORY_WINDOW = timedelta(days=1000)
43
  TIMEZONE = ZoneInfo("UTC")
44
  UPDATE_INTERVAL = 300 # In seconds (5 minutes)
45
+ START_TIME = datetime.now(TIMEZONE)
46
 
47
  # Global variables
48
  supabase_client = None
 
120
  self.reply_weight = reply_weight
121
  self.reply_like_weight = reply_like_weight
122
 
123
+ async def fetch_existing_post_ids() -> set:
124
+ supabase = get_supabase_client()
125
+ page_size = 1000
126
+ page = 0
127
+ post_ids = set()
128
+
129
+ while True:
130
+ response = await asyncio.to_thread(
131
+ supabase.table('posts')
132
+ .select('id')
133
+ .range(page * page_size, (page + 1) * page_size - 1)
134
+ .execute
135
+ )
136
+
137
+ if not response.data:
138
+ break
139
+
140
+ for row in response.data:
141
+ post_ids.add(row['id'])
142
+
143
+ page += 1
144
+
145
+ return post_ids
146
+
147
+ def clean_deleted_posts(self, existing_post_ids: set):
148
+ # Determine which posts are missing
149
+ all_cached_ids = set(post_metadata.keys())
150
+ deleted_ids = all_cached_ids - existing_post_ids
151
+
152
+ for post_id in deleted_ids:
153
+ post_metadata.pop(post_id, None)
154
+ post_features.pop(post_id, None)
155
+ self.post_popularity.pop(post_id, None)
156
+
157
+ for user_id in user_interactions:
158
+ user_interactions[user_id] -= deleted_ids # remove any deleted post_ids
159
+
160
  async def fetch_all_rows(self, table_name: str, columns: str, last_update: datetime, post_id_not_null: bool):
161
  """Fetch all rows from a table using pagination."""
162
  supabase = get_supabase_client()
 
297
  for post_id, embedding in zip(post_ids, embeddings):
298
  post_features[post_id] = embedding / np.linalg.norm(embedding)
299
 
300
+ existing_post_ids = await self.fetch_existing_post_ids()
301
+ self.clean_deleted_posts(existing_post_ids)
302
+
303
  self.last_update = datetime.now(TIMEZONE)
304
  total_interactions = len(likes) + len(comments) + \
305
  len(commentlikes) + len(replies) + len(replylikes)