Spaces:
Sleeping
Sleeping
andykr1k commited on
Commit ·
3eb6086
1
Parent(s): 6cbcc1b
Added deleted posts update
Browse files
app.py
CHANGED
|
@@ -42,6 +42,7 @@ TOP_K = 75
|
|
| 42 |
HISTORY_WINDOW = timedelta(days=1000)
|
| 43 |
TIMEZONE = ZoneInfo("UTC")
|
| 44 |
UPDATE_INTERVAL = 300 # In seconds (5 minutes)
|
|
|
|
| 45 |
|
| 46 |
# Global variables
|
| 47 |
supabase_client = None
|
|
@@ -119,6 +120,43 @@ class Recommender:
|
|
| 119 |
self.reply_weight = reply_weight
|
| 120 |
self.reply_like_weight = reply_like_weight
|
| 121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
async def fetch_all_rows(self, table_name: str, columns: str, last_update: datetime, post_id_not_null: bool):
|
| 123 |
"""Fetch all rows from a table using pagination."""
|
| 124 |
supabase = get_supabase_client()
|
|
@@ -259,6 +297,9 @@ class Recommender:
|
|
| 259 |
for post_id, embedding in zip(post_ids, embeddings):
|
| 260 |
post_features[post_id] = embedding / np.linalg.norm(embedding)
|
| 261 |
|
|
|
|
|
|
|
|
|
|
| 262 |
self.last_update = datetime.now(TIMEZONE)
|
| 263 |
total_interactions = len(likes) + len(comments) + \
|
| 264 |
len(commentlikes) + len(replies) + len(replylikes)
|
|
|
|
| 42 |
HISTORY_WINDOW = timedelta(days=1000)
|
| 43 |
TIMEZONE = ZoneInfo("UTC")
|
| 44 |
UPDATE_INTERVAL = 300 # In seconds (5 minutes)
|
| 45 |
+
START_TIME = datetime.now(TIMEZONE)
|
| 46 |
|
| 47 |
# Global variables
|
| 48 |
supabase_client = None
|
|
|
|
| 120 |
self.reply_weight = reply_weight
|
| 121 |
self.reply_like_weight = reply_like_weight
|
| 122 |
|
| 123 |
+
async def fetch_existing_post_ids() -> set:
|
| 124 |
+
supabase = get_supabase_client()
|
| 125 |
+
page_size = 1000
|
| 126 |
+
page = 0
|
| 127 |
+
post_ids = set()
|
| 128 |
+
|
| 129 |
+
while True:
|
| 130 |
+
response = await asyncio.to_thread(
|
| 131 |
+
supabase.table('posts')
|
| 132 |
+
.select('id')
|
| 133 |
+
.range(page * page_size, (page + 1) * page_size - 1)
|
| 134 |
+
.execute
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
if not response.data:
|
| 138 |
+
break
|
| 139 |
+
|
| 140 |
+
for row in response.data:
|
| 141 |
+
post_ids.add(row['id'])
|
| 142 |
+
|
| 143 |
+
page += 1
|
| 144 |
+
|
| 145 |
+
return post_ids
|
| 146 |
+
|
| 147 |
+
def clean_deleted_posts(self, existing_post_ids: set):
|
| 148 |
+
# Determine which posts are missing
|
| 149 |
+
all_cached_ids = set(post_metadata.keys())
|
| 150 |
+
deleted_ids = all_cached_ids - existing_post_ids
|
| 151 |
+
|
| 152 |
+
for post_id in deleted_ids:
|
| 153 |
+
post_metadata.pop(post_id, None)
|
| 154 |
+
post_features.pop(post_id, None)
|
| 155 |
+
self.post_popularity.pop(post_id, None)
|
| 156 |
+
|
| 157 |
+
for user_id in user_interactions:
|
| 158 |
+
user_interactions[user_id] -= deleted_ids # remove any deleted post_ids
|
| 159 |
+
|
| 160 |
async def fetch_all_rows(self, table_name: str, columns: str, last_update: datetime, post_id_not_null: bool):
|
| 161 |
"""Fetch all rows from a table using pagination."""
|
| 162 |
supabase = get_supabase_client()
|
|
|
|
| 297 |
for post_id, embedding in zip(post_ids, embeddings):
|
| 298 |
post_features[post_id] = embedding / np.linalg.norm(embedding)
|
| 299 |
|
| 300 |
+
existing_post_ids = await self.fetch_existing_post_ids()
|
| 301 |
+
self.clean_deleted_posts(existing_post_ids)
|
| 302 |
+
|
| 303 |
self.last_update = datetime.now(TIMEZONE)
|
| 304 |
total_interactions = len(likes) + len(comments) + \
|
| 305 |
len(commentlikes) + len(replies) + len(replylikes)
|