andykr1k commited on
Commit
fd29a2f
·
1 Parent(s): c7d0291

Added application file and requirements.txt

Browse files
Files changed (4) hide show
  1. .gitignore +436 -0
  2. Dockerfile +5 -0
  3. app.py +183 -0
  4. requirements.txt +9 -0
.gitignore ADDED
@@ -0,0 +1,436 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Python ###
2
+ # Byte-compiled / optimized / DLL files
3
+ __pycache__/
4
+ *.py[cod]
5
+ *$py.class
6
+
7
+ # C extensions
8
+ *.so
9
+
10
+ # Distribution / packaging
11
+ .Python
12
+ build/
13
+ develop-eggs/
14
+ dist/
15
+ downloads/
16
+ eggs/
17
+ .eggs/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
161
+
162
+ ### Python Patch ###
163
+ # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
164
+ poetry.toml
165
+
166
+ # ruff
167
+ .ruff_cache/
168
+
169
+ # LSP config files
170
+ pyrightconfig.json
171
+
172
+ ### react ###
173
+ .DS_*
174
+ logs
175
+ **/*.backup.*
176
+ **/*.back.*
177
+
178
+ node_modules
179
+ bower_components
180
+
181
+ *.sublime*
182
+
183
+ psd
184
+ thumb
185
+ sketch
186
+
187
+ ### ReactNative ###
188
+ # React Native Stack Base
189
+
190
+ .expo
191
+ __generated__
192
+
193
+ ### ReactNative.Xcode Stack ###
194
+ ## User settings
195
+ xcuserdata/
196
+
197
+ ## Xcode 8 and earlier
198
+ *.xcscmblueprint
199
+ *.xccheckout
200
+
201
+ ### ReactNative.Gradle Stack ###
202
+ .gradle
203
+ **/build/
204
+ !src/**/build/
205
+
206
+ # Ignore Gradle GUI config
207
+ gradle-app.setting
208
+
209
+ # Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored)
210
+ !gradle-wrapper.jar
211
+
212
+ # Avoid ignore Gradle wrappper properties
213
+ !gradle-wrapper.properties
214
+
215
+ # Cache of project
216
+ .gradletasknamecache
217
+
218
+ # Eclipse Gradle plugin generated files
219
+ # Eclipse Core
220
+ .project
221
+ # JDT-specific (Eclipse Java Development Tools)
222
+ .classpath
223
+
224
+ ### ReactNative.macOS Stack ###
225
+ # General
226
+ .DS_Store
227
+ .AppleDouble
228
+ .LSOverride
229
+
230
+ # Icon must end with two \r
231
+ Icon
232
+
233
+
234
+ # Thumbnails
235
+ ._*
236
+
237
+ # Files that might appear in the root of a volume
238
+ .DocumentRevisions-V100
239
+ .fseventsd
240
+ .Spotlight-V100
241
+ .TemporaryItems
242
+ .Trashes
243
+ .VolumeIcon.icns
244
+ .com.apple.timemachine.donotpresent
245
+
246
+ # Directories potentially created on remote AFP share
247
+ .AppleDB
248
+ .AppleDesktop
249
+ Network Trash Folder
250
+ Temporary Items
251
+ .apdisk
252
+
253
+ ### ReactNative.Linux Stack ###
254
+ *~
255
+
256
+ # temporary files which can be created if a process still has a handle open of a deleted file
257
+ .fuse_hidden*
258
+
259
+ # KDE directory preferences
260
+ .directory
261
+
262
+ # Linux trash folder which might appear on any partition or disk
263
+ .Trash-*
264
+
265
+ # .nfs files are created when an open file is removed but is still being accessed
266
+ .nfs*
267
+
268
+ ### ReactNative.Node Stack ###
269
+ # Logs
270
+ npm-debug.log*
271
+ yarn-debug.log*
272
+ yarn-error.log*
273
+ lerna-debug.log*
274
+ .pnpm-debug.log*
275
+
276
+ # Diagnostic reports (https://nodejs.org/api/report.html)
277
+ report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
278
+
279
+ # Runtime data
280
+ pids
281
+ *.pid
282
+ *.seed
283
+ *.pid.lock
284
+
285
+ # Directory for instrumented libs generated by jscoverage/JSCover
286
+ lib-cov
287
+
288
+ # Coverage directory used by tools like istanbul
289
+ coverage
290
+ *.lcov
291
+
292
+ # nyc test coverage
293
+ .nyc_output
294
+
295
+ # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
296
+ .grunt
297
+
298
+ # Bower dependency directory (https://bower.io/)
299
+
300
+ # node-waf configuration
301
+ .lock-wscript
302
+
303
+ # Compiled binary addons (https://nodejs.org/api/addons.html)
304
+ build/Release
305
+
306
+ # Dependency directories
307
+ node_modules/
308
+ jspm_packages/
309
+
310
+ # Snowpack dependency directory (https://snowpack.dev/)
311
+ web_modules/
312
+
313
+ # TypeScript cache
314
+ *.tsbuildinfo
315
+
316
+ # Optional npm cache directory
317
+ .npm
318
+
319
+ # Optional eslint cache
320
+ .eslintcache
321
+
322
+ # Optional stylelint cache
323
+ .stylelintcache
324
+
325
+ # Microbundle cache
326
+ .rpt2_cache/
327
+ .rts2_cache_cjs/
328
+ .rts2_cache_es/
329
+ .rts2_cache_umd/
330
+
331
+ # Optional REPL history
332
+ .node_repl_history
333
+
334
+ # Output of 'npm pack'
335
+ *.tgz
336
+
337
+ # Yarn Integrity file
338
+ .yarn-integrity
339
+
340
+ # dotenv environment variable files
341
+ .env.development.local
342
+ .env.test.local
343
+ .env.production.local
344
+ .env.local
345
+
346
+ # parcel-bundler cache (https://parceljs.org/)
347
+ .parcel-cache
348
+
349
+ # Next.js build output
350
+ .next
351
+ out
352
+
353
+ # Nuxt.js build / generate output
354
+ .nuxt
355
+ dist
356
+
357
+ # Gatsby files
358
+ .cache/
359
+ # Comment in the public line in if your project uses Gatsby and not Next.js
360
+ # https://nextjs.org/blog/next-9-1#public-directory-support
361
+ # public
362
+
363
+ # vuepress build output
364
+ .vuepress/dist
365
+
366
+ # vuepress v2.x temp and cache directory
367
+ .temp
368
+
369
+ # Docusaurus cache and generated files
370
+ .docusaurus
371
+
372
+ # Serverless directories
373
+ .serverless/
374
+
375
+ # FuseBox cache
376
+ .fusebox/
377
+
378
+ # DynamoDB Local files
379
+ .dynamodb/
380
+
381
+ # TernJS port file
382
+ .tern-port
383
+
384
+ # Stores VSCode versions used for testing VSCode extensions
385
+ .vscode-test
386
+
387
+ # yarn v2
388
+ .yarn/cache
389
+ .yarn/unplugged
390
+ .yarn/build-state.yml
391
+ .yarn/install-state.gz
392
+ .pnp.*
393
+
394
+ ### ReactNative.Buck Stack ###
395
+ buck-out/
396
+ .buckconfig.local
397
+ .buckd/
398
+ .buckversion
399
+ .fakebuckversion
400
+
401
+ ### ReactNative.Android Stack ###
402
+ # Gradle files
403
+ .gradle/
404
+
405
+ # Local configuration file (sdk path, etc)
406
+ local.properties
407
+
408
+ # Log/OS Files
409
+
410
+ # Android Studio generated files and folders
411
+ captures/
412
+ .externalNativeBuild/
413
+ .cxx/
414
+ *.apk
415
+ output.json
416
+
417
+ # IntelliJ
418
+ *.iml
419
+ .idea/
420
+ misc.xml
421
+ deploymentTargetDropDown.xml
422
+ render.experimental.xml
423
+
424
+ # Keystore files
425
+ *.jks
426
+ *.keystore
427
+
428
+ # Google Services (e.g. APIs or Firebase)
429
+ google-services.json
430
+
431
+ # Android Profiling
432
+ *.hprof
433
+
434
+ # Personal
435
+ data
436
+ package-lock.json
Dockerfile ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ FROM python:3.10.9
2
+ COPY . .
3
+ WORKDIR /
4
+ RUN pip install --no-cache-dir --upgrade -r /requirements.txt
5
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+ import itertools
4
+ import numpy as np
5
+ import pandas as pd
6
+ import networkx as nx
7
+ import torch
8
+ import torch.nn as nn
9
+ import torch.nn.functional as F
10
+ import torch.optim as optim
11
+ from torch_geometric.utils import from_networkx
12
+ from torch_geometric.nn import SAGEConv
13
+ from supabase import create_client
14
+ from fastapi import FastAPI, HTTPException, Query
15
+ from fastapi.middleware.cors import CORSMiddleware
16
+ from dotenv import load_dotenv
17
+
18
+ load_dotenv()
19
+
20
+ app = FastAPI()
21
+
22
+ # Enable CORS
23
+ app.add_middleware(
24
+ CORSMiddleware,
25
+ allow_origins=["*"],
26
+ allow_credentials=True,
27
+ allow_methods=["*"],
28
+ allow_headers=["*"],
29
+ )
30
+
31
+ SEED = 42
32
+ random.seed(SEED)
33
+ np.random.seed(SEED)
34
+ torch.manual_seed(SEED)
35
+ if torch.cuda.is_available():
36
+ torch.cuda.manual_seed_all(SEED)
37
+
38
+ # Global variables
39
+ global G, features, usernames, pyg_data, trained_model
40
+ G = None
41
+ features = None
42
+ usernames = None
43
+ pyg_data = None
44
+ trained_model = None
45
+
46
+ SUPABASE_ID = os.getenv('supabaseID')
47
+ SUPABASE_URL = os.getenv('supabaseUrl')
48
+ SUPABASE_KEY = os.getenv('supabaseAnonKey')
49
+
50
+ def get_supabase_client():
51
+ return create_client(SUPABASE_URL, SUPABASE_KEY)
52
+
53
+ def load_and_preprocess_data():
54
+ supabase = get_supabase_client()
55
+ followers_response = supabase.table('followers').select('*').execute()
56
+ users_response = supabase.table('profiles').select('id, username').execute()
57
+
58
+ followers = pd.DataFrame(followers_response.data)
59
+ users = pd.DataFrame(users_response.data)
60
+
61
+ merged = followers.merge(users[['id', 'username']],
62
+ left_on='following', right_on='id', how='left')
63
+ merged = merged.rename(columns={'username': 'follower_username'}).drop(columns=['id_y'])
64
+
65
+ merged = merged.merge(users[['id', 'username']],
66
+ left_on='id_x', right_on='id', how='left')
67
+ merged = merged.rename(columns={'username': 'followed_username'})
68
+
69
+ merged = merged[['follower_username', 'followed_username']].dropna()
70
+ return merged[(merged['follower_username'] != '') & (merged['followed_username'] != '')]
71
+
72
+ def create_graph_dataframe(merged_df):
73
+ G = nx.from_pandas_edgelist(merged_df, source='follower_username', target='followed_username', create_using=nx.DiGraph())
74
+ usernames = sorted(G.nodes())
75
+ return G, torch.eye(len(usernames)), usernames
76
+
77
+ def prepare_training_data(G, usernames):
78
+ pos_edges = [(usernames.index(u), usernames.index(v)) for u, v in G.edges()]
79
+ pos_edge_index = torch.tensor(pos_edges).T
80
+
81
+ num_nodes = len(usernames)
82
+ all_possible_edges = set(itertools.permutations(range(num_nodes), 2))
83
+ existing_edges = set(zip(pos_edge_index[0].tolist(), pos_edge_index[1].tolist()))
84
+ negative_edges = random.sample(list(all_possible_edges - existing_edges), len(pos_edges))
85
+
86
+ return pos_edge_index, torch.tensor(negative_edges).T
87
+
88
+ class GraphRecommender(nn.Module):
89
+ def __init__(self, input_dim, hidden_dim=128, output_dim=64):
90
+ super().__init__()
91
+ self.conv1 = SAGEConv(input_dim, hidden_dim)
92
+ self.conv2 = SAGEConv(hidden_dim, output_dim)
93
+ self.dropout = nn.Dropout(0.3)
94
+
95
+ def forward(self, x, edge_index):
96
+ x = F.relu(self.conv1(x, edge_index))
97
+ x = self.dropout(x)
98
+ x = self.conv2(x, edge_index)
99
+ return x
100
+
101
+ def train_model(model, data, pos_edges, neg_edges, epochs=200):
102
+ optimizer = optim.Adam(model.parameters(), lr=0.005, weight_decay=1e-4)
103
+ best_loss = float('inf')
104
+ patience_counter = 0
105
+
106
+ for epoch in range(epochs):
107
+ model.train()
108
+ optimizer.zero_grad()
109
+
110
+ embeddings = model(data.x, data.edge_index)
111
+
112
+ pos_scores = (embeddings[pos_edges[0]] * embeddings[pos_edges[1]]).sum(1)
113
+ neg_scores = (embeddings[neg_edges[0]] * embeddings[neg_edges[1]]).sum(1)
114
+
115
+ pos_loss = F.binary_cross_entropy_with_logits(pos_scores, torch.ones_like(pos_scores))
116
+ neg_loss = F.binary_cross_entropy_with_logits(neg_scores, torch.zeros_like(neg_scores))
117
+ reg_loss = torch.norm(embeddings, p=2)
118
+
119
+ total_loss = pos_loss + neg_loss + 0.001 * reg_loss
120
+
121
+ total_loss.backward()
122
+ optimizer.step()
123
+
124
+ if total_loss < best_loss:
125
+ best_loss = total_loss
126
+ patience_counter = 0
127
+ else:
128
+ patience_counter += 1
129
+ if patience_counter >= 20:
130
+ break
131
+
132
+ return model
133
+
134
+ def get_recommendations(username, model, data, G, usernames, top_k=10):
135
+ if username not in usernames:
136
+ return []
137
+
138
+ user_idx = usernames.index(username)
139
+ current_follows = set(G.successors(username))
140
+
141
+ candidates = [u for u in usernames if u != username and u not in current_follows]
142
+
143
+ with torch.no_grad():
144
+ embeddings = model(data.x, data.edge_index)
145
+ user_embed = embeddings[user_idx]
146
+
147
+ candidate_indices = [usernames.index(u) for u in candidates]
148
+ candidate_embeds = embeddings[candidate_indices]
149
+
150
+ scores = torch.mm(user_embed.view(1, -1), candidate_embeds.T).squeeze()
151
+
152
+ top_indices = scores.argsort(descending=True)[:top_k]
153
+ return [candidates[i] for i in top_indices]
154
+
155
+ def rebuild_model():
156
+ global G, features, usernames, pyg_data, trained_model
157
+ merged_df = load_and_preprocess_data()
158
+ G, features, usernames = create_graph_dataframe(merged_df)
159
+ pyg_data = from_networkx(G)
160
+ pyg_data.x = features
161
+
162
+ pos_edge_index, neg_edge_index = prepare_training_data(G, usernames)
163
+ model = GraphRecommender(input_dim=len(usernames))
164
+ trained_model = train_model(model, pyg_data, pos_edge_index, neg_edge_index)
165
+
166
+ @app.post("/rebuild")
167
+ async def rebuild_handler():
168
+ rebuild_model()
169
+ return {"status": "success", "message": "Model and data rebuilt successfully"}
170
+
171
+ @app.get("/recommend/network")
172
+ async def get_recommendations_handler(username: str = Query(...)):
173
+ if not trained_model:
174
+ raise HTTPException(status_code=500, detail="Model not initialized, please rebuild first.")
175
+
176
+ recommendations = get_recommendations(username, trained_model, pyg_data, G, usernames)
177
+ return {"status": "success", "recommendations": recommendations}
178
+
179
+ @app.get("/")
180
+ async def health_check():
181
+ return {"status": "success", "message": "Recommendation service operational"}
182
+
183
+ rebuild_model()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ numpy
2
+ pandas
3
+ torch
4
+ networkx[default]
5
+ torch_geometric
6
+ supabase
7
+ fastapi
8
+ python-dotenv
9
+ uvicorn