Spaces:

andykrik
/

NetworkRecommender

Running

App Files Files Community

andykr1k commited on Mar 12, 2025

Commit

fd29a2f

1 Parent(s): c7d0291

Added application file and requirements.txt

Browse files

Files changed (4) hide show

.gitignore +436 -0
Dockerfile +5 -0
app.py +183 -0
requirements.txt +9 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,436 @@

+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+# ruff
+.ruff_cache/
+# LSP config files
+pyrightconfig.json
+### react ###
+.DS_*
+logs
+**/*.backup.*
+**/*.back.*
+node_modules
+bower_components
+*.sublime*
+psd
+thumb
+sketch
+### ReactNative ###
+# React Native Stack Base
+.expo
+__generated__
+### ReactNative.Xcode Stack ###
+## User settings
+xcuserdata/
+## Xcode 8 and earlier
+*.xcscmblueprint
+*.xccheckout
+### ReactNative.Gradle Stack ###
+.gradle
+**/build/
+!src/**/build/
+# Ignore Gradle GUI config
+gradle-app.setting
+# Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored)
+!gradle-wrapper.jar
+# Avoid ignore Gradle wrappper properties
+!gradle-wrapper.properties
+# Cache of project
+.gradletasknamecache
+# Eclipse Gradle plugin generated files
+# Eclipse Core
+.project
+# JDT-specific (Eclipse Java Development Tools)
+.classpath
+### ReactNative.macOS Stack ###
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+# Icon must end with two \r
+Icon
+# Thumbnails
+._*
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+### ReactNative.Linux Stack ###
+*~
+# temporary files which can be created if a process still has a handle open of a deleted file
+.fuse_hidden*
+# KDE directory preferences
+.directory
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+# .nfs files are created when an open file is removed but is still being accessed
+.nfs*
+### ReactNative.Node Stack ###
+# Logs
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+lerna-debug.log*
+.pnpm-debug.log*
+# Diagnostic reports (https://nodejs.org/api/report.html)
+report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+# Coverage directory used by tools like istanbul
+coverage
+*.lcov
+# nyc test coverage
+.nyc_output
+# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+# Bower dependency directory (https://bower.io/)
+# node-waf configuration
+.lock-wscript
+# Compiled binary addons (https://nodejs.org/api/addons.html)
+build/Release
+# Dependency directories
+node_modules/
+jspm_packages/
+# Snowpack dependency directory (https://snowpack.dev/)
+web_modules/
+# TypeScript cache
+*.tsbuildinfo
+# Optional npm cache directory
+.npm
+# Optional eslint cache
+.eslintcache
+# Optional stylelint cache
+.stylelintcache
+# Microbundle cache
+.rpt2_cache/
+.rts2_cache_cjs/
+.rts2_cache_es/
+.rts2_cache_umd/
+# Optional REPL history
+.node_repl_history
+# Output of 'npm pack'
+*.tgz
+# Yarn Integrity file
+.yarn-integrity
+# dotenv environment variable files
+.env.development.local
+.env.test.local
+.env.production.local
+.env.local
+# parcel-bundler cache (https://parceljs.org/)
+.parcel-cache
+# Next.js build output
+.next
+out
+# Nuxt.js build / generate output
+.nuxt
+dist
+# Gatsby files
+.cache/
+# Comment in the public line in if your project uses Gatsby and not Next.js
+# https://nextjs.org/blog/next-9-1#public-directory-support
+# public
+# vuepress build output
+.vuepress/dist
+# vuepress v2.x temp and cache directory
+.temp
+# Docusaurus cache and generated files
+.docusaurus
+# Serverless directories
+.serverless/
+# FuseBox cache
+.fusebox/
+# DynamoDB Local files
+.dynamodb/
+# TernJS port file
+.tern-port
+# Stores VSCode versions used for testing VSCode extensions
+.vscode-test
+# yarn v2
+.yarn/cache
+.yarn/unplugged
+.yarn/build-state.yml
+.yarn/install-state.gz
+.pnp.*
+### ReactNative.Buck Stack ###
+buck-out/
+.buckconfig.local
+.buckd/
+.buckversion
+.fakebuckversion
+### ReactNative.Android Stack ###
+# Gradle files
+.gradle/
+# Local configuration file (sdk path, etc)
+local.properties
+# Log/OS Files
+# Android Studio generated files and folders
+captures/
+.externalNativeBuild/
+.cxx/
+*.apk
+output.json
+# IntelliJ
+*.iml
+.idea/
+misc.xml
+deploymentTargetDropDown.xml
+render.experimental.xml
+# Keystore files
+*.jks
+*.keystore
+# Google Services (e.g. APIs or Firebase)
+google-services.json
+# Android Profiling
+*.hprof
+# Personal
+data
+package-lock.json

Dockerfile ADDED Viewed

	@@ -0,0 +1,5 @@

+FROM python:3.10.9
+COPY . .
+WORKDIR /
+RUN pip install --no-cache-dir --upgrade -r /requirements.txt
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import os
+import random
+import itertools
+import numpy as np
+import pandas as pd
+import networkx as nx
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torch_geometric.utils import from_networkx
+from torch_geometric.nn import SAGEConv
+from supabase import create_client
+from fastapi import FastAPI, HTTPException, Query
+from fastapi.middleware.cors import CORSMiddleware
+from dotenv import load_dotenv
+load_dotenv()
+app = FastAPI()
+# Enable CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+SEED = 42
+random.seed(SEED)
+np.random.seed(SEED)
+torch.manual_seed(SEED)
+if torch.cuda.is_available():
+    torch.cuda.manual_seed_all(SEED)
+# Global variables
+global G, features, usernames, pyg_data, trained_model
+G = None
+features = None
+usernames = None
+pyg_data = None
+trained_model = None
+SUPABASE_ID = os.getenv('supabaseID')
+SUPABASE_URL = os.getenv('supabaseUrl')
+SUPABASE_KEY = os.getenv('supabaseAnonKey')
+def get_supabase_client():
+    return create_client(SUPABASE_URL, SUPABASE_KEY)
+def load_and_preprocess_data():
+    supabase = get_supabase_client()
+    followers_response = supabase.table('followers').select('*').execute()
+    users_response = supabase.table('profiles').select('id, username').execute()
+    followers = pd.DataFrame(followers_response.data)
+    users = pd.DataFrame(users_response.data)
+    merged = followers.merge(users[['id', 'username']],
+                             left_on='following', right_on='id', how='left')
+    merged = merged.rename(columns={'username': 'follower_username'}).drop(columns=['id_y'])
+    merged = merged.merge(users[['id', 'username']],
+                           left_on='id_x', right_on='id', how='left')
+    merged = merged.rename(columns={'username': 'followed_username'})
+    merged = merged[['follower_username', 'followed_username']].dropna()
+    return merged[(merged['follower_username'] != '') & (merged['followed_username'] != '')]
+def create_graph_dataframe(merged_df):
+    G = nx.from_pandas_edgelist(merged_df, source='follower_username', target='followed_username', create_using=nx.DiGraph())
+    usernames = sorted(G.nodes())
+    return G, torch.eye(len(usernames)), usernames
+def prepare_training_data(G, usernames):
+    pos_edges = [(usernames.index(u), usernames.index(v)) for u, v in G.edges()]
+    pos_edge_index = torch.tensor(pos_edges).T
+    num_nodes = len(usernames)
+    all_possible_edges = set(itertools.permutations(range(num_nodes), 2))
+    existing_edges = set(zip(pos_edge_index[0].tolist(), pos_edge_index[1].tolist()))
+    negative_edges = random.sample(list(all_possible_edges - existing_edges), len(pos_edges))
+    return pos_edge_index, torch.tensor(negative_edges).T
+class GraphRecommender(nn.Module):
+    def __init__(self, input_dim, hidden_dim=128, output_dim=64):
+        super().__init__()
+        self.conv1 = SAGEConv(input_dim, hidden_dim)
+        self.conv2 = SAGEConv(hidden_dim, output_dim)
+        self.dropout = nn.Dropout(0.3)
+    def forward(self, x, edge_index):
+        x = F.relu(self.conv1(x, edge_index))
+        x = self.dropout(x)
+        x = self.conv2(x, edge_index)
+        return x
+def train_model(model, data, pos_edges, neg_edges, epochs=200):
+    optimizer = optim.Adam(model.parameters(), lr=0.005, weight_decay=1e-4)
+    best_loss = float('inf')
+    patience_counter = 0
+    for epoch in range(epochs):
+        model.train()
+        optimizer.zero_grad()
+        embeddings = model(data.x, data.edge_index)
+        pos_scores = (embeddings[pos_edges[0]] * embeddings[pos_edges[1]]).sum(1)
+        neg_scores = (embeddings[neg_edges[0]] * embeddings[neg_edges[1]]).sum(1)
+        pos_loss = F.binary_cross_entropy_with_logits(pos_scores, torch.ones_like(pos_scores))
+        neg_loss = F.binary_cross_entropy_with_logits(neg_scores, torch.zeros_like(neg_scores))
+        reg_loss = torch.norm(embeddings, p=2)
+        total_loss = pos_loss + neg_loss + 0.001 * reg_loss
+        total_loss.backward()
+        optimizer.step()
+        if total_loss < best_loss:
+            best_loss = total_loss
+            patience_counter = 0
+        else:
+            patience_counter += 1
+            if patience_counter >= 20:
+                break
+    return model
+def get_recommendations(username, model, data, G, usernames, top_k=10):
+    if username not in usernames:
+        return []
+    user_idx = usernames.index(username)
+    current_follows = set(G.successors(username))
+    candidates = [u for u in usernames if u != username and u not in current_follows]
+    with torch.no_grad():
+        embeddings = model(data.x, data.edge_index)
+        user_embed = embeddings[user_idx]
+        candidate_indices = [usernames.index(u) for u in candidates]
+        candidate_embeds = embeddings[candidate_indices]
+        scores = torch.mm(user_embed.view(1, -1), candidate_embeds.T).squeeze()
+    top_indices = scores.argsort(descending=True)[:top_k]
+    return [candidates[i] for i in top_indices]
+def rebuild_model():
+    global G, features, usernames, pyg_data, trained_model
+    merged_df = load_and_preprocess_data()
+    G, features, usernames = create_graph_dataframe(merged_df)
+    pyg_data = from_networkx(G)
+    pyg_data.x = features
+    pos_edge_index, neg_edge_index = prepare_training_data(G, usernames)
+    model = GraphRecommender(input_dim=len(usernames))
+    trained_model = train_model(model, pyg_data, pos_edge_index, neg_edge_index)
+@app.post("/rebuild")
+async def rebuild_handler():
+    rebuild_model()
+    return {"status": "success", "message": "Model and data rebuilt successfully"}
+@app.get("/recommend/network")
+async def get_recommendations_handler(username: str = Query(...)):
+    if not trained_model:
+        raise HTTPException(status_code=500, detail="Model not initialized, please rebuild first.")
+    recommendations = get_recommendations(username, trained_model, pyg_data, G, usernames)
+    return {"status": "success", "recommendations": recommendations}
+@app.get("/")
+async def health_check():
+    return {"status": "success", "message": "Recommendation service operational"}
+rebuild_model()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+numpy
+pandas
+torch
+networkx[default]
+torch_geometric
+supabase
+fastapi
+python-dotenv
+uvicorn