Spaces:

andykrik
/

FeedRecommender

Sleeping

App Files Files Community

andykr1k commited on Mar 12, 2025

Commit

20b3c7f

1 Parent(s): 5ae989a

created feed recommender

Browse files

Files changed (4) hide show

.gitignore +436 -0
Dockerfile +5 -0
app.py +256 -0
requirements.txt +9 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,436 @@

+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+# ruff
+.ruff_cache/
+# LSP config files
+pyrightconfig.json
+### react ###
+.DS_*
+logs
+**/*.backup.*
+**/*.back.*
+node_modules
+bower_components
+*.sublime*
+psd
+thumb
+sketch
+### ReactNative ###
+# React Native Stack Base
+.expo
+__generated__
+### ReactNative.Xcode Stack ###
+## User settings
+xcuserdata/
+## Xcode 8 and earlier
+*.xcscmblueprint
+*.xccheckout
+### ReactNative.Gradle Stack ###
+.gradle
+**/build/
+!src/**/build/
+# Ignore Gradle GUI config
+gradle-app.setting
+# Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored)
+!gradle-wrapper.jar
+# Avoid ignore Gradle wrappper properties
+!gradle-wrapper.properties
+# Cache of project
+.gradletasknamecache
+# Eclipse Gradle plugin generated files
+# Eclipse Core
+.project
+# JDT-specific (Eclipse Java Development Tools)
+.classpath
+### ReactNative.macOS Stack ###
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+# Icon must end with two \r
+Icon
+# Thumbnails
+._*
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+### ReactNative.Linux Stack ###
+*~
+# temporary files which can be created if a process still has a handle open of a deleted file
+.fuse_hidden*
+# KDE directory preferences
+.directory
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+# .nfs files are created when an open file is removed but is still being accessed
+.nfs*
+### ReactNative.Node Stack ###
+# Logs
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+lerna-debug.log*
+.pnpm-debug.log*
+# Diagnostic reports (https://nodejs.org/api/report.html)
+report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+# Coverage directory used by tools like istanbul
+coverage
+*.lcov
+# nyc test coverage
+.nyc_output
+# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+# Bower dependency directory (https://bower.io/)
+# node-waf configuration
+.lock-wscript
+# Compiled binary addons (https://nodejs.org/api/addons.html)
+build/Release
+# Dependency directories
+node_modules/
+jspm_packages/
+# Snowpack dependency directory (https://snowpack.dev/)
+web_modules/
+# TypeScript cache
+*.tsbuildinfo
+# Optional npm cache directory
+.npm
+# Optional eslint cache
+.eslintcache
+# Optional stylelint cache
+.stylelintcache
+# Microbundle cache
+.rpt2_cache/
+.rts2_cache_cjs/
+.rts2_cache_es/
+.rts2_cache_umd/
+# Optional REPL history
+.node_repl_history
+# Output of 'npm pack'
+*.tgz
+# Yarn Integrity file
+.yarn-integrity
+# dotenv environment variable files
+.env.development.local
+.env.test.local
+.env.production.local
+.env.local
+# parcel-bundler cache (https://parceljs.org/)
+.parcel-cache
+# Next.js build output
+.next
+out
+# Nuxt.js build / generate output
+.nuxt
+dist
+# Gatsby files
+.cache/
+# Comment in the public line in if your project uses Gatsby and not Next.js
+# https://nextjs.org/blog/next-9-1#public-directory-support
+# public
+# vuepress build output
+.vuepress/dist
+# vuepress v2.x temp and cache directory
+.temp
+# Docusaurus cache and generated files
+.docusaurus
+# Serverless directories
+.serverless/
+# FuseBox cache
+.fusebox/
+# DynamoDB Local files
+.dynamodb/
+# TernJS port file
+.tern-port
+# Stores VSCode versions used for testing VSCode extensions
+.vscode-test
+# yarn v2
+.yarn/cache
+.yarn/unplugged
+.yarn/build-state.yml
+.yarn/install-state.gz
+.pnp.*
+### ReactNative.Buck Stack ###
+buck-out/
+.buckconfig.local
+.buckd/
+.buckversion
+.fakebuckversion
+### ReactNative.Android Stack ###
+# Gradle files
+.gradle/
+# Local configuration file (sdk path, etc)
+local.properties
+# Log/OS Files
+# Android Studio generated files and folders
+captures/
+.externalNativeBuild/
+.cxx/
+*.apk
+output.json
+# IntelliJ
+*.iml
+.idea/
+misc.xml
+deploymentTargetDropDown.xml
+render.experimental.xml
+# Keystore files
+*.jks
+*.keystore
+# Google Services (e.g. APIs or Firebase)
+google-services.json
+# Android Profiling
+*.hprof
+# Personal
+data
+package-lock.json

Dockerfile ADDED Viewed

	@@ -0,0 +1,5 @@

+FROM python:3.10.9
+COPY . .
+WORKDIR /
+RUN pip install --no-cache-dir --upgrade -r /requirements.txt
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,256 @@

+import os
+import random
+import numpy as np
+import pandas as pd
+import networkx as nx
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torch_geometric.utils import from_networkx
+from torch_geometric.nn import SAGEConv
+from supabase import create_client
+from fastapi import FastAPI, HTTPException, Query
+from fastapi.middleware.cors import CORSMiddleware
+from dotenv import load_dotenv
+load_dotenv()
+app = FastAPI()
+# Enable CORS for all origins (adjust as needed)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+SEED = 42
+random.seed(SEED)
+np.random.seed(SEED)
+torch.manual_seed(SEED)
+if torch.cuda.is_available():
+    torch.cuda.manual_seed_all(SEED)
+# Global variables for our GNN-based post recommender
+global G, features, user_nodes, post_nodes, node2idx, pyg_data, trained_model
+G = None             # Bipartite graph (users and posts)
+features = None      # Node features (we use identity)
+user_nodes = None    # Sorted list of user node IDs
+post_nodes = None    # Sorted list of post node IDs
+node2idx = None      # Mapping from node ID to index (for features)
+pyg_data = None      # PyTorch Geometric data object
+trained_model = None # Trained GNN model
+SUPABASE_ID = os.getenv('supabaseID')
+SUPABASE_URL = os.getenv('supabaseUrl')
+SUPABASE_KEY = os.getenv('supabaseAnonKey')
+def get_supabase_client():
+    return create_client(SUPABASE_URL, SUPABASE_KEY)
+def load_and_preprocess_data_for_posts():
+    """
+    Build a bipartite directed graph from Supabase data:
+      - Users: derived from profiles (via posts and likes)
+      - Posts: from the posts table.
+    Edges:
+      - From user to post if the user created the post.
+      - From user to post if the user liked the post.
+    """
+    supabase = get_supabase_client()
+    # Load profiles (users)
+    profiles_response = supabase.table('profiles').select('id, username').execute()
+    df_profiles = pd.DataFrame(profiles_response.data)
+    # Create mapping from user id to username
+    uuid_to_username = dict(zip(df_profiles['id'], df_profiles['username']))
+    # Load posts (each with an author)
+    posts_response = supabase.table('posts').select('id, author').execute()
+    df_posts = pd.DataFrame(posts_response.data)
+    # Map post authors to usernames
+    df_posts['username'] = df_posts['author'].map(uuid_to_username)
+    # Load likes: records of (user_id, post_id)
+    likes_response = supabase.table('likes').select('user_id, post_id').execute()
+    df_likes = pd.DataFrame(likes_response.data)
+    df_likes['username'] = df_likes['user_id'].map(uuid_to_username)
+    # Build bipartite graph (directed: from user to post)
+    bipartite = nx.DiGraph()
+    # Determine set of users (only those who appear in posts or likes)
+    user_set = set(df_posts['username'].dropna().tolist()) | set(df_likes['username'].dropna().tolist())
+    # Determine set of posts (by id)
+    post_set = set(df_posts['id'].tolist())
+    # Add user nodes with attribute type 'user'
+    for user in user_set:
+        if user:  # ensure non-empty
+            bipartite.add_node(user, type='user')
+    # Add post nodes with attribute type 'post'
+    for post in post_set:
+        bipartite.add_node(post, type='post')
+    # Add edges from post creation: user -> post
+    for _, row in df_posts.iterrows():
+        user = row['username']
+        post = row['id']
+        if user and post:
+            bipartite.add_edge(user, post)
+    # Add edges from likes: user -> post
+    for _, row in df_likes.iterrows():
+        user = row['username']
+        post = row['post_id']
+        if user and post:
+            bipartite.add_edge(user, post)
+    return bipartite
+# GNN Model using GraphSAGE
+class GraphRecommender(nn.Module):
+    def __init__(self, input_dim, hidden_dim=128, output_dim=64):
+        super().__init__()
+        self.conv1 = SAGEConv(input_dim, hidden_dim)
+        self.conv2 = SAGEConv(hidden_dim, output_dim)
+        self.dropout = nn.Dropout(0.3)
+    def forward(self, x, edge_index):
+        x = F.relu(self.conv1(x, edge_index))
+        x = self.dropout(x)
+        x = self.conv2(x, edge_index)
+        return x
+def prepare_training_data(G, node2idx, user_nodes, post_nodes):
+    """
+    Create positive edges for training.
+    Only consider edges from a user node to a post node.
+    """
+    pos_edges = []
+    for u, v in G.edges():
+        # Only include if u is a user and v is a post
+        if G.nodes[u].get('type') == 'user' and G.nodes[v].get('type') == 'post':
+            pos_edges.append((node2idx[u], node2idx[v]))
+    pos_edge_index = torch.tensor(pos_edges).T  # shape: [2, num_pos_edges]
+    # For negative sampling, form all possible user->post pairs and subtract positive edges.
+    all_possible = [(node2idx[u], node2idx[p]) for u in user_nodes for p in post_nodes]
+    pos_set = set(pos_edges)
+    neg_candidates = [pair for pair in all_possible if pair not in pos_set]
+    # Sample as many negatives as positives (if available)
+    neg_sample_size = min(len(pos_edges), len(neg_candidates))
+    neg_edges = random.sample(neg_candidates, neg_sample_size)
+    neg_edge_index = torch.tensor(neg_edges).T
+    return pos_edge_index, neg_edge_index
+def train_model(model, data, pos_edges, neg_edges, epochs=200):
+    optimizer = optim.Adam(model.parameters(), lr=0.005, weight_decay=1e-4)
+    best_loss = float('inf')
+    patience_counter = 0
+    for epoch in range(epochs):
+        model.train()
+        optimizer.zero_grad()
+        embeddings = model(data.x, data.edge_index)
+        # Compute scores for positive and negative edges via dot product
+        pos_scores = (embeddings[pos_edges[0]] * embeddings[pos_edges[1]]).sum(1)
+        neg_scores = (embeddings[neg_edges[0]] * embeddings[neg_edges[1]]).sum(1)
+        pos_loss = F.binary_cross_entropy_with_logits(pos_scores, torch.ones_like(pos_scores))
+        neg_loss = F.binary_cross_entropy_with_logits(neg_scores, torch.zeros_like(neg_scores))
+        reg_loss = torch.norm(embeddings, p=2)
+        total_loss = pos_loss + neg_loss + 0.001 * reg_loss
+        total_loss.backward()
+        optimizer.step()
+        if total_loss < best_loss:
+            best_loss = total_loss
+            patience_counter = 0
+        else:
+            patience_counter += 1
+            if patience_counter >= 20:
+                break
+    return model
+def rebuild_model():
+    """
+    Loads the bipartite user-post graph, computes node features,
+    prepares training data, trains the GNN model, and updates globals.
+    """
+    global G, features, user_nodes, post_nodes, node2idx, pyg_data, trained_model
+    G = load_and_preprocess_data_for_posts()
+    # Get sorted lists of user and post nodes
+    user_nodes = sorted([n for n, attr in G.nodes(data=True) if attr.get('type') == 'user'])
+    post_nodes = sorted([n for n, attr in G.nodes(data=True) if attr.get('type') == 'post'])
+    all_nodes = sorted(G.nodes())
+    node2idx = {node: i for i, node in enumerate(all_nodes)}
+    # Use identity features (one-hot) for all nodes
+    features = torch.eye(len(all_nodes))
+    pyg_data = from_networkx(G)
+    pyg_data.x = features
+    pos_edge_index, neg_edge_index = prepare_training_data(G, node2idx, user_nodes, post_nodes)
+    input_dim = features.shape[1]
+    model = GraphRecommender(input_dim=input_dim, hidden_dim=128, output_dim=64)
+    trained_model = train_model(model, pyg_data, pos_edge_index, neg_edge_index)
+def get_recommendations(username, model, data, G, user_nodes, post_nodes, node2idx, top_k=10):
+    """
+    For a given username, compute the user's embedding and rank candidate posts (that the user hasn't interacted with).
+    """
+    if username not in user_nodes:
+        return []
+    user_idx = node2idx[username]
+    # Find posts the user already interacted with (edges from username)
+    user_interacted = set()
+    for _, v in G.out_edges(username):
+        if G.nodes[v].get('type') == 'post':
+            user_interacted.add(v)
+    with torch.no_grad():
+        embeddings = model(data.x, data.edge_index)
+    user_embed = embeddings[user_idx]
+    candidate_scores = []
+    for post in post_nodes:
+        if post in user_interacted:
+            continue
+        post_idx = node2idx[post]
+        score = torch.dot(user_embed, embeddings[post_idx]).item()
+        candidate_scores.append((post, score))
+    candidate_scores = sorted(candidate_scores, key=lambda x: x[1], reverse=True)
+    top_posts = [post for post, score in candidate_scores[:top_k]]
+    return top_posts
+# Endpoints
+@app.post("/rebuild")
+async def rebuild_handler():
+    rebuild_model()
+    return {"status": "success", "message": "Model and data rebuilt successfully"}
+@app.get("/recommend/feed")
+async def get_recommendations_handler(username: str = Query(...)):
+    if trained_model is None:
+        raise HTTPException(status_code=500, detail="Model not initialized, please rebuild first.")
+    recs = get_recommendations(username, trained_model, pyg_data, G, user_nodes, post_nodes, node2idx)
+    return {"status": "success", "recommendations": recs}
+@app.get("/")
+async def health_check():
+    return {"status": "success", "message": "Recommendation service operational"}
+# Optionally, rebuild the model on startup
+rebuild_model()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+numpy
+pandas
+torch
+networkx[default]
+torch_geometric
+supabase
+fastapi
+python-dotenv
+uvicorn