Spaces:
Running
Running
Dashm commited on
Commit ·
f1ee860
1
Parent(s): 0448b2b
fix: replace direct psycopg DB connection with Supabase REST API
Browse filesRemoves psycopg dependency and direct PostgreSQL TCP connection which
caused IPv6 connection failures on HuggingFace Spaces. All DB operations
now go through supabase-py over HTTPS via RPC calls.
- backend/database.py +36 -105
- backend/requirements.txt +0 -1
backend/database.py
CHANGED
|
@@ -1,15 +1,12 @@
|
|
| 1 |
"""
|
| 2 |
Database connection helpers for Supabase.
|
| 3 |
-
Uses
|
| 4 |
and supabase-py for standard CRUD operations.
|
| 5 |
"""
|
| 6 |
|
| 7 |
import os
|
| 8 |
-
import psycopg
|
| 9 |
-
from psycopg.rows import dict_row
|
| 10 |
import numpy as np
|
| 11 |
from supabase import create_client, Client
|
| 12 |
-
from config import SUPABASE_URL, SUPABASE_KEY, DATABASE_URL
|
| 13 |
|
| 14 |
# --- Supabase Client (for CRUD operations) ---
|
| 15 |
|
|
@@ -20,21 +17,13 @@ def get_supabase() -> Client:
|
|
| 20 |
"""Get or create the Supabase client for standard CRUD operations."""
|
| 21 |
global _supabase_client
|
| 22 |
if _supabase_client is None:
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
| 26 |
return _supabase_client
|
| 27 |
|
| 28 |
|
| 29 |
-
# --- Direct PostgreSQL connection (for pgvector queries) ---
|
| 30 |
-
|
| 31 |
-
def get_db_connection():
|
| 32 |
-
"""Create a new psycopg3 connection for pgvector queries."""
|
| 33 |
-
db_url = (os.environ.get("DATABASE_URL") or DATABASE_URL).strip()
|
| 34 |
-
conn = psycopg.connect(db_url, row_factory=dict_row)
|
| 35 |
-
return conn
|
| 36 |
-
|
| 37 |
-
|
| 38 |
def close_db_pool():
|
| 39 |
"""Placeholder for shutdown compatibility."""
|
| 40 |
pass
|
|
@@ -53,37 +42,23 @@ def pgvector_to_embedding(pgvector_str: str) -> np.ndarray:
|
|
| 53 |
return np.array([float(v) for v in values], dtype=np.float32)
|
| 54 |
|
| 55 |
|
| 56 |
-
# --- pgvector similarity search ---
|
| 57 |
|
| 58 |
def search_similar_products(query_embedding: np.ndarray, top_k: int = 50):
|
| 59 |
"""
|
| 60 |
Find the top_k most similar products to the query embedding
|
| 61 |
-
using pgvector cosine similarity search.
|
| 62 |
-
Returns list of dicts with product info + embedding.
|
| 63 |
"""
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
1 - (embedding <=> %s::vector) as similarity
|
| 72 |
-
FROM products
|
| 73 |
-
WHERE is_active = true AND status = 'approved' AND stock > 0 AND embedding IS NOT NULL
|
| 74 |
-
ORDER BY embedding <=> %s::vector
|
| 75 |
-
LIMIT %s
|
| 76 |
-
"""
|
| 77 |
-
|
| 78 |
-
try:
|
| 79 |
-
with conn.cursor() as cur:
|
| 80 |
-
cur.execute(query, (embedding_str, embedding_str, top_k))
|
| 81 |
-
rows = cur.fetchall()
|
| 82 |
-
finally:
|
| 83 |
-
conn.close()
|
| 84 |
|
| 85 |
results = []
|
| 86 |
-
for row in
|
| 87 |
results.append({
|
| 88 |
"id": str(row["id"]),
|
| 89 |
"seller_id": str(row["seller_id"]),
|
|
@@ -108,61 +83,23 @@ def search_similar_products_filtered(
|
|
| 108 |
color: str = None,
|
| 109 |
):
|
| 110 |
"""
|
| 111 |
-
Find the top_k most similar products with optional structured filters
|
| 112 |
-
|
| 113 |
"""
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
if price_max is not None:
|
| 126 |
-
conditions.append(f"price < %s")
|
| 127 |
-
filter_params.append(price_max)
|
| 128 |
-
|
| 129 |
-
if brand:
|
| 130 |
-
conditions.append(f"(title ILIKE %s OR description ILIKE %s)")
|
| 131 |
-
filter_params.append(f"%{brand}%")
|
| 132 |
-
filter_params.append(f"%{brand}%")
|
| 133 |
-
|
| 134 |
-
if color:
|
| 135 |
-
conditions.append(f"(title ILIKE %s OR description ILIKE %s)")
|
| 136 |
-
filter_params.append(f"%{color}%")
|
| 137 |
-
filter_params.append(f"%{color}%")
|
| 138 |
-
|
| 139 |
-
where_clause = " AND ".join(conditions)
|
| 140 |
-
|
| 141 |
-
# Params must match SQL placeholder order:
|
| 142 |
-
# 1) embedding for SELECT similarity, 2) filter params for WHERE,
|
| 143 |
-
# 3) embedding for ORDER BY, 4) top_k for LIMIT
|
| 144 |
-
params = [embedding_str] + filter_params + [embedding_str, top_k]
|
| 145 |
-
|
| 146 |
-
query = f"""
|
| 147 |
-
SELECT
|
| 148 |
-
id, seller_id, title, description, price, stock, images,
|
| 149 |
-
embedding::text as embedding_text,
|
| 150 |
-
1 - (embedding <=> %s::vector) as similarity
|
| 151 |
-
FROM products
|
| 152 |
-
WHERE {where_clause}
|
| 153 |
-
ORDER BY embedding <=> %s::vector
|
| 154 |
-
LIMIT %s
|
| 155 |
-
"""
|
| 156 |
-
|
| 157 |
-
try:
|
| 158 |
-
with conn.cursor() as cur:
|
| 159 |
-
cur.execute(query, tuple(params))
|
| 160 |
-
rows = cur.fetchall()
|
| 161 |
-
finally:
|
| 162 |
-
conn.close()
|
| 163 |
|
| 164 |
results = []
|
| 165 |
-
for row in
|
| 166 |
results.append({
|
| 167 |
"id": str(row["id"]),
|
| 168 |
"seller_id": str(row["seller_id"]),
|
|
@@ -179,17 +116,11 @@ def search_similar_products_filtered(
|
|
| 179 |
|
| 180 |
|
| 181 |
def store_product_embedding(product_id: str, embedding: np.ndarray):
|
| 182 |
-
"""Store/update the BERT embedding for a product."""
|
| 183 |
-
|
| 184 |
embedding_str = embedding_to_pgvector(embedding)
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
try:
|
| 191 |
-
with conn.cursor() as cur:
|
| 192 |
-
cur.execute(query, (embedding_str, product_id))
|
| 193 |
-
conn.commit()
|
| 194 |
-
finally:
|
| 195 |
-
conn.close()
|
|
|
|
| 1 |
"""
|
| 2 |
Database connection helpers for Supabase.
|
| 3 |
+
Uses Supabase RPC for pgvector similarity search (works over HTTPS),
|
| 4 |
and supabase-py for standard CRUD operations.
|
| 5 |
"""
|
| 6 |
|
| 7 |
import os
|
|
|
|
|
|
|
| 8 |
import numpy as np
|
| 9 |
from supabase import create_client, Client
|
|
|
|
| 10 |
|
| 11 |
# --- Supabase Client (for CRUD operations) ---
|
| 12 |
|
|
|
|
| 17 |
"""Get or create the Supabase client for standard CRUD operations."""
|
| 18 |
global _supabase_client
|
| 19 |
if _supabase_client is None:
|
| 20 |
+
_supabase_client = create_client(
|
| 21 |
+
os.environ["SUPABASE_URL"].strip(),
|
| 22 |
+
os.environ["SUPABASE_KEY"].strip(),
|
| 23 |
+
)
|
| 24 |
return _supabase_client
|
| 25 |
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
def close_db_pool():
|
| 28 |
"""Placeholder for shutdown compatibility."""
|
| 29 |
pass
|
|
|
|
| 42 |
return np.array([float(v) for v in values], dtype=np.float32)
|
| 43 |
|
| 44 |
|
| 45 |
+
# --- pgvector similarity search (via Supabase RPC) ---
|
| 46 |
|
| 47 |
def search_similar_products(query_embedding: np.ndarray, top_k: int = 50):
|
| 48 |
"""
|
| 49 |
Find the top_k most similar products to the query embedding
|
| 50 |
+
using pgvector cosine similarity search via Supabase RPC.
|
|
|
|
| 51 |
"""
|
| 52 |
+
sb = get_supabase()
|
| 53 |
+
embedding_list = query_embedding.tolist()
|
| 54 |
+
|
| 55 |
+
response = sb.rpc("search_products_by_embedding", {
|
| 56 |
+
"query_embedding": embedding_list,
|
| 57 |
+
"match_count": top_k,
|
| 58 |
+
}).execute()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
results = []
|
| 61 |
+
for row in response.data:
|
| 62 |
results.append({
|
| 63 |
"id": str(row["id"]),
|
| 64 |
"seller_id": str(row["seller_id"]),
|
|
|
|
| 83 |
color: str = None,
|
| 84 |
):
|
| 85 |
"""
|
| 86 |
+
Find the top_k most similar products with optional structured filters
|
| 87 |
+
via Supabase RPC.
|
| 88 |
"""
|
| 89 |
+
sb = get_supabase()
|
| 90 |
+
embedding_list = query_embedding.tolist()
|
| 91 |
+
|
| 92 |
+
response = sb.rpc("search_products_by_embedding_filtered", {
|
| 93 |
+
"query_embedding": embedding_list,
|
| 94 |
+
"match_count": top_k,
|
| 95 |
+
"filter_price_min": price_min,
|
| 96 |
+
"filter_price_max": price_max,
|
| 97 |
+
"filter_brand": brand,
|
| 98 |
+
"filter_color": color,
|
| 99 |
+
}).execute()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
results = []
|
| 102 |
+
for row in response.data:
|
| 103 |
results.append({
|
| 104 |
"id": str(row["id"]),
|
| 105 |
"seller_id": str(row["seller_id"]),
|
|
|
|
| 116 |
|
| 117 |
|
| 118 |
def store_product_embedding(product_id: str, embedding: np.ndarray):
|
| 119 |
+
"""Store/update the BERT embedding for a product via Supabase RPC."""
|
| 120 |
+
sb = get_supabase()
|
| 121 |
embedding_str = embedding_to_pgvector(embedding)
|
| 122 |
+
|
| 123 |
+
sb.rpc("update_product_embedding", {
|
| 124 |
+
"p_product_id": product_id,
|
| 125 |
+
"p_embedding": embedding_str,
|
| 126 |
+
}).execute()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/requirements.txt
CHANGED
|
@@ -7,7 +7,6 @@ python-dotenv==1.0.1
|
|
| 7 |
|
| 8 |
# Database
|
| 9 |
supabase==2.7.2
|
| 10 |
-
psycopg[binary]==3.3.2
|
| 11 |
|
| 12 |
# ML / AI
|
| 13 |
torch>=2.0.0
|
|
|
|
| 7 |
|
| 8 |
# Database
|
| 9 |
supabase==2.7.2
|
|
|
|
| 10 |
|
| 11 |
# ML / AI
|
| 12 |
torch>=2.0.0
|