Dashm commited on
Commit
f1ee860
·
1 Parent(s): 0448b2b

fix: replace direct psycopg DB connection with Supabase REST API

Browse files

Removes psycopg dependency and direct PostgreSQL TCP connection which
caused IPv6 connection failures on HuggingFace Spaces. All DB operations
now go through supabase-py over HTTPS via RPC calls.

Files changed (2) hide show
  1. backend/database.py +36 -105
  2. backend/requirements.txt +0 -1
backend/database.py CHANGED
@@ -1,15 +1,12 @@
1
  """
2
  Database connection helpers for Supabase.
3
- Uses psycopg (v3) for direct PostgreSQL/pgvector queries (search),
4
  and supabase-py for standard CRUD operations.
5
  """
6
 
7
  import os
8
- import psycopg
9
- from psycopg.rows import dict_row
10
  import numpy as np
11
  from supabase import create_client, Client
12
- from config import SUPABASE_URL, SUPABASE_KEY, DATABASE_URL
13
 
14
  # --- Supabase Client (for CRUD operations) ---
15
 
@@ -20,21 +17,13 @@ def get_supabase() -> Client:
20
  """Get or create the Supabase client for standard CRUD operations."""
21
  global _supabase_client
22
  if _supabase_client is None:
23
- url = (os.environ.get("SUPABASE_URL") or SUPABASE_URL).strip()
24
- key = (os.environ.get("SUPABASE_KEY") or SUPABASE_KEY).strip()
25
- _supabase_client = create_client(url, key)
 
26
  return _supabase_client
27
 
28
 
29
- # --- Direct PostgreSQL connection (for pgvector queries) ---
30
-
31
- def get_db_connection():
32
- """Create a new psycopg3 connection for pgvector queries."""
33
- db_url = (os.environ.get("DATABASE_URL") or DATABASE_URL).strip()
34
- conn = psycopg.connect(db_url, row_factory=dict_row)
35
- return conn
36
-
37
-
38
  def close_db_pool():
39
  """Placeholder for shutdown compatibility."""
40
  pass
@@ -53,37 +42,23 @@ def pgvector_to_embedding(pgvector_str: str) -> np.ndarray:
53
  return np.array([float(v) for v in values], dtype=np.float32)
54
 
55
 
56
- # --- pgvector similarity search ---
57
 
58
  def search_similar_products(query_embedding: np.ndarray, top_k: int = 50):
59
  """
60
  Find the top_k most similar products to the query embedding
61
- using pgvector cosine similarity search.
62
- Returns list of dicts with product info + embedding.
63
  """
64
- conn = get_db_connection()
65
- embedding_str = embedding_to_pgvector(query_embedding)
66
-
67
- query = """
68
- SELECT
69
- id, seller_id, title, description, price, stock, images,
70
- embedding::text as embedding_text,
71
- 1 - (embedding <=> %s::vector) as similarity
72
- FROM products
73
- WHERE is_active = true AND status = 'approved' AND stock > 0 AND embedding IS NOT NULL
74
- ORDER BY embedding <=> %s::vector
75
- LIMIT %s
76
- """
77
-
78
- try:
79
- with conn.cursor() as cur:
80
- cur.execute(query, (embedding_str, embedding_str, top_k))
81
- rows = cur.fetchall()
82
- finally:
83
- conn.close()
84
 
85
  results = []
86
- for row in rows:
87
  results.append({
88
  "id": str(row["id"]),
89
  "seller_id": str(row["seller_id"]),
@@ -108,61 +83,23 @@ def search_similar_products_filtered(
108
  color: str = None,
109
  ):
110
  """
111
- Find the top_k most similar products with optional structured filters.
112
- Extends search_similar_products with WHERE clauses from query rewriting.
113
  """
114
- conn = get_db_connection()
115
- embedding_str = embedding_to_pgvector(query_embedding)
116
-
117
- # Build dynamic WHERE clause
118
- conditions = ["is_active = true", "status = 'approved'", "stock > 0", "embedding IS NOT NULL"]
119
- filter_params = []
120
-
121
- if price_min is not None:
122
- conditions.append(f"price > %s")
123
- filter_params.append(price_min)
124
-
125
- if price_max is not None:
126
- conditions.append(f"price < %s")
127
- filter_params.append(price_max)
128
-
129
- if brand:
130
- conditions.append(f"(title ILIKE %s OR description ILIKE %s)")
131
- filter_params.append(f"%{brand}%")
132
- filter_params.append(f"%{brand}%")
133
-
134
- if color:
135
- conditions.append(f"(title ILIKE %s OR description ILIKE %s)")
136
- filter_params.append(f"%{color}%")
137
- filter_params.append(f"%{color}%")
138
-
139
- where_clause = " AND ".join(conditions)
140
-
141
- # Params must match SQL placeholder order:
142
- # 1) embedding for SELECT similarity, 2) filter params for WHERE,
143
- # 3) embedding for ORDER BY, 4) top_k for LIMIT
144
- params = [embedding_str] + filter_params + [embedding_str, top_k]
145
-
146
- query = f"""
147
- SELECT
148
- id, seller_id, title, description, price, stock, images,
149
- embedding::text as embedding_text,
150
- 1 - (embedding <=> %s::vector) as similarity
151
- FROM products
152
- WHERE {where_clause}
153
- ORDER BY embedding <=> %s::vector
154
- LIMIT %s
155
- """
156
-
157
- try:
158
- with conn.cursor() as cur:
159
- cur.execute(query, tuple(params))
160
- rows = cur.fetchall()
161
- finally:
162
- conn.close()
163
 
164
  results = []
165
- for row in rows:
166
  results.append({
167
  "id": str(row["id"]),
168
  "seller_id": str(row["seller_id"]),
@@ -179,17 +116,11 @@ def search_similar_products_filtered(
179
 
180
 
181
  def store_product_embedding(product_id: str, embedding: np.ndarray):
182
- """Store/update the BERT embedding for a product."""
183
- conn = get_db_connection()
184
  embedding_str = embedding_to_pgvector(embedding)
185
-
186
- query = """
187
- UPDATE products SET embedding = %s::vector WHERE id = %s
188
- """
189
-
190
- try:
191
- with conn.cursor() as cur:
192
- cur.execute(query, (embedding_str, product_id))
193
- conn.commit()
194
- finally:
195
- conn.close()
 
1
  """
2
  Database connection helpers for Supabase.
3
+ Uses Supabase RPC for pgvector similarity search (works over HTTPS),
4
  and supabase-py for standard CRUD operations.
5
  """
6
 
7
  import os
 
 
8
  import numpy as np
9
  from supabase import create_client, Client
 
10
 
11
  # --- Supabase Client (for CRUD operations) ---
12
 
 
17
  """Get or create the Supabase client for standard CRUD operations."""
18
  global _supabase_client
19
  if _supabase_client is None:
20
+ _supabase_client = create_client(
21
+ os.environ["SUPABASE_URL"].strip(),
22
+ os.environ["SUPABASE_KEY"].strip(),
23
+ )
24
  return _supabase_client
25
 
26
 
 
 
 
 
 
 
 
 
 
27
  def close_db_pool():
28
  """Placeholder for shutdown compatibility."""
29
  pass
 
42
  return np.array([float(v) for v in values], dtype=np.float32)
43
 
44
 
45
+ # --- pgvector similarity search (via Supabase RPC) ---
46
 
47
  def search_similar_products(query_embedding: np.ndarray, top_k: int = 50):
48
  """
49
  Find the top_k most similar products to the query embedding
50
+ using pgvector cosine similarity search via Supabase RPC.
 
51
  """
52
+ sb = get_supabase()
53
+ embedding_list = query_embedding.tolist()
54
+
55
+ response = sb.rpc("search_products_by_embedding", {
56
+ "query_embedding": embedding_list,
57
+ "match_count": top_k,
58
+ }).execute()
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  results = []
61
+ for row in response.data:
62
  results.append({
63
  "id": str(row["id"]),
64
  "seller_id": str(row["seller_id"]),
 
83
  color: str = None,
84
  ):
85
  """
86
+ Find the top_k most similar products with optional structured filters
87
+ via Supabase RPC.
88
  """
89
+ sb = get_supabase()
90
+ embedding_list = query_embedding.tolist()
91
+
92
+ response = sb.rpc("search_products_by_embedding_filtered", {
93
+ "query_embedding": embedding_list,
94
+ "match_count": top_k,
95
+ "filter_price_min": price_min,
96
+ "filter_price_max": price_max,
97
+ "filter_brand": brand,
98
+ "filter_color": color,
99
+ }).execute()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  results = []
102
+ for row in response.data:
103
  results.append({
104
  "id": str(row["id"]),
105
  "seller_id": str(row["seller_id"]),
 
116
 
117
 
118
  def store_product_embedding(product_id: str, embedding: np.ndarray):
119
+ """Store/update the BERT embedding for a product via Supabase RPC."""
120
+ sb = get_supabase()
121
  embedding_str = embedding_to_pgvector(embedding)
122
+
123
+ sb.rpc("update_product_embedding", {
124
+ "p_product_id": product_id,
125
+ "p_embedding": embedding_str,
126
+ }).execute()
 
 
 
 
 
 
backend/requirements.txt CHANGED
@@ -7,7 +7,6 @@ python-dotenv==1.0.1
7
 
8
  # Database
9
  supabase==2.7.2
10
- psycopg[binary]==3.3.2
11
 
12
  # ML / AI
13
  torch>=2.0.0
 
7
 
8
  # Database
9
  supabase==2.7.2
 
10
 
11
  # ML / AI
12
  torch>=2.0.0