IJ-Reynolds HF Staff commited on
Commit
e3168e1
·
verified ·
1 Parent(s): f327fb5

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +19 -0
main.py CHANGED
@@ -12,6 +12,11 @@ from pathlib import Path
12
  from dateutil import parser as date_parser
13
  from urllib.parse import urljoin
14
  from huggingface_hub import InferenceClient
 
 
 
 
 
15
 
16
  # --- CONFIGURATION & GLOBALS ---
17
  CONGRESS_API_KEY = os.getenv("CONGRESS_API_KEY")
@@ -539,6 +544,20 @@ def run():
539
 
540
  item["analysis"] = analysis
541
  item["keywords"] = keywords
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
  item["date_collected"] = datetime.now().strftime("%Y-%m-%d %H:%M")
543
  new_items.append(item)
544
  db.append(event_id)
 
12
  from dateutil import parser as date_parser
13
  from urllib.parse import urljoin
14
  from huggingface_hub import InferenceClient
15
+ from sentence_transformers import SentenceTransformer
16
+ import json
17
+
18
+ # Specifying model for efficient embedding + trend analysis
19
+ embedder = SentenceTransformer('all-MiniLM-L6-v2')
20
 
21
  # --- CONFIGURATION & GLOBALS ---
22
  CONGRESS_API_KEY = os.getenv("CONGRESS_API_KEY")
 
544
 
545
  item["analysis"] = analysis
546
  item["keywords"] = keywords
547
+
548
+ # --- NEW: GENERATE SEMANTIC EMBEDDING ---
549
+ try:
550
+ # Don't waste compute embedding error messages
551
+ if analysis and not analysis.startswith("Error") and not analysis.startswith("AI Triage disabled"):
552
+ vector = embedder.encode(analysis).tolist()
553
+ item["embedding"] = json.dumps(vector) # Stored as JSON string for CSV compatibility
554
+ else:
555
+ item["embedding"] = None
556
+ except Exception as e:
557
+ print(f" -> Embedding error: {e}")
558
+ item["embedding"] = None
559
+ # ----------------------------------------
560
+
561
  item["date_collected"] = datetime.now().strftime("%Y-%m-%d %H:%M")
562
  new_items.append(item)
563
  db.append(event_id)