Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
|
@@ -12,6 +12,11 @@ from pathlib import Path
|
|
| 12 |
from dateutil import parser as date_parser
|
| 13 |
from urllib.parse import urljoin
|
| 14 |
from huggingface_hub import InferenceClient
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# --- CONFIGURATION & GLOBALS ---
|
| 17 |
CONGRESS_API_KEY = os.getenv("CONGRESS_API_KEY")
|
|
@@ -539,6 +544,20 @@ def run():
|
|
| 539 |
|
| 540 |
item["analysis"] = analysis
|
| 541 |
item["keywords"] = keywords
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 542 |
item["date_collected"] = datetime.now().strftime("%Y-%m-%d %H:%M")
|
| 543 |
new_items.append(item)
|
| 544 |
db.append(event_id)
|
|
|
|
| 12 |
from dateutil import parser as date_parser
|
| 13 |
from urllib.parse import urljoin
|
| 14 |
from huggingface_hub import InferenceClient
|
| 15 |
+
from sentence_transformers import SentenceTransformer
|
| 16 |
+
import json
|
| 17 |
+
|
| 18 |
+
# Specifying model for efficient embedding + trend analysis
|
| 19 |
+
embedder = SentenceTransformer('all-MiniLM-L6-v2')
|
| 20 |
|
| 21 |
# --- CONFIGURATION & GLOBALS ---
|
| 22 |
CONGRESS_API_KEY = os.getenv("CONGRESS_API_KEY")
|
|
|
|
| 544 |
|
| 545 |
item["analysis"] = analysis
|
| 546 |
item["keywords"] = keywords
|
| 547 |
+
|
| 548 |
+
# --- NEW: GENERATE SEMANTIC EMBEDDING ---
|
| 549 |
+
try:
|
| 550 |
+
# Don't waste compute embedding error messages
|
| 551 |
+
if analysis and not analysis.startswith("Error") and not analysis.startswith("AI Triage disabled"):
|
| 552 |
+
vector = embedder.encode(analysis).tolist()
|
| 553 |
+
item["embedding"] = json.dumps(vector) # Stored as JSON string for CSV compatibility
|
| 554 |
+
else:
|
| 555 |
+
item["embedding"] = None
|
| 556 |
+
except Exception as e:
|
| 557 |
+
print(f" -> Embedding error: {e}")
|
| 558 |
+
item["embedding"] = None
|
| 559 |
+
# ----------------------------------------
|
| 560 |
+
|
| 561 |
item["date_collected"] = datetime.now().strftime("%Y-%m-%d %H:%M")
|
| 562 |
new_items.append(item)
|
| 563 |
db.append(event_id)
|