Spaces:

IJ-Reynolds
/

AI_Intel_Tracker

Sleeping

App Files Files Community

IJ-Reynolds HF Staff commited on 17 days ago

Commit

344bb2e

verified ·

1 Parent(s): 2f5126c

Update main.py

Browse files

Files changed (1) hide show

main.py +49 -22

main.py CHANGED Viewed

@@ -219,13 +219,26 @@ def fetch_federal_register():
     print("Scanning Federal Register API...")
     results = []
     url = "https://www.federalregister.gov/api/v1/documents.json"
-    params = {"conditions[term]": "artificial intelligence", "order": "newest", "per_page": 10}
     try:
         r = requests.get(url, params=params, timeout=15)
         if r.status_code == 200:
             for doc in r.json().get("results", []):
                 title = doc.get("title", "No Title")
                 summary = doc.get("abstract", "No summary provided.")
                 pub_date = doc.get("publication_date")
                 fmt_date = pd.to_datetime(pub_date).tz_localize(None).to_pydatetime() if pub_date else datetime.now()
@@ -263,29 +276,43 @@ def fetch_legislation(target=1000):
     headers = {"X-API-Key": CONGRESS_API_KEY, "Accept": "application/json"}
     BILL_MAP = {"HR": "house-bill", "S": "senate-bill", "HRES": "house-resolution", "SRES": "senate-resolution"}
-    for offset in range(0, target, 250):
-        try:
-            r = requests.get(f"{CONGRESS_API_BASE}/bill/{CURRENT_CONGRESS}", params={"limit": 250, "offset": offset, "format": "json", "sort": "updateDate desc"}, headers=headers, timeout=20)
-            if r.status_code != 200: break
-            bills = r.json().get("bills", [])
-            if not bills: break
-            for b in bills:
-                if not is_relevant(b.get("title", "")): continue
-                action_data = b.get("latestAction", {})
-                action_date_raw = action_data.get("actionDate") or b.get("updateDate")
-                fmt_date = pd.to_datetime(action_date_raw).tz_localize(None).to_pydatetime() if action_date_raw else datetime.now()
-                raw_type = b.get("type", "HR").upper()
-                proper_link = f"https://www.congress.gov/bill/{CURRENT_CONGRESS}th-congress/{BILL_MAP.get(raw_type, 'house-bill')}/{b.get('number')}"
-                results.append({
-                    "source": "Congress.gov", "type": "Legislation", "event_date": fmt_date,
-                    "time": "API Verified", "title": f"{b.get('type')}{b.get('number')}: {b.get('title')}",
-                    "latest_action": action_data.get("text", "Active"), "link": proper_link,
-                    "summary": "Legislative movement tracked via API.", "bill_type": b.get("type", "HR"), "bill_number": b.get("number")
-                })
-            time.sleep(1.5)
-        except Exception as e: break
     return results
 # --- MAIN RUNNER ---
 def run():

     print("Scanning Federal Register API...")
     results = []
     url = "https://www.federalregister.gov/api/v1/documents.json"
+    # We pull a larger batch (50) because we are going to heavily filter them locally
+    params = {"conditions[term]": "artificial intelligence", "order": "newest", "per_page": 50}
     try:
         r = requests.get(url, params=params, timeout=15)
         if r.status_code == 200:
             for doc in r.json().get("results", []):
                 title = doc.get("title", "No Title")
                 summary = doc.get("abstract", "No summary provided.")
+                # --- THE LOCAL RELEVANCE FILTER ---
+                # Only keep it if the AI keywords are in the Title or Abstract (ignores full-text matches)
+                if not is_relevant(title, str(summary)):
+                    continue
+                # Explicitly block noisy SEC stock exchange filings
+                if "Self-Regulatory Organizations" in title:
+                    continue
                 pub_date = doc.get("publication_date")
                 fmt_date = pd.to_datetime(pub_date).tz_localize(None).to_pydatetime() if pub_date else datetime.now()
     headers = {"X-API-Key": CONGRESS_API_KEY, "Accept": "application/json"}
     BILL_MAP = {"HR": "house-bill", "S": "senate-bill", "HRES": "house-resolution", "SRES": "senate-resolution"}
+    # We split the scan: 500 newest introduced, AND 500 most recently updated
+    scan_strategies = ["introducedDate desc", "updateDate desc"]
+    for sort_method in scan_strategies:
+        print(f"  -> Pulling by {sort_method}...")
+        # target // 2 means we pull 500 for each strategy
+        for offset in range(0, target // 2, 250):
+            try:
+                r = requests.get(
+                    f"{CONGRESS_API_BASE}/bill/{CURRENT_CONGRESS}",
+                    params={"limit": 250, "offset": offset, "format": "json", "sort": sort_method},
+                    headers=headers, timeout=20
+                )
+                if r.status_code != 200: break
+                bills = r.json().get("bills", [])
+                if not bills: break
+                for b in bills:
+                    if not is_relevant(b.get("title", "")): continue
+                    action_data = b.get("latestAction", {})
+                    action_date_raw = action_data.get("actionDate") or b.get("updateDate")
+                    fmt_date = pd.to_datetime(action_date_raw).tz_localize(None).to_pydatetime() if action_date_raw else datetime.now()
+                    raw_type = b.get("type", "HR").upper()
+                    proper_link = f"https://www.congress.gov/bill/{CURRENT_CONGRESS}th-congress/{BILL_MAP.get(raw_type, 'house-bill')}/{b.get('number')}"
+                    results.append({
+                        "source": "Congress.gov", "type": "Legislation", "event_date": fmt_date,
+                        "time": "API Verified", "title": f"{b.get('type')}{b.get('number')}: {b.get('title')}",
+                        "latest_action": action_data.get("text", "Active"), "link": proper_link,
+                        "summary": "Legislative movement tracked via API.", "bill_type": b.get("type", "HR"), "bill_number": b.get("number")
+                    })
+                time.sleep(1.5) # Polite delay
+            except Exception as e: break
     return results
 # --- MAIN RUNNER ---
 def run():