Spaces:

IJ-Reynolds
/

AI_Intel_Tracker

Running

App Files Files Community

IJ-Reynolds HF Staff commited on 9 days ago

Commit

c2159e4

verified ·

1 Parent(s): c53a8ee

Update main.py

Browse files

Files changed (1) hide show

main.py +21 -50

main.py CHANGED Viewed

@@ -74,6 +74,24 @@ NEWS_FEEDS = {
     "The Hill Tech": "https://thehill.com/policy/technology/feed/"
 }
 GOV_FEEDS = {
     "White House OSTP": "https://www.whitehouse.gov/ostp/feed/",
     "White House Briefing Room": "https://www.whitehouse.gov/briefing-room/feed/",
@@ -399,56 +417,10 @@ def run():
     raw_data = []
     raw_data.extend(fetch_rss(NEWS_FEEDS, "News/Media"))
     raw_data.extend(fetch_rss(GOV_FEEDS, "Federal/Exec Action"))
-    raw_data.extend(fetch_rss(CALENDAR_FEEDS, "Schedule/Hearing"))
-    raw_data.extend(fetch_master_schedules())
-    raw_data.extend(fetch_legislation())
-    new_items = []
-    for item in raw_data:
-        # Check against the composite ID (URL + Status)
-        if is_new_event(item, db):
-            print(f"Triaging new item: {item['title'][:40]}...")
-            bill_text = ""
-            if item.get("type") == "Legislation":
-                bill_text = fetch_bill_text(CURRENT_CONGRESS, item.get("bill_type"), item.get("bill_number"))
-            analysis, keywords = analyze_with_ai(item["title"], item["summary"], item["source"], bill_text=bill_text)
-            item["analysis"] = analysis
-            item["keywords"] = keywords
-            item["date_collected"] = datetime.now().strftime("%Y-%m-%d %H:%M")
-            new_items.append(item)
-            # Store the composite fingerprint in the seen database
-            db.append(get_event_id(item))
-    if new_items:
-        df_new = pd.DataFrame(new_items)
-        if CSV_PATH.exists():
-            # Standardize date parsing on load to prevent concat errors
-            df_existing = pd.read_csv(CSV_PATH, parse_dates=["event_date"])
-            df_combined = pd.concat([df_existing, df_new], ignore_index=True)
-        else:
-            df_combined = df_new
-        # 🛡️ THE SAFETY SHIELD: Force deduplication on the combined dataset before saving
-        # This kills any 'ghost twins' if the scraper accidentally pulls them twice
-        df_combined = df_combined.drop_duplicates(subset=['link', 'latest_action'], keep='first')
-        df_combined.to_csv(CSV_PATH, index=False)
-        save_db(db)
-        print(f"Added {len(new_items)} new items.")
-    else:
-        print("Sweep complete. No new items.")
-    return len(new_items)# --- MAIN EXECUTION ---
-def run():
-    db = load_db()
-    raw_data = []
-    raw_data.extend(fetch_rss(NEWS_FEEDS, "News/Media"))
-    raw_data.extend(fetch_rss(GOV_FEEDS, "Federal/Exec Action"))
     raw_data.extend(fetch_rss(CALENDAR_FEEDS, "Schedule/Hearing"))
     raw_data.extend(fetch_master_schedules())
     raw_data.extend(fetch_legislation())
@@ -483,7 +455,6 @@ def run():
             df_combined = df_new
         # 🛡️ THE SAFETY SHIELD: Force deduplication on the combined dataset before saving
-        # This kills any 'ghost twins' if the scraper accidentally pulls them twice
         df_combined = df_combined.drop_duplicates(subset=['link', 'latest_action'], keep='first')
         df_combined.to_csv(CSV_PATH, index=False)

     "The Hill Tech": "https://thehill.com/policy/technology/feed/"
 }
+CONGRESS_PRESS_FEEDS = {
+    # Senate Commerce & AI Leaders
+    "Sen. Cruz (Commerce Chair)": "https://www.cruz.senate.gov/rss/press.xml",
+    "Sen. Schatz (AI Lead)": "https://www.schatz.senate.gov/rss/press.xml",
+    "Sen. Schumer (AI Lead)": "https://www.schumer.senate.gov/rss/press.xml",
+    "Sen. Young (AI Caucus)": "https://www.young.senate.gov/rss/press.xml",
+    # Tech/Foreign Policy Nexus
+    "Sen. Andy Kim (Tech/Export Lead)": "https://www.kim.senate.gov/rss/press.xml",
+    "Sen. Ricketts (Tech/Foreign Lead)": "https://www.ricketts.senate.gov/rss/press.xml",
+    # House Science & Tech Leaders
+    "Rep. Babin (Science Chair)": "https://babin.house.gov/rss.xml",
+    "Rep. Obernolte (Science/Tech Chair)": "https://obernolte.house.gov/rss.xml",
+    "Rep. Lieu (AI Task Force)": "https://lieu.house.gov/rss.xml",
+    "Rep. Beyer (AI Caucus)": "https://beyer.house.gov/rss.xml"
+}
 GOV_FEEDS = {
     "White House OSTP": "https://www.whitehouse.gov/ostp/feed/",
     "White House Briefing Room": "https://www.whitehouse.gov/briefing-room/feed/",
     raw_data = []
     raw_data.extend(fetch_rss(NEWS_FEEDS, "News/Media"))
     raw_data.extend(fetch_rss(GOV_FEEDS, "Federal/Exec Action"))
+    # 🛑 ADDED: The new congressional press feeds with the custom category
+    raw_data.extend(fetch_rss(CONGRESS_PRESS_FEEDS, "Legislative Office Press Release"))
     raw_data.extend(fetch_rss(CALENDAR_FEEDS, "Schedule/Hearing"))
     raw_data.extend(fetch_master_schedules())
     raw_data.extend(fetch_legislation())
             df_combined = df_new
         # 🛡️ THE SAFETY SHIELD: Force deduplication on the combined dataset before saving
         df_combined = df_combined.drop_duplicates(subset=['link', 'latest_action'], keep='first')
         df_combined.to_csv(CSV_PATH, index=False)