Spaces:

IJ-Reynolds
/

AI_Intel_Tracker

Running

App Files Files Community

IJ-Reynolds HF Staff commited on 11 days ago

Commit

5bbdd4b

verified ·

1 Parent(s): 3f35b16

Update main.py

Browse files

Files changed (1) hide show

main.py +69 -29

main.py CHANGED Viewed

@@ -5,6 +5,7 @@ from bs4 import BeautifulSoup
 import feedparser
 import json
 import re
 from datetime import datetime
 from pathlib import Path
 from dateutil import parser as date_parser
@@ -102,15 +103,24 @@ else:
 def analyze_with_ai(title, summary, source):
     if not hf_client:
         return "LOW - MONITOR", "AI Triage disabled (No API Key).", "N/A"
     prompt = f"""
-    You are a D.C. AI policy analyst. Review this update:
     Source: {source}
     Title: {title}
     Summary: {summary}
-    Categorize priority as exactly: "HIGH - ACTION REQUIRED", "MEDIUM - REVIEW", or "LOW - MONITOR".
-    Provide a 1-sentence analysis.
     Extract 3 comma-separated keywords.
     Format output EXACTLY as:
     PRIORITY: [Flag]
@@ -125,6 +135,10 @@ def analyze_with_ai(title, summary, source):
         priority = re.search(r'PRIORITY:\s*(.*)', text).group(1) if re.search(r'PRIORITY:\s*(.*)', text) else "LOW - MONITOR"
         analysis = re.search(r'ANALYSIS:\s*(.*)', text).group(1) if re.search(r'ANALYSIS:\s*(.*)', text) else "Could not generate analysis."
         keywords = re.search(r'KEYWORDS:\s*(.*)', text).group(1) if re.search(r'KEYWORDS:\s*(.*)', text) else "AI, Tech, Policy"
         return priority.strip(), analysis.strip(), keywords.strip()
     except Exception as e:
         print(f"AI Error: {e}")
@@ -144,7 +158,7 @@ def save_db(db):
 def is_new_event(link, db):
     return link not in db
-# --- PRO DATE EXTRACTOR ---
 def extract_robust_date(text_blocks):
     date_patterns = [
         r'\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{1,2}(?:st|nd|rd|th)?(?:,)?\s+\d{4}\b',
@@ -215,7 +229,6 @@ def fetch_specific_committees():
                 if len(title) < 15: continue
                 href_lower = a['href'].lower()
-                # Tighter filter: require specific event-related paths
                 if any(x in href_lower for x in ["hearing", "event", "markup"]):
                     if not is_relevant(title):
@@ -227,8 +240,6 @@ def fetch_specific_committees():
                     time_text = time_node["datetime"] if time_node and time_node.has_attr("datetime") else ""
                     fmt_date = extract_robust_date([time_text, title, container_text])
-                    # STRICT RULE: If no valid date is extracted, it's likely a generic link, drop it.
                     if not fmt_date:
                         continue
@@ -252,11 +263,12 @@ def fetch_committee_meetings():
         r = requests.get(url, params={"limit": 100, "format": "json"}, headers=headers, timeout=20)
         if r.status_code == 200:
             for m in r.json().get("committeeMeetings", []):
-                # We removed the keyword bouncer here because API titles are too generic
-                # (e.g., "Business Meeting"). We will let the AI Triage figure out if it's important.
                 title = m.get("title", "Committee Meeting")
                 summary = f"Location: {m.get('room', 'TBD')} {m.get('building', '')}"
                 raw_date = m.get("date")
                 if raw_date:
                     fmt_date = pd.to_datetime(raw_date).tz_localize(None).to_pydatetime()
@@ -273,36 +285,64 @@ def fetch_committee_meetings():
     except Exception as e:
         print(f"API Error: {e}")
     return results
-def fetch_legislation():
     print("Scanning Legislation...")
-    results = []
     if not CONGRESS_API_KEY: return []
-    url = f"{CONGRESS_API_BASE}/bill/{CURRENT_CONGRESS}"
-    headers = {"X-API-Key": CONGRESS_API_KEY, "Accept": "application/json"}
-    try:
-        # Bumped limit from 50 to 250 to look further back in time for AI bills
-        r = requests.get(url, params={"limit": 250, "format": "json"}, headers=headers, timeout=20)
-        if r.status_code == 200:
-            for b in r.json().get("bills", []):
                 title = b.get("title", "")
-                # We keep the bouncer here, otherwise you get 200 post-office renamings.
                 if not is_relevant(title):
                     continue
-                raw_date = b.get("updateDate")
-                fmt_date = pd.to_datetime(raw_date).tz_localize(None).to_pydatetime() if raw_date else datetime.now().replace(tzinfo=None)
                 results.append({
-                    "source": "Congress.gov API", "type": "Legislation",
-                    "event_date": fmt_date, "time": "N/A",
-                    "title": f"{b.get('type')} {b.get('number')}: {title}",
-                    "latest_action": b.get("latestAction", {}).get("text", "Introduced"),
-                    "link": b.get("url", "https://www.congress.gov"), "summary": "AI related legislation."
                 })
-    except Exception as e:
-        print(f"Bill API Error: {e}")
     return results
 # --- MAIN EXECUTION ---

 import feedparser
 import json
 import re
+import time
 from datetime import datetime
 from pathlib import Path
 from dateutil import parser as date_parser
 def analyze_with_ai(title, summary, source):
     if not hf_client:
         return "LOW - MONITOR", "AI Triage disabled (No API Key).", "N/A"
+    # Hard Filter: If the title is generic and has no AI keywords, don't ask the AI.
+    if not is_relevant(title, summary) and "Committee API" in source:
+        return "LOW - MONITOR", "Administrative update with no specific tech policy markers.", "Admin"
     prompt = f"""
+    You are a ruthless D.C. AI policy analyst. Review this update:
     Source: {source}
     Title: {title}
     Summary: {summary}
+    RULES:
+    1. Categorize priority as exactly: "HIGH - ACTION REQUIRED", "MEDIUM - REVIEW", or "LOW - MONITOR".
+    2. HIGH priority is ONLY for major AI legislation advancing, executive orders, or finalized rules.
+    3. DO NOT flag generic meetings, "TBD" locations, or administrative updates as HIGH.
+    4. If the update lacks specific AI or Tech policy details, it MUST be "LOW - MONITOR".
+    Provide a 1-sentence analysis explaining the actual policy impact.
     Extract 3 comma-separated keywords.
     Format output EXACTLY as:
     PRIORITY: [Flag]
         priority = re.search(r'PRIORITY:\s*(.*)', text).group(1) if re.search(r'PRIORITY:\s*(.*)', text) else "LOW - MONITOR"
         analysis = re.search(r'ANALYSIS:\s*(.*)', text).group(1) if re.search(r'ANALYSIS:\s*(.*)', text) else "Could not generate analysis."
         keywords = re.search(r'KEYWORDS:\s*(.*)', text).group(1) if re.search(r'KEYWORDS:\s*(.*)', text) else "AI, Tech, Policy"
+        if "TBD" in summary and "HIGH" in priority:
+            priority = "LOW - MONITOR"
         return priority.strip(), analysis.strip(), keywords.strip()
     except Exception as e:
         print(f"AI Error: {e}")
 def is_new_event(link, db):
     return link not in db
+# --- DATE EXTRACTOR ---
 def extract_robust_date(text_blocks):
     date_patterns = [
         r'\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{1,2}(?:st|nd|rd|th)?(?:,)?\s+\d{4}\b',
                 if len(title) < 15: continue
                 href_lower = a['href'].lower()
                 if any(x in href_lower for x in ["hearing", "event", "markup"]):
                     if not is_relevant(title):
                     time_text = time_node["datetime"] if time_node and time_node.has_attr("datetime") else ""
                     fmt_date = extract_robust_date([time_text, title, container_text])
                     if not fmt_date:
                         continue
         r = requests.get(url, params={"limit": 100, "format": "json"}, headers=headers, timeout=20)
         if r.status_code == 200:
             for m in r.json().get("committeeMeetings", []):
                 title = m.get("title", "Committee Meeting")
                 summary = f"Location: {m.get('room', 'TBD')} {m.get('building', '')}"
+                if not is_relevant(title, summary):
+                    continue
                 raw_date = m.get("date")
                 if raw_date:
                     fmt_date = pd.to_datetime(raw_date).tz_localize(None).to_pydatetime()
     except Exception as e:
         print(f"API Error: {e}")
     return results
+def fetch_legislation(target=2000):
     print("Scanning Legislation...")
     if not CONGRESS_API_KEY: return []
+    results = []
+    headers = {"Accept": "application/json"}
+    BILL_TYPE_MAP = {
+        "HR": "house-bill", "S": "senate-bill", "HRES": "house-resolution", "SRES": "senate-resolution",
+        "HJRES": "house-joint-resolution", "SJRES": "senate-joint-resolution",
+        "HCONRES": "house-concurrent-resolution", "SCONRES": "senate-concurrent-resolution"
+    }
+    for offset in range(0, target, 250):
+        try:
+            params = {
+                "api_key": CONGRESS_API_KEY, "limit": 250, "offset": offset,
+                "format": "json", "sort": "updateDate desc"
+            }
+            r = requests.get(f"{CONGRESS_API_BASE}/bill/{CURRENT_CONGRESS}", params=params, headers=headers, timeout=20)
+            if r.status_code != 200: break
+            bills = r.json().get("bills", [])
+            if not bills: break
+            for b in bills:
                 title = b.get("title", "")
+                # Apply our keyword filter so we only store tech policy bills
                 if not is_relevant(title):
                     continue
+                action_data = b.get("latestAction")
+                action_text = action_data.get("text", "Active") if action_data else "Active"
+                action_date_raw = action_data.get("actionDate") if action_data else None
+                if not action_date_raw:
+                    action_date_raw = b.get("updateDate")
+                # Timezone-safe parsing to avoid UI crashes
+                if action_date_raw:
+                    fmt_date = pd.to_datetime(action_date_raw).tz_localize(None).to_pydatetime()
+                else:
+                    fmt_date = datetime.now().replace(tzinfo=None)
+                raw_type = b.get("type", "HR").upper()
+                url_type = BILL_TYPE_MAP.get(raw_type, "house-bill")
+                proper_link = f"https://www.congress.gov/bill/{CURRENT_CONGRESS}th-congress/{url_type}/{b.get('number')}"
                 results.append({
+                    "source": "Congress.gov", "type": "Legislation", "event_date": fmt_date,
+                    "time": "API Verified", "title": f"{b.get('type')}{b.get('number')}: {title}",
+                    "latest_action": action_text, "link": proper_link, "summary": "Legislative movement tracked via Congress.gov API."
                 })
+            time.sleep(1.5)
+        except Exception as e:
+            print(f"Legislation API Error at offset {offset}: {e}")
+            break
     return results
 # --- MAIN EXECUTION ---