IJ-Reynolds HF Staff commited on
Commit
5bbdd4b
·
verified ·
1 Parent(s): 3f35b16

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +69 -29
main.py CHANGED
@@ -5,6 +5,7 @@ from bs4 import BeautifulSoup
5
  import feedparser
6
  import json
7
  import re
 
8
  from datetime import datetime
9
  from pathlib import Path
10
  from dateutil import parser as date_parser
@@ -102,15 +103,24 @@ else:
102
  def analyze_with_ai(title, summary, source):
103
  if not hf_client:
104
  return "LOW - MONITOR", "AI Triage disabled (No API Key).", "N/A"
 
 
 
 
105
 
106
  prompt = f"""
107
- You are a D.C. AI policy analyst. Review this update:
108
  Source: {source}
109
  Title: {title}
110
  Summary: {summary}
111
 
112
- Categorize priority as exactly: "HIGH - ACTION REQUIRED", "MEDIUM - REVIEW", or "LOW - MONITOR".
113
- Provide a 1-sentence analysis.
 
 
 
 
 
114
  Extract 3 comma-separated keywords.
115
  Format output EXACTLY as:
116
  PRIORITY: [Flag]
@@ -125,6 +135,10 @@ def analyze_with_ai(title, summary, source):
125
  priority = re.search(r'PRIORITY:\s*(.*)', text).group(1) if re.search(r'PRIORITY:\s*(.*)', text) else "LOW - MONITOR"
126
  analysis = re.search(r'ANALYSIS:\s*(.*)', text).group(1) if re.search(r'ANALYSIS:\s*(.*)', text) else "Could not generate analysis."
127
  keywords = re.search(r'KEYWORDS:\s*(.*)', text).group(1) if re.search(r'KEYWORDS:\s*(.*)', text) else "AI, Tech, Policy"
 
 
 
 
128
  return priority.strip(), analysis.strip(), keywords.strip()
129
  except Exception as e:
130
  print(f"AI Error: {e}")
@@ -144,7 +158,7 @@ def save_db(db):
144
  def is_new_event(link, db):
145
  return link not in db
146
 
147
- # --- PRO DATE EXTRACTOR ---
148
  def extract_robust_date(text_blocks):
149
  date_patterns = [
150
  r'\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{1,2}(?:st|nd|rd|th)?(?:,)?\s+\d{4}\b',
@@ -215,7 +229,6 @@ def fetch_specific_committees():
215
  if len(title) < 15: continue
216
 
217
  href_lower = a['href'].lower()
218
- # Tighter filter: require specific event-related paths
219
  if any(x in href_lower for x in ["hearing", "event", "markup"]):
220
 
221
  if not is_relevant(title):
@@ -227,8 +240,6 @@ def fetch_specific_committees():
227
  time_text = time_node["datetime"] if time_node and time_node.has_attr("datetime") else ""
228
 
229
  fmt_date = extract_robust_date([time_text, title, container_text])
230
-
231
- # STRICT RULE: If no valid date is extracted, it's likely a generic link, drop it.
232
  if not fmt_date:
233
  continue
234
 
@@ -252,11 +263,12 @@ def fetch_committee_meetings():
252
  r = requests.get(url, params={"limit": 100, "format": "json"}, headers=headers, timeout=20)
253
  if r.status_code == 200:
254
  for m in r.json().get("committeeMeetings", []):
255
- # We removed the keyword bouncer here because API titles are too generic
256
- # (e.g., "Business Meeting"). We will let the AI Triage figure out if it's important.
257
  title = m.get("title", "Committee Meeting")
258
  summary = f"Location: {m.get('room', 'TBD')} {m.get('building', '')}"
259
 
 
 
 
260
  raw_date = m.get("date")
261
  if raw_date:
262
  fmt_date = pd.to_datetime(raw_date).tz_localize(None).to_pydatetime()
@@ -273,36 +285,64 @@ def fetch_committee_meetings():
273
  except Exception as e:
274
  print(f"API Error: {e}")
275
  return results
276
-
277
- def fetch_legislation():
278
  print("Scanning Legislation...")
279
- results = []
280
  if not CONGRESS_API_KEY: return []
281
- url = f"{CONGRESS_API_BASE}/bill/{CURRENT_CONGRESS}"
282
- headers = {"X-API-Key": CONGRESS_API_KEY, "Accept": "application/json"}
283
- try:
284
- # Bumped limit from 50 to 250 to look further back in time for AI bills
285
- r = requests.get(url, params={"limit": 250, "format": "json"}, headers=headers, timeout=20)
286
- if r.status_code == 200:
287
- for b in r.json().get("bills", []):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  title = b.get("title", "")
289
 
290
- # We keep the bouncer here, otherwise you get 200 post-office renamings.
291
  if not is_relevant(title):
292
  continue
293
 
294
- raw_date = b.get("updateDate")
295
- fmt_date = pd.to_datetime(raw_date).tz_localize(None).to_pydatetime() if raw_date else datetime.now().replace(tzinfo=None)
 
 
 
 
296
 
 
 
 
 
 
 
 
 
 
 
297
  results.append({
298
- "source": "Congress.gov API", "type": "Legislation",
299
- "event_date": fmt_date, "time": "N/A",
300
- "title": f"{b.get('type')} {b.get('number')}: {title}",
301
- "latest_action": b.get("latestAction", {}).get("text", "Introduced"),
302
- "link": b.get("url", "https://www.congress.gov"), "summary": "AI related legislation."
303
  })
304
- except Exception as e:
305
- print(f"Bill API Error: {e}")
 
 
 
306
  return results
307
 
308
  # --- MAIN EXECUTION ---
 
5
  import feedparser
6
  import json
7
  import re
8
+ import time
9
  from datetime import datetime
10
  from pathlib import Path
11
  from dateutil import parser as date_parser
 
103
  def analyze_with_ai(title, summary, source):
104
  if not hf_client:
105
  return "LOW - MONITOR", "AI Triage disabled (No API Key).", "N/A"
106
+
107
+ # Hard Filter: If the title is generic and has no AI keywords, don't ask the AI.
108
+ if not is_relevant(title, summary) and "Committee API" in source:
109
+ return "LOW - MONITOR", "Administrative update with no specific tech policy markers.", "Admin"
110
 
111
  prompt = f"""
112
+ You are a ruthless D.C. AI policy analyst. Review this update:
113
  Source: {source}
114
  Title: {title}
115
  Summary: {summary}
116
 
117
+ RULES:
118
+ 1. Categorize priority as exactly: "HIGH - ACTION REQUIRED", "MEDIUM - REVIEW", or "LOW - MONITOR".
119
+ 2. HIGH priority is ONLY for major AI legislation advancing, executive orders, or finalized rules.
120
+ 3. DO NOT flag generic meetings, "TBD" locations, or administrative updates as HIGH.
121
+ 4. If the update lacks specific AI or Tech policy details, it MUST be "LOW - MONITOR".
122
+
123
+ Provide a 1-sentence analysis explaining the actual policy impact.
124
  Extract 3 comma-separated keywords.
125
  Format output EXACTLY as:
126
  PRIORITY: [Flag]
 
135
  priority = re.search(r'PRIORITY:\s*(.*)', text).group(1) if re.search(r'PRIORITY:\s*(.*)', text) else "LOW - MONITOR"
136
  analysis = re.search(r'ANALYSIS:\s*(.*)', text).group(1) if re.search(r'ANALYSIS:\s*(.*)', text) else "Could not generate analysis."
137
  keywords = re.search(r'KEYWORDS:\s*(.*)', text).group(1) if re.search(r'KEYWORDS:\s*(.*)', text) else "AI, Tech, Policy"
138
+
139
+ if "TBD" in summary and "HIGH" in priority:
140
+ priority = "LOW - MONITOR"
141
+
142
  return priority.strip(), analysis.strip(), keywords.strip()
143
  except Exception as e:
144
  print(f"AI Error: {e}")
 
158
  def is_new_event(link, db):
159
  return link not in db
160
 
161
+ # --- DATE EXTRACTOR ---
162
  def extract_robust_date(text_blocks):
163
  date_patterns = [
164
  r'\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{1,2}(?:st|nd|rd|th)?(?:,)?\s+\d{4}\b',
 
229
  if len(title) < 15: continue
230
 
231
  href_lower = a['href'].lower()
 
232
  if any(x in href_lower for x in ["hearing", "event", "markup"]):
233
 
234
  if not is_relevant(title):
 
240
  time_text = time_node["datetime"] if time_node and time_node.has_attr("datetime") else ""
241
 
242
  fmt_date = extract_robust_date([time_text, title, container_text])
 
 
243
  if not fmt_date:
244
  continue
245
 
 
263
  r = requests.get(url, params={"limit": 100, "format": "json"}, headers=headers, timeout=20)
264
  if r.status_code == 200:
265
  for m in r.json().get("committeeMeetings", []):
 
 
266
  title = m.get("title", "Committee Meeting")
267
  summary = f"Location: {m.get('room', 'TBD')} {m.get('building', '')}"
268
 
269
+ if not is_relevant(title, summary):
270
+ continue
271
+
272
  raw_date = m.get("date")
273
  if raw_date:
274
  fmt_date = pd.to_datetime(raw_date).tz_localize(None).to_pydatetime()
 
285
  except Exception as e:
286
  print(f"API Error: {e}")
287
  return results
288
+
289
+ def fetch_legislation(target=2000):
290
  print("Scanning Legislation...")
 
291
  if not CONGRESS_API_KEY: return []
292
+ results = []
293
+ headers = {"Accept": "application/json"}
294
+ BILL_TYPE_MAP = {
295
+ "HR": "house-bill", "S": "senate-bill", "HRES": "house-resolution", "SRES": "senate-resolution",
296
+ "HJRES": "house-joint-resolution", "SJRES": "senate-joint-resolution",
297
+ "HCONRES": "house-concurrent-resolution", "SCONRES": "senate-concurrent-resolution"
298
+ }
299
+
300
+ for offset in range(0, target, 250):
301
+ try:
302
+ params = {
303
+ "api_key": CONGRESS_API_KEY, "limit": 250, "offset": offset,
304
+ "format": "json", "sort": "updateDate desc"
305
+ }
306
+ r = requests.get(f"{CONGRESS_API_BASE}/bill/{CURRENT_CONGRESS}", params=params, headers=headers, timeout=20)
307
+ if r.status_code != 200: break
308
+
309
+ bills = r.json().get("bills", [])
310
+ if not bills: break
311
+
312
+ for b in bills:
313
  title = b.get("title", "")
314
 
315
+ # Apply our keyword filter so we only store tech policy bills
316
  if not is_relevant(title):
317
  continue
318
 
319
+ action_data = b.get("latestAction")
320
+ action_text = action_data.get("text", "Active") if action_data else "Active"
321
+
322
+ action_date_raw = action_data.get("actionDate") if action_data else None
323
+ if not action_date_raw:
324
+ action_date_raw = b.get("updateDate")
325
 
326
+ # Timezone-safe parsing to avoid UI crashes
327
+ if action_date_raw:
328
+ fmt_date = pd.to_datetime(action_date_raw).tz_localize(None).to_pydatetime()
329
+ else:
330
+ fmt_date = datetime.now().replace(tzinfo=None)
331
+
332
+ raw_type = b.get("type", "HR").upper()
333
+ url_type = BILL_TYPE_MAP.get(raw_type, "house-bill")
334
+ proper_link = f"https://www.congress.gov/bill/{CURRENT_CONGRESS}th-congress/{url_type}/{b.get('number')}"
335
+
336
  results.append({
337
+ "source": "Congress.gov", "type": "Legislation", "event_date": fmt_date,
338
+ "time": "API Verified", "title": f"{b.get('type')}{b.get('number')}: {title}",
339
+ "latest_action": action_text, "link": proper_link, "summary": "Legislative movement tracked via Congress.gov API."
 
 
340
  })
341
+ time.sleep(1.5)
342
+ except Exception as e:
343
+ print(f"Legislation API Error at offset {offset}: {e}")
344
+ break
345
+
346
  return results
347
 
348
  # --- MAIN EXECUTION ---