IJ-Reynolds HF Staff commited on
Commit
344bb2e
·
verified ·
1 Parent(s): 2f5126c

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +49 -22
main.py CHANGED
@@ -219,13 +219,26 @@ def fetch_federal_register():
219
  print("Scanning Federal Register API...")
220
  results = []
221
  url = "https://www.federalregister.gov/api/v1/documents.json"
222
- params = {"conditions[term]": "artificial intelligence", "order": "newest", "per_page": 10}
 
 
 
223
  try:
224
  r = requests.get(url, params=params, timeout=15)
225
  if r.status_code == 200:
226
  for doc in r.json().get("results", []):
227
  title = doc.get("title", "No Title")
228
  summary = doc.get("abstract", "No summary provided.")
 
 
 
 
 
 
 
 
 
 
229
  pub_date = doc.get("publication_date")
230
  fmt_date = pd.to_datetime(pub_date).tz_localize(None).to_pydatetime() if pub_date else datetime.now()
231
 
@@ -263,29 +276,43 @@ def fetch_legislation(target=1000):
263
  headers = {"X-API-Key": CONGRESS_API_KEY, "Accept": "application/json"}
264
  BILL_MAP = {"HR": "house-bill", "S": "senate-bill", "HRES": "house-resolution", "SRES": "senate-resolution"}
265
 
266
- for offset in range(0, target, 250):
267
- try:
268
- r = requests.get(f"{CONGRESS_API_BASE}/bill/{CURRENT_CONGRESS}", params={"limit": 250, "offset": offset, "format": "json", "sort": "updateDate desc"}, headers=headers, timeout=20)
269
- if r.status_code != 200: break
270
- bills = r.json().get("bills", [])
271
- if not bills: break
272
- for b in bills:
273
- if not is_relevant(b.get("title", "")): continue
274
- action_data = b.get("latestAction", {})
275
- action_date_raw = action_data.get("actionDate") or b.get("updateDate")
276
- fmt_date = pd.to_datetime(action_date_raw).tz_localize(None).to_pydatetime() if action_date_raw else datetime.now()
277
- raw_type = b.get("type", "HR").upper()
278
- proper_link = f"https://www.congress.gov/bill/{CURRENT_CONGRESS}th-congress/{BILL_MAP.get(raw_type, 'house-bill')}/{b.get('number')}"
 
 
 
279
 
280
- results.append({
281
- "source": "Congress.gov", "type": "Legislation", "event_date": fmt_date,
282
- "time": "API Verified", "title": f"{b.get('type')}{b.get('number')}: {b.get('title')}",
283
- "latest_action": action_data.get("text", "Active"), "link": proper_link,
284
- "summary": "Legislative movement tracked via API.", "bill_type": b.get("type", "HR"), "bill_number": b.get("number")
285
- })
286
- time.sleep(1.5)
287
- except Exception as e: break
 
 
 
 
 
 
 
 
 
 
288
  return results
 
289
 
290
  # --- MAIN RUNNER ---
291
  def run():
 
219
  print("Scanning Federal Register API...")
220
  results = []
221
  url = "https://www.federalregister.gov/api/v1/documents.json"
222
+
223
+ # We pull a larger batch (50) because we are going to heavily filter them locally
224
+ params = {"conditions[term]": "artificial intelligence", "order": "newest", "per_page": 50}
225
+
226
  try:
227
  r = requests.get(url, params=params, timeout=15)
228
  if r.status_code == 200:
229
  for doc in r.json().get("results", []):
230
  title = doc.get("title", "No Title")
231
  summary = doc.get("abstract", "No summary provided.")
232
+
233
+ # --- THE LOCAL RELEVANCE FILTER ---
234
+ # Only keep it if the AI keywords are in the Title or Abstract (ignores full-text matches)
235
+ if not is_relevant(title, str(summary)):
236
+ continue
237
+
238
+ # Explicitly block noisy SEC stock exchange filings
239
+ if "Self-Regulatory Organizations" in title:
240
+ continue
241
+
242
  pub_date = doc.get("publication_date")
243
  fmt_date = pd.to_datetime(pub_date).tz_localize(None).to_pydatetime() if pub_date else datetime.now()
244
 
 
276
  headers = {"X-API-Key": CONGRESS_API_KEY, "Accept": "application/json"}
277
  BILL_MAP = {"HR": "house-bill", "S": "senate-bill", "HRES": "house-resolution", "SRES": "senate-resolution"}
278
 
279
+ # We split the scan: 500 newest introduced, AND 500 most recently updated
280
+ scan_strategies = ["introducedDate desc", "updateDate desc"]
281
+
282
+ for sort_method in scan_strategies:
283
+ print(f" -> Pulling by {sort_method}...")
284
+ # target // 2 means we pull 500 for each strategy
285
+ for offset in range(0, target // 2, 250):
286
+ try:
287
+ r = requests.get(
288
+ f"{CONGRESS_API_BASE}/bill/{CURRENT_CONGRESS}",
289
+ params={"limit": 250, "offset": offset, "format": "json", "sort": sort_method},
290
+ headers=headers, timeout=20
291
+ )
292
+ if r.status_code != 200: break
293
+ bills = r.json().get("bills", [])
294
+ if not bills: break
295
 
296
+ for b in bills:
297
+ if not is_relevant(b.get("title", "")): continue
298
+
299
+ action_data = b.get("latestAction", {})
300
+ action_date_raw = action_data.get("actionDate") or b.get("updateDate")
301
+ fmt_date = pd.to_datetime(action_date_raw).tz_localize(None).to_pydatetime() if action_date_raw else datetime.now()
302
+ raw_type = b.get("type", "HR").upper()
303
+ proper_link = f"https://www.congress.gov/bill/{CURRENT_CONGRESS}th-congress/{BILL_MAP.get(raw_type, 'house-bill')}/{b.get('number')}"
304
+
305
+ results.append({
306
+ "source": "Congress.gov", "type": "Legislation", "event_date": fmt_date,
307
+ "time": "API Verified", "title": f"{b.get('type')}{b.get('number')}: {b.get('title')}",
308
+ "latest_action": action_data.get("text", "Active"), "link": proper_link,
309
+ "summary": "Legislative movement tracked via API.", "bill_type": b.get("type", "HR"), "bill_number": b.get("number")
310
+ })
311
+ time.sleep(1.5) # Polite delay
312
+ except Exception as e: break
313
+
314
  return results
315
+
316
 
317
  # --- MAIN RUNNER ---
318
  def run():