Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -219,13 +219,26 @@ def fetch_federal_register():
|
|
| 219 |
print("Scanning Federal Register API...")
|
| 220 |
results = []
|
| 221 |
url = "https://www.federalregister.gov/api/v1/documents.json"
|
| 222 |
-
|
|
|
|
|
|
|
|
|
|
| 223 |
try:
|
| 224 |
r = requests.get(url, params=params, timeout=15)
|
| 225 |
if r.status_code == 200:
|
| 226 |
for doc in r.json().get("results", []):
|
| 227 |
title = doc.get("title", "No Title")
|
| 228 |
summary = doc.get("abstract", "No summary provided.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
pub_date = doc.get("publication_date")
|
| 230 |
fmt_date = pd.to_datetime(pub_date).tz_localize(None).to_pydatetime() if pub_date else datetime.now()
|
| 231 |
|
|
@@ -263,29 +276,43 @@ def fetch_legislation(target=1000):
|
|
| 263 |
headers = {"X-API-Key": CONGRESS_API_KEY, "Accept": "application/json"}
|
| 264 |
BILL_MAP = {"HR": "house-bill", "S": "senate-bill", "HRES": "house-resolution", "SRES": "senate-resolution"}
|
| 265 |
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
|
|
|
|
|
|
|
|
|
| 279 |
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
return results
|
|
|
|
| 289 |
|
| 290 |
# --- MAIN RUNNER ---
|
| 291 |
def run():
|
|
|
|
| 219 |
print("Scanning Federal Register API...")
|
| 220 |
results = []
|
| 221 |
url = "https://www.federalregister.gov/api/v1/documents.json"
|
| 222 |
+
|
| 223 |
+
# We pull a larger batch (50) because we are going to heavily filter them locally
|
| 224 |
+
params = {"conditions[term]": "artificial intelligence", "order": "newest", "per_page": 50}
|
| 225 |
+
|
| 226 |
try:
|
| 227 |
r = requests.get(url, params=params, timeout=15)
|
| 228 |
if r.status_code == 200:
|
| 229 |
for doc in r.json().get("results", []):
|
| 230 |
title = doc.get("title", "No Title")
|
| 231 |
summary = doc.get("abstract", "No summary provided.")
|
| 232 |
+
|
| 233 |
+
# --- THE LOCAL RELEVANCE FILTER ---
|
| 234 |
+
# Only keep it if the AI keywords are in the Title or Abstract (ignores full-text matches)
|
| 235 |
+
if not is_relevant(title, str(summary)):
|
| 236 |
+
continue
|
| 237 |
+
|
| 238 |
+
# Explicitly block noisy SEC stock exchange filings
|
| 239 |
+
if "Self-Regulatory Organizations" in title:
|
| 240 |
+
continue
|
| 241 |
+
|
| 242 |
pub_date = doc.get("publication_date")
|
| 243 |
fmt_date = pd.to_datetime(pub_date).tz_localize(None).to_pydatetime() if pub_date else datetime.now()
|
| 244 |
|
|
|
|
| 276 |
headers = {"X-API-Key": CONGRESS_API_KEY, "Accept": "application/json"}
|
| 277 |
BILL_MAP = {"HR": "house-bill", "S": "senate-bill", "HRES": "house-resolution", "SRES": "senate-resolution"}
|
| 278 |
|
| 279 |
+
# We split the scan: 500 newest introduced, AND 500 most recently updated
|
| 280 |
+
scan_strategies = ["introducedDate desc", "updateDate desc"]
|
| 281 |
+
|
| 282 |
+
for sort_method in scan_strategies:
|
| 283 |
+
print(f" -> Pulling by {sort_method}...")
|
| 284 |
+
# target // 2 means we pull 500 for each strategy
|
| 285 |
+
for offset in range(0, target // 2, 250):
|
| 286 |
+
try:
|
| 287 |
+
r = requests.get(
|
| 288 |
+
f"{CONGRESS_API_BASE}/bill/{CURRENT_CONGRESS}",
|
| 289 |
+
params={"limit": 250, "offset": offset, "format": "json", "sort": sort_method},
|
| 290 |
+
headers=headers, timeout=20
|
| 291 |
+
)
|
| 292 |
+
if r.status_code != 200: break
|
| 293 |
+
bills = r.json().get("bills", [])
|
| 294 |
+
if not bills: break
|
| 295 |
|
| 296 |
+
for b in bills:
|
| 297 |
+
if not is_relevant(b.get("title", "")): continue
|
| 298 |
+
|
| 299 |
+
action_data = b.get("latestAction", {})
|
| 300 |
+
action_date_raw = action_data.get("actionDate") or b.get("updateDate")
|
| 301 |
+
fmt_date = pd.to_datetime(action_date_raw).tz_localize(None).to_pydatetime() if action_date_raw else datetime.now()
|
| 302 |
+
raw_type = b.get("type", "HR").upper()
|
| 303 |
+
proper_link = f"https://www.congress.gov/bill/{CURRENT_CONGRESS}th-congress/{BILL_MAP.get(raw_type, 'house-bill')}/{b.get('number')}"
|
| 304 |
+
|
| 305 |
+
results.append({
|
| 306 |
+
"source": "Congress.gov", "type": "Legislation", "event_date": fmt_date,
|
| 307 |
+
"time": "API Verified", "title": f"{b.get('type')}{b.get('number')}: {b.get('title')}",
|
| 308 |
+
"latest_action": action_data.get("text", "Active"), "link": proper_link,
|
| 309 |
+
"summary": "Legislative movement tracked via API.", "bill_type": b.get("type", "HR"), "bill_number": b.get("number")
|
| 310 |
+
})
|
| 311 |
+
time.sleep(1.5) # Polite delay
|
| 312 |
+
except Exception as e: break
|
| 313 |
+
|
| 314 |
return results
|
| 315 |
+
|
| 316 |
|
| 317 |
# --- MAIN RUNNER ---
|
| 318 |
def run():
|