Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
|
@@ -159,31 +159,44 @@ def fetch_congress_scraped():
|
|
| 159 |
if len(title) < 15 or not is_relevant(title): continue
|
| 160 |
seen_links.add(full_url)
|
| 161 |
|
| 162 |
-
# ---
|
| 163 |
fmt_date = None
|
| 164 |
-
current_node = a_tag
|
| 165 |
|
| 166 |
-
#
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
# If still no date, check previous text nodes entirely
|
| 177 |
if not fmt_date:
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
results.append({
|
| 185 |
"source": name, "type": "Legislative Office Press Release",
|
| 186 |
-
"event_date": fmt_date,
|
|
|
|
| 187 |
"latest_action": "Web Publication", "link": full_url, "summary": "HTML Scrape"
|
| 188 |
})
|
| 189 |
time.sleep(1)
|
|
|
|
| 159 |
if len(title) < 15 or not is_relevant(title): continue
|
| 160 |
seen_links.add(full_url)
|
| 161 |
|
| 162 |
+
# --- UPGRADED DATE HUNTING (Container Search) ---
|
| 163 |
fmt_date = None
|
|
|
|
| 164 |
|
| 165 |
+
# 1. Look for the entire row/article container (Catches sibling dates in Drupal!)
|
| 166 |
+
container = a_tag.find_parent(["article", "tr", "li"])
|
| 167 |
+
if not container:
|
| 168 |
+
container = a_tag.find_parent("div", class_=re.compile(r"views-row|item|post", re.I))
|
| 169 |
+
|
| 170 |
+
if container:
|
| 171 |
+
fmt_date = extract_robust_date([container.get_text(" ", strip=True)])
|
| 172 |
+
|
| 173 |
+
# 2. Fallback DOM climb
|
|
|
|
|
|
|
| 174 |
if not fmt_date:
|
| 175 |
+
current_node = a_tag
|
| 176 |
+
for _ in range(5):
|
| 177 |
+
if current_node.parent:
|
| 178 |
+
current_node = current_node.parent
|
| 179 |
+
found_date = extract_robust_date([current_node.get_text(" ", strip=True)])
|
| 180 |
+
if found_date:
|
| 181 |
+
fmt_date = found_date
|
| 182 |
+
break
|
| 183 |
+
|
| 184 |
+
# --- THE USER-FACING MISSING DATE FLAG ---
|
| 185 |
+
if not fmt_date:
|
| 186 |
+
# We stop defaulting to today! Mark it explicitly for the user.
|
| 187 |
+
display_time = "⚠️ DATE UNKNOWN"
|
| 188 |
+
display_title = f"[DATE MISSING] {title}"
|
| 189 |
+
else:
|
| 190 |
+
# If we found a date, run the age gate
|
| 191 |
+
days_old = (datetime.now() - fmt_date).days
|
| 192 |
+
if days_old > 60: continue
|
| 193 |
+
display_time = "Published"
|
| 194 |
+
display_title = title
|
| 195 |
+
|
| 196 |
results.append({
|
| 197 |
"source": name, "type": "Legislative Office Press Release",
|
| 198 |
+
"event_date": fmt_date, # This will be passed as None (Blank) instead of today
|
| 199 |
+
"time": display_time, "title": display_title,
|
| 200 |
"latest_action": "Web Publication", "link": full_url, "summary": "HTML Scrape"
|
| 201 |
})
|
| 202 |
time.sleep(1)
|