| """Allow-listed web retrieval.""" | |
| from __future__ import annotations | |
| from urllib.parse import urlparse | |
| import requests | |
| def fetch_url(url: str, allowed_domains: list[str]) -> str: | |
| host = urlparse(url).netloc.lower() | |
| if not any(host.endswith(domain) for domain in allowed_domains): | |
| raise ValueError(f"Domain not allow-listed: {host}") | |
| try: | |
| response = requests.get(url, timeout=20) | |
| response.raise_for_status() | |
| return response.text | |
| except Exception as exc: # noqa: BLE001 | |
| # Explicit failure message makes offline-first behavior easier to reason about upstream. | |
| raise RuntimeError(f"web_fetch_failed:{host}:{exc}") from exc | |