polyguard-openenv / app /dataops /web_agent.py
TheJackBright's picture
Deploy PolyGuard OpenEnv Space
877add7 verified
"""Allow-listed web retrieval."""
from __future__ import annotations
from urllib.parse import urlparse
import requests
def fetch_url(url: str, allowed_domains: list[str]) -> str:
host = urlparse(url).netloc.lower()
if not any(host.endswith(domain) for domain in allowed_domains):
raise ValueError(f"Domain not allow-listed: {host}")
try:
response = requests.get(url, timeout=20)
response.raise_for_status()
return response.text
except Exception as exc: # noqa: BLE001
# Explicit failure message makes offline-first behavior easier to reason about upstream.
raise RuntimeError(f"web_fetch_failed:{host}:{exc}") from exc