#!/usr/bin/env python3 """Offline-first source ingestion entrypoint.""" from __future__ import annotations import json import os from pathlib import Path from app.dataops.source_manager import DataAcquisitionAgent def main() -> None: root = Path(__file__).resolve().parents[1] allow_domains = ["who.int", "nih.gov", "fda.gov", "ema.europa.eu"] agent = DataAcquisitionAgent(root=root, allow_domains=allow_domains) records = agent.acquire_local_knowledge() out_dir = root / "data" / "processed" out_dir.mkdir(parents=True, exist_ok=True) payload: dict[str, object] = {"local_records": records} optional_url = os.getenv("POLYGUARD_OPTIONAL_LABEL_URL") if optional_url: payload["web_record"] = agent.acquire_web_knowledge(optional_url, offline_first=True) (out_dir / "ingested_sources.json").write_text(json.dumps(payload, ensure_ascii=True, indent=2), encoding="utf-8") print(f"ingested_records={len(records)}") if __name__ == "__main__": main()