| """Persist the forecast SQLite DB across HF Space rebuilds. |
| |
| HF Spaces' free tier has ephemeral storage — every `git push` rebuilds the |
| container and wipes any local files. We back the forecast log with a |
| private HF Dataset: |
| |
| - On startup: pull the latest forecasts.db from the dataset (if any). |
| - After every refresh: push the current forecasts.db back. |
| |
| Environment: |
| HF_TOKEN must have write access to the dataset |
| LOG_DATASET_REPO override the default dataset repo id |
| """ |
|
|
| from __future__ import annotations |
|
|
| import os |
| import shutil |
| import threading |
| import time |
| import traceback |
|
|
| DEFAULT_REPO = "bitsofchris/toto-weather-forecast-log" |
| PATH_IN_REPO = "forecasts.db" |
| DEFAULT_LOCAL = "data/forecasts.db" |
|
|
| _push_lock = threading.Lock() |
| _last_push_at = 0.0 |
| PUSH_MIN_INTERVAL = 60.0 |
|
|
|
|
| def _repo_id() -> str: |
| return os.environ.get("LOG_DATASET_REPO", DEFAULT_REPO) |
|
|
|
|
| def _token() -> str | None: |
| return os.environ.get("HF_TOKEN") |
|
|
|
|
| def pull_db(local_path: str = DEFAULT_LOCAL) -> bool: |
| """Download the latest DB from the dataset, overwriting any local copy. |
| Returns True on success.""" |
| tok = _token() |
| if not tok: |
| print("[persist] HF_TOKEN not set — skipping pull") |
| return False |
| try: |
| from huggingface_hub import hf_hub_download |
| downloaded = hf_hub_download( |
| repo_id=_repo_id(), |
| repo_type="dataset", |
| filename=PATH_IN_REPO, |
| token=tok, |
| ) |
| os.makedirs(os.path.dirname(local_path) or ".", exist_ok=True) |
| shutil.copyfile(downloaded, local_path) |
| print(f"[persist] pulled DB from {_repo_id()} ({os.path.getsize(local_path)} bytes)") |
| return True |
| except Exception: |
| print(f"[persist] pull skipped (no remote DB or network error):") |
| traceback.print_exc() |
| return False |
|
|
|
|
| def push_db(local_path: str = DEFAULT_LOCAL) -> bool: |
| """Upload the local DB to the dataset. Coalesced and lock-protected so |
| overlapping refreshes don't issue redundant uploads.""" |
| global _last_push_at |
| tok = _token() |
| if not tok or not os.path.exists(local_path): |
| return False |
|
|
| |
| if time.time() - _last_push_at < PUSH_MIN_INTERVAL: |
| return False |
| if not _push_lock.acquire(blocking=False): |
| return False |
| try: |
| from huggingface_hub import HfApi |
| api = HfApi(token=tok) |
| api.upload_file( |
| path_or_fileobj=local_path, |
| path_in_repo=PATH_IN_REPO, |
| repo_id=_repo_id(), |
| repo_type="dataset", |
| commit_message="forecast log update", |
| ) |
| _last_push_at = time.time() |
| print(f"[persist] pushed DB to {_repo_id()} ({os.path.getsize(local_path)} bytes)") |
| return True |
| except Exception: |
| print("[persist] push failed:") |
| traceback.print_exc() |
| return False |
| finally: |
| _push_lock.release() |
|
|
|
|
| def push_db_async(local_path: str = DEFAULT_LOCAL) -> None: |
| """Fire-and-forget push so refresh() returns to the user immediately.""" |
| threading.Thread( |
| target=push_db, args=(local_path,), daemon=True, name="persist-push" |
| ).start() |
|
|
|
|
| |
| ARCHIVE_LOCAL = "data/ecowitt.db" |
| ARCHIVE_PATH_IN_REPO = "ecowitt.db" |
| _multi_lock = threading.Lock() |
| _multi_last = 0.0 |
|
|
|
|
| def push_all( |
| forecast_local: str = DEFAULT_LOCAL, |
| archive_local: str = ARCHIVE_LOCAL, |
| ) -> bool: |
| """Upload both DBs in a single dataset commit.""" |
| global _multi_last |
| tok = _token() |
| if not tok: |
| return False |
| if time.time() - _multi_last < PUSH_MIN_INTERVAL: |
| return False |
| if not _multi_lock.acquire(blocking=False): |
| return False |
| try: |
| from huggingface_hub import CommitOperationAdd, HfApi |
| api = HfApi(token=tok) |
| ops = [] |
| for local, in_repo in ( |
| (forecast_local, PATH_IN_REPO), |
| (archive_local, ARCHIVE_PATH_IN_REPO), |
| ): |
| if os.path.exists(local): |
| ops.append(CommitOperationAdd(path_in_repo=in_repo, path_or_fileobj=local)) |
| if not ops: |
| return False |
| api.create_commit( |
| repo_id=_repo_id(), |
| repo_type="dataset", |
| operations=ops, |
| commit_message="forecast log + archive update", |
| ) |
| _multi_last = time.time() |
| sizes = ", ".join(f"{op.path_in_repo}={os.path.getsize(forecast_local if op.path_in_repo==PATH_IN_REPO else archive_local)}B" for op in ops) |
| print(f"[persist] pushed multi to {_repo_id()} ({sizes})") |
| return True |
| except Exception: |
| print("[persist] push_all failed:") |
| traceback.print_exc() |
| return False |
| finally: |
| _multi_lock.release() |
|
|
|
|
| def push_all_async( |
| forecast_local: str = DEFAULT_LOCAL, |
| archive_local: str = ARCHIVE_LOCAL, |
| ) -> None: |
| threading.Thread( |
| target=push_all, args=(forecast_local, archive_local), |
| daemon=True, name="persist-push-all", |
| ).start() |
|
|
|
|
| def pull_all( |
| forecast_local: str = DEFAULT_LOCAL, |
| archive_local: str = ARCHIVE_LOCAL, |
| ) -> None: |
| """Pull both DBs from the dataset on startup. Each missing file is silently skipped.""" |
| pull_db(forecast_local) |
| |
| tok = _token() |
| if not tok: |
| return |
| try: |
| from huggingface_hub import hf_hub_download |
| downloaded = hf_hub_download( |
| repo_id=_repo_id(), |
| repo_type="dataset", |
| filename=ARCHIVE_PATH_IN_REPO, |
| token=tok, |
| ) |
| os.makedirs(os.path.dirname(archive_local) or ".", exist_ok=True) |
| shutil.copyfile(downloaded, archive_local) |
| print(f"[persist] pulled archive ({os.path.getsize(archive_local)} bytes)") |
| except Exception: |
| |
| pass |
|
|