bitsofchris's picture
Switch to 15-min autorefresh, drop manual button, sync all cycle archives
7e7a097
"""Persist the forecast SQLite DB across HF Space rebuilds.
HF Spaces' free tier has ephemeral storage — every `git push` rebuilds the
container and wipes any local files. We back the forecast log with a
private HF Dataset:
- On startup: pull the latest forecasts.db from the dataset (if any).
- After every refresh: push the current forecasts.db back.
Environment:
HF_TOKEN must have write access to the dataset
LOG_DATASET_REPO override the default dataset repo id
"""
from __future__ import annotations
import os
import shutil
import threading
import time
import traceback
DEFAULT_REPO = "bitsofchris/toto-weather-forecast-log"
PATH_IN_REPO = "forecasts.db"
DEFAULT_LOCAL = "data/forecasts.db"
_push_lock = threading.Lock()
_last_push_at = 0.0
PUSH_MIN_INTERVAL = 60.0 # seconds — coalesce rapid pushes
def _repo_id() -> str:
return os.environ.get("LOG_DATASET_REPO", DEFAULT_REPO)
def _token() -> str | None:
return os.environ.get("HF_TOKEN")
def pull_db(local_path: str = DEFAULT_LOCAL) -> bool:
"""Download the latest DB from the dataset, overwriting any local copy.
Returns True on success."""
tok = _token()
if not tok:
print("[persist] HF_TOKEN not set — skipping pull")
return False
try:
from huggingface_hub import hf_hub_download # noqa: PLC0415
downloaded = hf_hub_download(
repo_id=_repo_id(),
repo_type="dataset",
filename=PATH_IN_REPO,
token=tok,
)
os.makedirs(os.path.dirname(local_path) or ".", exist_ok=True)
shutil.copyfile(downloaded, local_path)
print(f"[persist] pulled DB from {_repo_id()} ({os.path.getsize(local_path)} bytes)")
return True
except Exception: # noqa: BLE001
print(f"[persist] pull skipped (no remote DB or network error):")
traceback.print_exc()
return False
def push_db(local_path: str = DEFAULT_LOCAL) -> bool:
"""Upload the local DB to the dataset. Coalesced and lock-protected so
overlapping refreshes don't issue redundant uploads."""
global _last_push_at
tok = _token()
if not tok or not os.path.exists(local_path):
return False
# Coalesce: if we just pushed, skip.
if time.time() - _last_push_at < PUSH_MIN_INTERVAL:
return False
if not _push_lock.acquire(blocking=False):
return False
try:
from huggingface_hub import HfApi # noqa: PLC0415
api = HfApi(token=tok)
api.upload_file(
path_or_fileobj=local_path,
path_in_repo=PATH_IN_REPO,
repo_id=_repo_id(),
repo_type="dataset",
commit_message="forecast log update",
)
_last_push_at = time.time()
print(f"[persist] pushed DB to {_repo_id()} ({os.path.getsize(local_path)} bytes)")
return True
except Exception: # noqa: BLE001
print("[persist] push failed:")
traceback.print_exc()
return False
finally:
_push_lock.release()
def push_db_async(local_path: str = DEFAULT_LOCAL) -> None:
"""Fire-and-forget push so refresh() returns to the user immediately."""
threading.Thread(
target=push_db, args=(local_path,), daemon=True, name="persist-push"
).start()
# --- multi-file push (forecast log + Ecowitt archive in one commit) ------
ARCHIVE_LOCAL = "data/ecowitt.db"
ARCHIVE_PATH_IN_REPO = "ecowitt.db"
_multi_lock = threading.Lock()
_multi_last = 0.0
def push_all(
forecast_local: str = DEFAULT_LOCAL,
archive_local: str = ARCHIVE_LOCAL,
) -> bool:
"""Upload both DBs in a single dataset commit."""
global _multi_last
tok = _token()
if not tok:
return False
if time.time() - _multi_last < PUSH_MIN_INTERVAL:
return False
if not _multi_lock.acquire(blocking=False):
return False
try:
from huggingface_hub import CommitOperationAdd, HfApi # noqa: PLC0415
api = HfApi(token=tok)
ops = []
for local, in_repo in (
(forecast_local, PATH_IN_REPO),
(archive_local, ARCHIVE_PATH_IN_REPO),
):
if os.path.exists(local):
ops.append(CommitOperationAdd(path_in_repo=in_repo, path_or_fileobj=local))
if not ops:
return False
api.create_commit(
repo_id=_repo_id(),
repo_type="dataset",
operations=ops,
commit_message="forecast log + archive update",
)
_multi_last = time.time()
sizes = ", ".join(f"{op.path_in_repo}={os.path.getsize(forecast_local if op.path_in_repo==PATH_IN_REPO else archive_local)}B" for op in ops)
print(f"[persist] pushed multi to {_repo_id()} ({sizes})")
return True
except Exception: # noqa: BLE001
print("[persist] push_all failed:")
traceback.print_exc()
return False
finally:
_multi_lock.release()
def push_all_async(
forecast_local: str = DEFAULT_LOCAL,
archive_local: str = ARCHIVE_LOCAL,
) -> None:
threading.Thread(
target=push_all, args=(forecast_local, archive_local),
daemon=True, name="persist-push-all",
).start()
def pull_all(
forecast_local: str = DEFAULT_LOCAL,
archive_local: str = ARCHIVE_LOCAL,
) -> None:
"""Pull both DBs from the dataset on startup. Each missing file is silently skipped."""
pull_db(forecast_local)
# Pull the archive too if it exists.
tok = _token()
if not tok:
return
try:
from huggingface_hub import hf_hub_download # noqa: PLC0415
downloaded = hf_hub_download(
repo_id=_repo_id(),
repo_type="dataset",
filename=ARCHIVE_PATH_IN_REPO,
token=tok,
)
os.makedirs(os.path.dirname(archive_local) or ".", exist_ok=True)
shutil.copyfile(downloaded, archive_local)
print(f"[persist] pulled archive ({os.path.getsize(archive_local)} bytes)")
except Exception:
# 404 on first run is expected.
pass