Spaces:

bitsofchris
/

time-series-ai-weather-forecast

Running

bitsofchris Claude Opus 4.7 (1M context) commited on 12 days ago

Commit

7e7a097

1 Parent(s): bb2faab

Switch to 15-min autorefresh, drop manual button, sync all cycle archives

- app.py: AUTO_REFRESH_SECONDS = 15 min, CACHE_TTL = 14 min so the next
autorefresh always misses the cache.
- app.py: remove the 'Refresh forecast' button and replace with a status
line. Dropdowns still trigger re-render (cheap — they hit the cache).
- app.py: each autorefresh tick now also calls sync.sync_cycle for every
cycle_type (5min / 30min / 4hour) so the all-channel Ecowitt archive
in data/ecowitt.db stays current. Rate-limit aware.
- src/persist.py: push_all / pull_all / push_all_async — multi-file
commit that ships forecasts.db + ecowitt.db together to the HF
Dataset. push_db_async on user-triggered refresh stays as before
for snappier round-trips; the autorefresh thread does the full push.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (2) hide show

app.py +37 -8
src/persist.py +86 -0

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ from datetime import datetime, timedelta, timezone
 import gradio as gr
 import pandas as pd
-from src import ecowitt, forecast_log, nws, persist
 from src.forecast import forecast_series
 from src.weather_ui import (
     aligned_comparison_markdown,
@@ -26,8 +26,8 @@ from src.weather_ui import (
     hero_markdown,
 )
-CACHE_TTL_SECONDS = 60 * 60
-AUTO_REFRESH_SECONDS = 60 * 60
 DISPLAY_TZ = os.environ.get("DISPLAY_TZ", "America/New_York")
 PLACE_NAME = os.environ.get("PLACE_NAME", "Yaphank, NY")
@@ -158,8 +158,9 @@ def refresh(cycle_label: str = "Hourly", horizon_label: str = "24 h"):
     strip = emoji_strip_markdown(nws_df_raw, DISPLAY_TZ, n=12)
     scoreboard = render_scoreboard(log_conn)
-    # Backup the SQLite log to the HF dataset (non-blocking).
     persist.push_db_async()
     return hero, comparison_md, strip, fig, scoreboard
@@ -198,10 +199,37 @@ def render_scoreboard(conn) -> str:
 # --- auto-refresh background thread --------------------------------------
 def _autorefresh_loop():
     while True:
         try:
-            refresh()
         except Exception:  # noqa: BLE001
             print("[autorefresh] error during refresh:")
             traceback.print_exc()
@@ -243,7 +271,9 @@ with gr.Blocks(title="Toto Weather Forecast", theme=gr.themes.Soft()) as demo:
             choices=list(HORIZON_CONFIG.keys()), value="24 h",
             label="Forecast horizon", scale=1,
         )
-        refresh_btn = gr.Button("Refresh forecast", variant="primary", scale=1)
     scoreboard_md = gr.Markdown()
     plot = gr.Plot(label="Forecast")
@@ -251,12 +281,11 @@ with gr.Blocks(title="Toto Weather Forecast", theme=gr.themes.Soft()) as demo:
     outputs = [hero_md, comparison_md, strip_md, plot, scoreboard_md]
     inputs = [cycle_dd, horizon_dd]
     demo.load(refresh, inputs=inputs, outputs=outputs)
-    refresh_btn.click(refresh, inputs=inputs, outputs=outputs)
     cycle_dd.change(refresh, inputs=inputs, outputs=outputs)
     horizon_dd.change(refresh, inputs=inputs, outputs=outputs)
 if __name__ == "__main__":
-    persist.pull_db()  # bootstrap the forecast log from the HF Dataset
     _start_autorefresh()
     demo.launch()

 import gradio as gr
 import pandas as pd
+from src import ecowitt, forecast_log, nws, persist, storage, sync
 from src.forecast import forecast_series
 from src.weather_ui import (
     aligned_comparison_markdown,
     hero_markdown,
 )
+AUTO_REFRESH_SECONDS = 15 * 60          # background tick + archive sync
+CACHE_TTL_SECONDS = AUTO_REFRESH_SECONDS - 60  # so autorefresh always refetches
 DISPLAY_TZ = os.environ.get("DISPLAY_TZ", "America/New_York")
 PLACE_NAME = os.environ.get("PLACE_NAME", "Yaphank, NY")
     strip = emoji_strip_markdown(nws_df_raw, DISPLAY_TZ, n=12)
     scoreboard = render_scoreboard(log_conn)
+    # Backup forecast log to HF Dataset (non-blocking).
     persist.push_db_async()
+    # The full archive sync + push happens in the autorefresh thread.
     return hero, comparison_md, strip, fig, scoreboard
 # --- auto-refresh background thread --------------------------------------
+ECOWITT_ARCHIVE_DB = "data/ecowitt.db"
+def _sync_archive_all_cycles() -> None:
+    """Refresh the SQLite archive (data/ecowitt.db) for every cycle_type
+    so the local mirror of Ecowitt's storage stays current."""
+    try:
+        cfg = ecowitt.EcowittConfig.from_env()
+    except RuntimeError:
+        return
+    conn = storage.connect(ECOWITT_ARCHIVE_DB)
+    try:
+        for cycle in sync.CYCLES:
+            try:
+                sync.sync_cycle(cfg, conn, cycle, verbose=False)
+            except ecowitt.EcowittRateLimitError as err:
+                print(f"[autorefresh] rate-limited on {cycle.name}: {err} — skipping rest")
+                break
+            except Exception:  # noqa: BLE001
+                print(f"[autorefresh] sync error on {cycle.name}:")
+                traceback.print_exc()
+    finally:
+        conn.close()
 def _autorefresh_loop():
     while True:
         try:
+            refresh()                  # live forecast + forecasts.db log
+            _sync_archive_all_cycles() # 5min/30min/4hour raw archive
+            persist.push_all_async()   # back up both DBs to HF Dataset
         except Exception:  # noqa: BLE001
             print("[autorefresh] error during refresh:")
             traceback.print_exc()
             choices=list(HORIZON_CONFIG.keys()), value="24 h",
             label="Forecast horizon", scale=1,
         )
+    gr.Markdown(
+        "<span style='opacity:0.55'>🔄 Live data + forecast auto-refresh every 15 minutes.</span>"
+    )
     scoreboard_md = gr.Markdown()
     plot = gr.Plot(label="Forecast")
     outputs = [hero_md, comparison_md, strip_md, plot, scoreboard_md]
     inputs = [cycle_dd, horizon_dd]
     demo.load(refresh, inputs=inputs, outputs=outputs)
     cycle_dd.change(refresh, inputs=inputs, outputs=outputs)
     horizon_dd.change(refresh, inputs=inputs, outputs=outputs)
 if __name__ == "__main__":
+    persist.pull_all()  # bootstrap forecast log + archive from the HF Dataset
     _start_autorefresh()
     demo.launch()

src/persist.py CHANGED Viewed

@@ -101,3 +101,89 @@ def push_db_async(local_path: str = DEFAULT_LOCAL) -> None:
     threading.Thread(
         target=push_db, args=(local_path,), daemon=True, name="persist-push"
     ).start()

     threading.Thread(
         target=push_db, args=(local_path,), daemon=True, name="persist-push"
     ).start()
+# --- multi-file push (forecast log + Ecowitt archive in one commit) ------
+ARCHIVE_LOCAL = "data/ecowitt.db"
+ARCHIVE_PATH_IN_REPO = "ecowitt.db"
+_multi_lock = threading.Lock()
+_multi_last = 0.0
+def push_all(
+    forecast_local: str = DEFAULT_LOCAL,
+    archive_local: str = ARCHIVE_LOCAL,
+) -> bool:
+    """Upload both DBs in a single dataset commit."""
+    global _multi_last
+    tok = _token()
+    if not tok:
+        return False
+    if time.time() - _multi_last < PUSH_MIN_INTERVAL:
+        return False
+    if not _multi_lock.acquire(blocking=False):
+        return False
+    try:
+        from huggingface_hub import CommitOperationAdd, HfApi  # noqa: PLC0415
+        api = HfApi(token=tok)
+        ops = []
+        for local, in_repo in (
+            (forecast_local, PATH_IN_REPO),
+            (archive_local, ARCHIVE_PATH_IN_REPO),
+        ):
+            if os.path.exists(local):
+                ops.append(CommitOperationAdd(path_in_repo=in_repo, path_or_fileobj=local))
+        if not ops:
+            return False
+        api.create_commit(
+            repo_id=_repo_id(),
+            repo_type="dataset",
+            operations=ops,
+            commit_message="forecast log + archive update",
+        )
+        _multi_last = time.time()
+        sizes = ", ".join(f"{op.path_in_repo}={os.path.getsize(forecast_local if op.path_in_repo==PATH_IN_REPO else archive_local)}B" for op in ops)
+        print(f"[persist] pushed multi to {_repo_id()} ({sizes})")
+        return True
+    except Exception:  # noqa: BLE001
+        print("[persist] push_all failed:")
+        traceback.print_exc()
+        return False
+    finally:
+        _multi_lock.release()
+def push_all_async(
+    forecast_local: str = DEFAULT_LOCAL,
+    archive_local: str = ARCHIVE_LOCAL,
+) -> None:
+    threading.Thread(
+        target=push_all, args=(forecast_local, archive_local),
+        daemon=True, name="persist-push-all",
+    ).start()
+def pull_all(
+    forecast_local: str = DEFAULT_LOCAL,
+    archive_local: str = ARCHIVE_LOCAL,
+) -> None:
+    """Pull both DBs from the dataset on startup. Each missing file is silently skipped."""
+    pull_db(forecast_local)
+    # Pull the archive too if it exists.
+    tok = _token()
+    if not tok:
+        return
+    try:
+        from huggingface_hub import hf_hub_download  # noqa: PLC0415
+        downloaded = hf_hub_download(
+            repo_id=_repo_id(),
+            repo_type="dataset",
+            filename=ARCHIVE_PATH_IN_REPO,
+            token=tok,
+        )
+        os.makedirs(os.path.dirname(archive_local) or ".", exist_ok=True)
+        shutil.copyfile(downloaded, archive_local)
+        print(f"[persist] pulled archive ({os.path.getsize(archive_local)} bytes)")
+    except Exception:
+        # 404 on first run is expected.
+        pass