Spaces:

bitsofchris
/

time-series-ai-weather-forecast

Running

bitsofchris Claude Opus 4.7 (1M context) commited on 13 days ago

Commit

2f0437f

1 Parent(s): 06ff1aa

Toto fed hourly while chart stays 5-min; drop past overlay entirely

Two changes that fix the 'forecasts look awful at 5-min' regression
and clean up the chart:

1. Decouple display cadence from inference cadence. The chart still
renders at 5-min (display_resample='5min') but Toto consumes an
hourly-resampled view of the same history (forecast_resample='1h').
That brings the input back to a 168-point context + 48-step horizon,
the sequence size where the 4M model produced clean forecasts.

The 5-min view was sending Toto-2.0-4m a 2016+576 sequence — well
outside its sweet spot — and the long auto-regressive rollout
compounded error. Plus a lot of the older 5-min points were really
30-min source rows linearly interpolated, which gave the model fake
high-frequency structure to anchor on.

2. Remove the past Toto overlay from the chart. The sawtooth came from
sparse forecast snapshots in some hours and the smoothing knobs
(fixed-lag, etc.) didn't fully fix it. The scoreboard MAE still
captures the same information as a single honest number.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (2) hide show

app.py +20 -28
src/weather_ui.py +0 -16

app.py CHANGED Viewed

@@ -41,9 +41,10 @@ PLACE_NAME = os.environ.get("PLACE_NAME", "Yaphank, NY")
 # keeps the forecast informed by the full week without making the chart
 # noisy.
 VIEW_WEEK = {
-    "label": "Past 5 days · 48 h forecast (5-min cadence, Toto sees 7d)",
     "cycle_type": "5min",
-    "resample": "5min",
     "display_days": 5,
     "context_days": 7,
     "horizon_hours": 48,
@@ -177,10 +178,13 @@ def _build_view(view: dict, log_conn, log_to_scoreboard: bool) -> dict:
     """Fetch + forecast for one view config. Returns intermediate pieces so
     the caller can stitch the page together."""
     cycle_type = view["cycle_type"]
-    resample = view["resample"]
-    step_hours = _resample_hours(resample)
     horizon_hours = view["horizon_hours"]
-    horizon_steps = max(1, int(round(horizon_hours / step_hours)))
     # context_hours = what we feed Toto; display_hours = what we show on
     # the chart. Fall back to old keys for backward compatibility.
@@ -195,7 +199,14 @@ def _build_view(view: dict, log_conn, log_to_scoreboard: bool) -> dict:
         or view.get("history_hours", 0),
     )
-    history = fetch_history(cycle_type, resample, context_hours)
     nws_df_raw = fetch_nws(horizon_hours)
     nws_df = _resample_nws_to(nws_df_raw, resample)
     last_actual = history.dropna(how="all").index.max()
@@ -207,7 +218,7 @@ def _build_view(view: dict, log_conn, log_to_scoreboard: bool) -> dict:
     totos: dict[str, object] = {}
     nws_aligned: dict[str, pd.Series] = {}
     for m in METRICS:
-        series = history[m["col"]].dropna()
         if series.empty:
             continue
         toto = forecast_series(series, horizon=horizon_steps)
@@ -220,35 +231,16 @@ def _build_view(view: dict, log_conn, log_to_scoreboard: bool) -> dict:
             if log_to_scoreboard:
                 forecast_log.record_nws(log_conn, m["col"], ns)
-    now = pd.Timestamp.now(tz="UTC").floor(resample)
-    visible_steps = int(round(display_hours / step_hours))
     visible_history = history.tail(visible_steps)
-    # Past Toto forecasts: for each past hour visible on the chart, the
-    # most-recent forecast we issued *before* that hour. Strictly capped at
-    # the most recent Ecowitt actual so the overlay never bleeds into the
-    # future portion of the chart.
-    since_unix = (
-        int(visible_history.index.min().timestamp()) if not visible_history.empty else None
-    )
-    until_unix = int(last_actual.timestamp()) if last_actual is not None else None
-    past_toto: dict[str, pd.DataFrame] = {}
-    for m in METRICS:
-        col = m["col"]
-        pt = forecast_log.historical_predictions(
-            log_conn, "toto", col,
-            since_unix=since_unix, until_unix=until_unix,
-        )
-        if not pt.empty:
-            past_toto[col] = pt
     fig = combined_figure(
         history=visible_history,
         totos=totos,
         nws_df=nws_future,
         metrics=METRICS,
         now=now,
-        past_toto=past_toto,
     )
     return {
         "fig": fig,

 # keeps the forecast informed by the full week without making the chart
 # noisy.
 VIEW_WEEK = {
+    "label": "Past 5 days · 48 h forecast (5-min display, Toto fed hourly)",
     "cycle_type": "5min",
+    "resample": "5min",            # display cadence on the chart
+    "forecast_resample": "1h",     # cadence Toto actually consumes
     "display_days": 5,
     "context_days": 7,
     "horizon_hours": 48,
     """Fetch + forecast for one view config. Returns intermediate pieces so
     the caller can stitch the page together."""
     cycle_type = view["cycle_type"]
+    display_resample = view["resample"]
+    forecast_resample = view.get("forecast_resample", display_resample)
+    display_step_hours = _resample_hours(display_resample)
+    forecast_step_hours = _resample_hours(forecast_resample)
     horizon_hours = view["horizon_hours"]
+    horizon_steps = max(1, int(round(horizon_hours / forecast_step_hours)))
     # context_hours = what we feed Toto; display_hours = what we show on
     # the chart. Fall back to old keys for backward compatibility.
         or view.get("history_hours", 0),
     )
+    history = fetch_history(cycle_type, display_resample, context_hours)
+    # Coarser series for Toto inference: keeps the input length and
+    # forecast horizon short enough for the 4M model to predict cleanly,
+    # while the chart still shows the full 5-min granularity.
+    if forecast_resample != display_resample:
+        history_for_toto = history.resample(forecast_resample).mean()
+    else:
+        history_for_toto = history
     nws_df_raw = fetch_nws(horizon_hours)
     nws_df = _resample_nws_to(nws_df_raw, resample)
     last_actual = history.dropna(how="all").index.max()
     totos: dict[str, object] = {}
     nws_aligned: dict[str, pd.Series] = {}
     for m in METRICS:
+        series = history_for_toto[m["col"]].dropna()
         if series.empty:
             continue
         toto = forecast_series(series, horizon=horizon_steps)
             if log_to_scoreboard:
                 forecast_log.record_nws(log_conn, m["col"], ns)
+    now = pd.Timestamp.now(tz="UTC").floor(display_resample)
+    visible_steps = int(round(display_hours / display_step_hours))
     visible_history = history.tail(visible_steps)
     fig = combined_figure(
         history=visible_history,
         totos=totos,
         nws_df=nws_future,
         metrics=METRICS,
         now=now,
     )
     return {
         "fig": fig,

src/weather_ui.py CHANGED Viewed

@@ -187,7 +187,6 @@ def combined_figure(
     nws_df: pd.DataFrame | None,
     metrics: list[dict],
     now: pd.Timestamp | None = None,
-    past_toto: dict[str, pd.DataFrame] | None = None,
 ) -> go.Figure:
     """Three stacked subplots sharing the x-axis."""
     fig = make_subplots(
@@ -213,21 +212,6 @@ def combined_figure(
             ),
             row=i, col=1,
         )
-        # Past Toto forecasts overlaid on actuals (historical side only).
-        # Each point is Toto's prediction issued at a fixed lag before its
-        # target hour (default 6h-ahead) — so the line shows model error at
-        # a consistent forecast horizon, not a mix of lags.
-        if past_toto and col in past_toto:
-            pt = past_toto[col]
-            fig.add_trace(
-                go.Scatter(
-                    x=pt.index, y=pt["p50"].values,
-                    name="🤖 Toto (6h-ahead, past)", mode="lines",
-                    line=dict(color="rgba(31,119,180,0.55)", width=1.5),
-                    showlegend=showlegend, legendgroup="toto-past",
-                ),
-                row=i, col=1,
-            )
         if toto is not None:
             fig.add_trace(
                 go.Scatter(

     nws_df: pd.DataFrame | None,
     metrics: list[dict],
     now: pd.Timestamp | None = None,
 ) -> go.Figure:
     """Three stacked subplots sharing the x-axis."""
     fig = make_subplots(
             ),
             row=i, col=1,
         )
         if toto is not None:
             fig.add_trace(
                 go.Scatter(