Toto fed hourly while chart stays 5-min; drop past overlay entirely
Browse filesTwo changes that fix the 'forecasts look awful at 5-min' regression
and clean up the chart:
1. Decouple display cadence from inference cadence. The chart still
renders at 5-min (display_resample='5min') but Toto consumes an
hourly-resampled view of the same history (forecast_resample='1h').
That brings the input back to a 168-point context + 48-step horizon,
the sequence size where the 4M model produced clean forecasts.
The 5-min view was sending Toto-2.0-4m a 2016+576 sequence — well
outside its sweet spot — and the long auto-regressive rollout
compounded error. Plus a lot of the older 5-min points were really
30-min source rows linearly interpolated, which gave the model fake
high-frequency structure to anchor on.
2. Remove the past Toto overlay from the chart. The sawtooth came from
sparse forecast snapshots in some hours and the smoothing knobs
(fixed-lag, etc.) didn't fully fix it. The scoreboard MAE still
captures the same information as a single honest number.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
- app.py +20 -28
- src/weather_ui.py +0 -16
|
@@ -41,9 +41,10 @@ PLACE_NAME = os.environ.get("PLACE_NAME", "Yaphank, NY")
|
|
| 41 |
# keeps the forecast informed by the full week without making the chart
|
| 42 |
# noisy.
|
| 43 |
VIEW_WEEK = {
|
| 44 |
-
"label": "Past 5 days · 48 h forecast (5-min
|
| 45 |
"cycle_type": "5min",
|
| 46 |
-
"resample": "5min",
|
|
|
|
| 47 |
"display_days": 5,
|
| 48 |
"context_days": 7,
|
| 49 |
"horizon_hours": 48,
|
|
@@ -177,10 +178,13 @@ def _build_view(view: dict, log_conn, log_to_scoreboard: bool) -> dict:
|
|
| 177 |
"""Fetch + forecast for one view config. Returns intermediate pieces so
|
| 178 |
the caller can stitch the page together."""
|
| 179 |
cycle_type = view["cycle_type"]
|
| 180 |
-
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
| 182 |
horizon_hours = view["horizon_hours"]
|
| 183 |
-
horizon_steps = max(1, int(round(horizon_hours /
|
| 184 |
|
| 185 |
# context_hours = what we feed Toto; display_hours = what we show on
|
| 186 |
# the chart. Fall back to old keys for backward compatibility.
|
|
@@ -195,7 +199,14 @@ def _build_view(view: dict, log_conn, log_to_scoreboard: bool) -> dict:
|
|
| 195 |
or view.get("history_hours", 0),
|
| 196 |
)
|
| 197 |
|
| 198 |
-
history = fetch_history(cycle_type,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
nws_df_raw = fetch_nws(horizon_hours)
|
| 200 |
nws_df = _resample_nws_to(nws_df_raw, resample)
|
| 201 |
last_actual = history.dropna(how="all").index.max()
|
|
@@ -207,7 +218,7 @@ def _build_view(view: dict, log_conn, log_to_scoreboard: bool) -> dict:
|
|
| 207 |
totos: dict[str, object] = {}
|
| 208 |
nws_aligned: dict[str, pd.Series] = {}
|
| 209 |
for m in METRICS:
|
| 210 |
-
series =
|
| 211 |
if series.empty:
|
| 212 |
continue
|
| 213 |
toto = forecast_series(series, horizon=horizon_steps)
|
|
@@ -220,35 +231,16 @@ def _build_view(view: dict, log_conn, log_to_scoreboard: bool) -> dict:
|
|
| 220 |
if log_to_scoreboard:
|
| 221 |
forecast_log.record_nws(log_conn, m["col"], ns)
|
| 222 |
|
| 223 |
-
now = pd.Timestamp.now(tz="UTC").floor(
|
| 224 |
-
visible_steps = int(round(display_hours /
|
| 225 |
visible_history = history.tail(visible_steps)
|
| 226 |
|
| 227 |
-
# Past Toto forecasts: for each past hour visible on the chart, the
|
| 228 |
-
# most-recent forecast we issued *before* that hour. Strictly capped at
|
| 229 |
-
# the most recent Ecowitt actual so the overlay never bleeds into the
|
| 230 |
-
# future portion of the chart.
|
| 231 |
-
since_unix = (
|
| 232 |
-
int(visible_history.index.min().timestamp()) if not visible_history.empty else None
|
| 233 |
-
)
|
| 234 |
-
until_unix = int(last_actual.timestamp()) if last_actual is not None else None
|
| 235 |
-
past_toto: dict[str, pd.DataFrame] = {}
|
| 236 |
-
for m in METRICS:
|
| 237 |
-
col = m["col"]
|
| 238 |
-
pt = forecast_log.historical_predictions(
|
| 239 |
-
log_conn, "toto", col,
|
| 240 |
-
since_unix=since_unix, until_unix=until_unix,
|
| 241 |
-
)
|
| 242 |
-
if not pt.empty:
|
| 243 |
-
past_toto[col] = pt
|
| 244 |
-
|
| 245 |
fig = combined_figure(
|
| 246 |
history=visible_history,
|
| 247 |
totos=totos,
|
| 248 |
nws_df=nws_future,
|
| 249 |
metrics=METRICS,
|
| 250 |
now=now,
|
| 251 |
-
past_toto=past_toto,
|
| 252 |
)
|
| 253 |
return {
|
| 254 |
"fig": fig,
|
|
|
|
| 41 |
# keeps the forecast informed by the full week without making the chart
|
| 42 |
# noisy.
|
| 43 |
VIEW_WEEK = {
|
| 44 |
+
"label": "Past 5 days · 48 h forecast (5-min display, Toto fed hourly)",
|
| 45 |
"cycle_type": "5min",
|
| 46 |
+
"resample": "5min", # display cadence on the chart
|
| 47 |
+
"forecast_resample": "1h", # cadence Toto actually consumes
|
| 48 |
"display_days": 5,
|
| 49 |
"context_days": 7,
|
| 50 |
"horizon_hours": 48,
|
|
|
|
| 178 |
"""Fetch + forecast for one view config. Returns intermediate pieces so
|
| 179 |
the caller can stitch the page together."""
|
| 180 |
cycle_type = view["cycle_type"]
|
| 181 |
+
display_resample = view["resample"]
|
| 182 |
+
forecast_resample = view.get("forecast_resample", display_resample)
|
| 183 |
+
|
| 184 |
+
display_step_hours = _resample_hours(display_resample)
|
| 185 |
+
forecast_step_hours = _resample_hours(forecast_resample)
|
| 186 |
horizon_hours = view["horizon_hours"]
|
| 187 |
+
horizon_steps = max(1, int(round(horizon_hours / forecast_step_hours)))
|
| 188 |
|
| 189 |
# context_hours = what we feed Toto; display_hours = what we show on
|
| 190 |
# the chart. Fall back to old keys for backward compatibility.
|
|
|
|
| 199 |
or view.get("history_hours", 0),
|
| 200 |
)
|
| 201 |
|
| 202 |
+
history = fetch_history(cycle_type, display_resample, context_hours)
|
| 203 |
+
# Coarser series for Toto inference: keeps the input length and
|
| 204 |
+
# forecast horizon short enough for the 4M model to predict cleanly,
|
| 205 |
+
# while the chart still shows the full 5-min granularity.
|
| 206 |
+
if forecast_resample != display_resample:
|
| 207 |
+
history_for_toto = history.resample(forecast_resample).mean()
|
| 208 |
+
else:
|
| 209 |
+
history_for_toto = history
|
| 210 |
nws_df_raw = fetch_nws(horizon_hours)
|
| 211 |
nws_df = _resample_nws_to(nws_df_raw, resample)
|
| 212 |
last_actual = history.dropna(how="all").index.max()
|
|
|
|
| 218 |
totos: dict[str, object] = {}
|
| 219 |
nws_aligned: dict[str, pd.Series] = {}
|
| 220 |
for m in METRICS:
|
| 221 |
+
series = history_for_toto[m["col"]].dropna()
|
| 222 |
if series.empty:
|
| 223 |
continue
|
| 224 |
toto = forecast_series(series, horizon=horizon_steps)
|
|
|
|
| 231 |
if log_to_scoreboard:
|
| 232 |
forecast_log.record_nws(log_conn, m["col"], ns)
|
| 233 |
|
| 234 |
+
now = pd.Timestamp.now(tz="UTC").floor(display_resample)
|
| 235 |
+
visible_steps = int(round(display_hours / display_step_hours))
|
| 236 |
visible_history = history.tail(visible_steps)
|
| 237 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
fig = combined_figure(
|
| 239 |
history=visible_history,
|
| 240 |
totos=totos,
|
| 241 |
nws_df=nws_future,
|
| 242 |
metrics=METRICS,
|
| 243 |
now=now,
|
|
|
|
| 244 |
)
|
| 245 |
return {
|
| 246 |
"fig": fig,
|
|
@@ -187,7 +187,6 @@ def combined_figure(
|
|
| 187 |
nws_df: pd.DataFrame | None,
|
| 188 |
metrics: list[dict],
|
| 189 |
now: pd.Timestamp | None = None,
|
| 190 |
-
past_toto: dict[str, pd.DataFrame] | None = None,
|
| 191 |
) -> go.Figure:
|
| 192 |
"""Three stacked subplots sharing the x-axis."""
|
| 193 |
fig = make_subplots(
|
|
@@ -213,21 +212,6 @@ def combined_figure(
|
|
| 213 |
),
|
| 214 |
row=i, col=1,
|
| 215 |
)
|
| 216 |
-
# Past Toto forecasts overlaid on actuals (historical side only).
|
| 217 |
-
# Each point is Toto's prediction issued at a fixed lag before its
|
| 218 |
-
# target hour (default 6h-ahead) — so the line shows model error at
|
| 219 |
-
# a consistent forecast horizon, not a mix of lags.
|
| 220 |
-
if past_toto and col in past_toto:
|
| 221 |
-
pt = past_toto[col]
|
| 222 |
-
fig.add_trace(
|
| 223 |
-
go.Scatter(
|
| 224 |
-
x=pt.index, y=pt["p50"].values,
|
| 225 |
-
name="🤖 Toto (6h-ahead, past)", mode="lines",
|
| 226 |
-
line=dict(color="rgba(31,119,180,0.55)", width=1.5),
|
| 227 |
-
showlegend=showlegend, legendgroup="toto-past",
|
| 228 |
-
),
|
| 229 |
-
row=i, col=1,
|
| 230 |
-
)
|
| 231 |
if toto is not None:
|
| 232 |
fig.add_trace(
|
| 233 |
go.Scatter(
|
|
|
|
| 187 |
nws_df: pd.DataFrame | None,
|
| 188 |
metrics: list[dict],
|
| 189 |
now: pd.Timestamp | None = None,
|
|
|
|
| 190 |
) -> go.Figure:
|
| 191 |
"""Three stacked subplots sharing the x-axis."""
|
| 192 |
fig = make_subplots(
|
|
|
|
| 212 |
),
|
| 213 |
row=i, col=1,
|
| 214 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
if toto is not None:
|
| 216 |
fig.add_trace(
|
| 217 |
go.Scatter(
|