Commit Β·
06ff1aa
1
Parent(s): 6519ca2
Past-Toto overlay: fixed 6h-ahead horizon instead of mixed lags
Browse filesThe sawtooth on the past-forecast overlay came from the 'latest-pre-
target' rule: each past hour was scored against whatever forecast
happened to be most-recent before it, so target_ts=13:00 might use a
15-min-ahead prediction while target_ts=14:00 used a 45-min-ahead
prediction. Different lags β different prediction quality β sawtooth.
historical_predictions now defaults to picking the forecast whose
forecast_made_at is closest to (target_ts β 6 h). Constant 6-hour
lookback gives a consistent forecast horizon and a smooth overlay.
Legacy 'latest-pre-target' mode still available via lag_hours=None.
Chart legend updated to 'π€ Toto (6h-ahead, past)' for clarity.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
- src/forecast_log.py +72 -25
- src/weather_ui.py +4 -1
src/forecast_log.py
CHANGED
|
@@ -191,37 +191,84 @@ def historical_predictions(
|
|
| 191 |
metric: str,
|
| 192 |
since_unix: int | None = None,
|
| 193 |
until_unix: int | None = None,
|
|
|
|
| 194 |
) -> pd.DataFrame:
|
| 195 |
-
"""For each target_ts in [since, until], return
|
| 196 |
-
issued *before* that hour.
|
| 197 |
|
| 198 |
-
|
| 199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
"""
|
| 201 |
import time as _time # noqa: PLC0415
|
| 202 |
if until_unix is None:
|
| 203 |
until_unix = int(_time.time())
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
df = pd.read_sql_query(sql, conn, params=params)
|
| 226 |
if df.empty:
|
| 227 |
return df
|
|
|
|
| 191 |
metric: str,
|
| 192 |
since_unix: int | None = None,
|
| 193 |
until_unix: int | None = None,
|
| 194 |
+
lag_hours: float | None = 6.0,
|
| 195 |
) -> pd.DataFrame:
|
| 196 |
+
"""For each target_ts in [since, until], return one historical forecast row.
|
|
|
|
| 197 |
|
| 198 |
+
Two modes:
|
| 199 |
+
|
| 200 |
+
- `lag_hours=None`: legacy 'latest-pre-target' behavior β for each
|
| 201 |
+
target hour, return the most-recent forecast issued before it. This
|
| 202 |
+
mixes different forecast lags depending on autorefresh timing, which
|
| 203 |
+
visually produces a sawtooth on the overlay.
|
| 204 |
+
|
| 205 |
+
- `lag_hours=N` (default 6.0): for each target hour, return the
|
| 206 |
+
forecast whose `forecast_made_at` is closest to `target_ts β N
|
| 207 |
+
hours`. Constant lag = consistent prediction difficulty = smooth
|
| 208 |
+
line on the chart. Semantics: 'what did Toto predict for this hour,
|
| 209 |
+
N hours before it happened?'.
|
| 210 |
+
|
| 211 |
+
`until_unix` defaults to now and caps the overlay so it never crosses
|
| 212 |
+
into the future side of the chart.
|
| 213 |
"""
|
| 214 |
import time as _time # noqa: PLC0415
|
| 215 |
if until_unix is None:
|
| 216 |
until_unix = int(_time.time())
|
| 217 |
+
|
| 218 |
+
if lag_hours is None:
|
| 219 |
+
# Original 'latest before target' query.
|
| 220 |
+
params: list = [source, metric, until_unix]
|
| 221 |
+
where_extra = ""
|
| 222 |
+
if since_unix is not None:
|
| 223 |
+
where_extra = " AND target_ts >= ?"
|
| 224 |
+
params.append(since_unix)
|
| 225 |
+
sql = f"""
|
| 226 |
+
WITH latest AS (
|
| 227 |
+
SELECT source, target_ts, metric,
|
| 228 |
+
MAX(forecast_made_at) AS forecast_made_at
|
| 229 |
+
FROM forecast_snapshots
|
| 230 |
+
WHERE source = ? AND metric = ?
|
| 231 |
+
AND forecast_made_at <= target_ts
|
| 232 |
+
AND target_ts <= ?
|
| 233 |
+
{where_extra}
|
| 234 |
+
GROUP BY source, target_ts, metric
|
| 235 |
+
)
|
| 236 |
+
SELECT f.target_ts, f.p10, f.p50, f.p90
|
| 237 |
+
FROM forecast_snapshots f
|
| 238 |
+
JOIN latest l USING (source, target_ts, metric, forecast_made_at)
|
| 239 |
+
ORDER BY f.target_ts
|
| 240 |
+
"""
|
| 241 |
+
else:
|
| 242 |
+
# Fixed-horizon pick: forecast_made_at closest to target_ts β lag.
|
| 243 |
+
lag_seconds = int(lag_hours * 3600)
|
| 244 |
+
params = [lag_seconds, source, metric, until_unix]
|
| 245 |
+
where_extra = ""
|
| 246 |
+
if since_unix is not None:
|
| 247 |
+
where_extra = " AND target_ts >= ?"
|
| 248 |
+
params.append(since_unix)
|
| 249 |
+
sql = f"""
|
| 250 |
+
WITH ranked AS (
|
| 251 |
+
SELECT target_ts, forecast_made_at, p10, p50, p90,
|
| 252 |
+
ABS(forecast_made_at - (target_ts - ?)) AS lag_err,
|
| 253 |
+
ROW_NUMBER() OVER (
|
| 254 |
+
PARTITION BY target_ts
|
| 255 |
+
ORDER BY ABS(forecast_made_at - (target_ts - ?))
|
| 256 |
+
) AS rk
|
| 257 |
+
FROM forecast_snapshots
|
| 258 |
+
WHERE source = ? AND metric = ?
|
| 259 |
+
AND forecast_made_at <= target_ts
|
| 260 |
+
AND target_ts <= ?
|
| 261 |
+
{where_extra}
|
| 262 |
+
)
|
| 263 |
+
SELECT target_ts, p10, p50, p90
|
| 264 |
+
FROM ranked
|
| 265 |
+
WHERE rk = 1
|
| 266 |
+
ORDER BY target_ts
|
| 267 |
+
"""
|
| 268 |
+
# The window function references the lag twice β easier to pass it
|
| 269 |
+
# twice than juggle indexes in the prepared statement.
|
| 270 |
+
params.insert(1, lag_seconds)
|
| 271 |
+
|
| 272 |
df = pd.read_sql_query(sql, conn, params=params)
|
| 273 |
if df.empty:
|
| 274 |
return df
|
src/weather_ui.py
CHANGED
|
@@ -214,12 +214,15 @@ def combined_figure(
|
|
| 214 |
row=i, col=1,
|
| 215 |
)
|
| 216 |
# Past Toto forecasts overlaid on actuals (historical side only).
|
|
|
|
|
|
|
|
|
|
| 217 |
if past_toto and col in past_toto:
|
| 218 |
pt = past_toto[col]
|
| 219 |
fig.add_trace(
|
| 220 |
go.Scatter(
|
| 221 |
x=pt.index, y=pt["p50"].values,
|
| 222 |
-
name="π€ Toto (
|
| 223 |
line=dict(color="rgba(31,119,180,0.55)", width=1.5),
|
| 224 |
showlegend=showlegend, legendgroup="toto-past",
|
| 225 |
),
|
|
|
|
| 214 |
row=i, col=1,
|
| 215 |
)
|
| 216 |
# Past Toto forecasts overlaid on actuals (historical side only).
|
| 217 |
+
# Each point is Toto's prediction issued at a fixed lag before its
|
| 218 |
+
# target hour (default 6h-ahead) β so the line shows model error at
|
| 219 |
+
# a consistent forecast horizon, not a mix of lags.
|
| 220 |
if past_toto and col in past_toto:
|
| 221 |
pt = past_toto[col]
|
| 222 |
fig.add_trace(
|
| 223 |
go.Scatter(
|
| 224 |
x=pt.index, y=pt["p50"].values,
|
| 225 |
+
name="π€ Toto (6h-ahead, past)", mode="lines",
|
| 226 |
line=dict(color="rgba(31,119,180,0.55)", width=1.5),
|
| 227 |
showlegend=showlegend, legendgroup="toto-past",
|
| 228 |
),
|