bitsofchris Claude Opus 4.7 (1M context) commited on
Commit
06ff1aa
Β·
1 Parent(s): 6519ca2

Past-Toto overlay: fixed 6h-ahead horizon instead of mixed lags

Browse files

The sawtooth on the past-forecast overlay came from the 'latest-pre-
target' rule: each past hour was scored against whatever forecast
happened to be most-recent before it, so target_ts=13:00 might use a
15-min-ahead prediction while target_ts=14:00 used a 45-min-ahead
prediction. Different lags β†’ different prediction quality β†’ sawtooth.

historical_predictions now defaults to picking the forecast whose
forecast_made_at is closest to (target_ts βˆ’ 6 h). Constant 6-hour
lookback gives a consistent forecast horizon and a smooth overlay.
Legacy 'latest-pre-target' mode still available via lag_hours=None.

Chart legend updated to 'πŸ€– Toto (6h-ahead, past)' for clarity.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (2) hide show
  1. src/forecast_log.py +72 -25
  2. src/weather_ui.py +4 -1
src/forecast_log.py CHANGED
@@ -191,37 +191,84 @@ def historical_predictions(
191
  metric: str,
192
  since_unix: int | None = None,
193
  until_unix: int | None = None,
 
194
  ) -> pd.DataFrame:
195
- """For each target_ts in [since, until], return the most-recent forecast
196
- issued *before* that hour.
197
 
198
- `until_unix` defaults to now β€” pass it to cap the overlay so it doesn't
199
- bleed into the future portion of the chart.
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  """
201
  import time as _time # noqa: PLC0415
202
  if until_unix is None:
203
  until_unix = int(_time.time())
204
- params: list = [source, metric, until_unix]
205
- where_extra = ""
206
- if since_unix is not None:
207
- where_extra = " AND target_ts >= ?"
208
- params.append(since_unix)
209
- sql = f"""
210
- WITH latest AS (
211
- SELECT source, target_ts, metric,
212
- MAX(forecast_made_at) AS forecast_made_at
213
- FROM forecast_snapshots
214
- WHERE source = ? AND metric = ?
215
- AND forecast_made_at <= target_ts
216
- AND target_ts <= ?
217
- {where_extra}
218
- GROUP BY source, target_ts, metric
219
- )
220
- SELECT f.target_ts, f.p10, f.p50, f.p90
221
- FROM forecast_snapshots f
222
- JOIN latest l USING (source, target_ts, metric, forecast_made_at)
223
- ORDER BY f.target_ts
224
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  df = pd.read_sql_query(sql, conn, params=params)
226
  if df.empty:
227
  return df
 
191
  metric: str,
192
  since_unix: int | None = None,
193
  until_unix: int | None = None,
194
+ lag_hours: float | None = 6.0,
195
  ) -> pd.DataFrame:
196
+ """For each target_ts in [since, until], return one historical forecast row.
 
197
 
198
+ Two modes:
199
+
200
+ - `lag_hours=None`: legacy 'latest-pre-target' behavior β€” for each
201
+ target hour, return the most-recent forecast issued before it. This
202
+ mixes different forecast lags depending on autorefresh timing, which
203
+ visually produces a sawtooth on the overlay.
204
+
205
+ - `lag_hours=N` (default 6.0): for each target hour, return the
206
+ forecast whose `forecast_made_at` is closest to `target_ts βˆ’ N
207
+ hours`. Constant lag = consistent prediction difficulty = smooth
208
+ line on the chart. Semantics: 'what did Toto predict for this hour,
209
+ N hours before it happened?'.
210
+
211
+ `until_unix` defaults to now and caps the overlay so it never crosses
212
+ into the future side of the chart.
213
  """
214
  import time as _time # noqa: PLC0415
215
  if until_unix is None:
216
  until_unix = int(_time.time())
217
+
218
+ if lag_hours is None:
219
+ # Original 'latest before target' query.
220
+ params: list = [source, metric, until_unix]
221
+ where_extra = ""
222
+ if since_unix is not None:
223
+ where_extra = " AND target_ts >= ?"
224
+ params.append(since_unix)
225
+ sql = f"""
226
+ WITH latest AS (
227
+ SELECT source, target_ts, metric,
228
+ MAX(forecast_made_at) AS forecast_made_at
229
+ FROM forecast_snapshots
230
+ WHERE source = ? AND metric = ?
231
+ AND forecast_made_at <= target_ts
232
+ AND target_ts <= ?
233
+ {where_extra}
234
+ GROUP BY source, target_ts, metric
235
+ )
236
+ SELECT f.target_ts, f.p10, f.p50, f.p90
237
+ FROM forecast_snapshots f
238
+ JOIN latest l USING (source, target_ts, metric, forecast_made_at)
239
+ ORDER BY f.target_ts
240
+ """
241
+ else:
242
+ # Fixed-horizon pick: forecast_made_at closest to target_ts βˆ’ lag.
243
+ lag_seconds = int(lag_hours * 3600)
244
+ params = [lag_seconds, source, metric, until_unix]
245
+ where_extra = ""
246
+ if since_unix is not None:
247
+ where_extra = " AND target_ts >= ?"
248
+ params.append(since_unix)
249
+ sql = f"""
250
+ WITH ranked AS (
251
+ SELECT target_ts, forecast_made_at, p10, p50, p90,
252
+ ABS(forecast_made_at - (target_ts - ?)) AS lag_err,
253
+ ROW_NUMBER() OVER (
254
+ PARTITION BY target_ts
255
+ ORDER BY ABS(forecast_made_at - (target_ts - ?))
256
+ ) AS rk
257
+ FROM forecast_snapshots
258
+ WHERE source = ? AND metric = ?
259
+ AND forecast_made_at <= target_ts
260
+ AND target_ts <= ?
261
+ {where_extra}
262
+ )
263
+ SELECT target_ts, p10, p50, p90
264
+ FROM ranked
265
+ WHERE rk = 1
266
+ ORDER BY target_ts
267
+ """
268
+ # The window function references the lag twice β€” easier to pass it
269
+ # twice than juggle indexes in the prepared statement.
270
+ params.insert(1, lag_seconds)
271
+
272
  df = pd.read_sql_query(sql, conn, params=params)
273
  if df.empty:
274
  return df
src/weather_ui.py CHANGED
@@ -214,12 +214,15 @@ def combined_figure(
214
  row=i, col=1,
215
  )
216
  # Past Toto forecasts overlaid on actuals (historical side only).
 
 
 
217
  if past_toto and col in past_toto:
218
  pt = past_toto[col]
219
  fig.add_trace(
220
  go.Scatter(
221
  x=pt.index, y=pt["p50"].values,
222
- name="πŸ€– Toto (past forecasts)", mode="lines",
223
  line=dict(color="rgba(31,119,180,0.55)", width=1.5),
224
  showlegend=showlegend, legendgroup="toto-past",
225
  ),
 
214
  row=i, col=1,
215
  )
216
  # Past Toto forecasts overlaid on actuals (historical side only).
217
+ # Each point is Toto's prediction issued at a fixed lag before its
218
+ # target hour (default 6h-ahead) β€” so the line shows model error at
219
+ # a consistent forecast horizon, not a mix of lags.
220
  if past_toto and col in past_toto:
221
  pt = past_toto[col]
222
  fig.add_trace(
223
  go.Scatter(
224
  x=pt.index, y=pt["p50"].values,
225
+ name="πŸ€– Toto (6h-ahead, past)", mode="lines",
226
  line=dict(color="rgba(31,119,180,0.55)", width=1.5),
227
  showlegend=showlegend, legendgroup="toto-past",
228
  ),