bitsofchris Claude Opus 4.7 (1M context) commited on
Commit
ba5aabf
·
1 Parent(s): 40e92a0

Add residual chart: Toto vs NWS error for past 48h temp predictions

Browse files

We log every forecast snapshot, so we already have everything needed to
visualize accuracy over time — not just an aggregate MAE. New chart
under the scoreboard headline:

- For each hourly target in the last 48 h, pick each model's prediction
whose forecast_made_at is closest to (target − 3 h). Fixed lag = like-
for-like comparison.
- Plot signed residual (prediction − actual). Zero line = perfect.

Two lines (Toto blue, NWS red dashed) so the viewer can read 'Toto was
+5°F off at 3 AM, NWS was +1°F' at a glance.

- forecast_log.residuals(metric, window_hours, lag_hours)
- weather_ui.residual_figure(df)
- app.py wires it under the scoreboard

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (3) hide show
  1. app.py +8 -2
  2. src/forecast_log.py +55 -0
  3. src/weather_ui.py +35 -0
app.py CHANGED
@@ -23,6 +23,7 @@ from src.weather_ui import (
23
  aligned_comparison_markdown,
24
  combined_figure,
25
  hero_markdown,
 
26
  )
27
 
28
  AUTO_REFRESH_SECONDS = 15 * 60 # background tick + archive sync
@@ -280,8 +281,12 @@ def refresh():
280
  comparison_md = ""
281
  scoreboard = render_scoreboard(log_conn)
282
 
 
 
 
 
283
  persist.push_db_async()
284
- return hero, comparison_md, week["fig"], scoreboard
285
 
286
 
287
  # --- scoreboard ----------------------------------------------------------
@@ -418,6 +423,7 @@ with gr.Blocks(title="Toto Weather Forecast", theme=gr.themes.Soft()) as demo:
418
  )
419
 
420
  scoreboard_md = gr.Markdown()
 
421
 
422
  gr.Markdown(f"### 📅 {VIEW_WEEK['label']}")
423
  week_plot = gr.Plot(label="Weekly")
@@ -464,7 +470,7 @@ with gr.Blocks(title="Toto Weather Forecast", theme=gr.themes.Soft()) as demo:
464
  "Full spec: [`docs/toto-inference.md`](https://huggingface.co/spaces/bitsofchris/time-series-ai-weather-forecast/blob/main/docs/toto-inference.md)."
465
  )
466
 
467
- outputs = [hero_md, comparison_md, week_plot, scoreboard_md]
468
  demo.load(refresh, outputs=outputs)
469
 
470
 
 
23
  aligned_comparison_markdown,
24
  combined_figure,
25
  hero_markdown,
26
+ residual_figure,
27
  )
28
 
29
  AUTO_REFRESH_SECONDS = 15 * 60 # background tick + archive sync
 
281
  comparison_md = ""
282
  scoreboard = render_scoreboard(log_conn)
283
 
284
+ # Residual chart — past 48h of 3h-ahead temperature predictions vs actual.
285
+ resid_df = forecast_log.residuals(log_conn, metric="temp_f", window_hours=48, lag_hours=3.0)
286
+ resid_fig = residual_figure(resid_df) if not resid_df.empty else None
287
+
288
  persist.push_db_async()
289
+ return hero, comparison_md, week["fig"], scoreboard, resid_fig
290
 
291
 
292
  # --- scoreboard ----------------------------------------------------------
 
423
  )
424
 
425
  scoreboard_md = gr.Markdown()
426
+ residual_plot = gr.Plot(label="Forecast residual")
427
 
428
  gr.Markdown(f"### 📅 {VIEW_WEEK['label']}")
429
  week_plot = gr.Plot(label="Weekly")
 
470
  "Full spec: [`docs/toto-inference.md`](https://huggingface.co/spaces/bitsofchris/time-series-ai-weather-forecast/blob/main/docs/toto-inference.md)."
471
  )
472
 
473
+ outputs = [hero_md, comparison_md, week_plot, scoreboard_md, residual_plot]
474
  demo.load(refresh, outputs=outputs)
475
 
476
 
src/forecast_log.py CHANGED
@@ -277,6 +277,61 @@ def historical_predictions(
277
  return df
278
 
279
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  def scoreboard_summary(
281
  conn: sqlite3.Connection,
282
  metric: str = "temp_f",
 
277
  return df
278
 
279
 
280
+ def residuals(
281
+ conn: sqlite3.Connection,
282
+ metric: str,
283
+ window_hours: int = 48,
284
+ lag_hours: float = 3.0,
285
+ ) -> pd.DataFrame:
286
+ """For each hourly target_ts in the last `window_hours`, return Toto's
287
+ and NWS's predictions and the Ecowitt actual side-by-side, plus signed
288
+ residuals (prediction − actual).
289
+
290
+ Both predictions are selected at a fixed lag from target_ts so the
291
+ residual time series compares like-with-like: 'how far off was each
292
+ model's N-hours-ahead prediction for this hour?'.
293
+ """
294
+ import time as _time # noqa: PLC0415
295
+ now = int(_time.time())
296
+ cutoff = now - window_hours * 3600
297
+ lag_seconds = int(lag_hours * 3600)
298
+ sql = """
299
+ WITH ranked AS (
300
+ SELECT source, target_ts, p50,
301
+ ROW_NUMBER() OVER (
302
+ PARTITION BY source, target_ts
303
+ ORDER BY ABS(forecast_made_at - (target_ts - ?))
304
+ ) AS rk
305
+ FROM forecast_snapshots
306
+ WHERE metric = ?
307
+ AND forecast_made_at <= target_ts
308
+ AND target_ts BETWEEN ? AND ?
309
+ ),
310
+ picked AS (
311
+ SELECT source, target_ts, p50 FROM ranked WHERE rk = 1
312
+ )
313
+ SELECT a.target_ts,
314
+ MAX(CASE WHEN p.source='toto' THEN p.p50 END) AS toto_p50,
315
+ MAX(CASE WHEN p.source='nws' THEN p.p50 END) AS nws_p50,
316
+ a.value AS actual
317
+ FROM actuals a
318
+ LEFT JOIN picked p USING (target_ts)
319
+ WHERE a.metric = ?
320
+ AND a.target_ts BETWEEN ? AND ?
321
+ GROUP BY a.target_ts
322
+ ORDER BY a.target_ts
323
+ """
324
+ params = [lag_seconds, metric, cutoff, now, metric, cutoff, now]
325
+ df = pd.read_sql_query(sql, conn, params=params)
326
+ if df.empty:
327
+ return df
328
+ df.index = pd.to_datetime(df["target_ts"], unit="s", utc=True)
329
+ df = df.drop(columns=["target_ts"])
330
+ df["toto_residual"] = df["toto_p50"] - df["actual"]
331
+ df["nws_residual"] = df["nws_p50"] - df["actual"]
332
+ return df
333
+
334
+
335
  def scoreboard_summary(
336
  conn: sqlite3.Connection,
337
  metric: str = "temp_f",
src/weather_ui.py CHANGED
@@ -181,6 +181,41 @@ def emoji_strip_markdown(nws_df: pd.DataFrame, tz: str, n: int = 12) -> str:
181
  return f"| {hours} |\n{sep}\n| {glyphs} |\n| {temps} |"
182
 
183
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  def combined_figure(
185
  history: pd.DataFrame,
186
  totos: dict[str, TotoForecast],
 
181
  return f"| {hours} |\n{sep}\n| {glyphs} |\n| {temps} |"
182
 
183
 
184
+ def residual_figure(
185
+ df: pd.DataFrame,
186
+ title: str = "Forecast residual — 3h-ahead prediction minus Ecowitt actual (°F)",
187
+ ) -> go.Figure:
188
+ """Plot signed residuals over time for Toto and NWS. Zero is perfect."""
189
+ fig = go.Figure()
190
+ fig.add_hline(y=0, line=dict(color="#888", width=1))
191
+ fig.add_trace(
192
+ go.Scatter(
193
+ x=df.index, y=df["toto_residual"],
194
+ name="🤖 Toto residual", mode="lines+markers",
195
+ line=dict(color="#1f77b4", width=2),
196
+ marker=dict(size=5),
197
+ )
198
+ )
199
+ fig.add_trace(
200
+ go.Scatter(
201
+ x=df.index, y=df["nws_residual"],
202
+ name="🌎 NWS residual", mode="lines+markers",
203
+ line=dict(color="#d62728", width=2, dash="dash"),
204
+ marker=dict(size=5),
205
+ )
206
+ )
207
+ fig.update_layout(
208
+ title=title,
209
+ height=320,
210
+ hovermode="x unified",
211
+ yaxis_title="°F (signed error)",
212
+ margin=dict(l=50, r=20, t=50, b=50),
213
+ legend=dict(orientation="h", yanchor="bottom", y=1.04, xanchor="right", x=1),
214
+ )
215
+ fig.update_xaxes(tickformat="%b %-d\n%-I %p", showgrid=True)
216
+ return fig
217
+
218
+
219
  def combined_figure(
220
  history: pd.DataFrame,
221
  totos: dict[str, TotoForecast],