Commit ·
ba5aabf
1
Parent(s): 40e92a0
Add residual chart: Toto vs NWS error for past 48h temp predictions
Browse filesWe log every forecast snapshot, so we already have everything needed to
visualize accuracy over time — not just an aggregate MAE. New chart
under the scoreboard headline:
- For each hourly target in the last 48 h, pick each model's prediction
whose forecast_made_at is closest to (target − 3 h). Fixed lag = like-
for-like comparison.
- Plot signed residual (prediction − actual). Zero line = perfect.
Two lines (Toto blue, NWS red dashed) so the viewer can read 'Toto was
+5°F off at 3 AM, NWS was +1°F' at a glance.
- forecast_log.residuals(metric, window_hours, lag_hours)
- weather_ui.residual_figure(df)
- app.py wires it under the scoreboard
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
- app.py +8 -2
- src/forecast_log.py +55 -0
- src/weather_ui.py +35 -0
app.py
CHANGED
|
@@ -23,6 +23,7 @@ from src.weather_ui import (
|
|
| 23 |
aligned_comparison_markdown,
|
| 24 |
combined_figure,
|
| 25 |
hero_markdown,
|
|
|
|
| 26 |
)
|
| 27 |
|
| 28 |
AUTO_REFRESH_SECONDS = 15 * 60 # background tick + archive sync
|
|
@@ -280,8 +281,12 @@ def refresh():
|
|
| 280 |
comparison_md = ""
|
| 281 |
scoreboard = render_scoreboard(log_conn)
|
| 282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
persist.push_db_async()
|
| 284 |
-
return hero, comparison_md, week["fig"], scoreboard
|
| 285 |
|
| 286 |
|
| 287 |
# --- scoreboard ----------------------------------------------------------
|
|
@@ -418,6 +423,7 @@ with gr.Blocks(title="Toto Weather Forecast", theme=gr.themes.Soft()) as demo:
|
|
| 418 |
)
|
| 419 |
|
| 420 |
scoreboard_md = gr.Markdown()
|
|
|
|
| 421 |
|
| 422 |
gr.Markdown(f"### 📅 {VIEW_WEEK['label']}")
|
| 423 |
week_plot = gr.Plot(label="Weekly")
|
|
@@ -464,7 +470,7 @@ with gr.Blocks(title="Toto Weather Forecast", theme=gr.themes.Soft()) as demo:
|
|
| 464 |
"Full spec: [`docs/toto-inference.md`](https://huggingface.co/spaces/bitsofchris/time-series-ai-weather-forecast/blob/main/docs/toto-inference.md)."
|
| 465 |
)
|
| 466 |
|
| 467 |
-
outputs = [hero_md, comparison_md, week_plot, scoreboard_md]
|
| 468 |
demo.load(refresh, outputs=outputs)
|
| 469 |
|
| 470 |
|
|
|
|
| 23 |
aligned_comparison_markdown,
|
| 24 |
combined_figure,
|
| 25 |
hero_markdown,
|
| 26 |
+
residual_figure,
|
| 27 |
)
|
| 28 |
|
| 29 |
AUTO_REFRESH_SECONDS = 15 * 60 # background tick + archive sync
|
|
|
|
| 281 |
comparison_md = ""
|
| 282 |
scoreboard = render_scoreboard(log_conn)
|
| 283 |
|
| 284 |
+
# Residual chart — past 48h of 3h-ahead temperature predictions vs actual.
|
| 285 |
+
resid_df = forecast_log.residuals(log_conn, metric="temp_f", window_hours=48, lag_hours=3.0)
|
| 286 |
+
resid_fig = residual_figure(resid_df) if not resid_df.empty else None
|
| 287 |
+
|
| 288 |
persist.push_db_async()
|
| 289 |
+
return hero, comparison_md, week["fig"], scoreboard, resid_fig
|
| 290 |
|
| 291 |
|
| 292 |
# --- scoreboard ----------------------------------------------------------
|
|
|
|
| 423 |
)
|
| 424 |
|
| 425 |
scoreboard_md = gr.Markdown()
|
| 426 |
+
residual_plot = gr.Plot(label="Forecast residual")
|
| 427 |
|
| 428 |
gr.Markdown(f"### 📅 {VIEW_WEEK['label']}")
|
| 429 |
week_plot = gr.Plot(label="Weekly")
|
|
|
|
| 470 |
"Full spec: [`docs/toto-inference.md`](https://huggingface.co/spaces/bitsofchris/time-series-ai-weather-forecast/blob/main/docs/toto-inference.md)."
|
| 471 |
)
|
| 472 |
|
| 473 |
+
outputs = [hero_md, comparison_md, week_plot, scoreboard_md, residual_plot]
|
| 474 |
demo.load(refresh, outputs=outputs)
|
| 475 |
|
| 476 |
|
src/forecast_log.py
CHANGED
|
@@ -277,6 +277,61 @@ def historical_predictions(
|
|
| 277 |
return df
|
| 278 |
|
| 279 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
def scoreboard_summary(
|
| 281 |
conn: sqlite3.Connection,
|
| 282 |
metric: str = "temp_f",
|
|
|
|
| 277 |
return df
|
| 278 |
|
| 279 |
|
| 280 |
+
def residuals(
|
| 281 |
+
conn: sqlite3.Connection,
|
| 282 |
+
metric: str,
|
| 283 |
+
window_hours: int = 48,
|
| 284 |
+
lag_hours: float = 3.0,
|
| 285 |
+
) -> pd.DataFrame:
|
| 286 |
+
"""For each hourly target_ts in the last `window_hours`, return Toto's
|
| 287 |
+
and NWS's predictions and the Ecowitt actual side-by-side, plus signed
|
| 288 |
+
residuals (prediction − actual).
|
| 289 |
+
|
| 290 |
+
Both predictions are selected at a fixed lag from target_ts so the
|
| 291 |
+
residual time series compares like-with-like: 'how far off was each
|
| 292 |
+
model's N-hours-ahead prediction for this hour?'.
|
| 293 |
+
"""
|
| 294 |
+
import time as _time # noqa: PLC0415
|
| 295 |
+
now = int(_time.time())
|
| 296 |
+
cutoff = now - window_hours * 3600
|
| 297 |
+
lag_seconds = int(lag_hours * 3600)
|
| 298 |
+
sql = """
|
| 299 |
+
WITH ranked AS (
|
| 300 |
+
SELECT source, target_ts, p50,
|
| 301 |
+
ROW_NUMBER() OVER (
|
| 302 |
+
PARTITION BY source, target_ts
|
| 303 |
+
ORDER BY ABS(forecast_made_at - (target_ts - ?))
|
| 304 |
+
) AS rk
|
| 305 |
+
FROM forecast_snapshots
|
| 306 |
+
WHERE metric = ?
|
| 307 |
+
AND forecast_made_at <= target_ts
|
| 308 |
+
AND target_ts BETWEEN ? AND ?
|
| 309 |
+
),
|
| 310 |
+
picked AS (
|
| 311 |
+
SELECT source, target_ts, p50 FROM ranked WHERE rk = 1
|
| 312 |
+
)
|
| 313 |
+
SELECT a.target_ts,
|
| 314 |
+
MAX(CASE WHEN p.source='toto' THEN p.p50 END) AS toto_p50,
|
| 315 |
+
MAX(CASE WHEN p.source='nws' THEN p.p50 END) AS nws_p50,
|
| 316 |
+
a.value AS actual
|
| 317 |
+
FROM actuals a
|
| 318 |
+
LEFT JOIN picked p USING (target_ts)
|
| 319 |
+
WHERE a.metric = ?
|
| 320 |
+
AND a.target_ts BETWEEN ? AND ?
|
| 321 |
+
GROUP BY a.target_ts
|
| 322 |
+
ORDER BY a.target_ts
|
| 323 |
+
"""
|
| 324 |
+
params = [lag_seconds, metric, cutoff, now, metric, cutoff, now]
|
| 325 |
+
df = pd.read_sql_query(sql, conn, params=params)
|
| 326 |
+
if df.empty:
|
| 327 |
+
return df
|
| 328 |
+
df.index = pd.to_datetime(df["target_ts"], unit="s", utc=True)
|
| 329 |
+
df = df.drop(columns=["target_ts"])
|
| 330 |
+
df["toto_residual"] = df["toto_p50"] - df["actual"]
|
| 331 |
+
df["nws_residual"] = df["nws_p50"] - df["actual"]
|
| 332 |
+
return df
|
| 333 |
+
|
| 334 |
+
|
| 335 |
def scoreboard_summary(
|
| 336 |
conn: sqlite3.Connection,
|
| 337 |
metric: str = "temp_f",
|
src/weather_ui.py
CHANGED
|
@@ -181,6 +181,41 @@ def emoji_strip_markdown(nws_df: pd.DataFrame, tz: str, n: int = 12) -> str:
|
|
| 181 |
return f"| {hours} |\n{sep}\n| {glyphs} |\n| {temps} |"
|
| 182 |
|
| 183 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
def combined_figure(
|
| 185 |
history: pd.DataFrame,
|
| 186 |
totos: dict[str, TotoForecast],
|
|
|
|
| 181 |
return f"| {hours} |\n{sep}\n| {glyphs} |\n| {temps} |"
|
| 182 |
|
| 183 |
|
| 184 |
+
def residual_figure(
|
| 185 |
+
df: pd.DataFrame,
|
| 186 |
+
title: str = "Forecast residual — 3h-ahead prediction minus Ecowitt actual (°F)",
|
| 187 |
+
) -> go.Figure:
|
| 188 |
+
"""Plot signed residuals over time for Toto and NWS. Zero is perfect."""
|
| 189 |
+
fig = go.Figure()
|
| 190 |
+
fig.add_hline(y=0, line=dict(color="#888", width=1))
|
| 191 |
+
fig.add_trace(
|
| 192 |
+
go.Scatter(
|
| 193 |
+
x=df.index, y=df["toto_residual"],
|
| 194 |
+
name="🤖 Toto residual", mode="lines+markers",
|
| 195 |
+
line=dict(color="#1f77b4", width=2),
|
| 196 |
+
marker=dict(size=5),
|
| 197 |
+
)
|
| 198 |
+
)
|
| 199 |
+
fig.add_trace(
|
| 200 |
+
go.Scatter(
|
| 201 |
+
x=df.index, y=df["nws_residual"],
|
| 202 |
+
name="🌎 NWS residual", mode="lines+markers",
|
| 203 |
+
line=dict(color="#d62728", width=2, dash="dash"),
|
| 204 |
+
marker=dict(size=5),
|
| 205 |
+
)
|
| 206 |
+
)
|
| 207 |
+
fig.update_layout(
|
| 208 |
+
title=title,
|
| 209 |
+
height=320,
|
| 210 |
+
hovermode="x unified",
|
| 211 |
+
yaxis_title="°F (signed error)",
|
| 212 |
+
margin=dict(l=50, r=20, t=50, b=50),
|
| 213 |
+
legend=dict(orientation="h", yanchor="bottom", y=1.04, xanchor="right", x=1),
|
| 214 |
+
)
|
| 215 |
+
fig.update_xaxes(tickformat="%b %-d\n%-I %p", showgrid=True)
|
| 216 |
+
return fig
|
| 217 |
+
|
| 218 |
+
|
| 219 |
def combined_figure(
|
| 220 |
history: pd.DataFrame,
|
| 221 |
totos: dict[str, TotoForecast],
|