Spaces:

bitsofchris
/

time-series-ai-weather-forecast

Running

bitsofchris Claude Opus 4.7 (1M context) commited on 4 days ago

Commit

06ff1aa

1 Parent(s): 6519ca2

Past-Toto overlay: fixed 6h-ahead horizon instead of mixed lags

The sawtooth on the past-forecast overlay came from the 'latest-pre-
target' rule: each past hour was scored against whatever forecast
happened to be most-recent before it, so target_ts=13:00 might use a
15-min-ahead prediction while target_ts=14:00 used a 45-min-ahead
prediction. Different lags → different prediction quality → sawtooth.

historical_predictions now defaults to picking the forecast whose
forecast_made_at is closest to (target_ts − 6 h). Constant 6-hour
lookback gives a consistent forecast horizon and a smooth overlay.
Legacy 'latest-pre-target' mode still available via lag_hours=None.

Chart legend updated to '🤖 Toto (6h-ahead, past)' for clarity.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (2) hide show

src/forecast_log.py +72 -25
src/weather_ui.py +4 -1

src/forecast_log.py CHANGED Viewed

@@ -191,37 +191,84 @@ def historical_predictions(
     metric: str,
     since_unix: int | None = None,
     until_unix: int | None = None,
 ) -> pd.DataFrame:
-    """For each target_ts in [since, until], return the most-recent forecast
-    issued *before* that hour.
-    `until_unix` defaults to now — pass it to cap the overlay so it doesn't
-    bleed into the future portion of the chart.
     """
     import time as _time  # noqa: PLC0415
     if until_unix is None:
         until_unix = int(_time.time())
-    params: list = [source, metric, until_unix]
-    where_extra = ""
-    if since_unix is not None:
-        where_extra = " AND target_ts >= ?"
-        params.append(since_unix)
-    sql = f"""
-    WITH latest AS (
-        SELECT source, target_ts, metric,
-               MAX(forecast_made_at) AS forecast_made_at
-        FROM forecast_snapshots
-        WHERE source = ? AND metric = ?
-          AND forecast_made_at <= target_ts
-          AND target_ts <= ?
-          {where_extra}
-        GROUP BY source, target_ts, metric
-    )
-    SELECT f.target_ts, f.p10, f.p50, f.p90
-    FROM forecast_snapshots f
-    JOIN latest l USING (source, target_ts, metric, forecast_made_at)
-    ORDER BY f.target_ts
-    """
     df = pd.read_sql_query(sql, conn, params=params)
     if df.empty:
         return df

     metric: str,
     since_unix: int | None = None,
     until_unix: int | None = None,
+    lag_hours: float | None = 6.0,
 ) -> pd.DataFrame:
+    """For each target_ts in [since, until], return one historical forecast row.
+    Two modes:
+    - `lag_hours=None`: legacy 'latest-pre-target' behavior — for each
+      target hour, return the most-recent forecast issued before it. This
+      mixes different forecast lags depending on autorefresh timing, which
+      visually produces a sawtooth on the overlay.
+    - `lag_hours=N` (default 6.0): for each target hour, return the
+      forecast whose `forecast_made_at` is closest to `target_ts − N
+      hours`. Constant lag = consistent prediction difficulty = smooth
+      line on the chart. Semantics: 'what did Toto predict for this hour,
+      N hours before it happened?'.
+    `until_unix` defaults to now and caps the overlay so it never crosses
+    into the future side of the chart.
     """
     import time as _time  # noqa: PLC0415
     if until_unix is None:
         until_unix = int(_time.time())
+    if lag_hours is None:
+        # Original 'latest before target' query.
+        params: list = [source, metric, until_unix]
+        where_extra = ""
+        if since_unix is not None:
+            where_extra = " AND target_ts >= ?"
+            params.append(since_unix)
+        sql = f"""
+        WITH latest AS (
+            SELECT source, target_ts, metric,
+                   MAX(forecast_made_at) AS forecast_made_at
+            FROM forecast_snapshots
+            WHERE source = ? AND metric = ?
+              AND forecast_made_at <= target_ts
+              AND target_ts <= ?
+              {where_extra}
+            GROUP BY source, target_ts, metric
+        )
+        SELECT f.target_ts, f.p10, f.p50, f.p90
+        FROM forecast_snapshots f
+        JOIN latest l USING (source, target_ts, metric, forecast_made_at)
+        ORDER BY f.target_ts
+        """
+    else:
+        # Fixed-horizon pick: forecast_made_at closest to target_ts − lag.
+        lag_seconds = int(lag_hours * 3600)
+        params = [lag_seconds, source, metric, until_unix]
+        where_extra = ""
+        if since_unix is not None:
+            where_extra = " AND target_ts >= ?"
+            params.append(since_unix)
+        sql = f"""
+        WITH ranked AS (
+            SELECT target_ts, forecast_made_at, p10, p50, p90,
+                   ABS(forecast_made_at - (target_ts - ?)) AS lag_err,
+                   ROW_NUMBER() OVER (
+                       PARTITION BY target_ts
+                       ORDER BY ABS(forecast_made_at - (target_ts - ?))
+                   ) AS rk
+            FROM forecast_snapshots
+            WHERE source = ? AND metric = ?
+              AND forecast_made_at <= target_ts
+              AND target_ts <= ?
+              {where_extra}
+        )
+        SELECT target_ts, p10, p50, p90
+        FROM ranked
+        WHERE rk = 1
+        ORDER BY target_ts
+        """
+        # The window function references the lag twice — easier to pass it
+        # twice than juggle indexes in the prepared statement.
+        params.insert(1, lag_seconds)
     df = pd.read_sql_query(sql, conn, params=params)
     if df.empty:
         return df

src/weather_ui.py CHANGED Viewed

@@ -214,12 +214,15 @@ def combined_figure(
             row=i, col=1,
         )
         # Past Toto forecasts overlaid on actuals (historical side only).
         if past_toto and col in past_toto:
             pt = past_toto[col]
             fig.add_trace(
                 go.Scatter(
                     x=pt.index, y=pt["p50"].values,
-                    name="🤖 Toto (past forecasts)", mode="lines",
                     line=dict(color="rgba(31,119,180,0.55)", width=1.5),
                     showlegend=showlegend, legendgroup="toto-past",
                 ),

             row=i, col=1,
         )
         # Past Toto forecasts overlaid on actuals (historical side only).
+        # Each point is Toto's prediction issued at a fixed lag before its
+        # target hour (default 6h-ahead) — so the line shows model error at
+        # a consistent forecast horizon, not a mix of lags.
         if past_toto and col in past_toto:
             pt = past_toto[col]
             fig.add_trace(
                 go.Scatter(
                     x=pt.index, y=pt["p50"].values,
+                    name="🤖 Toto (6h-ahead, past)", mode="lines",
                     line=dict(color="rgba(31,119,180,0.55)", width=1.5),
                     showlegend=showlegend, legendgroup="toto-past",
                 ),