Update README.md
Browse files
README.md
CHANGED
|
@@ -1,22 +1,31 @@
|
|
| 1 |
---
|
| 2 |
license: mit
|
| 3 |
tags:
|
| 4 |
-
-
|
| 5 |
- weather
|
| 6 |
- lstm
|
| 7 |
- classification
|
| 8 |
- regression
|
| 9 |
-
- weather-
|
| 10 |
- multitask
|
| 11 |
- harley-ml
|
| 12 |
---
|
| 13 |
|
| 14 |
# Hweh-6M
|
| 15 |
|
|
|
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
| 20 |
However, this model was primarily trained to serve as a teacher for [Hweh-446k](https://huggingface.co/Harley-ml/Hweh-446k).
|
| 21 |
|
| 22 |
We would also like to give a shoutout to [**Open-Meteo**](https://open-meteo.com/) for providing a **free-to-use weather forecasting API**.
|
|
@@ -33,7 +42,7 @@ The model uses a multitask LSTM setup:
|
|
| 33 |
| `hidden_dim` | `384` |
|
| 34 |
| `num_layers` | `6` |
|
| 35 |
| `dropout` | `0.1` |
|
| 36 |
-
| `encoder_type` | `lstm`
|
| 37 |
| `num_locations` | `82` |
|
| 38 |
| `location_emb_dim` | `32` |
|
| 39 |
| `num_weather_classes` | `7` |
|
|
@@ -112,6 +121,16 @@ We trained Hweh-6M on 4.06 million rows of weather data from 82 locations for on
|
|
| 112 |
|
| 113 |
## Generation Examples
|
| 114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
City=Seattle
|
| 116 |
```
|
| 117 |
{
|
|
@@ -232,7 +251,7 @@ City=Nuuk
|
|
| 232 |
"precipitation_mm": 0.9192219972610474,
|
| 233 |
"pressure_msl_hpa": 999.2293090820312,
|
| 234 |
"surface_pressure_hpa": 984.041015625,
|
| 235 |
-
"cloud_cover_pct": 100.
|
| 236 |
"wind_speed_10m_kmh": 22.042539596557617,
|
| 237 |
"rain_probability": 0.9964759945869446,
|
| 238 |
"weather_class": 5,
|
|
@@ -257,7 +276,7 @@ City=Nuuk
|
|
| 257 |
"precipitation_mm": 0.8211548924446106,
|
| 258 |
"pressure_msl_hpa": 998.41796875,
|
| 259 |
"surface_pressure_hpa": 983.3368530273438,
|
| 260 |
-
"cloud_cover_pct": 100.
|
| 261 |
"wind_speed_10m_kmh": 21.754901885986328,
|
| 262 |
"rain_probability": 0.9918462634086609,
|
| 263 |
"weather_class": 5,
|
|
@@ -282,7 +301,7 @@ City=Nuuk
|
|
| 282 |
"precipitation_mm": 0.7267112731933594,
|
| 283 |
"pressure_msl_hpa": 997.7637939453125,
|
| 284 |
"surface_pressure_hpa": 982.8118286132812,
|
| 285 |
-
"cloud_cover_pct": 100.
|
| 286 |
"wind_speed_10m_kmh": 21.31927490234375,
|
| 287 |
"rain_probability": 0.9851851463317871,
|
| 288 |
"weather_class": 5,
|
|
@@ -307,7 +326,7 @@ City=Nuuk
|
|
| 307 |
"precipitation_mm": 0.6503503322601318,
|
| 308 |
"pressure_msl_hpa": 997.3221435546875,
|
| 309 |
"surface_pressure_hpa": 982.2531127929688,
|
| 310 |
-
"cloud_cover_pct": 100.
|
| 311 |
"wind_speed_10m_kmh": 20.908214569091797,
|
| 312 |
"rain_probability": 0.9797365069389343,
|
| 313 |
"weather_class": 5,
|
|
@@ -332,7 +351,7 @@ City=Nuuk
|
|
| 332 |
"precipitation_mm": 0.5726789832115173,
|
| 333 |
"pressure_msl_hpa": 997.1259155273438,
|
| 334 |
"surface_pressure_hpa": 982.1145629882812,
|
| 335 |
-
"cloud_cover_pct":
|
| 336 |
"wind_speed_10m_kmh": 20.37297821044922,
|
| 337 |
"rain_probability": 0.9752851724624634,
|
| 338 |
"weather_class": 5,
|
|
@@ -357,7 +376,7 @@ City=Nuuk
|
|
| 357 |
"precipitation_mm": 0.4950953722000122,
|
| 358 |
"pressure_msl_hpa": 997.0792236328125,
|
| 359 |
"surface_pressure_hpa": 981.837646484375,
|
| 360 |
-
"cloud_cover_pct":
|
| 361 |
"wind_speed_10m_kmh": 19.884090423583984,
|
| 362 |
"rain_probability": 0.9711479544639587,
|
| 363 |
"weather_class": 5,
|
|
@@ -382,7 +401,7 @@ City=Nuuk
|
|
| 382 |
"precipitation_mm": 0.43851515650749207,
|
| 383 |
"pressure_msl_hpa": 997.214111328125,
|
| 384 |
"surface_pressure_hpa": 981.5133666992188,
|
| 385 |
-
"cloud_cover_pct":
|
| 386 |
"wind_speed_10m_kmh": 19.454288482666016,
|
| 387 |
"rain_probability": 0.9665488600730896,
|
| 388 |
"weather_class": 5,
|
|
@@ -407,7 +426,7 @@ City=Nuuk
|
|
| 407 |
"precipitation_mm": 0.38016656041145325,
|
| 408 |
"pressure_msl_hpa": 997.3843994140625,
|
| 409 |
"surface_pressure_hpa": 981.6067504882812,
|
| 410 |
-
"cloud_cover_pct":
|
| 411 |
"wind_speed_10m_kmh": 19.01665496826172,
|
| 412 |
"rain_probability": 0.9600462913513184,
|
| 413 |
"weather_class": 5,
|
|
@@ -432,7 +451,7 @@ City=Nuuk
|
|
| 432 |
"precipitation_mm": 0.3401757478713989,
|
| 433 |
"pressure_msl_hpa": 997.632568359375,
|
| 434 |
"surface_pressure_hpa": 981.5086059570312,
|
| 435 |
-
"cloud_cover_pct":
|
| 436 |
"wind_speed_10m_kmh": 18.69092559814453,
|
| 437 |
"rain_probability": 0.9514879584312439,
|
| 438 |
"weather_class": 5,
|
|
@@ -457,7 +476,7 @@ City=Nuuk
|
|
| 457 |
"precipitation_mm": 0.29920822381973267,
|
| 458 |
"pressure_msl_hpa": 997.88671875,
|
| 459 |
"surface_pressure_hpa": 981.637451171875,
|
| 460 |
-
"cloud_cover_pct":
|
| 461 |
"wind_speed_10m_kmh": 18.297332763671875,
|
| 462 |
"rain_probability": 0.9422094821929932,
|
| 463 |
"weather_class": 5,
|
|
@@ -482,7 +501,7 @@ City=Nuuk
|
|
| 482 |
"precipitation_mm": 0.2678143382072449,
|
| 483 |
"pressure_msl_hpa": 998.099853515625,
|
| 484 |
"surface_pressure_hpa": 981.798583984375,
|
| 485 |
-
"cloud_cover_pct": 100.
|
| 486 |
"wind_speed_10m_kmh": 17.996307373046875,
|
| 487 |
"rain_probability": 0.9368607401847839,
|
| 488 |
"weather_class": 5,
|
|
@@ -507,7 +526,7 @@ City=Nuuk
|
|
| 507 |
"precipitation_mm": 0.23492039740085602,
|
| 508 |
"pressure_msl_hpa": 998.2453002929688,
|
| 509 |
"surface_pressure_hpa": 981.8583374023438,
|
| 510 |
-
"cloud_cover_pct": 100.
|
| 511 |
"wind_speed_10m_kmh": 17.61905860900879,
|
| 512 |
"rain_probability": 0.9265281558036804,
|
| 513 |
"weather_class": 5,
|
|
@@ -533,9 +552,34 @@ City=Nuuk
|
|
| 533 |
}
|
| 534 |
```
|
| 535 |
|
| 536 |
-
## Note
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 537 |
|
| 538 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 539 |
|
| 540 |
# Inference
|
| 541 |
|
|
@@ -801,6 +845,10 @@ def cyc(x: np.ndarray, period: float) -> tuple[np.ndarray, np.ndarray]:
|
|
| 801 |
return np.sin(angle), np.cos(angle)
|
| 802 |
|
| 803 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 804 |
def request_with_backoff(session: requests.Session, url: str, params: dict[str, Any]) -> dict[str, Any]:
|
| 805 |
last_exc: Exception | None = None
|
| 806 |
for attempt in range(MAX_RETRIES):
|
|
@@ -922,6 +970,13 @@ def build_single_sequence(df: pd.DataFrame) -> np.ndarray:
|
|
| 922 |
visibility = np.nan_to_num(df["visibility"].astype(float).to_numpy(), nan=0.0)
|
| 923 |
wind = np.nan_to_num(df["wind_speed_10m"].astype(float).to_numpy(), nan=0.0)
|
| 924 |
wind_dir = np.nan_to_num(df["wind_direction_10m"].astype(float).to_numpy(), nan=0.0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 925 |
wind_dir_sin, wind_dir_cos = cyc(wind_dir, 360.0)
|
| 926 |
weather_bucket = df["weather_code"].fillna(1).apply(weather_code_to_bucket).to_numpy(dtype=np.int64)
|
| 927 |
|
|
@@ -1070,6 +1125,12 @@ def predict():
|
|
| 1070 |
weather_probs = torch.softmax(weather_logits, dim=-1).squeeze(0).detach().cpu().numpy()
|
| 1071 |
weather_idx = np.argmax(weather_probs, axis=-1).astype(np.int64)
|
| 1072 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1073 |
context_start = df["time"].iloc[0]
|
| 1074 |
context_end = df["time"].iloc[-1]
|
| 1075 |
requested_at_utc = pd.Timestamp.now(tz="UTC")
|
|
|
|
| 1 |
---
|
| 2 |
license: mit
|
| 3 |
tags:
|
| 4 |
+
- forecast
|
| 5 |
- weather
|
| 6 |
- lstm
|
| 7 |
- classification
|
| 8 |
- regression
|
| 9 |
+
- weather-forecast
|
| 10 |
- multitask
|
| 11 |
- harley-ml
|
| 12 |
---
|
| 13 |
|
| 14 |
# Hweh-6M
|
| 15 |
|
| 16 |
+
## Summary
|
| 17 |
|
| 18 |
+
Task: Weather Forecasting
|
| 19 |
+
Inputs: 72 hours time-series
|
| 20 |
+
Outputs: 12h multivariate forecast
|
| 21 |
+
Params: 6M
|
| 22 |
+
Framework: PyTorch & Transformers
|
| 23 |
+
Author: Paul Courneya (Harley-ml)
|
| 24 |
|
| 25 |
+
## Description
|
| 26 |
+
|
| 27 |
+
**Hweh-6M** is a **6-million-parameter LSTM model** trained to predict the next **12 hours of weather**, including temperature, humidity, pressure, precipitation, and more, using the previous **72 hours of weather context**.
|
| 28 |
+
We recommend using this model as a backup to a weather API or for offline forecasting when internet access is unavailable.
|
| 29 |
However, this model was primarily trained to serve as a teacher for [Hweh-446k](https://huggingface.co/Harley-ml/Hweh-446k).
|
| 30 |
|
| 31 |
We would also like to give a shoutout to [**Open-Meteo**](https://open-meteo.com/) for providing a **free-to-use weather forecasting API**.
|
|
|
|
| 42 |
| `hidden_dim` | `384` |
|
| 43 |
| `num_layers` | `6` |
|
| 44 |
| `dropout` | `0.1` |
|
| 45 |
+
| `encoder_type` | `lstm` |
|
| 46 |
| `num_locations` | `82` |
|
| 47 |
| `location_emb_dim` | `32` |
|
| 48 |
| `num_weather_classes` | `7` |
|
|
|
|
| 121 |
|
| 122 |
## Generation Examples
|
| 123 |
|
| 124 |
+
| ID | Class |
|
| 125 |
+
| -- | ------------ |
|
| 126 |
+
| 0 | clear |
|
| 127 |
+
| 1 | cloudy |
|
| 128 |
+
| 2 | fog |
|
| 129 |
+
| 3 | drizzle |
|
| 130 |
+
| 4 | rain |
|
| 131 |
+
| 5 | snow |
|
| 132 |
+
| 6 | thunderstorm |
|
| 133 |
+
|
| 134 |
City=Seattle
|
| 135 |
```
|
| 136 |
{
|
|
|
|
| 251 |
"precipitation_mm": 0.9192219972610474,
|
| 252 |
"pressure_msl_hpa": 999.2293090820312,
|
| 253 |
"surface_pressure_hpa": 984.041015625,
|
| 254 |
+
"cloud_cover_pct": 100.0,
|
| 255 |
"wind_speed_10m_kmh": 22.042539596557617,
|
| 256 |
"rain_probability": 0.9964759945869446,
|
| 257 |
"weather_class": 5,
|
|
|
|
| 276 |
"precipitation_mm": 0.8211548924446106,
|
| 277 |
"pressure_msl_hpa": 998.41796875,
|
| 278 |
"surface_pressure_hpa": 983.3368530273438,
|
| 279 |
+
"cloud_cover_pct": 100.0,
|
| 280 |
"wind_speed_10m_kmh": 21.754901885986328,
|
| 281 |
"rain_probability": 0.9918462634086609,
|
| 282 |
"weather_class": 5,
|
|
|
|
| 301 |
"precipitation_mm": 0.7267112731933594,
|
| 302 |
"pressure_msl_hpa": 997.7637939453125,
|
| 303 |
"surface_pressure_hpa": 982.8118286132812,
|
| 304 |
+
"cloud_cover_pct": 100.0,
|
| 305 |
"wind_speed_10m_kmh": 21.31927490234375,
|
| 306 |
"rain_probability": 0.9851851463317871,
|
| 307 |
"weather_class": 5,
|
|
|
|
| 326 |
"precipitation_mm": 0.6503503322601318,
|
| 327 |
"pressure_msl_hpa": 997.3221435546875,
|
| 328 |
"surface_pressure_hpa": 982.2531127929688,
|
| 329 |
+
"cloud_cover_pct": 100.0,
|
| 330 |
"wind_speed_10m_kmh": 20.908214569091797,
|
| 331 |
"rain_probability": 0.9797365069389343,
|
| 332 |
"weather_class": 5,
|
|
|
|
| 351 |
"precipitation_mm": 0.5726789832115173,
|
| 352 |
"pressure_msl_hpa": 997.1259155273438,
|
| 353 |
"surface_pressure_hpa": 982.1145629882812,
|
| 354 |
+
"cloud_cover_pct": 100.0,
|
| 355 |
"wind_speed_10m_kmh": 20.37297821044922,
|
| 356 |
"rain_probability": 0.9752851724624634,
|
| 357 |
"weather_class": 5,
|
|
|
|
| 376 |
"precipitation_mm": 0.4950953722000122,
|
| 377 |
"pressure_msl_hpa": 997.0792236328125,
|
| 378 |
"surface_pressure_hpa": 981.837646484375,
|
| 379 |
+
"cloud_cover_pct": 100.0,
|
| 380 |
"wind_speed_10m_kmh": 19.884090423583984,
|
| 381 |
"rain_probability": 0.9711479544639587,
|
| 382 |
"weather_class": 5,
|
|
|
|
| 401 |
"precipitation_mm": 0.43851515650749207,
|
| 402 |
"pressure_msl_hpa": 997.214111328125,
|
| 403 |
"surface_pressure_hpa": 981.5133666992188,
|
| 404 |
+
"cloud_cover_pct": 100.0,
|
| 405 |
"wind_speed_10m_kmh": 19.454288482666016,
|
| 406 |
"rain_probability": 0.9665488600730896,
|
| 407 |
"weather_class": 5,
|
|
|
|
| 426 |
"precipitation_mm": 0.38016656041145325,
|
| 427 |
"pressure_msl_hpa": 997.3843994140625,
|
| 428 |
"surface_pressure_hpa": 981.6067504882812,
|
| 429 |
+
"cloud_cover_pct": 100.0,
|
| 430 |
"wind_speed_10m_kmh": 19.01665496826172,
|
| 431 |
"rain_probability": 0.9600462913513184,
|
| 432 |
"weather_class": 5,
|
|
|
|
| 451 |
"precipitation_mm": 0.3401757478713989,
|
| 452 |
"pressure_msl_hpa": 997.632568359375,
|
| 453 |
"surface_pressure_hpa": 981.5086059570312,
|
| 454 |
+
"cloud_cover_pct": 100.0,
|
| 455 |
"wind_speed_10m_kmh": 18.69092559814453,
|
| 456 |
"rain_probability": 0.9514879584312439,
|
| 457 |
"weather_class": 5,
|
|
|
|
| 476 |
"precipitation_mm": 0.29920822381973267,
|
| 477 |
"pressure_msl_hpa": 997.88671875,
|
| 478 |
"surface_pressure_hpa": 981.637451171875,
|
| 479 |
+
"cloud_cover_pct": 100.0,
|
| 480 |
"wind_speed_10m_kmh": 18.297332763671875,
|
| 481 |
"rain_probability": 0.9422094821929932,
|
| 482 |
"weather_class": 5,
|
|
|
|
| 501 |
"precipitation_mm": 0.2678143382072449,
|
| 502 |
"pressure_msl_hpa": 998.099853515625,
|
| 503 |
"surface_pressure_hpa": 981.798583984375,
|
| 504 |
+
"cloud_cover_pct": 100.0,
|
| 505 |
"wind_speed_10m_kmh": 17.996307373046875,
|
| 506 |
"rain_probability": 0.9368607401847839,
|
| 507 |
"weather_class": 5,
|
|
|
|
| 526 |
"precipitation_mm": 0.23492039740085602,
|
| 527 |
"pressure_msl_hpa": 998.2453002929688,
|
| 528 |
"surface_pressure_hpa": 981.8583374023438,
|
| 529 |
+
"cloud_cover_pct": 100.0,
|
| 530 |
"wind_speed_10m_kmh": 17.61905860900879,
|
| 531 |
"rain_probability": 0.9265281558036804,
|
| 532 |
"weather_class": 5,
|
|
|
|
| 552 |
}
|
| 553 |
```
|
| 554 |
|
| 555 |
+
### Note
|
| 556 |
+
In observed outputs, the model is often within **0.3°C** of the actual value.
|
| 557 |
+
|
| 558 |
+
Furthermore, you can pass locations that are not present in the model’s location embedding table. We’ve observed that the model can generalize to out-of-distribution (OOD) cities, with an estimated accuracy drop of only about 2–5%. However, this figure is an estimate and does not reflect a true ground-truth measurement.
|
| 559 |
+
|
| 560 |
+
## Use Cases
|
| 561 |
+
|
| 562 |
+
Intended for:
|
| 563 |
+
|
| 564 |
+
1. Backup to API
|
| 565 |
+
2. Offline forecasting if you have the data
|
| 566 |
+
3. Research
|
| 567 |
+
4. Or more simply, for fun
|
| 568 |
+
|
| 569 |
+
Not intended for:
|
| 570 |
|
| 571 |
+
1. Safety-critical forecasting (aviation, emergency response)
|
| 572 |
+
2. Replacing meteorological or API services
|
| 573 |
+
|
| 574 |
+
## Limitations
|
| 575 |
+
|
| 576 |
+
1. The model is not perfectly accurate and will produce approximate forecasts rather than exact real-world weather conditions.
|
| 577 |
+
2. Prediction accuracy decreases as the forecast horizon increases up to 12 hours.
|
| 578 |
+
3. Performance may degrade on unseen or underrepresented geographic regions and climate types.
|
| 579 |
+
4. The model does not enforce physical laws of atmospheric dynamics and may produce physically inconsistent outputs.
|
| 580 |
+
5. Forecast quality is sensitive to the quality and completeness of input weather data.
|
| 581 |
+
6. Rare or extreme weather events are underrepresented in training data and may be poorly predicted.
|
| 582 |
+
7. Weather class outputs are simplified and do not capture fine-grained meteorological distinctions.
|
| 583 |
|
| 584 |
# Inference
|
| 585 |
|
|
|
|
| 845 |
return np.sin(angle), np.cos(angle)
|
| 846 |
|
| 847 |
|
| 848 |
+
def clamp_array(x: np.ndarray, lo: float | None = None, hi: float | None = None) -> np.ndarray:
|
| 849 |
+
return np.clip(x, lo, hi)
|
| 850 |
+
|
| 851 |
+
|
| 852 |
def request_with_backoff(session: requests.Session, url: str, params: dict[str, Any]) -> dict[str, Any]:
|
| 853 |
last_exc: Exception | None = None
|
| 854 |
for attempt in range(MAX_RETRIES):
|
|
|
|
| 970 |
visibility = np.nan_to_num(df["visibility"].astype(float).to_numpy(), nan=0.0)
|
| 971 |
wind = np.nan_to_num(df["wind_speed_10m"].astype(float).to_numpy(), nan=0.0)
|
| 972 |
wind_dir = np.nan_to_num(df["wind_direction_10m"].astype(float).to_numpy(), nan=0.0)
|
| 973 |
+
|
| 974 |
+
humidity = clamp_array(humidity, 0.0, 100.0)
|
| 975 |
+
cloud_cover = clamp_array(cloud_cover, 0.0, 100.0)
|
| 976 |
+
precip = clamp_array(precip, 0.0, None)
|
| 977 |
+
wind = clamp_array(wind, 0.0, None)
|
| 978 |
+
visibility = clamp_array(visibility, 0.0, None)
|
| 979 |
+
|
| 980 |
wind_dir_sin, wind_dir_cos = cyc(wind_dir, 360.0)
|
| 981 |
weather_bucket = df["weather_code"].fillna(1).apply(weather_code_to_bucket).to_numpy(dtype=np.int64)
|
| 982 |
|
|
|
|
| 1125 |
weather_probs = torch.softmax(weather_logits, dim=-1).squeeze(0).detach().cpu().numpy()
|
| 1126 |
weather_idx = np.argmax(weather_probs, axis=-1).astype(np.int64)
|
| 1127 |
|
| 1128 |
+
humidity_pred = np.clip(humidity_pred, 0.0, 100.0)
|
| 1129 |
+
cloud_cover_pred = np.clip(cloud_cover_pred, 0.0, 100.0)
|
| 1130 |
+
precip_pred = np.clip(precip_pred, 0.0, None)
|
| 1131 |
+
wind_pred = np.clip(wind_pred, 0.0, None)
|
| 1132 |
+
rain_prob = np.clip(rain_prob, 0.0, 1.0)
|
| 1133 |
+
|
| 1134 |
context_start = df["time"].iloc[0]
|
| 1135 |
context_end = df["time"].iloc[-1]
|
| 1136 |
requested_at_utc = pd.Timestamp.now(tz="UTC")
|