| import openmeteo_requests | |
| import pandas as pd | |
| import requests | |
| import requests_cache | |
| from retry_requests import retry | |
| def get_historical_weather_data(location_ids_df, start_date="2023-01-01", end_date="2024-12-31"): | |
| """ | |
| Fetch historical weather data for multiple locations (sensors locations). | |
| Parameters: | |
| ----------- | |
| location_ids_df : pd.DataFrame | |
| DataFrame with columns: ['datastream_name', 'datastream_id', 'x', 'y'] | |
| where x = latitude, y = longitude | |
| start_date : str | |
| Start date in 'YYYY-MM-DD' format | |
| end_date : str | |
| End date in 'YYYY-MM-DD' format | |
| Returns: | |
| -------- | |
| pd.DataFrame with weather data exploded per sensor | |
| """ | |
| # setup the Open-Meteo API client with cache and retry on error | |
| cache_session = requests_cache.CachedSession('.cache', expire_after=-1) | |
| retry_session = retry(cache_session, retries=5, backoff_factor=0.2) | |
| openmeteo = openmeteo_requests.Client(session=retry_session) | |
| # avoid failing due to sensors with no location | |
| location_ids_df = location_ids_df.dropna(subset=['x', 'y']) | |
| # unique locations and create mapping | |
| unique_locations = location_ids_df[['x', 'y']].drop_duplicates().reset_index(drop=True) | |
| # create mapping: (lat, lon) -> list of sensors at that location | |
| location_sensor_map = {} | |
| for _, row in location_ids_df.iterrows(): | |
| lat = row['x'] # x is latitude | |
| lon = row['y'] # y is longitude | |
| key = (lat, lon) | |
| if key not in location_sensor_map: | |
| location_sensor_map[key] = [] | |
| location_sensor_map[key].append({ | |
| 'datastream_name': row['datastream_name'], | |
| 'datastream_id': row['datastream_id'] | |
| }) | |
| # get location lists for API call | |
| lats = unique_locations['x'].tolist() # x is latitude | |
| lons = unique_locations['y'].tolist() # y is longitude | |
| # gen comma-separated strings for API | |
| lat_str = ",".join(f"{lat}" for lat in lats) | |
| lon_str = ",".join(f"{lon}" for lon in lons) | |
| # API call | |
| url = "https://archive-api.open-meteo.com/v1/archive" | |
| params = { | |
| "latitude": lat_str, | |
| "longitude": lon_str, | |
| "start_date": start_date, | |
| "end_date": end_date, | |
| "hourly": ["temperature_2m", "relative_humidity_2m", "precipitation", "et0_fao_evapotranspiration", | |
| "wind_speed_10m", "soil_temperature_0_to_7cm", "soil_moisture_0_to_7cm", "direct_radiation"] | |
| } | |
| responses = openmeteo.weather_api(url, params=params) | |
| # print info from first response (keep previous prints) | |
| response = responses[0] | |
| print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E") | |
| print(f"Elevation {response.Elevation()} m asl") | |
| print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}") | |
| print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s") | |
| # Process each response and re-add correct lat/lon | |
| all_dfs = [] | |
| for i, (response, lat, lon) in enumerate(zip(responses, lats, lons)): | |
| hourly = response.Hourly() | |
| df = pd.DataFrame({ | |
| "datetime": pd.date_range( | |
| start=pd.to_datetime(hourly.Time(), unit="s", utc=True), | |
| end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True), | |
| freq=pd.Timedelta(seconds=hourly.Interval()), | |
| inclusive="left" | |
| ), | |
| "temperature_2m": hourly.Variables(0).ValuesAsNumpy(), | |
| "relative_humidity_2m": hourly.Variables(1).ValuesAsNumpy(), | |
| "precipitation": hourly.Variables(2).ValuesAsNumpy(), | |
| "et0_fao_evapotranspiration": hourly.Variables(3).ValuesAsNumpy(), | |
| "wind_speed_10m": hourly.Variables(4).ValuesAsNumpy(), | |
| "soil_temperature_0_to_7cm": hourly.Variables(5).ValuesAsNumpy(), | |
| "soil_moisture_0_to_7cm": hourly.Variables(6).ValuesAsNumpy(), | |
| "direct_radiation": hourly.Variables(7).ValuesAsNumpy(), | |
| "latitude": lat, | |
| "longitude": lon | |
| }) | |
| all_dfs.append(df) | |
| # Combine all location dataframes | |
| weather_df = pd.concat(all_dfs, ignore_index=True) | |
| # Explode data by sensors: each sensor at a location gets its own rows | |
| exploded_dfs = [] | |
| for (lat, lon), sensors in location_sensor_map.items(): | |
| location_weather = weather_df[(weather_df['latitude'] == lat) & (weather_df['longitude'] == lon)].copy() | |
| for sensor in sensors: | |
| sensor_df = location_weather.copy() | |
| sensor_df['datastream_name'] = sensor['datastream_name'] | |
| sensor_df['datastream_id'] = sensor['datastream_id'] | |
| exploded_dfs.append(sensor_df) | |
| final_df = pd.concat(exploded_dfs, ignore_index=True).drop(columns=['datastream_id', 'latitude', 'longitude']) | |
| return final_df | |
| # for now used for forecast as forecast is not sensor level | |
| # def get_historical_weather_data_old(latitude, longitude, start_date="2023-01-01", end_date="2024-12-31"): | |
| # # Setup the Open-Meteo API client with cache and retry on error | |
| # cache_session = requests_cache.CachedSession('.cache', expire_after=-1) | |
| # retry_session = retry(cache_session, retries=5, backoff_factor=0.2) | |
| # openmeteo = openmeteo_requests.Client(session=retry_session) | |
| # | |
| # # Make sure all required weather variables are listed here | |
| # # The order of variables in hourly or daily is important to assign them correctly below | |
| # url = "https://archive-api.open-meteo.com/v1/archive" | |
| # params = { | |
| # "latitude": latitude, | |
| # "longitude": longitude, | |
| # "start_date": start_date, | |
| # "end_date": end_date, | |
| # "hourly": ["temperature_2m", "relative_humidity_2m", "precipitation", "et0_fao_evapotranspiration", | |
| # "wind_speed_10m", "soil_temperature_0_to_7cm", "soil_moisture_0_to_7cm", "direct_radiation"] | |
| # } | |
| # responses = openmeteo.weather_api(url, params=params) | |
| # | |
| # # keep just to keep previous prints | |
| # response = responses[0] | |
| # print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E") | |
| # print(f"Elevation {response.Elevation()} m asl") | |
| # print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}") | |
| # print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s") | |
| # | |
| # all_dfs = [] | |
| # | |
| # for i, (response, lat, lon) in enumerate(zip(responses, lats, lons)): | |
| # hourly = response.Hourly() | |
| # df = pd.DataFrame({ | |
| # "datetime": pd.date_range( | |
| # start=pd.to_datetime(hourly.Time(), unit="s", utc=True), | |
| # end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True), | |
| # freq=pd.Timedelta(seconds=hourly.Interval()), | |
| # inclusive="left" | |
| # ), | |
| # "temperature_2m": hourly.Variables(0).ValuesAsNumpy(), | |
| # "precipitation": hourly.Variables(2).ValuesAsNumpy(), | |
| # "wind_speed_10m": hourly.Variables(4).ValuesAsNumpy(), | |
| # "relative_humidity_2m": hourly.Variables(1).ValuesAsNumpy(), | |
| # "et0_fao_evapotranspiration": hourly.Variables(3).ValuesAsNumpy(), | |
| # "soil_temperature_0_to_7cm": hourly.Variables(5).ValuesAsNumpy(), | |
| # "soil_moisture_0_to_7cm": hourly.Variables(6).ValuesAsNumpy(), | |
| # "direct_radiation": hourly.Variables(7).ValuesAsNumpy(), | |
| # "latitude": lat, | |
| # "longitude": lon, | |
| # "location_id": i | |
| # }) | |
| # all_dfs.append(df) | |
| # | |
| # final_df = pd.concat(all_dfs, ignore_index=True) | |
| # | |
| # return final_df | |
| # def get_historical_weather_daily(latitude, longitude, start_date, end_date): | |
| # # Setup the Open-Meteo API client with cache and retry on error | |
| # #cache_session = requests_cache.CachedSession('.cache', expire_after=-1) | |
| # #retry_session = retry(cache_session, retries=5, backoff_factor=0.2) | |
| # | |
| # session = requests.Session() | |
| # retry_session = retry(session, retries=5, backoff_factor=0.2) | |
| # openmeteo = openmeteo_requests.Client(session=retry_session) | |
| # | |
| # url = "https://archive-api.open-meteo.com/v1/archive" | |
| # params = { | |
| # "latitude": latitude, | |
| # "longitude": longitude, | |
| # "start_date": start_date, | |
| # "end_date": end_date, | |
| # "daily": ["temperature_2m_min", "temperature_2m_max", "precipitation_sum", "et0_fao_evapotranspiration"] | |
| # } | |
| # | |
| # responses = openmeteo.weather_api(url, params=params) | |
| # response = responses[0] | |
| # | |
| # print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E") | |
| # print(f"Elevation {response.Elevation()} m asl") | |
| # print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}") | |
| # print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s") | |
| # | |
| # # Extract daily variables | |
| # daily = response.Daily() | |
| # daily_temperature_2m_min = daily.Variables(0).ValuesAsNumpy() | |
| # daily_temperature_2m_max = daily.Variables(1).ValuesAsNumpy() | |
| # daily_precipitation_sum = daily.Variables(2).ValuesAsNumpy() | |
| # daily_et0_fao_evapotranspiration = daily.Variables(3).ValuesAsNumpy() | |
| # | |
| # # Build dataframe | |
| # daily_data = { | |
| # "Date": pd.date_range( | |
| # start=pd.to_datetime(daily.Time(), unit="s", utc=True), | |
| # end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True), | |
| # freq=pd.Timedelta(seconds=daily.Interval()), | |
| # inclusive="left" | |
| # ), | |
| # "MinTemp": daily_temperature_2m_min, | |
| # "MaxTemp": daily_temperature_2m_max, | |
| # "Precipitation": daily_precipitation_sum, | |
| # "ReferenceET": daily_et0_fao_evapotranspiration | |
| # } | |
| # | |
| # final_df = pd.DataFrame(data=daily_data) | |
| # final_df["Date"] = final_df["Date"].dt.strftime("%Y-%m-%d") | |
| # | |
| # cols = [c for c in final_df.columns if c != "Date"] + ["Date"] | |
| # final_df = final_df[cols] | |
| # | |
| # return final_df | |