import openmeteo_requests import pandas as pd import requests import requests_cache from retry_requests import retry def get_historical_weather_data(location_ids_df, start_date="2023-01-01", end_date="2024-12-31"): """ Fetch historical weather data for multiple locations (sensors locations). Parameters: ----------- location_ids_df : pd.DataFrame DataFrame with columns: ['datastream_name', 'datastream_id', 'x', 'y'] where x = latitude, y = longitude start_date : str Start date in 'YYYY-MM-DD' format end_date : str End date in 'YYYY-MM-DD' format Returns: -------- pd.DataFrame with weather data exploded per sensor """ # setup the Open-Meteo API client with cache and retry on error cache_session = requests_cache.CachedSession('.cache', expire_after=-1) retry_session = retry(cache_session, retries=5, backoff_factor=0.2) openmeteo = openmeteo_requests.Client(session=retry_session) # avoid failing due to sensors with no location location_ids_df = location_ids_df.dropna(subset=['x', 'y']) # unique locations and create mapping unique_locations = location_ids_df[['x', 'y']].drop_duplicates().reset_index(drop=True) # create mapping: (lat, lon) -> list of sensors at that location location_sensor_map = {} for _, row in location_ids_df.iterrows(): lat = row['x'] # x is latitude lon = row['y'] # y is longitude key = (lat, lon) if key not in location_sensor_map: location_sensor_map[key] = [] location_sensor_map[key].append({ 'datastream_name': row['datastream_name'], 'datastream_id': row['datastream_id'] }) # get location lists for API call lats = unique_locations['x'].tolist() # x is latitude lons = unique_locations['y'].tolist() # y is longitude # gen comma-separated strings for API lat_str = ",".join(f"{lat}" for lat in lats) lon_str = ",".join(f"{lon}" for lon in lons) # API call url = "https://archive-api.open-meteo.com/v1/archive" params = { "latitude": lat_str, "longitude": lon_str, "start_date": start_date, "end_date": end_date, "hourly": ["temperature_2m", "relative_humidity_2m", "precipitation", "et0_fao_evapotranspiration", "wind_speed_10m", "soil_temperature_0_to_7cm", "soil_moisture_0_to_7cm", "direct_radiation"] } responses = openmeteo.weather_api(url, params=params) # print info from first response (keep previous prints) response = responses[0] print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E") print(f"Elevation {response.Elevation()} m asl") print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}") print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s") # Process each response and re-add correct lat/lon all_dfs = [] for i, (response, lat, lon) in enumerate(zip(responses, lats, lons)): hourly = response.Hourly() df = pd.DataFrame({ "datetime": pd.date_range( start=pd.to_datetime(hourly.Time(), unit="s", utc=True), end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True), freq=pd.Timedelta(seconds=hourly.Interval()), inclusive="left" ), "temperature_2m": hourly.Variables(0).ValuesAsNumpy(), "relative_humidity_2m": hourly.Variables(1).ValuesAsNumpy(), "precipitation": hourly.Variables(2).ValuesAsNumpy(), "et0_fao_evapotranspiration": hourly.Variables(3).ValuesAsNumpy(), "wind_speed_10m": hourly.Variables(4).ValuesAsNumpy(), "soil_temperature_0_to_7cm": hourly.Variables(5).ValuesAsNumpy(), "soil_moisture_0_to_7cm": hourly.Variables(6).ValuesAsNumpy(), "direct_radiation": hourly.Variables(7).ValuesAsNumpy(), "latitude": lat, "longitude": lon }) all_dfs.append(df) # Combine all location dataframes weather_df = pd.concat(all_dfs, ignore_index=True) # Explode data by sensors: each sensor at a location gets its own rows exploded_dfs = [] for (lat, lon), sensors in location_sensor_map.items(): location_weather = weather_df[(weather_df['latitude'] == lat) & (weather_df['longitude'] == lon)].copy() for sensor in sensors: sensor_df = location_weather.copy() sensor_df['datastream_name'] = sensor['datastream_name'] sensor_df['datastream_id'] = sensor['datastream_id'] exploded_dfs.append(sensor_df) final_df = pd.concat(exploded_dfs, ignore_index=True).drop(columns=['datastream_id', 'latitude', 'longitude']) return final_df # for now used for forecast as forecast is not sensor level # def get_historical_weather_data_old(latitude, longitude, start_date="2023-01-01", end_date="2024-12-31"): # # Setup the Open-Meteo API client with cache and retry on error # cache_session = requests_cache.CachedSession('.cache', expire_after=-1) # retry_session = retry(cache_session, retries=5, backoff_factor=0.2) # openmeteo = openmeteo_requests.Client(session=retry_session) # # # Make sure all required weather variables are listed here # # The order of variables in hourly or daily is important to assign them correctly below # url = "https://archive-api.open-meteo.com/v1/archive" # params = { # "latitude": latitude, # "longitude": longitude, # "start_date": start_date, # "end_date": end_date, # "hourly": ["temperature_2m", "relative_humidity_2m", "precipitation", "et0_fao_evapotranspiration", # "wind_speed_10m", "soil_temperature_0_to_7cm", "soil_moisture_0_to_7cm", "direct_radiation"] # } # responses = openmeteo.weather_api(url, params=params) # # # keep just to keep previous prints # response = responses[0] # print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E") # print(f"Elevation {response.Elevation()} m asl") # print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}") # print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s") # # all_dfs = [] # # for i, (response, lat, lon) in enumerate(zip(responses, lats, lons)): # hourly = response.Hourly() # df = pd.DataFrame({ # "datetime": pd.date_range( # start=pd.to_datetime(hourly.Time(), unit="s", utc=True), # end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True), # freq=pd.Timedelta(seconds=hourly.Interval()), # inclusive="left" # ), # "temperature_2m": hourly.Variables(0).ValuesAsNumpy(), # "precipitation": hourly.Variables(2).ValuesAsNumpy(), # "wind_speed_10m": hourly.Variables(4).ValuesAsNumpy(), # "relative_humidity_2m": hourly.Variables(1).ValuesAsNumpy(), # "et0_fao_evapotranspiration": hourly.Variables(3).ValuesAsNumpy(), # "soil_temperature_0_to_7cm": hourly.Variables(5).ValuesAsNumpy(), # "soil_moisture_0_to_7cm": hourly.Variables(6).ValuesAsNumpy(), # "direct_radiation": hourly.Variables(7).ValuesAsNumpy(), # "latitude": lat, # "longitude": lon, # "location_id": i # }) # all_dfs.append(df) # # final_df = pd.concat(all_dfs, ignore_index=True) # # return final_df # def get_historical_weather_daily(latitude, longitude, start_date, end_date): # # Setup the Open-Meteo API client with cache and retry on error # #cache_session = requests_cache.CachedSession('.cache', expire_after=-1) # #retry_session = retry(cache_session, retries=5, backoff_factor=0.2) # # session = requests.Session() # retry_session = retry(session, retries=5, backoff_factor=0.2) # openmeteo = openmeteo_requests.Client(session=retry_session) # # url = "https://archive-api.open-meteo.com/v1/archive" # params = { # "latitude": latitude, # "longitude": longitude, # "start_date": start_date, # "end_date": end_date, # "daily": ["temperature_2m_min", "temperature_2m_max", "precipitation_sum", "et0_fao_evapotranspiration"] # } # # responses = openmeteo.weather_api(url, params=params) # response = responses[0] # # print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E") # print(f"Elevation {response.Elevation()} m asl") # print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}") # print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s") # # # Extract daily variables # daily = response.Daily() # daily_temperature_2m_min = daily.Variables(0).ValuesAsNumpy() # daily_temperature_2m_max = daily.Variables(1).ValuesAsNumpy() # daily_precipitation_sum = daily.Variables(2).ValuesAsNumpy() # daily_et0_fao_evapotranspiration = daily.Variables(3).ValuesAsNumpy() # # # Build dataframe # daily_data = { # "Date": pd.date_range( # start=pd.to_datetime(daily.Time(), unit="s", utc=True), # end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True), # freq=pd.Timedelta(seconds=daily.Interval()), # inclusive="left" # ), # "MinTemp": daily_temperature_2m_min, # "MaxTemp": daily_temperature_2m_max, # "Precipitation": daily_precipitation_sum, # "ReferenceET": daily_et0_fao_evapotranspiration # } # # final_df = pd.DataFrame(data=daily_data) # final_df["Date"] = final_df["Date"].dt.strftime("%Y-%m-%d") # # cols = [c for c in final_df.columns if c != "Date"] + ["Date"] # final_df = final_df[cols] # # return final_df