| |
| |
| |
| |
| |
|
|
| import os |
| import datetime |
| import logging |
| os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' |
| import IPython |
| import IPython.display |
| import matplotlib as mpl |
| import matplotlib.pyplot as plt |
| import numpy as np |
| import pandas as pd |
| import seaborn as sns |
| import tensorflow as tf |
| import datetime as dt |
| from sklearn.preprocessing import MinMaxScaler |
| mpl.rcParams['figure.figsize'] = (8, 6) |
| mpl.rcParams['axes.grid'] = False |
|
|
| import warnings |
| warnings.filterwarnings("ignore") |
| from tensorflow.python.client import device_lib |
|
|
| |
| strategy = tf.distribute.MirroredStrategy() |
| print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU'))) |
| print(device_lib.list_local_devices()) |
| tf.keras.backend.set_floatx('float64') |
|
|
| for chunk in pd.read_csv("smartmeter.csv", chunksize= 10**6): |
| print(chunk) |
|
|
| data = pd.DataFrame(chunk) |
| data = data.drop(['device_id', 'device_name', 'property'], axis = 1) |
|
|
| |
| def time_d(x): |
| k = datetime.datetime.strptime(x, "%H:%M:%S") |
| y = k - datetime.datetime(1900, 1, 1) |
| return y.total_seconds() |
|
|
| daytime = data['timestamp'].str.slice(start = 11 ,stop=19) |
| secondsperday = daytime.map(lambda i: time_d(i)) |
| data['timestamp'] = data['timestamp'].str.slice(stop=19) |
| data['timestamp'] = data['timestamp'].map(lambda i: dt.datetime.strptime(i, '%Y-%m-%d %H:%M:%S')) |
| parse_dates = [data['timestamp']] |
|
|
| |
| wd_input = np.array(data['timestamp'].map(lambda i: int(i.weekday()))) |
|
|
| |
| seconds_in_day = 24*60*60 |
| data_seconds = np.array(data['timestamp'].map(lambda i: i.weekday())) |
| input_sin = np.array(np.sin(2*np.pi*secondsperday/seconds_in_day)) |
| input_cos = np.array(np.cos(2*np.pi*secondsperday/seconds_in_day)) |
|
|
| |
| df = pd.DataFrame(data = {'value':data['value'], 'input_sin':input_sin, 'input_cos':input_cos, 'input_wd': wd_input}) |
| column_indices = {name: i for i, name in enumerate(data.columns)} |
| n = len(df) |
| train_df = pd.DataFrame(df[0:int(n*0.7)]) |
| val_df = pd.DataFrame(df[int(n*0.7):int(n*0.9)]) |
| test_df = pd.DataFrame(df[int(n*0.9):]) |
| num_features = df.shape[1] |
|
|
| |
| train_mean = train_df['value'].mean() |
| train_std = train_df['value'].std() |
| train_df['value'] = (train_df['value'] - train_mean) / train_std |
| val_df['value'] = (val_df['value'] - train_mean) / train_std |
| test_df['value'] = (test_df['value'] - train_mean) / train_std |
|
|
| |
| train_df['value'] = train_df['value'] - train_df['value'].shift() |
|
|
| |
| train_df['value'] = train_df['value'].map(lambda i: abs(i)) |
| train_df.loc[train_df.value <= 0, 'value'] = 0.000000001 |
| train_df['value'] = train_df['value'].map(lambda i: np.log(i)) |
| train_df = train_df.replace(np.nan, 0.000000001) |
|
|
| |
| val_df['value'] = val_df['value'] - val_df['value'].shift() |
|
|
| |
| val_df['value'] = val_df['value'].map(lambda i: abs(i)) |
| val_df.loc[val_df.value <= 0, 'value'] = 0.000000001 |
| val_df['value'] = val_df['value'].map(lambda i: np.log(i)) |
| val_df = val_df.replace(np.nan, 0.000000001) |
|
|
| |
| test_df['value'] = test_df['value'] - test_df['value'].shift() |
|
|
| |
| test_df['value'] = test_df['value'].map(lambda i: abs(i)) |
| test_df.loc[test_df.value <= 0, 'value'] = 0.000000001 |
| test_df['value'] = test_df['value'].map(lambda i: np.log(i)) |
| test_df = test_df.replace(np.nan, 0.000000001) |
|
|
| |
|
|
| class WindowGenerator(): |
| def __init__(self, input_width, label_width, shift, |
| train_df=train_df, val_df=val_df, test_df=test_df, |
| label_columns=None): |
| |
| self.train_df = train_df |
| self.val_df = val_df |
| self.test_df = test_df |
|
|
| |
| self.label_columns = label_columns |
| if label_columns is not None: |
| self.label_columns_indices = {name: i for i, name in |
| enumerate(label_columns)} |
| self.column_indices = {name: i for i, name in |
| enumerate(train_df.columns)} |
|
|
| |
| self.input_width = input_width |
| self.label_width = label_width |
| self.shift = shift |
|
|
| self.total_window_size = input_width + shift |
|
|
| self.input_slice = slice(0, input_width) |
| self.input_indices = np.arange(self.total_window_size)[self.input_slice] |
|
|
| self.label_start = self.total_window_size - self.label_width |
| self.labels_slice = slice(self.label_start, None) |
| self.label_indices = np.arange(self.total_window_size)[self.labels_slice] |
|
|
| def __repr__(self): |
| return '\n'.join([ |
| f'Total window size: {self.total_window_size}', |
| f'Input indices: {self.input_indices}', |
| f'Label indices: {self.label_indices}', |
| f'Label column name(s): {self.label_columns}']) |
|
|
| def split_window(self, features): |
| inputs = features[:, self.input_slice, :] |
| labels = features[:, self.labels_slice, :] |
| if self.label_columns is not None: |
| labels = tf.stack( |
| [labels[:, :, self.column_indices[name]] for name in self.label_columns], |
| axis=-1) |
|
|
| |
| |
| inputs.set_shape([None, self.input_width, None]) |
| labels.set_shape([None, self.label_width, None]) |
|
|
| return inputs, labels |
|
|
| WindowGenerator.split_window = split_window |
|
|
| |
| def plot(self, model=None, plot_col='value', max_subplots=3): |
| inputs, labels = self.example |
| plt.figure(figsize=(12, 8)) |
| plot_col_index = self.column_indices[plot_col] |
| max_n = min(max_subplots, len(inputs)) |
| for n in range(max_n): |
| plt.subplot(3, 1, n+1) |
| plt.ylabel(f'{plot_col} [normed]') |
| plt.plot(self.input_indices, inputs[n, :, plot_col_index], |
| label='Inputs', marker='.', zorder=-10) |
| if self.label_columns: |
| label_col_index = self.label_columns_indices.get(plot_col, None) |
| else: |
| label_col_index = plot_col_index |
| if label_col_index is None: |
| continue |
| plt.scatter(self.label_indices, labels[n, :, label_col_index], |
| edgecolors='k', label='Labels', c='#2ca02c', s=64) |
| if model is not None: |
| predictions = model(inputs) |
| plt.scatter(self.label_indices, predictions[n, :, label_col_index], |
| marker='X', edgecolors='k', label='Predictions', |
| c='#ff7f0e', s=64) |
| if n == 0: |
| plt.legend() |
| plt.xlabel('Time [h]') |
| WindowGenerator.plot = plot |
|
|
| |
| def make_dataset(self, data): |
| data = np.array(data, dtype=np.float64) |
| ds = tf.keras.preprocessing.timeseries_dataset_from_array( |
| data=data, |
| targets=None, |
| sequence_length=self.total_window_size, |
| sequence_stride=1, |
| shuffle=True, |
| batch_size=32,) |
|
|
| ds = ds.map(self.split_window) |
| return ds |
| WindowGenerator.make_dataset = make_dataset |
|
|
| @property |
| def train(self): |
| return self.make_dataset(self.train_df) |
|
|
| @property |
| def val(self): |
| return self.make_dataset(self.val_df) |
|
|
| @property |
| def test(self): |
| return self.make_dataset(self.test_df) |
|
|
| @property |
| def example(self): |
| """Get and cache an example batch of `inputs, labels` for plotting.""" |
| result = getattr(self, '_example', None) |
| if result is None: |
| |
| result = next(iter(self.train)) |
| |
| self._example = result |
| return result |
| WindowGenerator.train = train |
| WindowGenerator.val = val |
| WindowGenerator.test = test |
| WindowGenerator.example = example |
| single_step_window = WindowGenerator( |
| input_width=1, label_width=1, shift=1, |
| label_columns=['value']) |
|
|
| |
| class Baseline(tf.keras.Model): |
| def __init__(self, label_index=None): |
| super().__init__() |
| self.label_index = label_index |
|
|
| def call(self, inputs): |
| if self.label_index is None: |
| return inputs |
| result = inputs[:, :, self.label_index] |
| return result[:, :, tf.newaxis] |
|
|
| baseline = Baseline(label_index=column_indices['value']) |
| baseline.compile(loss=tf.losses.MeanSquaredError(), |
| metrics=[tf.metrics.MeanAbsoluteError()]) |
| val_performance = {} |
| performance = {} |
| val_performance['Baseline'] = baseline.evaluate(single_step_window.val) |
| performance['Baseline'] = baseline.evaluate(single_step_window.test, verbose=0) |
| wide_window = WindowGenerator( |
| input_width=25, label_width=25, shift=1, |
| label_columns=['value']) |
| wide_window.plot(baseline) |
|
|
| |
| MAX_EPOCHS = 20 |
| def compile_and_fit(model, window, patience=2): |
| early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', |
| patience=patience, |
| mode='min') |
|
|
| model.compile(loss=tf.losses.MeanSquaredError(), |
| optimizer=tf.optimizers.SGD(), |
| metrics=[tf.metrics.MeanAbsoluteError()]) |
|
|
| history = model.fit(window.train, epochs=MAX_EPOCHS, |
| validation_data=window.val, |
| callbacks=[early_stopping]) |
| return history |
|
|
| |
| |
| lstm_model = tf.keras.models.Sequential([ |
| |
| tf.keras.layers.LSTM(32, return_sequences=True), |
| |
| tf.keras.layers.Dense(units=1) |
| ]) |
| wide_window = WindowGenerator( |
| input_width=50, label_width=50, shift=1, |
| label_columns=['value']) |
| history = compile_and_fit(lstm_model, wide_window) |
| IPython.display.clear_output() |
| val_performance['LSTM'] = lstm_model.evaluate(wide_window.val) |
| performance['LSTM'] = lstm_model.evaluate(wide_window.test, verbose=0) |
| wide_window.plot(lstm_model) |
|
|