Upload run_full_experiment.py with huggingface_hub
Browse files- run_full_experiment.py +249 -0
run_full_experiment.py
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Full EQL two-phase training experiment.
|
| 3 |
+
Usage:
|
| 4 |
+
python run_full_experiment.py --city Roskilde --steps_ahead 6 --feature wind_speed
|
| 5 |
+
"""
|
| 6 |
+
import os, sys, time
|
| 7 |
+
import numpy as np
|
| 8 |
+
from collections import OrderedDict
|
| 9 |
+
from tqdm import tqdm
|
| 10 |
+
from scipy.io import loadmat
|
| 11 |
+
import tensorflow as tf
|
| 12 |
+
import h5py
|
| 13 |
+
import matplotlib
|
| 14 |
+
matplotlib.use('Agg')
|
| 15 |
+
from matplotlib import pyplot as plt
|
| 16 |
+
|
| 17 |
+
from reproduce_eql import (
|
| 18 |
+
SymbolicNet, Constant, Identity, Square, Sin, Sigmoid, Product,
|
| 19 |
+
count_double, l12_smooth, get_folders_started, record_base_info,
|
| 20 |
+
append_text_to_summary, plot_train_vs_validation, plot_histogram,
|
| 21 |
+
plot_descaled_real_vs_prediction, generate_all_data, generate_variable_list,
|
| 22 |
+
save_weights, network
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def run_experiment(target_city, steps_ahead=6, feature="wind_speed"):
|
| 27 |
+
target_cities = ["Esbjerg", "Odense", "Roskilde"]
|
| 28 |
+
target_city_index = target_cities.index(target_city)
|
| 29 |
+
|
| 30 |
+
filename = "{}/step{}.mat".format(feature, {6: "1", 12: "2", 18: "3", 24: "4"}[steps_ahead])
|
| 31 |
+
data = loadmat('Denmark_data/{}'.format(filename))
|
| 32 |
+
y_min = data["y_min_tr"][0][target_city_index]
|
| 33 |
+
y_max = data["y_max_tr"][0][target_city_index]
|
| 34 |
+
|
| 35 |
+
city_configs = {
|
| 36 |
+
"Esbjerg": {"lambda_reg": 5.0, "a": 5e-3, "threshold": 8.0e-3},
|
| 37 |
+
"Odense": {"lambda_reg": 5.0, "a": 5e-4, "threshold": 7.5e-3},
|
| 38 |
+
"Roskilde": {"lambda_reg": 3.0, "a": 5e-3, "threshold": 7.5e-3},
|
| 39 |
+
}
|
| 40 |
+
city_cfg = city_configs[target_city]
|
| 41 |
+
|
| 42 |
+
config = {
|
| 43 |
+
"use_rescaled_MSE": True,
|
| 44 |
+
"a_L_0.5": city_cfg["a"],
|
| 45 |
+
"threshold_value": city_cfg["threshold"],
|
| 46 |
+
"lambda_reg": city_cfg["lambda_reg"],
|
| 47 |
+
"steps_ahead": steps_ahead,
|
| 48 |
+
"feature": feature,
|
| 49 |
+
"target_city": target_city,
|
| 50 |
+
"epochs1": 100,
|
| 51 |
+
"epochs2": 100,
|
| 52 |
+
"use_phase2": True,
|
| 53 |
+
"batch_size": 200,
|
| 54 |
+
"phase1_lr": 1e-4,
|
| 55 |
+
"phase2_lr": 1e-5,
|
| 56 |
+
"eql_number_layers": 2,
|
| 57 |
+
"optimizer": "rmsprop",
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
use_rescaled_loss = config["use_rescaled_MSE"]
|
| 61 |
+
number_epochs1 = config["epochs1"]
|
| 62 |
+
number_epochs2 = config["epochs2"]
|
| 63 |
+
threshold_value = config["threshold_value"]
|
| 64 |
+
a_L_05 = config["a_L_0.5"]
|
| 65 |
+
lambda_reg = config["lambda_reg"]
|
| 66 |
+
batch_size = config["batch_size"]
|
| 67 |
+
first_phase_lr = config["phase1_lr"]
|
| 68 |
+
second_phase_lr = config["phase2_lr"]
|
| 69 |
+
|
| 70 |
+
x_dim = 80
|
| 71 |
+
activation_funcs = [
|
| 72 |
+
*[Constant()] * 2,
|
| 73 |
+
*[Identity()] * 4,
|
| 74 |
+
*[Square()] * 4,
|
| 75 |
+
*[Sin()] * 2,
|
| 76 |
+
*[Sigmoid()] * 2,
|
| 77 |
+
*[Product()] * 2
|
| 78 |
+
]
|
| 79 |
+
n_layers = 2
|
| 80 |
+
n_double = count_double(activation_funcs)
|
| 81 |
+
width = len(activation_funcs)
|
| 82 |
+
|
| 83 |
+
init_weights = [
|
| 84 |
+
tf.random.truncated_normal([x_dim, width + n_double], stddev=0.1),
|
| 85 |
+
tf.random.truncated_normal([width, width + n_double], stddev=0.5),
|
| 86 |
+
tf.random.truncated_normal([width, 1], stddev=1.0)
|
| 87 |
+
]
|
| 88 |
+
model = SymbolicNet(n_layers, funcs=activation_funcs, initial_weights=init_weights)
|
| 89 |
+
phase1_optimizer = tf.keras.optimizers.RMSprop(learning_rate=first_phase_lr)
|
| 90 |
+
|
| 91 |
+
experiment_number = get_folders_started()
|
| 92 |
+
print("\n\n" + "*" * 10 + " Beginning of Experiment {} ".format(experiment_number) + "*" * 10 + "\n\n")
|
| 93 |
+
print("Feature : {}, City : {}, Steps ahead : {}".format(feature, target_city, steps_ahead))
|
| 94 |
+
record_base_info(experiment_number, **config)
|
| 95 |
+
|
| 96 |
+
x_train, y_train = generate_all_data("train", steps_ahead, feature, target_city_index)
|
| 97 |
+
x_test, y_test = generate_all_data("test", steps_ahead, feature, target_city_index)
|
| 98 |
+
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
|
| 99 |
+
val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)
|
| 100 |
+
|
| 101 |
+
# Phase 1
|
| 102 |
+
best_val_loss = float('inf')
|
| 103 |
+
best_val_weights = None
|
| 104 |
+
train_loss_results = []
|
| 105 |
+
valid_loss_results = []
|
| 106 |
+
count_loss_stagnation = 0
|
| 107 |
+
|
| 108 |
+
for epoch in range(number_epochs1):
|
| 109 |
+
epoch_loss_avg = tf.keras.metrics.MeanSquaredError()
|
| 110 |
+
for x_batch, y_batch in tqdm(train_dataset, desc=f"P1 Epoch {epoch+1}", leave=False):
|
| 111 |
+
with tf.GradientTape() as tape:
|
| 112 |
+
y_pred = model(x_batch)
|
| 113 |
+
if use_rescaled_loss:
|
| 114 |
+
y_pred_r = y_pred * (y_max - y_min) + y_min
|
| 115 |
+
y_real_r = y_batch * (y_max - y_min) + y_min
|
| 116 |
+
error = tf.keras.losses.MeanSquaredError()(y_real_r, y_pred_r)
|
| 117 |
+
else:
|
| 118 |
+
error = tf.keras.losses.MeanSquaredError()(y_batch, y_pred)
|
| 119 |
+
reg_loss = l12_smooth(model.get_weights(), a_L_05)
|
| 120 |
+
loss = error + lambda_reg * reg_loss
|
| 121 |
+
grads = tape.gradient(loss, model.get_weights())
|
| 122 |
+
phase1_optimizer.apply_gradients(zip(grads, model.get_weights()))
|
| 123 |
+
epoch_loss_avg.update_state(y_batch, y_pred)
|
| 124 |
+
|
| 125 |
+
train_mse = epoch_loss_avg.result().numpy()
|
| 126 |
+
train_loss_results.append(train_mse)
|
| 127 |
+
|
| 128 |
+
val_loss_avg = tf.keras.metrics.MeanSquaredError()
|
| 129 |
+
for x_val, y_val in val_dataset:
|
| 130 |
+
y_pred_val = model(x_val)
|
| 131 |
+
val_loss_avg.update_state(y_val, y_pred_val)
|
| 132 |
+
val_mse = val_loss_avg.result().numpy()
|
| 133 |
+
valid_loss_results.append(val_mse)
|
| 134 |
+
|
| 135 |
+
if val_mse < best_val_loss:
|
| 136 |
+
best_val_loss = val_mse
|
| 137 |
+
best_val_weights = [w.numpy() for w in model.get_weights()]
|
| 138 |
+
print(f"P1 Epoch {epoch+1}: val MSE improved to {val_mse:.4e} (train {train_mse:.4e})")
|
| 139 |
+
count_loss_stagnation = 0
|
| 140 |
+
else:
|
| 141 |
+
count_loss_stagnation += 1
|
| 142 |
+
print(f"P1 Epoch {epoch+1}: val MSE {val_mse:.4e} (train {train_mse:.4e})")
|
| 143 |
+
|
| 144 |
+
if count_loss_stagnation >= 4 and epoch > 70:
|
| 145 |
+
print("No improvement over 4 epochs, epoch > 70. Exiting phase 1...")
|
| 146 |
+
break
|
| 147 |
+
|
| 148 |
+
save_weights(best_val_weights, experiment_number, "phase1", best_val_loss)
|
| 149 |
+
append_text_to_summary(experiment_number, "phase 1 best MSE validation: {}\n".format(best_val_loss))
|
| 150 |
+
plot_train_vs_validation(experiment_number, number_epochs1, train_loss_results, valid_loss_results, "phase1")
|
| 151 |
+
plot_histogram(experiment_number, best_val_weights[0], "phase1", "weights1", a_L_05)
|
| 152 |
+
plot_histogram(experiment_number, best_val_weights[1], "phase1", "weights2", a_L_05)
|
| 153 |
+
plot_histogram(experiment_number, best_val_weights[2], "phase1", "weights3", a_L_05)
|
| 154 |
+
|
| 155 |
+
# Phase 2
|
| 156 |
+
if not config["use_phase2"]:
|
| 157 |
+
return experiment_number, best_val_loss, None
|
| 158 |
+
|
| 159 |
+
print("\n--- Starting Phase 2 ---\n")
|
| 160 |
+
masked_weights = []
|
| 161 |
+
for w_i in best_val_weights:
|
| 162 |
+
mask = tf.cast(tf.constant(tf.abs(w_i) > threshold_value), tf.float32)
|
| 163 |
+
masked_weights.append(tf.multiply(w_i, mask))
|
| 164 |
+
|
| 165 |
+
sparsity1 = 100 * np.count_nonzero(masked_weights[0] == 0) / masked_weights[0].size
|
| 166 |
+
sparsity2 = 100 * np.count_nonzero(masked_weights[1] == 0) / masked_weights[1].size
|
| 167 |
+
sparsity3 = 100 * np.count_nonzero(masked_weights[2] == 0) / masked_weights[2].size
|
| 168 |
+
append_text_to_summary(experiment_number, f"sparsity1 phase2 after masking: {sparsity1}\n")
|
| 169 |
+
append_text_to_summary(experiment_number, f"sparsity2 phase2 after masking: {sparsity2}\n")
|
| 170 |
+
append_text_to_summary(experiment_number, f"sparsity3 phase2 after masking: {sparsity3}\n")
|
| 171 |
+
|
| 172 |
+
masked_model = SymbolicNet(n_layers, funcs=activation_funcs, initial_weights=masked_weights)
|
| 173 |
+
phase2_optimizer = tf.keras.optimizers.RMSprop(learning_rate=second_phase_lr)
|
| 174 |
+
|
| 175 |
+
train_loss_results2 = []
|
| 176 |
+
valid_loss_results2 = []
|
| 177 |
+
best_val_loss2 = float('inf')
|
| 178 |
+
best_val_weights2 = None
|
| 179 |
+
count_loss_stagnation = 0
|
| 180 |
+
|
| 181 |
+
for epoch in range(number_epochs2):
|
| 182 |
+
epoch_loss_avg = tf.keras.metrics.MeanSquaredError()
|
| 183 |
+
for x_batch, y_batch in tqdm(train_dataset, desc=f"P2 Epoch {epoch+1}", leave=False):
|
| 184 |
+
with tf.GradientTape() as tape:
|
| 185 |
+
y_pred = masked_model(x_batch)
|
| 186 |
+
if use_rescaled_loss:
|
| 187 |
+
y_pred_r = y_pred * (y_max - y_min) + y_min
|
| 188 |
+
y_real_r = y_batch * (y_max - y_min) + y_min
|
| 189 |
+
error = tf.keras.losses.MeanSquaredError()(y_real_r, y_pred_r)
|
| 190 |
+
else:
|
| 191 |
+
error = tf.keras.losses.MeanSquaredError()(y_batch, y_pred)
|
| 192 |
+
loss = error
|
| 193 |
+
grads = tape.gradient(loss, masked_model.get_weights())
|
| 194 |
+
phase2_optimizer.apply_gradients(zip(grads, masked_model.get_weights()))
|
| 195 |
+
epoch_loss_avg.update_state(y_batch, y_pred)
|
| 196 |
+
|
| 197 |
+
train_mse = epoch_loss_avg.result().numpy()
|
| 198 |
+
train_loss_results2.append(train_mse)
|
| 199 |
+
|
| 200 |
+
val_loss_avg = tf.keras.metrics.MeanSquaredError()
|
| 201 |
+
for x_val, y_val in val_dataset:
|
| 202 |
+
y_pred_val = masked_model(x_val)
|
| 203 |
+
val_loss_avg.update_state(y_val, y_pred_val)
|
| 204 |
+
val_mse = val_loss_avg.result().numpy()
|
| 205 |
+
valid_loss_results2.append(val_mse)
|
| 206 |
+
|
| 207 |
+
if val_mse < best_val_loss2:
|
| 208 |
+
best_val_loss2 = val_mse
|
| 209 |
+
best_val_weights2 = [w.numpy() for w in masked_model.get_weights()]
|
| 210 |
+
print(f"P2 Epoch {epoch+1}: val MSE improved to {val_mse:.4e} (train {train_mse:.4e})")
|
| 211 |
+
count_loss_stagnation = 0
|
| 212 |
+
else:
|
| 213 |
+
count_loss_stagnation += 1
|
| 214 |
+
print(f"P2 Epoch {epoch+1}: val MSE {val_mse:.4e} (train {train_mse:.4e})")
|
| 215 |
+
|
| 216 |
+
if count_loss_stagnation >= 4 and epoch > 80:
|
| 217 |
+
print("No improvement over 4 epochs, epoch > 80. Exiting phase 2...")
|
| 218 |
+
break
|
| 219 |
+
|
| 220 |
+
save_weights(best_val_weights2, experiment_number, "phase2", best_val_loss2)
|
| 221 |
+
append_text_to_summary(experiment_number, "phase 2 best MSE validation: {}\n".format(best_val_loss2))
|
| 222 |
+
plot_train_vs_validation(experiment_number, number_epochs2, train_loss_results2, valid_loss_results2, "phase2")
|
| 223 |
+
|
| 224 |
+
# Extract formula
|
| 225 |
+
var_names = generate_variable_list(26, 80)
|
| 226 |
+
expr = network(best_val_weights2, activation_funcs, var_names[:x_dim], threshold=0)
|
| 227 |
+
print("\nExtracted formula:\n", expr)
|
| 228 |
+
append_text_to_summary(experiment_number, "Formula after phase2: {}\n".format(expr))
|
| 229 |
+
|
| 230 |
+
# Final prediction plot
|
| 231 |
+
best_model = SymbolicNet(n_layers, funcs=activation_funcs,
|
| 232 |
+
initial_weights=[tf.constant(w) for w in best_val_weights2])
|
| 233 |
+
y_predicted = best_model(x_test)
|
| 234 |
+
mae = plot_descaled_real_vs_prediction(experiment_number, y_test, y_predicted, y_min, y_max)
|
| 235 |
+
append_text_to_summary(experiment_number, "MAE: {}\n".format(mae))
|
| 236 |
+
print("\nMAE: {}".format(mae))
|
| 237 |
+
|
| 238 |
+
print("\n\n" + "*" * 10 + " End of Experiment {} ".format(experiment_number) + "*" * 10 + "\n\n")
|
| 239 |
+
return experiment_number, best_val_loss, best_val_loss2
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
if __name__ == "__main__":
|
| 243 |
+
import argparse
|
| 244 |
+
parser = argparse.ArgumentParser()
|
| 245 |
+
parser.add_argument("--city", type=str, default="Roskilde", choices=["Esbjerg", "Odense", "Roskilde"])
|
| 246 |
+
parser.add_argument("--steps_ahead", type=int, default=6)
|
| 247 |
+
parser.add_argument("--feature", type=str, default="wind_speed")
|
| 248 |
+
args = parser.parse_args()
|
| 249 |
+
run_experiment(args.city, args.steps_ahead, args.feature)
|