Mengqinxue commited on
Commit
17dd685
·
verified ·
1 Parent(s): e21a093

Upload run_full_experiment.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. run_full_experiment.py +249 -0
run_full_experiment.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Full EQL two-phase training experiment.
3
+ Usage:
4
+ python run_full_experiment.py --city Roskilde --steps_ahead 6 --feature wind_speed
5
+ """
6
+ import os, sys, time
7
+ import numpy as np
8
+ from collections import OrderedDict
9
+ from tqdm import tqdm
10
+ from scipy.io import loadmat
11
+ import tensorflow as tf
12
+ import h5py
13
+ import matplotlib
14
+ matplotlib.use('Agg')
15
+ from matplotlib import pyplot as plt
16
+
17
+ from reproduce_eql import (
18
+ SymbolicNet, Constant, Identity, Square, Sin, Sigmoid, Product,
19
+ count_double, l12_smooth, get_folders_started, record_base_info,
20
+ append_text_to_summary, plot_train_vs_validation, plot_histogram,
21
+ plot_descaled_real_vs_prediction, generate_all_data, generate_variable_list,
22
+ save_weights, network
23
+ )
24
+
25
+
26
+ def run_experiment(target_city, steps_ahead=6, feature="wind_speed"):
27
+ target_cities = ["Esbjerg", "Odense", "Roskilde"]
28
+ target_city_index = target_cities.index(target_city)
29
+
30
+ filename = "{}/step{}.mat".format(feature, {6: "1", 12: "2", 18: "3", 24: "4"}[steps_ahead])
31
+ data = loadmat('Denmark_data/{}'.format(filename))
32
+ y_min = data["y_min_tr"][0][target_city_index]
33
+ y_max = data["y_max_tr"][0][target_city_index]
34
+
35
+ city_configs = {
36
+ "Esbjerg": {"lambda_reg": 5.0, "a": 5e-3, "threshold": 8.0e-3},
37
+ "Odense": {"lambda_reg": 5.0, "a": 5e-4, "threshold": 7.5e-3},
38
+ "Roskilde": {"lambda_reg": 3.0, "a": 5e-3, "threshold": 7.5e-3},
39
+ }
40
+ city_cfg = city_configs[target_city]
41
+
42
+ config = {
43
+ "use_rescaled_MSE": True,
44
+ "a_L_0.5": city_cfg["a"],
45
+ "threshold_value": city_cfg["threshold"],
46
+ "lambda_reg": city_cfg["lambda_reg"],
47
+ "steps_ahead": steps_ahead,
48
+ "feature": feature,
49
+ "target_city": target_city,
50
+ "epochs1": 100,
51
+ "epochs2": 100,
52
+ "use_phase2": True,
53
+ "batch_size": 200,
54
+ "phase1_lr": 1e-4,
55
+ "phase2_lr": 1e-5,
56
+ "eql_number_layers": 2,
57
+ "optimizer": "rmsprop",
58
+ }
59
+
60
+ use_rescaled_loss = config["use_rescaled_MSE"]
61
+ number_epochs1 = config["epochs1"]
62
+ number_epochs2 = config["epochs2"]
63
+ threshold_value = config["threshold_value"]
64
+ a_L_05 = config["a_L_0.5"]
65
+ lambda_reg = config["lambda_reg"]
66
+ batch_size = config["batch_size"]
67
+ first_phase_lr = config["phase1_lr"]
68
+ second_phase_lr = config["phase2_lr"]
69
+
70
+ x_dim = 80
71
+ activation_funcs = [
72
+ *[Constant()] * 2,
73
+ *[Identity()] * 4,
74
+ *[Square()] * 4,
75
+ *[Sin()] * 2,
76
+ *[Sigmoid()] * 2,
77
+ *[Product()] * 2
78
+ ]
79
+ n_layers = 2
80
+ n_double = count_double(activation_funcs)
81
+ width = len(activation_funcs)
82
+
83
+ init_weights = [
84
+ tf.random.truncated_normal([x_dim, width + n_double], stddev=0.1),
85
+ tf.random.truncated_normal([width, width + n_double], stddev=0.5),
86
+ tf.random.truncated_normal([width, 1], stddev=1.0)
87
+ ]
88
+ model = SymbolicNet(n_layers, funcs=activation_funcs, initial_weights=init_weights)
89
+ phase1_optimizer = tf.keras.optimizers.RMSprop(learning_rate=first_phase_lr)
90
+
91
+ experiment_number = get_folders_started()
92
+ print("\n\n" + "*" * 10 + " Beginning of Experiment {} ".format(experiment_number) + "*" * 10 + "\n\n")
93
+ print("Feature : {}, City : {}, Steps ahead : {}".format(feature, target_city, steps_ahead))
94
+ record_base_info(experiment_number, **config)
95
+
96
+ x_train, y_train = generate_all_data("train", steps_ahead, feature, target_city_index)
97
+ x_test, y_test = generate_all_data("test", steps_ahead, feature, target_city_index)
98
+ train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
99
+ val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)
100
+
101
+ # Phase 1
102
+ best_val_loss = float('inf')
103
+ best_val_weights = None
104
+ train_loss_results = []
105
+ valid_loss_results = []
106
+ count_loss_stagnation = 0
107
+
108
+ for epoch in range(number_epochs1):
109
+ epoch_loss_avg = tf.keras.metrics.MeanSquaredError()
110
+ for x_batch, y_batch in tqdm(train_dataset, desc=f"P1 Epoch {epoch+1}", leave=False):
111
+ with tf.GradientTape() as tape:
112
+ y_pred = model(x_batch)
113
+ if use_rescaled_loss:
114
+ y_pred_r = y_pred * (y_max - y_min) + y_min
115
+ y_real_r = y_batch * (y_max - y_min) + y_min
116
+ error = tf.keras.losses.MeanSquaredError()(y_real_r, y_pred_r)
117
+ else:
118
+ error = tf.keras.losses.MeanSquaredError()(y_batch, y_pred)
119
+ reg_loss = l12_smooth(model.get_weights(), a_L_05)
120
+ loss = error + lambda_reg * reg_loss
121
+ grads = tape.gradient(loss, model.get_weights())
122
+ phase1_optimizer.apply_gradients(zip(grads, model.get_weights()))
123
+ epoch_loss_avg.update_state(y_batch, y_pred)
124
+
125
+ train_mse = epoch_loss_avg.result().numpy()
126
+ train_loss_results.append(train_mse)
127
+
128
+ val_loss_avg = tf.keras.metrics.MeanSquaredError()
129
+ for x_val, y_val in val_dataset:
130
+ y_pred_val = model(x_val)
131
+ val_loss_avg.update_state(y_val, y_pred_val)
132
+ val_mse = val_loss_avg.result().numpy()
133
+ valid_loss_results.append(val_mse)
134
+
135
+ if val_mse < best_val_loss:
136
+ best_val_loss = val_mse
137
+ best_val_weights = [w.numpy() for w in model.get_weights()]
138
+ print(f"P1 Epoch {epoch+1}: val MSE improved to {val_mse:.4e} (train {train_mse:.4e})")
139
+ count_loss_stagnation = 0
140
+ else:
141
+ count_loss_stagnation += 1
142
+ print(f"P1 Epoch {epoch+1}: val MSE {val_mse:.4e} (train {train_mse:.4e})")
143
+
144
+ if count_loss_stagnation >= 4 and epoch > 70:
145
+ print("No improvement over 4 epochs, epoch > 70. Exiting phase 1...")
146
+ break
147
+
148
+ save_weights(best_val_weights, experiment_number, "phase1", best_val_loss)
149
+ append_text_to_summary(experiment_number, "phase 1 best MSE validation: {}\n".format(best_val_loss))
150
+ plot_train_vs_validation(experiment_number, number_epochs1, train_loss_results, valid_loss_results, "phase1")
151
+ plot_histogram(experiment_number, best_val_weights[0], "phase1", "weights1", a_L_05)
152
+ plot_histogram(experiment_number, best_val_weights[1], "phase1", "weights2", a_L_05)
153
+ plot_histogram(experiment_number, best_val_weights[2], "phase1", "weights3", a_L_05)
154
+
155
+ # Phase 2
156
+ if not config["use_phase2"]:
157
+ return experiment_number, best_val_loss, None
158
+
159
+ print("\n--- Starting Phase 2 ---\n")
160
+ masked_weights = []
161
+ for w_i in best_val_weights:
162
+ mask = tf.cast(tf.constant(tf.abs(w_i) > threshold_value), tf.float32)
163
+ masked_weights.append(tf.multiply(w_i, mask))
164
+
165
+ sparsity1 = 100 * np.count_nonzero(masked_weights[0] == 0) / masked_weights[0].size
166
+ sparsity2 = 100 * np.count_nonzero(masked_weights[1] == 0) / masked_weights[1].size
167
+ sparsity3 = 100 * np.count_nonzero(masked_weights[2] == 0) / masked_weights[2].size
168
+ append_text_to_summary(experiment_number, f"sparsity1 phase2 after masking: {sparsity1}\n")
169
+ append_text_to_summary(experiment_number, f"sparsity2 phase2 after masking: {sparsity2}\n")
170
+ append_text_to_summary(experiment_number, f"sparsity3 phase2 after masking: {sparsity3}\n")
171
+
172
+ masked_model = SymbolicNet(n_layers, funcs=activation_funcs, initial_weights=masked_weights)
173
+ phase2_optimizer = tf.keras.optimizers.RMSprop(learning_rate=second_phase_lr)
174
+
175
+ train_loss_results2 = []
176
+ valid_loss_results2 = []
177
+ best_val_loss2 = float('inf')
178
+ best_val_weights2 = None
179
+ count_loss_stagnation = 0
180
+
181
+ for epoch in range(number_epochs2):
182
+ epoch_loss_avg = tf.keras.metrics.MeanSquaredError()
183
+ for x_batch, y_batch in tqdm(train_dataset, desc=f"P2 Epoch {epoch+1}", leave=False):
184
+ with tf.GradientTape() as tape:
185
+ y_pred = masked_model(x_batch)
186
+ if use_rescaled_loss:
187
+ y_pred_r = y_pred * (y_max - y_min) + y_min
188
+ y_real_r = y_batch * (y_max - y_min) + y_min
189
+ error = tf.keras.losses.MeanSquaredError()(y_real_r, y_pred_r)
190
+ else:
191
+ error = tf.keras.losses.MeanSquaredError()(y_batch, y_pred)
192
+ loss = error
193
+ grads = tape.gradient(loss, masked_model.get_weights())
194
+ phase2_optimizer.apply_gradients(zip(grads, masked_model.get_weights()))
195
+ epoch_loss_avg.update_state(y_batch, y_pred)
196
+
197
+ train_mse = epoch_loss_avg.result().numpy()
198
+ train_loss_results2.append(train_mse)
199
+
200
+ val_loss_avg = tf.keras.metrics.MeanSquaredError()
201
+ for x_val, y_val in val_dataset:
202
+ y_pred_val = masked_model(x_val)
203
+ val_loss_avg.update_state(y_val, y_pred_val)
204
+ val_mse = val_loss_avg.result().numpy()
205
+ valid_loss_results2.append(val_mse)
206
+
207
+ if val_mse < best_val_loss2:
208
+ best_val_loss2 = val_mse
209
+ best_val_weights2 = [w.numpy() for w in masked_model.get_weights()]
210
+ print(f"P2 Epoch {epoch+1}: val MSE improved to {val_mse:.4e} (train {train_mse:.4e})")
211
+ count_loss_stagnation = 0
212
+ else:
213
+ count_loss_stagnation += 1
214
+ print(f"P2 Epoch {epoch+1}: val MSE {val_mse:.4e} (train {train_mse:.4e})")
215
+
216
+ if count_loss_stagnation >= 4 and epoch > 80:
217
+ print("No improvement over 4 epochs, epoch > 80. Exiting phase 2...")
218
+ break
219
+
220
+ save_weights(best_val_weights2, experiment_number, "phase2", best_val_loss2)
221
+ append_text_to_summary(experiment_number, "phase 2 best MSE validation: {}\n".format(best_val_loss2))
222
+ plot_train_vs_validation(experiment_number, number_epochs2, train_loss_results2, valid_loss_results2, "phase2")
223
+
224
+ # Extract formula
225
+ var_names = generate_variable_list(26, 80)
226
+ expr = network(best_val_weights2, activation_funcs, var_names[:x_dim], threshold=0)
227
+ print("\nExtracted formula:\n", expr)
228
+ append_text_to_summary(experiment_number, "Formula after phase2: {}\n".format(expr))
229
+
230
+ # Final prediction plot
231
+ best_model = SymbolicNet(n_layers, funcs=activation_funcs,
232
+ initial_weights=[tf.constant(w) for w in best_val_weights2])
233
+ y_predicted = best_model(x_test)
234
+ mae = plot_descaled_real_vs_prediction(experiment_number, y_test, y_predicted, y_min, y_max)
235
+ append_text_to_summary(experiment_number, "MAE: {}\n".format(mae))
236
+ print("\nMAE: {}".format(mae))
237
+
238
+ print("\n\n" + "*" * 10 + " End of Experiment {} ".format(experiment_number) + "*" * 10 + "\n\n")
239
+ return experiment_number, best_val_loss, best_val_loss2
240
+
241
+
242
+ if __name__ == "__main__":
243
+ import argparse
244
+ parser = argparse.ArgumentParser()
245
+ parser.add_argument("--city", type=str, default="Roskilde", choices=["Esbjerg", "Odense", "Roskilde"])
246
+ parser.add_argument("--steps_ahead", type=int, default=6)
247
+ parser.add_argument("--feature", type=str, default="wind_speed")
248
+ args = parser.parse_args()
249
+ run_experiment(args.city, args.steps_ahead, args.feature)