"""Medium-length EQL experiment (10+10 epochs) to verify full pipeline.""" import os, sys, time import numpy as np from tqdm import tqdm from scipy.io import loadmat import tensorflow as tf import matplotlib matplotlib.use('Agg') from matplotlib import pyplot as plt from reproduce_eql import ( SymbolicNet, Constant, Identity, Square, Sin, Sigmoid, Product, count_double, l12_smooth, get_folders_started, record_base_info, append_text_to_summary, plot_train_vs_validation, plot_histogram, plot_descaled_real_vs_prediction, generate_all_data, generate_variable_list, save_weights, network ) if __name__ == "__main__": target_city = "Roskilde" steps_ahead = 6 feature = "wind_speed" target_cities = ["Esbjerg", "Odense", "Roskilde"] city_index = target_cities.index(target_city) data = loadmat('Denmark_data/wind_speed/step1.mat') y_min = data["y_min_tr"][0][city_index] y_max = data["y_max_tr"][0][city_index] config = { "use_rescaled_MSE": True, "a_L_0.5": 5e-3, "threshold_value": 7.5e-3, "lambda_reg": 3.0, "steps_ahead": 6, "feature": feature, "target_city": target_city, "epochs1": 10, "epochs2": 10, "use_phase2": True, "batch_size": 200, "phase1_lr": 1e-4, "phase2_lr": 1e-5, "eql_number_layers": 2, "optimizer": "rmsprop", } x_dim = 80 activation_funcs = [ *[Constant()] * 2, *[Identity()] * 4, *[Square()] * 4, *[Sin()] * 2, *[Sigmoid()] * 2, *[Product()] * 2 ] n_layers = 2 n_double = count_double(activation_funcs) width = len(activation_funcs) init_weights = [ tf.random.truncated_normal([x_dim, width + n_double], stddev=0.1), tf.random.truncated_normal([width, width + n_double], stddev=0.5), tf.random.truncated_normal([width, 1], stddev=1.0) ] model = SymbolicNet(n_layers, funcs=activation_funcs, initial_weights=init_weights) phase1_optimizer = tf.keras.optimizers.RMSprop(learning_rate=1e-4) experiment_number = get_folders_started() print("\n" + "*" * 10 + " Experiment {} ".format(experiment_number) + "*" * 10 + "\n") record_base_info(experiment_number, **config) x_train, y_train = generate_all_data("train", steps_ahead, feature, city_index) x_test, y_test = generate_all_data("test", steps_ahead, feature, city_index) train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(200) val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(200) # Phase 1 best_val_loss = float('inf') best_val_weights = None train_loss_results = [] valid_loss_results = [] for epoch in range(config["epochs1"]): epoch_loss_avg = tf.keras.metrics.MeanSquaredError() for xb, yb in tqdm(train_dataset, desc=f"P1 Epoch {epoch+1}", leave=False): with tf.GradientTape() as tape: yp = model(xb) err = tf.keras.losses.MeanSquaredError()(yb * (y_max - y_min) + y_min, yp * (y_max - y_min) + y_min) reg = l12_smooth(model.get_weights(), 5e-3) loss = err + 3.0 * reg grads = tape.gradient(loss, model.get_weights()) phase1_optimizer.apply_gradients(zip(grads, model.get_weights())) epoch_loss_avg.update_state(yb, yp) train_mse = epoch_loss_avg.result().numpy() val_mse = tf.keras.metrics.MeanSquaredError() for xv, yv in val_dataset: val_mse.update_state(yv, model(xv)) val_mse = val_mse.result().numpy() train_loss_results.append(train_mse) valid_loss_results.append(val_mse) if val_mse < best_val_loss: best_val_loss = val_mse best_val_weights = [w.numpy() for w in model.get_weights()] print(f"P1 Epoch {epoch+1}: val MSE improved to {val_mse:.4e} (train {train_mse:.4e})") else: print(f"P1 Epoch {epoch+1}: val MSE {val_mse:.4e} (train {train_mse:.4e})") save_weights(best_val_weights, experiment_number, "phase1", best_val_loss) append_text_to_summary(experiment_number, f"phase 1 best MSE validation: {best_val_loss}\n") plot_train_vs_validation(experiment_number, config["epochs1"], train_loss_results, valid_loss_results, "phase1") plot_histogram(experiment_number, best_val_weights[0], "phase1", "weights1", 5e-3) plot_histogram(experiment_number, best_val_weights[1], "phase1", "weights2", 5e-3) plot_histogram(experiment_number, best_val_weights[2], "phase1", "weights3", 5e-3) # Phase 2 masked_weights = [] for w_i in best_val_weights: mask = tf.cast(tf.constant(tf.abs(w_i) > 7.5e-3), tf.float32) masked_weights.append(tf.multiply(w_i, mask)) model2 = SymbolicNet(n_layers, funcs=activation_funcs, initial_weights=masked_weights) opt2 = tf.keras.optimizers.RMSprop(learning_rate=1e-5) train_loss_results2 = [] valid_loss_results2 = [] best_val_loss2 = float('inf') best_val_weights2 = None for epoch in range(config["epochs2"]): epoch_loss_avg = tf.keras.metrics.MeanSquaredError() for xb, yb in tqdm(train_dataset, desc=f"P2 Epoch {epoch+1}", leave=False): with tf.GradientTape() as tape: yp = model2(xb) err = tf.keras.losses.MeanSquaredError()(yb * (y_max - y_min) + y_min, yp * (y_max - y_min) + y_min) grads = tape.gradient(err, model2.get_weights()) opt2.apply_gradients(zip(grads, model2.get_weights())) epoch_loss_avg.update_state(yb, yp) train_mse = epoch_loss_avg.result().numpy() val_mse = tf.keras.metrics.MeanSquaredError() for xv, yv in val_dataset: val_mse.update_state(yv, model2(xv)) val_mse = val_mse.result().numpy() train_loss_results2.append(train_mse) valid_loss_results2.append(val_mse) if val_mse < best_val_loss2: best_val_loss2 = val_mse best_val_weights2 = [w.numpy() for w in model2.get_weights()] print(f"P2 Epoch {epoch+1}: val MSE improved to {val_mse:.4e} (train {train_mse:.4e})") else: print(f"P2 Epoch {epoch+1}: val MSE {val_mse:.4e} (train {train_mse:.4e})") save_weights(best_val_weights2, experiment_number, "phase2", best_val_loss2) plot_train_vs_validation(experiment_number, config["epochs2"], train_loss_results2, valid_loss_results2, "phase2") expr = network(best_val_weights2, activation_funcs, generate_variable_list(26, 80)[:80], threshold=0) append_text_to_summary(experiment_number, f"Formula after phase2: {expr}\n") print(f"\nExtracted formula: {expr}") best_model = SymbolicNet(n_layers, funcs=activation_funcs, initial_weights=[tf.constant(w) for w in best_val_weights2]) yp = best_model(x_test) mae = plot_descaled_real_vs_prediction(experiment_number, y_test, yp, y_min, y_max) append_text_to_summary(experiment_number, f"MAE: {mae}\n") print(f"\nMAE: {mae}") print("\nMedium experiment complete!")