pdppo / code /Lot-sizing /generate_tables.py
leokana's picture
include experiment results and updated code
ff58990
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 6 16:30:32 2023
@author: leona
"""
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind
def get_max_rewards(experiment_names, sample_size, methods):
results = pd.DataFrame(columns=['Environment', 'Method', 'Max Reward', 'Standard Deviation'])
for experiment_name in experiment_names:
for method in methods:
env_name = experiment_name
max_rewards = []
for run_num in range(1, sample_size+1):
log_f_name = f'logs/{experiment_name}_{method}/{method}_{env_name}_log_{run_num}.csv'
data = pd.read_csv(log_f_name)
max_reward = data['reward'].max()
max_rewards.append(max_reward)
mean_max_reward = np.mean(max_rewards)
std_max_reward = np.std(max_rewards)
results = results.append({'Environment': env_name, 'Method': method, 'Max Reward': mean_max_reward, 'Standard Deviation': std_max_reward}, ignore_index=True)
return results
def get_first_rewards(experiment_names, sample_size, methods):
results = pd.DataFrame(columns=['Environment', 'Method', 'Max Reward', 'Standard Deviation'])
for experiment_name in experiment_names:
for method in methods:
env_name = experiment_name
max_rewards = []
for run_num in range(1, sample_size+1):
log_f_name = f'logs/{experiment_name}_{method}/{method}_{env_name}_log_{run_num}.csv'
data = pd.read_csv(log_f_name)
reward_500000 = data[data['timestep'] == 500000]['reward'].values
max_rewards.append(reward_500000)
mean_max_reward = np.mean(max_rewards)
std_max_reward = np.std(max_rewards)
results = results.append({'Environment': env_name, 'Method': method, 'First Reward': mean_max_reward, 'Standard Deviation': std_max_reward}, ignore_index=True)
return results
def get_learning_metrics(experiment_names, sample_size=10, methods = ['PDPPO', 'PPO']):
results = pd.DataFrame(columns=['Environment', 'Method', 'Max Reward', 'Standard Deviation MR', 'Cummulative Reward', 'Standard Deviation CR', 'Time to Threshold'])
for experiment_name in experiment_names:
for method in methods:
env_name = experiment_name
rewards_over_time = []
max_rewards = []
time_to_threshold = float('inf')
threshold = 200 # Define your threshold based on your domain knowledge
for run_num in range(1, sample_size + 1):
log_f_name = f'logs/{experiment_name}_{method}/{method}_{env_name}_log_{run_num}.csv'
data = pd.read_csv(log_f_name)
if not data.empty:
cum_rewards = data['reward'].cumsum() # AUC calculation
if 'timestep' in data.columns:
first_above_threshold = data[data['reward'] >= threshold]['timestep'].min()
if pd.notna(first_above_threshold):
time_to_threshold = min(time_to_threshold, first_above_threshold)
max_rewards.append(data['reward'].max())
rewards_over_time.append(cum_rewards.iloc[-1]) # using the last value as AUC
mean_max_reward = np.mean(max_rewards)
std_max_reward = np.std(max_rewards)
mean_auc = np.mean(rewards_over_time)
std_auc = np.std(rewards_over_time)
results = results.append({
'Environment': env_name,
'Method': method,
'Max Reward': mean_max_reward,
'Standard Deviation MR': std_max_reward,
'Cummulative Reward': mean_auc,
'Standard Deviation CR': std_auc,
'Time to Threshold': time_to_threshold if time_to_threshold != float('inf') else None,
}, ignore_index=True)
return results
def get_steps_reward_threshold(experiment_names, sample_size, methods):
reward_thresholds = [-1900, -5500, -3700]
results = pd.DataFrame(columns=['Environment', 'Method', 'Steps', 'Standard Deviation'])
for i, experiment_name in enumerate(experiment_names):
for j, method in enumerate(methods):
env_name = experiment_name
reward_steps = []
for run_num in range(1, sample_size+1):
log_f_name = f'logs/{experiment_name}_{method}/{method}_{env_name}_log_{run_num}.csv'
data = pd.read_csv(log_f_name)
reward_threshold_value = reward_thresholds[i]
reward_steps.append(data[data['reward'] >= reward_threshold_value]['timestep'].iloc[0])
mean_reward_steps = np.mean(reward_steps) if reward_steps else np.nan
std_reward_steps = np.std(reward_steps) if reward_steps else np.nan
results = results.append({'Environment': env_name, 'Method': method, 'Steps': mean_reward_steps, 'Standard Deviation': std_reward_steps}, ignore_index=True)
return results
def perform_t_tests(experiment_names, sample_size=10, metric='max', methods = ['PPO', 'PDPPO']):
results = []
for experiment_name in experiment_names:
data_collection = {method: [] for method in methods}
for method in methods:
for run_num in range(1, sample_size + 1):
env_name = experiment_name
log_f_name = f'logs/{experiment_name}_{method}/{method}_{env_name}_log_{run_num}.csv'
try:
data = pd.read_csv(log_f_name)
if 'reward' in data.columns:
if metric == 'max':
reward_value = data['reward'].max() # Focus on max reward
elif metric == 'cumulative':
reward_value = data['reward'].sum() # Focus on cumulative reward
else:
raise ValueError("Invalid metric type specified. Use 'max' or 'cumulative'.")
data_collection[method].append(reward_value)
except FileNotFoundError:
print(f"File not found: {log_f_name}")
except pd.errors.EmptyDataError:
print(f"No data in file: {log_f_name}")
# Perform t-tests between each pair of methods
for i in range(len(methods)):
for j in range(i + 1, len(methods)):
method1 = methods[i]
method2 = methods[j]
rewards1 = data_collection[method1]
rewards2 = data_collection[method2]
if rewards1 and rewards2: # Ensure there is data to compare
t_stat, p_value = ttest_ind(rewards1, rewards2, equal_var=False) # Using Welch's t-test for unequal variances
results.append((experiment_name, method1, method2, t_stat, p_value))
# Print results
for environment, method1, method2, t_stat, p_value in results:
print(f"Environment: {environment}, Comparison: {method1} vs {method2}")
print(f" T-Statistic: {t_stat:.3f}, P-Value: {p_value:.3f}\n")
if __name__ == '__main__':
#experiment_names = ['15items_5machines_t100_i100' , '20items_10machines_t100_i100' , '25items_10machines_t100_i100']
experiment_names = ['20items_10machines_t100_i100' , '25items_10machines_t100_i100', '25items_15machines_t100_i100']
#experiment_names = ['15items_5machines_t100_i100']
methods = ['PDPPO', 'PPO', 'PDPPO_one_critic']
iterations = 20
# print(get_max_rewards(experiment_names, sample_size = iterations, methods=methods))
# print(get_first_rewards(experiment_names, sample_size = iterations, methods=methods))
print('===========Learning Metrics============')
print(get_learning_metrics(experiment_names, iterations, methods=methods))
methods = ['PDPPO', 'PPO']
print('===========Maximums============')
perform_t_tests(experiment_names, sample_size=iterations, metric='max', methods=methods) # For maximum reward
print('===========Cummulatives============')
perform_t_tests(experiment_names, sample_size=iterations, metric='cumulative', methods=methods) # For cumulative rewards
#print(get_steps_reward_threshold())