| |
| |
| import os |
| import gc |
| import sys |
| import json |
| import random |
| import gym |
| import torch |
|
|
| BASE_DIR = os.path.dirname(os.path.abspath('__file__')) |
| AGENTS_DIR = os.path.join(BASE_DIR,'agents') |
| sys.path.append(AGENTS_DIR) |
|
|
| from agents.PDPPOAgent import PDPPOAgent |
| from agents.PPOAgent import PPOAgent |
|
|
| import numpy as np |
| from agents import * |
|
|
| |
|
|
| if __name__ == '__main__': |
| for i in range(27,32): |
| |
| np.random.seed(i) |
| random.seed(i) |
| torch.manual_seed(i) |
|
|
| if torch.cuda.is_available(): |
| torch.cuda.manual_seed(i) |
| torch.cuda.manual_seed_all(i) |
|
|
| |
| from gym.envs.toy_text.frozen_lake import generate_random_map |
|
|
| |
| env = gym.make('FrozenLake-v1', desc=generate_random_map(size=10), is_slippery=True) |
|
|
| experiment_name = 'frozen_lake' |
|
|
| setting_sol_method = { |
| 'discount_rate': 0.99, |
| 'experiment_name': experiment_name, |
| 'parallelization': False, |
| 'model_name': 'PPO', |
| 'branching_factors': [4, 2, 2], |
| 'dict_obs': False |
| } |
| |
|
|
| setting_sol_method['regressor_name'] = 'plain_matrix_I2xM1' |
| setting_sol_method['discount_rate'] = 0.99 |
| setting_sol_method['run'] = i |
| agents = [] |
| |
| training_epochs_RL = 200000 |
| |
| setting_sol_method['parallelization'] = False |
| |
| |
| nreps = 100 |
| |
| |
| |
| |
| |
| base_model_name = 'PPO' |
| ppo_agent = PPOAgent( |
| env, |
| setting_sol_method |
| ) |
| ppo_agent.learn(n_episodes=training_epochs_RL) |
| |
| |
| BEST_MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath('__file__')),'logs',f'best_{base_model_name}_{experiment_name}','best_model') |
| ppo_agent.load_agent(BEST_MODEL_DIR) |
|
|
| |
| |
| |
| |
| |
| |
| base_model_name = 'PDPPO' |
| pdppo_agent = PDPPOAgent( |
| env, |
| setting_sol_method |
| ) |
| pdppo_agent.learn(n_episodes=training_epochs_RL) |
| |
| |
| BEST_MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath('__file__')),'logs',f'best_{base_model_name}_{experiment_name}','best_model') |
| pdppo_agent.load_agent(BEST_MODEL_DIR) |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| del env |
| gc.collect() |
|
|