leokana commited on
Commit
a241478
·
1 Parent(s): 55e578d

first commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +148 -2
  2. code/Lake application/__pycache__/PDPPO.cpython-38.pyc +0 -0
  3. code/Lake application/__pycache__/PPO.cpython-38.pyc +0 -0
  4. code/Lake application/agents/PDPPO one critic.py +321 -0
  5. code/Lake application/agents/PDPPO two critics.py +345 -0
  6. code/Lake application/agents/PDPPO.py +301 -0
  7. code/Lake application/agents/PDPPOAgent two critics.py +394 -0
  8. code/Lake application/agents/PDPPOAgent.py +402 -0
  9. code/Lake application/agents/PDPPO_two_actors.py +353 -0
  10. code/Lake application/agents/PDPPO_two_critics_two_actors.py +377 -0
  11. code/Lake application/agents/PDPPO_v0.py +328 -0
  12. code/Lake application/agents/PPO.py +248 -0
  13. code/Lake application/agents/__init__.py +8 -0
  14. code/Lake application/envs/frozen_lake.py +301 -0
  15. code/Lake application/experiments.py +117 -0
  16. code/Lake application/generate_tables.py +78 -0
  17. code/Lake application/logs/.gitkeep +0 -0
  18. code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_0_0.pth +3 -0
  19. code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_1.csv +501 -0
  20. code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_2.csv +501 -0
  21. code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_3.csv +501 -0
  22. code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_4.csv +501 -0
  23. code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_5.csv +501 -0
  24. code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_0_0.pth +3 -0
  25. code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_1.csv +501 -0
  26. code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_2.csv +501 -0
  27. code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_3.csv +501 -0
  28. code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_4.csv +501 -0
  29. code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_5.csv +501 -0
  30. code/Lake application/logs/results_1/PDPPO_frozen_lake_log_1.csv +501 -0
  31. code/Lake application/logs/results_1/PDPPO_frozen_lake_log_2.csv +501 -0
  32. code/Lake application/logs/results_1/PDPPO_frozen_lake_log_3.csv +501 -0
  33. code/Lake application/logs/results_1/PDPPO_frozen_lake_log_4.csv +501 -0
  34. code/Lake application/logs/results_1/PDPPO_frozen_lake_log_5.csv +501 -0
  35. code/Lake application/logs/results_1/PPO_frozen_lake_log_1.csv +501 -0
  36. code/Lake application/logs/results_1/PPO_frozen_lake_log_2.csv +501 -0
  37. code/Lake application/logs/results_1/PPO_frozen_lake_log_3.csv +501 -0
  38. code/Lake application/logs/results_1/PPO_frozen_lake_log_4.csv +501 -0
  39. code/Lake application/logs/results_1/PPO_frozen_lake_log_5.csv +501 -0
  40. code/Lake application/logs/results_2/PDPPO_frozen_lake_log_1.csv +501 -0
  41. code/Lake application/logs/results_2/PDPPO_frozen_lake_log_2.csv +501 -0
  42. code/Lake application/logs/results_2/PDPPO_frozen_lake_log_3.csv +501 -0
  43. code/Lake application/logs/results_2/PDPPO_frozen_lake_log_4.csv +501 -0
  44. code/Lake application/logs/results_2/PDPPO_frozen_lake_log_5.csv +501 -0
  45. code/Lake application/logs/results_2/PPO_frozen_lake_log_1.csv +501 -0
  46. code/Lake application/logs/results_2/PPO_frozen_lake_log_2.csv +501 -0
  47. code/Lake application/logs/results_2/PPO_frozen_lake_log_3.csv +501 -0
  48. code/Lake application/logs/results_2/PPO_frozen_lake_log_4.csv +501 -0
  49. code/Lake application/logs/results_2/PPO_frozen_lake_log_5.csv +501 -0
  50. code/Lake application/plot_figure.py +157 -0
README.md CHANGED
@@ -1,2 +1,148 @@
1
- # pdppo
2
- Post-Decision Proximal Policy Optimization with Dual Critic Networks for Accelerated Learning
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reinforcement Learning for Stochastic Discrete Lot-Sizing Problem and Frozen-Lake game
2
+
3
+ This repository contains code and resources the research developed at University of São Paulo (USP) and Politecnico di Torino (Polito) on using reinforcement learning, particularly the Post-Decision Proximal Policy Optimization (PDPPO), for the Stochastic Discrete Lot-Sizing Problem and a Frozen-Lake game.
4
+
5
+ ## Project Structure
6
+ This repository consists of two main directories: `Lot-sizing` and `Lake application`, each containing the related files and folders.
7
+
8
+ ### Lot-Sizing
9
+
10
+ `Lot-sizing` directory holds the following subdirectories:
11
+
12
+ - **agents**: Holds various versions of PDPPO agent implementations and utility functions.
13
+
14
+ - **cfg_env**: Includes environment settings and configurations files for the project in JSON format. Additionally, `generate_setting.py` is used for generating new environment settings.
15
+
16
+ - **cfg_sol**: Stores the solution settings in `sol_setting.json`.
17
+
18
+ - **envs**: Contains different environment definitions for the problem, like `simplePlant.py` and `singleSequenceDependentMachinePlant.py`.
19
+
20
+ - **logs**: Keeps the log files for the model training and evaluation.
21
+
22
+ - **models**: Stores various optimization models.
23
+
24
+ - **results**: After executing the experiments, the results are saved in this directory.
25
+
26
+ - **scenarioManager**: Manages different scenario setups.
27
+
28
+ - **test_functions**: Stores functions to validate the models and generate plots and tables.
29
+
30
+ ### Lake Application
31
+
32
+ `Lake application` directory holds the following subdirectories:
33
+
34
+ - **agents**: Contains various versions of PDPPO agent implementations for the Lake problem.
35
+
36
+ - **envs**: Contains environment definitions, like `frozen_lake.py`.
37
+
38
+ - **logs**: Contains the log files and results from the model training and evaluation for different scenarios.
39
+
40
+ - **results**: Stores the output from experiments and relevant figures.
41
+
42
+ - Root level scripts `experiments.py`, `generate_tables.py` and `plot_figure.py` are used for running experiments, generating output tables and plotting results respectively.
43
+
44
+
45
+ ## Repository structure:
46
+
47
+ The main components of the repository are as follows:
48
+
49
+ ```graphql
50
+
51
+ ├───Lake application
52
+ │ ├───agents # contains the implementations of various agents
53
+ │ ├───envs # contains the FrozenLake environment implementation
54
+ │ ├───logs # contains the logs of the agent's performance
55
+ │ │ ├───frozen_lake_PDPPO
56
+ │ │ ├───frozen_lake_PPO
57
+ │ │ ├───results_1
58
+ │ │ └───results_2
59
+ │ └───results # contains the results of the agent's performance
60
+ │ └───frozen_lake_PPO
61
+ └───Lot-sizing
62
+ ├───.vscode
63
+ ├───agents # contains the implementations of various agents
64
+ │ ├───utils # utility functions for the agents
65
+ │ │ └───__pycache__
66
+ │ └───__pycache__
67
+ ├───cfg_env # contains the settings for the Lot-sizing environment
68
+ │ └───setting file
69
+ ├───cfg_sol
70
+ ├───envs # contains the Lot-sizing environment implementation
71
+ ├───logs # contains the logs of the agent's performance
72
+ ├───models # contains the models for the optimization problems
73
+ ├───results # contains the results of the agent's performance
74
+ ├───scenarioManager # manages different scenarios for the Lot-sizing environment
75
+ └───test_functions # contains test functions for the Lot-sizing environment
76
+
77
+ ```
78
+
79
+ ## Requirements:
80
+
81
+ This project uses the following main dependencies:
82
+
83
+ - [Python 3.8](https://www.python.org/downloads/)
84
+ - [numpy](https://numpy.org/)
85
+ - [gym](https://gym.openai.com/)
86
+ - [matplotlib](https://matplotlib.org/)
87
+ - [torch](https://pytorch.org/)
88
+ - gurobipy (not included in `requirements.txt` due to separate licensing)
89
+
90
+
91
+ ## How to Reproduce
92
+
93
+ 1. Clone the repository:
94
+
95
+ ```
96
+ git clone https://github.com/username/repository.git
97
+
98
+ ```
99
+
100
+ ## Navigate into the project directory:
101
+
102
+ ```
103
+ cd repository
104
+
105
+ ```
106
+
107
+ Install the required Python packages. This project was developed with Python 3.8. Substitute requirements.txt with your actual requirements file:
108
+
109
+ ```
110
+ pip install -r requirements.txt
111
+ ```
112
+
113
+ NOTE: You might need to replace the frozen environment file in your environment path with the frozen_lake.py provided in this repository for the Lake application to work properly.
114
+
115
+
116
+ Run the experiments:
117
+
118
+ ```
119
+ python ./code/Lot-sizing/experiments.py
120
+ python ./code/Lake application/experiments.py
121
+ ```
122
+
123
+ Generate the tables:
124
+
125
+
126
+ ```
127
+ python ./code/Lot-sizing/generate_tables.py
128
+ python ./code/Lake application/generate_tables.py
129
+ ```
130
+
131
+ Plot the figures:
132
+
133
+ ```
134
+ python ./code/Lot-sizing/plot_figure.py
135
+ python ./code/Lake application/plot_figure.py
136
+ ```
137
+
138
+
139
+ You can find the results of the experiments in the results directories in both Lot-sizing and Lake application directories.
140
+
141
+
142
+ ## Reproducing Results
143
+ To reproduce the results in the logs and results folders, you would need to run the experiments with the same hyperparameters and seeds.
144
+
145
+ Please note that due to the stochastic nature of the environments and training process, the results might not be identical, but they should be within a similar range.
146
+
147
+ ## Contact
148
+ For any additional questions, you can reach me at email@example.com
code/Lake application/__pycache__/PDPPO.cpython-38.pyc ADDED
Binary file (7.56 kB). View file
 
code/Lake application/__pycache__/PPO.cpython-38.pyc ADDED
Binary file (7.19 kB). View file
 
code/Lake application/agents/PDPPO one critic.py ADDED
@@ -0,0 +1,321 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Wed Mar 1 00:43:49 2023
4
+
5
+ @author: leona
6
+ """
7
+
8
+ import numpy as np
9
+ import torch
10
+ import torch.nn as nn
11
+ import torch.nn.init as init
12
+ from torch.distributions import MultivariateNormal
13
+ from torch.distributions import Categorical
14
+
15
+ ################################## set device ##################################
16
+ print("============================================================================================")
17
+ # set device to cpu or cuda
18
+ device = torch.device('cpu')
19
+ if(torch.cuda.is_available()):
20
+ device = torch.device('cuda:0')
21
+ torch.cuda.empty_cache()
22
+ print("Device set to : " + str(torch.cuda.get_device_name(device)))
23
+ else:
24
+ print("Device set to : cpu")
25
+ print("============================================================================================")
26
+
27
+
28
+ ################################## PDPPO Policy ##################################
29
+ class RolloutBuffer:
30
+ def __init__(self):
31
+ self.actions = []
32
+ self.states = []
33
+ self.post_states = []
34
+ self.logprobs = []
35
+ self.rewards = []
36
+ self.state_values = []
37
+ self.state_values_post = []
38
+ self.is_terminals = []
39
+
40
+ def clear(self):
41
+ del self.actions[:]
42
+ del self.states[:]
43
+ del self.post_states[:]
44
+ del self.logprobs[:]
45
+ del self.rewards[:]
46
+ del self.state_values[:]
47
+ del self.state_values_post[:]
48
+ del self.is_terminals[:]
49
+
50
+
51
+ class ActorCritic(nn.Module):
52
+ def __init__(self, state_dim, action_dim, has_continuous_action_space, action_std_init):
53
+ super(ActorCritic, self).__init__()
54
+
55
+ self.has_continuous_action_space = has_continuous_action_space
56
+
57
+ if has_continuous_action_space:
58
+ self.action_dim = action_dim
59
+ self.action_var = torch.full((action_dim,), action_std_init * action_std_init).to(device)
60
+ # actor
61
+ if has_continuous_action_space :
62
+ self.actor = nn.Sequential(
63
+ nn.Linear(state_dim, 64),
64
+ nn.Tanh(),
65
+ nn.Linear(64, 64),
66
+ nn.Tanh(),
67
+ nn.Linear(64, action_dim),
68
+ nn.Tanh()
69
+ )
70
+ else:
71
+
72
+ self.action_dim = action_dim
73
+ self.fc1 = nn.Linear(state_dim, 128)
74
+ self.fc2 = nn.Linear(128, 128)
75
+ self.actor = nn.Linear(128, self.action_dim.nvec.sum())
76
+
77
+
78
+ # critic
79
+ self.critic = nn.Sequential(
80
+ nn.Linear(state_dim, 128),
81
+ nn.Tanh(),
82
+ nn.Linear(128, 128),
83
+ nn.Tanh(),
84
+ nn.Linear(128, 1)
85
+ )
86
+
87
+
88
+ def forward(self, state):
89
+ raise NotImplementedError
90
+
91
+
92
+
93
+ def set_action_std(self, new_action_std):
94
+ if self.has_continuous_action_space:
95
+ self.action_var = torch.full((self.action_dim,), new_action_std * new_action_std).to(device)
96
+ else:
97
+ print("--------------------------------------------------------------------------------------------")
98
+ print("WARNING : Calling ActorCritic::set_action_std() on discrete action space policy")
99
+ print("--------------------------------------------------------------------------------------------")
100
+
101
+
102
+
103
+ def act(self, state,tau):
104
+
105
+ if self.has_continuous_action_space:
106
+ action_mean = self.actor(state)
107
+ cov_mat = torch.diag(self.action_var).unsqueeze(dim=0)
108
+ dist = MultivariateNormal(action_mean, cov_mat)
109
+ else:
110
+ x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
111
+ logits = self.actor(x)
112
+ action_probs = nn.functional.softmax(logits, dim=-1)
113
+ dist = Categorical(action_probs.view(len(self.action_dim.nvec),-1))
114
+
115
+ action = dist.sample()
116
+ action_logprob = dist.log_prob(action)
117
+
118
+ return action.detach(), action_logprob.detach()
119
+
120
+ def evaluate(self, state,post_state, action,tau):
121
+
122
+ if self.has_continuous_action_space:
123
+ action_mean = self.actor(state)
124
+
125
+ action_var = self.action_var.expand_as(action_mean)
126
+ cov_mat = torch.diag_embed(action_var).to(device)
127
+ dist = MultivariateNormal(action_mean, cov_mat)
128
+
129
+ # For Single Action Environments.
130
+ if self.action_dim == 1:
131
+ action = action.reshape(-1, self.action_dim)
132
+ else:
133
+ x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
134
+ logits = self.actor(x)
135
+ action_probs = nn.functional.softmax(logits, dim=-1)
136
+
137
+ dist = Categorical(action_probs.view(state.shape[0],len(self.action_dim.nvec),-1))
138
+ # action_probs = self.actor(state)
139
+ # dist = Categorical(action_probs)
140
+ action_logprobs = dist.log_prob(action)
141
+ dist_entropy = dist.entropy()
142
+ state_values = self.critic(post_state)
143
+
144
+ return action_logprobs, state_values, dist_entropy
145
+
146
+
147
+ class PDPPO:
148
+ def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, env, has_continuous_action_space, tau, action_std_init=0.6):
149
+
150
+ self.has_continuous_action_space = has_continuous_action_space
151
+
152
+ if has_continuous_action_space:
153
+ self.action_std = action_std_init
154
+
155
+ self.tau = tau
156
+ self.env = env
157
+ self.gamma = gamma
158
+ self.eps_clip = eps_clip
159
+ self.K_epochs = K_epochs
160
+
161
+ self.buffer = RolloutBuffer()
162
+
163
+ self.policy = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
164
+ self.optimizer = torch.optim.Adam([
165
+ {'params': self.policy.actor.parameters(), 'lr': lr_actor},
166
+ {'params': self.policy.critic.parameters(), 'lr': lr_critic}
167
+ ], weight_decay=0.001)
168
+
169
+ self.policy_old = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
170
+ self.policy_old.load_state_dict(self.policy.state_dict())
171
+
172
+ self.MseLoss = nn.MSELoss()
173
+
174
+ def set_action_std(self, new_action_std):
175
+ if self.has_continuous_action_space:
176
+ self.action_std = new_action_std
177
+ self.policy.set_action_std(new_action_std)
178
+ self.policy_old.set_action_std(new_action_std)
179
+ else:
180
+ print("--------------------------------------------------------------------------------------------")
181
+ print("WARNING : Calling PDPPO::set_action_std() on discrete action space policy")
182
+ print("--------------------------------------------------------------------------------------------")
183
+
184
+ def decay_action_std(self, action_std_decay_rate, min_action_std):
185
+ print("--------------------------------------------------------------------------------------------")
186
+ if self.has_continuous_action_space:
187
+ self.action_std = self.action_std - action_std_decay_rate
188
+ self.action_std = round(self.action_std, 4)
189
+ if (self.action_std <= min_action_std):
190
+ self.action_std = min_action_std
191
+ print("setting actor output action_std to min_action_std : ", self.action_std)
192
+ else:
193
+ print("setting actor output action_std to : ", self.action_std)
194
+ self.set_action_std(self.action_std)
195
+
196
+ else:
197
+ print("WARNING : Calling PDPPO::decay_action_std() on discrete action space policy")
198
+ print("--------------------------------------------------------------------------------------------")
199
+
200
+ def get_post_state(self, action, machine_setup, inventory_level):
201
+ setup_loss = np.zeros(self.env.n_machines, dtype=int)
202
+ setup_costs = np.zeros(self.env.n_machines)
203
+ # if we are just changing the setup, we use the setup cost matrix with the corresponding position given by the actual setup and the new setup
204
+ for m in range(self.env.n_machines):
205
+ if action[m] != 0: # if the machine is not iddle
206
+ # 1. IF NEEDED CHANGE SETUP
207
+ if machine_setup[m] != action[m] and action[m] != 0:
208
+ setup_costs[m] = self.env.setup_costs[m][action[m] - 1]
209
+ setup_loss[m] = self.env.setup_loss[m][action[m] - 1]
210
+ machine_setup[m] = action[m]
211
+ # 2. PRODUCTION
212
+ production = self.env.machine_production_matrix[m][action[m] - 1] - setup_loss[m]
213
+ inventory_level[action[m] - 1] += production
214
+ else:
215
+ machine_setup[m] = 0
216
+ # return the new machine_setup_inventory_level and the setup_cost
217
+ return machine_setup, inventory_level, setup_costs
218
+
219
+ def select_action(self, state,tau):
220
+
221
+ if self.has_continuous_action_space:
222
+ with torch.no_grad():
223
+ state = torch.FloatTensor(state).to(device)
224
+ action, action_logprob, state_val = self.policy_old.act(state,tau)
225
+
226
+ self.buffer.states.append(state)
227
+ self.buffer.actions.append(action)
228
+ self.buffer.logprobs.append(action_logprob)
229
+ self.buffer.state_values.append(state_val)
230
+
231
+ return action.detach().cpu().numpy().flatten()
232
+ else:
233
+ with torch.no_grad():
234
+ state = torch.FloatTensor(state).to(device)
235
+ action, action_logprob = self.policy_old.act(state,tau)
236
+
237
+
238
+ machine_setup, inventory_level, setup_cost = self.get_post_state(action, state[self.env.n_items:self.env.n_items+self.env.n_machines].clone(), state[0:self.env.n_items].clone())
239
+
240
+ post_state = state.clone()
241
+ post_state[self.env.n_items:self.env.n_items+self.env.n_machines] = machine_setup.clone()
242
+ post_state[0:self.env.n_items] = inventory_level.clone()
243
+ post_state = torch.FloatTensor(post_state).to(device)
244
+
245
+ self.buffer.states.append(state)
246
+ self.buffer.post_states.append(post_state)
247
+ self.buffer.actions.append(action)
248
+ self.buffer.logprobs.append(action_logprob)
249
+
250
+ with torch.no_grad():
251
+ #post_state = torch.cat([post_state.clone(),state.clone()])
252
+ state_val = self.policy_old.critic(post_state)
253
+
254
+ self.buffer.state_values.append(state_val)
255
+
256
+ return action.numpy()
257
+
258
+ def update(self):
259
+ # Monte Carlo estimate of returns
260
+ rewards = []
261
+ discounted_reward = 0
262
+ for reward, is_terminal in zip(reversed(self.buffer.rewards), reversed(self.buffer.is_terminals)):
263
+ if is_terminal:
264
+ discounted_reward = 0
265
+ discounted_reward = reward + (self.gamma * discounted_reward)
266
+ rewards.insert(0, discounted_reward)
267
+
268
+ # Normalizing the rewards
269
+ rewards = torch.tensor(rewards, dtype=torch.float32).to(device)
270
+ rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-7)
271
+
272
+ # convert list to tensor
273
+ old_states = torch.squeeze(torch.stack(self.buffer.states, dim=0)).detach().to(device)
274
+ old_post_states = torch.squeeze(torch.stack(self.buffer.post_states, dim=0)).detach().to(device)
275
+ old_actions = torch.squeeze(torch.stack(self.buffer.actions, dim=0)).detach().to(device)
276
+ old_logprobs = torch.squeeze(torch.stack(self.buffer.logprobs, dim=0)).detach().to(device)
277
+ old_state_values = torch.squeeze(torch.stack(self.buffer.state_values, dim=0)).detach().to(device)
278
+
279
+ # calculate advantages
280
+ advantages = rewards.detach() - old_state_values.detach()
281
+
282
+ # Optimize policy for K epochs
283
+ for _ in range(self.K_epochs):
284
+
285
+ # Evaluating old actions and values
286
+ logprobs, state_values, dist_entropy = self.policy.evaluate(old_states,old_post_states, old_actions,self.tau)
287
+
288
+ # match state_values tensor dimensions with rewards tensor
289
+ state_values = torch.squeeze(state_values)
290
+
291
+ # Finding the ratio (pi_theta / pi_theta__old)
292
+ ratios = torch.exp(logprobs - old_logprobs.detach())
293
+
294
+ # Finding Surrogate Loss
295
+ surr1 = ratios * advantages.unsqueeze(1)
296
+ surr2 = torch.clamp(ratios, 1-self.eps_clip, 1+self.eps_clip) * advantages.unsqueeze(1)
297
+
298
+ # final loss of clipped objective PDPPO
299
+ loss = -torch.min(surr1, surr2) + 0.5 * self.MseLoss(state_values, rewards) - 0.012 * dist_entropy
300
+
301
+ loss_numpy = loss.detach().numpy()
302
+
303
+ # take gradient step
304
+ self.optimizer.zero_grad()
305
+ loss.mean().backward()
306
+ torch.nn.utils.clip_grad_norm_(self.policy.parameters(), max_norm=1)
307
+ self.optimizer.step()
308
+
309
+ # Copy new weights into old policy
310
+
311
+ self.policy_old.load_state_dict(self.policy.state_dict())
312
+
313
+ # clear buffer
314
+ self.buffer.clear()
315
+
316
+ def save(self, checkpoint_path):
317
+ torch.save(self.policy_old.state_dict(), checkpoint_path)
318
+
319
+ def load(self, checkpoint_path):
320
+ self.policy_old.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
321
+ self.policy.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
code/Lake application/agents/PDPPO two critics.py ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Wed Mar 1 00:43:49 2023
4
+
5
+ @author: leona
6
+ """
7
+
8
+ import numpy as np
9
+ import torch
10
+ import torch.nn as nn
11
+ import torch.nn.init as init
12
+ from torch.distributions import MultivariateNormal
13
+ from torch.distributions import Categorical
14
+
15
+ ################################## set device ##################################
16
+ print("============================================================================================")
17
+ # set device to cpu or cuda
18
+ device = torch.device('cpu')
19
+ if(torch.cuda.is_available()):
20
+ device = torch.device('cuda:0')
21
+ torch.cuda.empty_cache()
22
+ print("Device set to : " + str(torch.cuda.get_device_name(device)))
23
+ else:
24
+ print("Device set to : cpu")
25
+ print("============================================================================================")
26
+
27
+
28
+ ################################## PDPPO Policy ##################################
29
+ class RolloutBuffer:
30
+ def __init__(self):
31
+ self.actions = []
32
+ self.states = []
33
+ self.post_states = []
34
+ self.logprobs = []
35
+ self.rewards = []
36
+ self.state_values = []
37
+ self.state_values_post = []
38
+ self.is_terminals = []
39
+
40
+ def clear(self):
41
+ del self.actions[:]
42
+ del self.states[:]
43
+ del self.post_states[:]
44
+ del self.logprobs[:]
45
+ del self.rewards[:]
46
+ del self.state_values[:]
47
+ del self.state_values_post[:]
48
+ del self.is_terminals[:]
49
+
50
+
51
+ class ActorCritic(nn.Module):
52
+ def __init__(self, state_dim, action_dim, has_continuous_action_space, action_std_init):
53
+ super(ActorCritic, self).__init__()
54
+
55
+ self.has_continuous_action_space = has_continuous_action_space
56
+
57
+ if has_continuous_action_space:
58
+ self.action_dim = action_dim
59
+ self.action_var = torch.full((action_dim,), action_std_init * action_std_init).to(device)
60
+ # actor
61
+ if has_continuous_action_space :
62
+ self.actor = nn.Sequential(
63
+ nn.Linear(state_dim, 64),
64
+ nn.Tanh(),
65
+ nn.Linear(64, 64),
66
+ nn.Tanh(),
67
+ nn.Linear(64, action_dim),
68
+ nn.Tanh()
69
+ )
70
+ else:
71
+
72
+ self.action_dim = action_dim
73
+ self.fc1 = nn.Linear(state_dim, 128)
74
+ self.fc2 = nn.Linear(128, 128)
75
+ self.actor = nn.Linear(128, self.action_dim.nvec.sum())
76
+
77
+
78
+ # critic
79
+ self.critic = nn.Sequential(
80
+ nn.Linear(state_dim, 128),
81
+ nn.Tanh(),
82
+ nn.Linear(128, 128),
83
+ nn.Tanh(),
84
+ nn.Linear(128, 1)
85
+ )
86
+
87
+ self.critic_post = nn.Sequential(
88
+ nn.Linear(state_dim, 128),
89
+ nn.Tanh(),
90
+ nn.Linear(128, 128),
91
+ nn.Tanh(),
92
+ nn.Linear(128, 1)
93
+ )
94
+
95
+ def forward(self, state):
96
+ raise NotImplementedError
97
+
98
+
99
+
100
+ def set_action_std(self, new_action_std):
101
+ if self.has_continuous_action_space:
102
+ self.action_var = torch.full((self.action_dim,), new_action_std * new_action_std).to(device)
103
+ else:
104
+ print("--------------------------------------------------------------------------------------------")
105
+ print("WARNING : Calling ActorCritic::set_action_std() on discrete action space policy")
106
+ print("--------------------------------------------------------------------------------------------")
107
+
108
+
109
+
110
+ def act(self, state,tau):
111
+
112
+ if self.has_continuous_action_space:
113
+ action_mean = self.actor(state)
114
+ cov_mat = torch.diag(self.action_var).unsqueeze(dim=0)
115
+ dist = MultivariateNormal(action_mean, cov_mat)
116
+ else:
117
+ #x = nn.functional.relu(self.fc(state))
118
+ x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
119
+ logits = self.actor(x)
120
+ # x[torch.isnan(x)] = 0
121
+ action_probs = nn.functional.softmax(logits, dim=-1)
122
+ #action_probs = torch.nan_to_num(action_probs, nan=1e-6)
123
+ dist = Categorical(action_probs.view(len(self.action_dim.nvec),-1))
124
+ # action_probs = self.actor(state)
125
+ # dist = Categorical(action_probs)
126
+
127
+ action = dist.sample()
128
+ action_logprob = dist.log_prob(action)
129
+
130
+ return action.detach(), action_logprob.detach()
131
+
132
+ def evaluate(self, state,post_state, action,tau):
133
+
134
+ if self.has_continuous_action_space:
135
+ action_mean = self.actor(state)
136
+
137
+ action_var = self.action_var.expand_as(action_mean)
138
+ cov_mat = torch.diag_embed(action_var).to(device)
139
+ dist = MultivariateNormal(action_mean, cov_mat)
140
+
141
+ # For Single Action Environments.
142
+ if self.action_dim == 1:
143
+ action = action.reshape(-1, self.action_dim)
144
+ else:
145
+ #x = nn.functional.relu(self.fc(state))
146
+ x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
147
+ # x[torch.isnan(x)] = 0
148
+ logits = self.actor(x)
149
+ action_probs = nn.functional.softmax(logits, dim=-1)
150
+ #action_probs = torch.nan_to_num(action_probs, nan=1e-6)
151
+ # mask = torch.isnan(action_probs)
152
+ # if torch.all(mask):
153
+ # logits = torch.abs(logits)
154
+ # action_probs = nn.functional.softmax(logits, dim=-1)
155
+
156
+ dist = Categorical(action_probs.view(state.shape[0],len(self.action_dim.nvec),-1))
157
+ # action_probs = self.actor(state)
158
+ # dist = Categorical(action_probs)
159
+ action_logprobs = dist.log_prob(action)
160
+ dist_entropy = dist.entropy()
161
+ state_values = self.critic(state)
162
+ state_values_post = self.critic_post(post_state)
163
+
164
+ return action_logprobs, state_values, state_values_post, dist_entropy
165
+
166
+
167
+ class PDPPO:
168
+ def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, env, has_continuous_action_space, tau, action_std_init=0.6):
169
+
170
+ self.has_continuous_action_space = has_continuous_action_space
171
+
172
+ if has_continuous_action_space:
173
+ self.action_std = action_std_init
174
+
175
+ self.tau = tau
176
+ self.env = env
177
+ self.gamma = gamma
178
+ self.eps_clip = eps_clip
179
+ self.K_epochs = K_epochs
180
+
181
+ self.buffer = RolloutBuffer()
182
+
183
+ self.policy = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
184
+ self.optimizer = torch.optim.Adam([
185
+ {'params': self.policy.actor.parameters(), 'lr': lr_actor},
186
+ {'params': self.policy.critic.parameters(), 'lr': lr_critic},
187
+ {'params': self.policy.critic_post.parameters(), 'lr': lr_critic}
188
+ ], weight_decay=0.001)
189
+
190
+ self.policy_old = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
191
+ self.policy_old.load_state_dict(self.policy.state_dict())
192
+
193
+ self.MseLoss = nn.MSELoss()
194
+
195
+ def set_action_std(self, new_action_std):
196
+ if self.has_continuous_action_space:
197
+ self.action_std = new_action_std
198
+ self.policy.set_action_std(new_action_std)
199
+ self.policy_old.set_action_std(new_action_std)
200
+ else:
201
+ print("--------------------------------------------------------------------------------------------")
202
+ print("WARNING : Calling PDPPO::set_action_std() on discrete action space policy")
203
+ print("--------------------------------------------------------------------------------------------")
204
+
205
+ def decay_action_std(self, action_std_decay_rate, min_action_std):
206
+ print("--------------------------------------------------------------------------------------------")
207
+ if self.has_continuous_action_space:
208
+ self.action_std = self.action_std - action_std_decay_rate
209
+ self.action_std = round(self.action_std, 4)
210
+ if (self.action_std <= min_action_std):
211
+ self.action_std = min_action_std
212
+ print("setting actor output action_std to min_action_std : ", self.action_std)
213
+ else:
214
+ print("setting actor output action_std to : ", self.action_std)
215
+ self.set_action_std(self.action_std)
216
+
217
+ else:
218
+ print("WARNING : Calling PDPPO::decay_action_std() on discrete action space policy")
219
+ print("--------------------------------------------------------------------------------------------")
220
+
221
+ def get_post_state(self, action, machine_setup, inventory_level):
222
+ setup_loss = np.zeros(self.env.n_machines, dtype=int)
223
+ setup_costs = np.zeros(self.env.n_machines)
224
+ # if we are just changing the setup, we use the setup cost matrix with the corresponding position given by the actual setup and the new setup
225
+ for m in range(self.env.n_machines):
226
+ if action[m] != 0: # if the machine is not iddle
227
+ # 1. IF NEEDED CHANGE SETUP
228
+ if machine_setup[m] != action[m] and action[m] != 0:
229
+ setup_costs[m] = self.env.setup_costs[m][action[m] - 1]
230
+ setup_loss[m] = self.env.setup_loss[m][action[m] - 1]
231
+ machine_setup[m] = action[m]
232
+ # 2. PRODUCTION
233
+ production = self.env.machine_production_matrix[m][action[m] - 1] - setup_loss[m]
234
+ inventory_level[action[m] - 1] += production
235
+ else:
236
+ machine_setup[m] = 0
237
+ # return the new machine_setup_inventory_level and the setup_cost
238
+ return machine_setup, inventory_level, setup_costs
239
+
240
+ def select_action(self, state,tau):
241
+
242
+ if self.has_continuous_action_space:
243
+ with torch.no_grad():
244
+ state = torch.FloatTensor(state).to(device)
245
+ action, action_logprob, state_val = self.policy_old.act(state,tau)
246
+
247
+ self.buffer.states.append(state)
248
+ self.buffer.actions.append(action)
249
+ self.buffer.logprobs.append(action_logprob)
250
+ self.buffer.state_values.append(state_val)
251
+
252
+ return action.detach().cpu().numpy().flatten()
253
+ else:
254
+ with torch.no_grad():
255
+ state = torch.FloatTensor(state).to(device)
256
+ action, action_logprob = self.policy_old.act(state,tau)
257
+
258
+
259
+ machine_setup, inventory_level, setup_cost = self.get_post_state(action, state[self.env.n_items:self.env.n_items+self.env.n_machines].clone(), state[0:self.env.n_items].clone())
260
+
261
+ post_state = state.clone()
262
+ post_state[self.env.n_items:self.env.n_items+self.env.n_machines] = machine_setup.clone()
263
+ post_state[0:self.env.n_items] = inventory_level.clone()
264
+ post_state = torch.FloatTensor(post_state).to(device)
265
+
266
+ self.buffer.states.append(state)
267
+ self.buffer.post_states.append(post_state)
268
+ self.buffer.actions.append(action)
269
+ self.buffer.logprobs.append(action_logprob)
270
+
271
+ with torch.no_grad():
272
+ #post_state = torch.cat([post_state.clone(),state.clone()])
273
+ state_val = self.policy_old.critic(state)
274
+ state_val_post = self.policy_old.critic_post(post_state)
275
+
276
+ self.buffer.state_values.append(state_val)
277
+ self.buffer.state_values_post.append(state_val_post)
278
+
279
+ return action.numpy()
280
+
281
+ def update(self):
282
+ # Monte Carlo estimate of returns
283
+ rewards = []
284
+ discounted_reward = 0
285
+ for reward, is_terminal in zip(reversed(self.buffer.rewards), reversed(self.buffer.is_terminals)):
286
+ if is_terminal:
287
+ discounted_reward = 0
288
+ discounted_reward = reward + (self.gamma * discounted_reward)
289
+ rewards.insert(0, discounted_reward)
290
+
291
+ # Normalizing the rewards
292
+ rewards = torch.tensor(rewards, dtype=torch.float32).to(device)
293
+ rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-7)
294
+
295
+ # convert list to tensor
296
+ old_states = torch.squeeze(torch.stack(self.buffer.states, dim=0)).detach().to(device)
297
+ old_post_states = torch.squeeze(torch.stack(self.buffer.post_states, dim=0)).detach().to(device)
298
+ old_actions = torch.squeeze(torch.stack(self.buffer.actions, dim=0)).detach().to(device)
299
+ old_logprobs = torch.squeeze(torch.stack(self.buffer.logprobs, dim=0)).detach().to(device)
300
+ old_state_values = torch.squeeze(torch.stack(self.buffer.state_values, dim=0)).detach().to(device)
301
+ old_state_values_post = torch.squeeze(torch.stack(self.buffer.state_values_post, dim=0)).detach().to(device)
302
+
303
+ # calculate advantages
304
+ advantages = rewards.detach() - torch.min(old_state_values.detach(), old_state_values_post.detach()).detach()
305
+
306
+ # Optimize policy for K epochs
307
+ for _ in range(self.K_epochs):
308
+
309
+ # Evaluating old actions and values
310
+ logprobs, state_values, state_values_post, dist_entropy = self.policy.evaluate(old_states,old_post_states, old_actions,self.tau)
311
+
312
+ # match state_values tensor dimensions with rewards tensor
313
+ state_values = torch.squeeze(state_values)
314
+
315
+ # Finding the ratio (pi_theta / pi_theta__old)
316
+ ratios = torch.exp(logprobs - old_logprobs.detach())
317
+
318
+ # Finding Surrogate Loss
319
+ surr1 = ratios * advantages.unsqueeze(1)
320
+ surr2 = torch.clamp(ratios, 1-self.eps_clip, 1+self.eps_clip) * advantages.unsqueeze(1)
321
+
322
+ # final loss of clipped objective PDPPO
323
+ loss = -torch.min(surr1, surr2) + 0.5 * self.MseLoss(torch.min(state_values,state_values_post.squeeze()), rewards) - 0.012 * dist_entropy
324
+
325
+ loss_numpy = loss.detach().numpy()
326
+
327
+ # take gradient step
328
+ self.optimizer.zero_grad()
329
+ loss.mean().backward()
330
+ torch.nn.utils.clip_grad_norm_(self.policy.parameters(), max_norm=1)
331
+ self.optimizer.step()
332
+
333
+ # Copy new weights into old policy
334
+
335
+ self.policy_old.load_state_dict(self.policy.state_dict())
336
+
337
+ # clear buffer
338
+ self.buffer.clear()
339
+
340
+ def save(self, checkpoint_path):
341
+ torch.save(self.policy_old.state_dict(), checkpoint_path)
342
+
343
+ def load(self, checkpoint_path):
344
+ self.policy_old.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
345
+ self.policy.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
code/Lake application/agents/PDPPO.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Wed Mar 1 00:43:49 2023
4
+
5
+ @author: leona
6
+ """
7
+
8
+ import numpy as np
9
+ import torch
10
+ import torch.nn as nn
11
+ import torch.nn.init as init
12
+ from torch.distributions import MultivariateNormal
13
+ from torch.distributions import Categorical
14
+
15
+ ################################## set device ##################################
16
+ print("============================================================================================")
17
+ # set device to cpu or cuda
18
+ device = torch.device('cpu')
19
+ if(torch.cuda.is_available()):
20
+ device = torch.device('cuda:0')
21
+ torch.cuda.empty_cache()
22
+ print("Device set to : " + str(torch.cuda.get_device_name(device)))
23
+ else:
24
+ print("Device set to : cpu")
25
+ print("============================================================================================")
26
+
27
+
28
+ ################################## PDPPO Policy ##################################
29
+ class RolloutBuffer:
30
+ def __init__(self):
31
+ self.actions = []
32
+ self.states = []
33
+ self.post_states = []
34
+ self.logprobs = []
35
+ self.rewards = []
36
+ self.state_values = []
37
+ self.state_values_post = []
38
+ self.is_terminals = []
39
+
40
+ def clear(self):
41
+ del self.actions[:]
42
+ del self.states[:]
43
+ del self.post_states[:]
44
+ del self.logprobs[:]
45
+ del self.rewards[:]
46
+ del self.state_values[:]
47
+ del self.state_values_post[:]
48
+ del self.is_terminals[:]
49
+
50
+
51
+ class ActorCritic(nn.Module):
52
+ def __init__(self, state_dim, action_dim, has_continuous_action_space, action_std_init):
53
+ super(ActorCritic, self).__init__()
54
+
55
+ self.has_continuous_action_space = has_continuous_action_space
56
+
57
+ if has_continuous_action_space:
58
+ self.action_dim = action_dim
59
+ self.action_var = torch.full((action_dim,), action_std_init * action_std_init).to(device)
60
+ # actor
61
+ if has_continuous_action_space :
62
+ self.actor = nn.Sequential(
63
+ nn.Linear(state_dim, 64),
64
+ nn.Tanh(),
65
+ nn.Linear(64, 64),
66
+ nn.Tanh(),
67
+ nn.Linear(64, action_dim),
68
+ nn.Tanh()
69
+ )
70
+ else:
71
+
72
+ self.actor = nn.Sequential(
73
+ nn.Linear(state_dim, 128),
74
+ nn.Tanh(),
75
+ nn.Linear(128, 128),
76
+ nn.Tanh(),
77
+ nn.Linear(128, action_dim)
78
+ )
79
+
80
+
81
+ # critic
82
+ self.critic = nn.Sequential(
83
+ nn.Linear(state_dim, 128),
84
+ nn.Tanh(),
85
+ nn.Linear(128, 128),
86
+ nn.Tanh(),
87
+ nn.Linear(128, 1)
88
+ )
89
+
90
+ self.critic_post = nn.Sequential(
91
+ nn.Linear(state_dim, 128),
92
+ nn.Tanh(),
93
+ nn.Linear(128, 128),
94
+ nn.Tanh(),
95
+ nn.Linear(128, 1)
96
+ )
97
+
98
+ def forward(self, state):
99
+ raise NotImplementedError
100
+
101
+
102
+
103
+ def set_action_std(self, new_action_std):
104
+ if self.has_continuous_action_space:
105
+ self.action_var = torch.full((self.action_dim,), new_action_std * new_action_std).to(device)
106
+ else:
107
+ print("--------------------------------------------------------------------------------------------")
108
+ print("WARNING : Calling ActorCritic::set_action_std() on discrete action space policy")
109
+ print("--------------------------------------------------------------------------------------------")
110
+
111
+
112
+
113
+ def act(self, state,tau):
114
+
115
+ if self.has_continuous_action_space:
116
+ action_mean = self.actor(state)
117
+ cov_mat = torch.diag(self.action_var).unsqueeze(dim=0)
118
+ dist = MultivariateNormal(action_mean, cov_mat)
119
+ else:
120
+ #x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
121
+ logits = self.actor(state)
122
+ action_probs = nn.functional.softmax(logits, dim=-1)
123
+ dist = Categorical(action_probs)
124
+
125
+ action = dist.sample()
126
+ action_logprob = dist.log_prob(action)
127
+
128
+ return action.detach(), action_logprob.detach()
129
+
130
+ def evaluate(self, state,post_state, action,tau):
131
+
132
+ #x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
133
+ logits = self.actor(state)
134
+ action_probs = nn.functional.softmax(logits, dim=-1)
135
+ dist = Categorical(action_probs)
136
+
137
+ action_logprobs = dist.log_prob(action.T).T
138
+ dist_entropy = dist.entropy()
139
+ state_values = self.critic(state)
140
+ state_values_post = self.critic_post(post_state)
141
+
142
+ return action_logprobs, state_values, state_values_post, dist_entropy
143
+
144
+
145
+ class PDPPO:
146
+ def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, env, has_continuous_action_space, tau, action_std_init=0.6):
147
+
148
+ self.has_continuous_action_space = has_continuous_action_space
149
+
150
+ if has_continuous_action_space:
151
+ self.action_std = action_std_init
152
+
153
+ self.tau = tau
154
+ self.env = env
155
+ self.gamma = gamma
156
+ self.eps_clip = eps_clip
157
+ self.K_epochs = K_epochs
158
+
159
+ self.buffer = RolloutBuffer()
160
+
161
+ self.policy = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
162
+ self.optimizer = torch.optim.Adam([
163
+ {'params': self.policy.actor.parameters(), 'lr': lr_actor},
164
+ {'params': self.policy.critic.parameters(), 'lr': lr_critic},
165
+ {'params': self.policy.critic_post.parameters(), 'lr': lr_critic}
166
+ ], weight_decay=0.0002) #, weight_decay=0.001
167
+
168
+ self.policy_old = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
169
+ self.policy_old.load_state_dict(self.policy.state_dict())
170
+
171
+ self.MseLoss = nn.MSELoss()
172
+
173
+ def set_action_std(self, new_action_std):
174
+ if self.has_continuous_action_space:
175
+ self.action_std = new_action_std
176
+ self.policy.set_action_std(new_action_std)
177
+ self.policy_old.set_action_std(new_action_std)
178
+ else:
179
+ print("--------------------------------------------------------------------------------------------")
180
+ print("WARNING : Calling PDPPO::set_action_std() on discrete action space policy")
181
+ print("--------------------------------------------------------------------------------------------")
182
+
183
+ def decay_action_std(self, action_std_decay_rate, min_action_std):
184
+ print("--------------------------------------------------------------------------------------------")
185
+ if self.has_continuous_action_space:
186
+ self.action_std = self.action_std - action_std_decay_rate
187
+ self.action_std = round(self.action_std, 4)
188
+ if (self.action_std <= min_action_std):
189
+ self.action_std = min_action_std
190
+ print("setting actor output action_std to min_action_std : ", self.action_std)
191
+ else:
192
+ print("setting actor output action_std to : ", self.action_std)
193
+ self.set_action_std(self.action_std)
194
+
195
+ else:
196
+ print("WARNING : Calling PDPPO::decay_action_std() on discrete action space policy")
197
+ print("--------------------------------------------------------------------------------------------")
198
+
199
+ def select_action(self, state, tau):
200
+
201
+ state_int = state.copy()
202
+
203
+ with torch.no_grad():
204
+ state = torch.tensor(state).to(device)
205
+ state = state.float()
206
+ state = torch.unsqueeze(state, 1).T
207
+ action, action_logprob = self.policy_old.act(state,tau)
208
+
209
+ post_state = self.env.get_post_decision_state(np.argmax(state_int),action.clone())
210
+
211
+ binary_array = np.zeros(state.shape[1], dtype=int)
212
+ binary_array[post_state] = 1
213
+
214
+
215
+ post_state = torch.tensor(binary_array).to(device)
216
+ post_state = post_state.float()
217
+ post_state = torch.unsqueeze(post_state, 1).T
218
+
219
+
220
+ self.buffer.states.append(state)
221
+ self.buffer.post_states.append(post_state)
222
+ self.buffer.actions.append(action)
223
+ self.buffer.logprobs.append(action_logprob)
224
+
225
+ with torch.no_grad():
226
+ state_val = self.policy_old.critic(state)
227
+ state_val_post = self.policy_old.critic(post_state)
228
+
229
+ self.buffer.state_values.append(state_val)
230
+ self.buffer.state_values_post.append(state_val_post)
231
+
232
+ return action.cpu().numpy()
233
+
234
+ def update(self):
235
+ # Monte Carlo estimate of returns
236
+ rewards = []
237
+ discounted_reward = 0
238
+ for reward, is_terminal in zip(reversed(self.buffer.rewards), reversed(self.buffer.is_terminals)):
239
+ if is_terminal:
240
+ discounted_reward = 0
241
+ discounted_reward = reward + (self.gamma * discounted_reward)
242
+ rewards.insert(0, discounted_reward)
243
+
244
+ # Normalizing the rewards
245
+ rewards = torch.tensor(rewards, dtype=torch.float32).to(device)
246
+ rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-7)
247
+
248
+ # convert list to tensor
249
+
250
+ old_states = torch.squeeze(torch.stack(self.buffer.states, dim=1)).detach().to(device)
251
+ old_post_states = torch.squeeze(torch.stack(self.buffer.post_states, dim=0).detach().to(device),1)
252
+ old_actions = torch.stack(self.buffer.actions, dim=0).detach().to(device)
253
+ old_logprobs = torch.stack(self.buffer.logprobs, dim=0).detach().to(device)
254
+ old_state_values = torch.squeeze(torch.stack(self.buffer.state_values, dim=0)).detach().to(device)
255
+ old_state_values_post = torch.squeeze(torch.stack(self.buffer.state_values_post, dim=0)).detach().to(device)
256
+
257
+ # calculate advantages
258
+ advantages = rewards.detach() - torch.min(old_state_values.detach(), old_state_values_post.detach()).detach()
259
+
260
+ # Optimize policy for K epochs
261
+ for _ in range(self.K_epochs):
262
+
263
+ # Evaluating old actions and values
264
+ logprobs, state_values, state_values_post, dist_entropy = self.policy.evaluate(old_states,old_post_states, old_actions,self.tau)
265
+
266
+ # match state_values tensor dimensions with rewards tensor
267
+ state_values = torch.squeeze(state_values)
268
+
269
+ state_values_post = torch.squeeze(state_values_post)
270
+
271
+ # Finding the ratio (pi_theta / pi_theta__old)
272
+ ratios = torch.exp(logprobs - old_logprobs.detach())
273
+
274
+ # Finding Surrogate Loss
275
+ surr1 = ratios * advantages.unsqueeze(1)
276
+ surr2 = torch.clamp(ratios, 1-self.eps_clip, 1+self.eps_clip) * advantages.unsqueeze(1)
277
+
278
+ # final loss of clipped objective PDPPO
279
+ loss = -torch.min(surr1, surr2) + 0.5 * self.MseLoss(torch.min(state_values,state_values_post), rewards) - 0.012 * dist_entropy
280
+
281
+ loss_numpy = loss.detach().cpu().numpy()
282
+
283
+ # take gradient step
284
+ self.optimizer.zero_grad()
285
+ loss.mean().backward()
286
+ torch.nn.utils.clip_grad_norm_(self.policy.parameters(), max_norm=1)
287
+ self.optimizer.step()
288
+
289
+ # Copy new weights into old policy
290
+
291
+ self.policy_old.load_state_dict(self.policy.state_dict())
292
+
293
+ # clear buffer
294
+ self.buffer.clear()
295
+
296
+ def save(self, checkpoint_path):
297
+ torch.save(self.policy_old.state_dict(), checkpoint_path)
298
+
299
+ def load(self, checkpoint_path):
300
+ self.policy_old.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
301
+ self.policy.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
code/Lake application/agents/PDPPOAgent two critics.py ADDED
@@ -0,0 +1,394 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os # Provides a way of interacting with the file system
2
+ import sys
3
+ import glob # Helps find all the pathnames matching a specified pattern according to the rules used by the Unix shell
4
+ import time # Provides various time-related functions
5
+ from datetime import datetime # Module that supplies classes for working with dates and times
6
+
7
+ import numpy as np # A library for the Python programming language, adding support for large, multi-dimensional arrays and matrices
8
+ import gym # Provides a collection of test problems — environments — that you can use to work out your reinforcement learning algorithms
9
+ import torch # A machine learning framework that provides tensor computation (like NumPy) with strong acceleration on GPUs
10
+ import copy # Provides a module for shallow and deep copying operations
11
+ import matplotlib.pyplot as plt # A plotting library for the Python programming language and its numerical mathematics extension NumPy
12
+ import matplotlib.patches as mpatches # Provides a way of adding a colored patch to the plot, for example to create a legend
13
+ BASE_DIR = os.path.dirname(os.path.abspath('__file__'))
14
+ AGENTS_DIR = os.path.join(BASE_DIR,'agents')
15
+ sys.path.append(AGENTS_DIR)
16
+ from PDPPO import PDPPO
17
+ from envs import *
18
+ import copy
19
+
20
+
21
+ class SimplePlantSB(SimplePlant):
22
+ def __init__(self, settings, stoch_model):
23
+ super().__init__(settings, stoch_model)
24
+ try:self.dict_obs = settings['dict_obs']
25
+ except:self.dict_obs = False
26
+ self.last_inventory = copy.copy(self.inventory_level)
27
+ self.action_space = gym.spaces.MultiDiscrete(
28
+ [self.n_items+1] * self.n_machines
29
+ )
30
+
31
+ if self.dict_obs:
32
+ self.observation_space = gym.spaces.Dict({
33
+ 'inventory_level': gym.spaces.Box(low = np.zeros(self.n_items),high = np.ones(self.n_items)*(settings['max_inventory_level'][0]+1)*self.n_items),
34
+ 'machine_setup': gym.spaces.MultiDiscrete([self.n_items+1] * self.n_machines)
35
+ #'last_inventory_level':gym.spaces.Box(low = np.zeros(self.n_items),high = np.ones(self.n_items)*(settings['max_inventory_level'][0]+1)*self.n_items)
36
+ })
37
+ else:
38
+ self.observation_space = gym.spaces.Box(
39
+ low=np.zeros(self.n_items+self.n_machines),# high for the inventory level
40
+ high=np.concatenate(
41
+ [
42
+ np.array(self.max_inventory_level),
43
+ np.ones(self.n_machines) * (self.n_items+1), #high for the machine setups
44
+ #np.array(self.max_inventory_level) # high for the inventory level
45
+ ]),
46
+ dtype=np.int32
47
+ )
48
+
49
+ def step(self, action):
50
+ """
51
+ Step method: Execute one time step within the environment
52
+
53
+ Parameters
54
+ ----------
55
+ action : action given by the agent
56
+
57
+ Returns
58
+ -------
59
+ obs : Observation of the state give the method _next_observation
60
+ reward : Cost given by the _reward method
61
+ done : returns True or False given by the _done method
62
+ dict : possible information for control to environment monitoring
63
+
64
+ """
65
+ self.last_inventory = copy.copy(self.inventory_level)
66
+
67
+ self.total_cost = self._take_action(action, self.machine_setup, self.inventory_level, self.demand)
68
+
69
+ # self.total_cost['setup_costs'] = 0
70
+ # self.total_cost['holding_costs'] = 0
71
+
72
+ reward = -sum([ele for key, ele in self.total_cost.items()])
73
+ #reward = -self.total_cost['lost_sales']
74
+
75
+ #reward = np.abs(action)
76
+
77
+ self.current_step += 1
78
+ done = self.current_step == self.T
79
+ obs = self._next_observation()
80
+
81
+ return obs, reward, done, self.total_cost
82
+
83
+ def _next_observation(self):
84
+ """
85
+ Returns the next demand
86
+ """
87
+ obs = SimplePlant._next_observation(self)
88
+ #obs['last_inventory_level'] = copy.copy(self.last_inventory)
89
+ if isinstance(obs, dict):
90
+ if not self.dict_obs:
91
+ obs = np.concatenate(
92
+ (
93
+ obs['inventory_level'], # n_items size
94
+ obs['machine_setup'], # n_machine size
95
+ #obs['last_inventory_level']# n_items size
96
+ )
97
+ )
98
+ else:
99
+ if self.dict_obs:
100
+ raise('Change dict_obst to False')
101
+ return obs
102
+
103
+
104
+ class PDPPOAgent():
105
+ def __init__(self, env: SimplePlant, settings: dict):
106
+ self.env = SimplePlantSB(env.settings, env.stoch_model)
107
+ self.last_inventory = env.inventory_level
108
+ self.model_name = settings['model_name']
109
+ self.experiment_name = settings['experiment_name']
110
+ self.parallelization = settings['parallelization']
111
+ try:self.dict_obs = settings['dict_obs']
112
+ except:self.dict_obs = False
113
+
114
+ self.POSSIBLE_STATES = self.env.n_items + 1
115
+ self.env.cost_to_reward = True
116
+ self.epsilon = 0
117
+
118
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
119
+ # Use the logs file in the root path of the main.
120
+ self.LOG_DIR = os.path.join(BASE_DIR,'logs')
121
+
122
+ print("============================================================================================")
123
+
124
+ ####### initialize environment hyperparameters ######
125
+
126
+ self.has_continuous_action_space = False # continuous action space; else discrete
127
+
128
+ self.max_ep_len = 1000 # max timesteps in one episode
129
+ self.tau = 1
130
+ self.tau_start = 1.0 # initial value of tau
131
+ self.tau_end = 2.0 # final value of tau
132
+
133
+ self.print_freq = self.max_ep_len * 4 # print avg reward in the interval (in num timesteps)
134
+ self.log_freq = self.max_ep_len * 4 # log avg reward in the interval (in num timesteps)
135
+ self.save_model_freq = int(4999) # save model frequency (in num timesteps)
136
+
137
+ self.action_std = 0.6 # starting std for action distribution (Multivariate Normal)
138
+ self.action_std_decay_rate = 0.05 # linearly decay self.action_std (self.action_std = self.action_std - self.action_std_decay_rate)
139
+ self.min_action_std = 0.1 # minimum self.action_std (stop decay after self.action_std <= min_self.action_std)
140
+ self.action_std_decay_freq = int(2.5e5) # self.action_std decay frequency (in num timesteps)
141
+ #####################################################
142
+
143
+ ## Note : print/log frequencies should be > than self.max_ep_len
144
+
145
+ ################ PDPPO hyperparameters ################
146
+ self.update_timestep = self.max_ep_len * 4 # update policy every n timesteps
147
+ self.K_epochs = 60 # update policy for K epochs in one PDPPO update
148
+
149
+ self.eps_clip = 0.2 # clip parameter for PDPPO
150
+ self.gamma = 0.99 # discount factor
151
+
152
+ self.lr_actor = 0.00055 # learning rate for actor network
153
+ self.lr_critic = 0.001 # learning rate for critic network
154
+
155
+ self.random_seed = 0 # set random seed if required (0 = no random seed)
156
+ #####################################################
157
+ self.run_num_pretrained = 0 #### change this to prevent overwriting weights in same self.experiment_name folder
158
+
159
+ print("training environment name : " + self.experiment_name + '_PDPPO')
160
+
161
+
162
+
163
+ # state space dimension
164
+ self.state_dim = self.env.observation_space.shape[0]
165
+
166
+ # action space dimension
167
+ if self.has_continuous_action_space:
168
+ self.action_dim = self.env.action_space.shape[0]
169
+ else:
170
+ self.action_dim = self.env.action_space
171
+
172
+ self.pdppo_agent = PDPPO(self.state_dim, self.action_dim, self.lr_actor, self.lr_critic, self.gamma, self.K_epochs, self.eps_clip, copy.copy(self.env), self.has_continuous_action_space,self.tau, self.action_std)
173
+
174
+
175
+ ################################### Training ###################################
176
+ def learn(self,n_episodes = 100000):
177
+
178
+
179
+ ###################### logging ######################
180
+
181
+ self.max_training_timesteps = n_episodes # break training loop if timeteps > self.max_training_timesteps
182
+
183
+ env = self.env
184
+
185
+ #### log files for multiple runs are NOT overwritten
186
+ log_dir = self.LOG_DIR
187
+ if not os.path.exists(log_dir):
188
+ os.makedirs(log_dir)
189
+
190
+ log_dir = log_dir + '/' + self.experiment_name + '_PDPPO/'
191
+ if not os.path.exists(log_dir):
192
+ os.makedirs(log_dir)
193
+
194
+ #### get number of log files in log directory
195
+ run_num = 0
196
+ current_num_files = next(os.walk(log_dir))[2]
197
+ run_num = len(current_num_files)
198
+
199
+ #### create new log file for each run
200
+ log_f_name = log_dir + '/PDPPO_' + self.experiment_name + "_log_" + str(run_num) + ".csv"
201
+
202
+ print("current logging run number for " + self.experiment_name + " : ", run_num)
203
+ print("logging at : " + log_f_name)
204
+ #####################################################
205
+
206
+ ################### checkpointing ###################
207
+
208
+
209
+ directory = self.LOG_DIR
210
+ if not os.path.exists(directory):
211
+ os.makedirs(directory)
212
+
213
+ directory = directory + '/' + self.experiment_name + '_PDPPO' + '/'
214
+ if not os.path.exists(directory):
215
+ os.makedirs(directory)
216
+
217
+
218
+ checkpoint_path = directory + "PDPPO_{}_{}_{}.pth".format(self.experiment_name, self.random_seed, self.run_num_pretrained)
219
+ print("save checkpoint path : " + checkpoint_path)
220
+ #####################################################
221
+
222
+
223
+ ############# print all hyperparameters #############
224
+ print("--------------------------------------------------------------------------------------------")
225
+ print("max training timesteps : ", self.max_training_timesteps)
226
+ print("max timesteps per episode : ", self.max_ep_len)
227
+ print("model saving frequency : " + str(self.save_model_freq) + " timesteps")
228
+ print("log frequency : " + str(self.log_freq) + " timesteps")
229
+ print("printing average reward over episodes in last : " + str(self.print_freq) + " timesteps")
230
+ print("--------------------------------------------------------------------------------------------")
231
+ print("state space dimension : ", self.state_dim)
232
+ print("action space dimension : ", self.action_dim)
233
+ print("--------------------------------------------------------------------------------------------")
234
+ if self.has_continuous_action_space:
235
+ print("Initializing a continuous action space policy")
236
+ print("--------------------------------------------------------------------------------------------")
237
+ print("starting std of action distribution : ", self.action_std)
238
+ print("decay rate of std of action distribution : ", self.action_std_decay_rate)
239
+ print("minimum std of action distribution : ", min_self.action_std)
240
+ print("decay frequency of std of action distribution : " + str(self.action_std_decay_freq) + " timesteps")
241
+ else:
242
+ print("Initializing a discrete action space policy")
243
+ print("--------------------------------------------------------------------------------------------")
244
+ print("PDPPO update frequency : " + str(self.update_timestep) + " timesteps")
245
+ print("PDPPO K epochs : ", self.K_epochs)
246
+ print("PDPPO epsilon clip : ", self.eps_clip)
247
+ print("discount factor (self.gamma) : ", self.gamma)
248
+ print("--------------------------------------------------------------------------------------------")
249
+ print("optimizer learning rate actor : ", self.lr_actor)
250
+ print("optimizer learning rate critic : ", self.lr_critic)
251
+ if self.random_seed:
252
+ print("--------------------------------------------------------------------------------------------")
253
+ print("setting random seed to ", self.random_seed)
254
+
255
+ #####################################################
256
+
257
+ print("============================================================================================")
258
+
259
+ ################# training procedure ################
260
+
261
+ # initialize a PDPPO agent
262
+ self.PDPPO_agent = PDPPO(self.state_dim, self.action_dim, self.lr_actor, self.lr_critic, self.gamma, self.K_epochs, self.eps_clip, copy.copy(self.env), self.has_continuous_action_space, self.action_std)
263
+
264
+ # track total training time
265
+ start_time = datetime.now().replace(microsecond=0)
266
+ print("Started training at (GMT) : ", start_time)
267
+
268
+ print("============================================================================================")
269
+
270
+ # logging file
271
+ log_f = open(log_f_name,"w+")
272
+ log_f.write('episode,timestep,reward\n')
273
+
274
+ # printing and logging variables
275
+ print_running_reward = 0
276
+ print_running_episodes = 0
277
+
278
+ log_running_reward = 0
279
+ log_running_episodes = 0
280
+
281
+ time_step = 0
282
+ i_episode = 0
283
+
284
+ annealing_steps = self.max_training_timesteps # total number of training steps
285
+
286
+ # training loop
287
+ while time_step <= self.max_training_timesteps:
288
+
289
+ anneal_rate = (self.tau_end - self.tau_start) / annealing_steps # rate of tau increase per step
290
+
291
+ self.tau = max(self.tau_end, self.tau_start + anneal_rate * time_step)
292
+
293
+ state = env.reset()
294
+ current_ep_reward = 0
295
+
296
+ for t in range(1, self.max_ep_len+1):
297
+
298
+ # select action with policy
299
+ action = self.pdppo_agent.select_action(state,self.tau)
300
+ state, reward, done, _ = env.step(action)
301
+
302
+ # saving reward and is_terminals
303
+ self.pdppo_agent.buffer.rewards.append(reward)
304
+ self.pdppo_agent.buffer.is_terminals.append(done)
305
+
306
+ time_step +=1
307
+ current_ep_reward += reward
308
+
309
+ # update PDPPO agent
310
+ if time_step % self.update_timestep == 0:
311
+ self.pdppo_agent.update()
312
+
313
+ # if continuous action space; then decay action std of ouput action distribution
314
+ if self.has_continuous_action_space and time_step % self.action_std_decay_freq == 0:
315
+ self.pdppo_agent.decay_self.action_std(self.action_std_decay_rate, self.action_std)
316
+
317
+ # log in logging file
318
+ if time_step % self.log_freq == 0:
319
+
320
+ # log average reward till last episode
321
+ log_avg_reward = log_running_reward / log_running_episodes
322
+ log_avg_reward = round(log_avg_reward, 4)
323
+
324
+ log_f.write('{},{},{}\n'.format(i_episode, time_step, log_avg_reward))
325
+ log_f.flush()
326
+
327
+ log_running_reward = 0
328
+ log_running_episodes = 0
329
+
330
+ # printing average reward
331
+ if time_step % self.print_freq == 0:
332
+
333
+ # print average reward till last episode
334
+ print_avg_reward = print_running_reward / print_running_episodes
335
+ print_avg_reward = round(print_avg_reward, 2)
336
+
337
+ print("Episode : {} \t\t Timestep : {} \t\t Average Reward : {}".format(i_episode, time_step, print_avg_reward))
338
+
339
+ print_running_reward = 0
340
+ print_running_episodes = 0
341
+
342
+ # save model weights
343
+ if time_step % self.save_model_freq == 0:
344
+ print("--------------------------------------------------------------------------------------------")
345
+ #print("saving model at : " + checkpoint_path)
346
+ self.pdppo_agent.save(checkpoint_path)
347
+ #print("model saved")
348
+ print("Elapsed Time : ", datetime.now().replace(microsecond=0) - start_time)
349
+ print("--------------------------------------------------------------------------------------------")
350
+
351
+ # break; if the episode is over
352
+ if done:
353
+ break
354
+
355
+ print_running_reward += current_ep_reward
356
+ print_running_episodes += 1
357
+
358
+ log_running_reward += current_ep_reward
359
+ log_running_episodes += 1
360
+
361
+ i_episode += 1
362
+
363
+ log_f.close()
364
+ #env.close()
365
+
366
+ # print total training time
367
+ print("============================================================================================")
368
+ end_time = datetime.now().replace(microsecond=0)
369
+ print("Started training at (GMT) : ", start_time)
370
+ print("Finished training at (GMT) : ", end_time)
371
+ print("Total training time : ", end_time - start_time)
372
+ print("============================================================================================")
373
+
374
+ def get_action(self,state):
375
+ if isinstance(state, dict):
376
+ if not self.dict_obs:
377
+ state = np.concatenate(
378
+ (
379
+ state['inventory_level'], # n_items size
380
+ state['machine_setup'], # n_machine size
381
+ )
382
+ )
383
+ else:
384
+ if self.dict_obs:
385
+ raise('Change dict_obst to False')
386
+ return self.pdppo_agent.select_action(state,self.tau)
387
+
388
+ def load_agent(self,path):
389
+ #directory = "PDPPO_preTrained" + '/' + env_name + '/'
390
+ directory = self.LOG_DIR
391
+ directory = directory + '/' + self.experiment_name + '_PDPPO' + '/'
392
+ checkpoint_path = directory + "PDPPO_{}_{}_{}.pth".format(self.experiment_name, self.random_seed, self.run_num_pretrained)
393
+ print("loading network from : " + checkpoint_path)
394
+ self.pdppo_agent.load(checkpoint_path)
code/Lake application/agents/PDPPOAgent.py ADDED
@@ -0,0 +1,402 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os # Provides a way of interacting with the file system
2
+ import sys
3
+ import glob # Helps find all the pathnames matching a specified pattern according to the rules used by the Unix shell
4
+ import time # Provides various time-related functions
5
+ from datetime import datetime # Module that supplies classes for working with dates and times
6
+
7
+ import numpy as np # A library for the Python programming language, adding support for large, multi-dimensional arrays and matrices
8
+ import gym # Provides a collection of test problems — environments — that you can use to work out your reinforcement learning algorithms
9
+ import torch # A machine learning framework that provides tensor computation (like NumPy) with strong acceleration on GPUs
10
+ import copy # Provides a module for shallow and deep copying operations
11
+ import matplotlib.pyplot as plt # A plotting library for the Python programming language and its numerical mathematics extension NumPy
12
+ import matplotlib.patches as mpatches # Provides a way of adding a colored patch to the plot, for example to create a legend
13
+ BASE_DIR = os.path.dirname(os.path.abspath('__file__'))
14
+ AGENTS_DIR = os.path.join(BASE_DIR,'agents')
15
+ sys.path.append(AGENTS_DIR)
16
+ from agents.PDPPO import PDPPO
17
+ from envs import *
18
+ import copy
19
+
20
+
21
+ class SimplePlantSB(SimplePlant):
22
+ def __init__(self, settings, stoch_model):
23
+ super().__init__(settings, stoch_model)
24
+ try:self.dict_obs = settings['dict_obs']
25
+ except:self.dict_obs = False
26
+ self.last_inventory = copy.copy(self.inventory_level)
27
+ self.action_space = gym.spaces.MultiDiscrete(
28
+ [self.n_items+1] * self.n_machines
29
+ )
30
+
31
+ if self.dict_obs:
32
+ self.observation_space = gym.spaces.Dict({
33
+ 'inventory_level': gym.spaces.Box(low = np.zeros(self.n_items),high = np.ones(self.n_items)*(settings['max_inventory_level'][0]+1)*self.n_items),
34
+ 'machine_setup': gym.spaces.MultiDiscrete([self.n_items+1] * self.n_machines)
35
+ #'last_inventory_level':gym.spaces.Box(low = np.zeros(self.n_items),high = np.ones(self.n_items)*(settings['max_inventory_level'][0]+1)*self.n_items)
36
+ })
37
+ else:
38
+ self.observation_space = gym.spaces.Box(
39
+ low=np.zeros(self.n_items+self.n_machines),# high for the inventory level
40
+ high=np.concatenate(
41
+ [
42
+ np.array(self.max_inventory_level),
43
+ np.ones(self.n_machines) * (self.n_items+1), #high for the machine setups
44
+ #np.array(self.max_inventory_level) # high for the inventory level
45
+ ]),
46
+ dtype=np.int32
47
+ )
48
+
49
+ def step(self, action):
50
+ """
51
+ Step method: Execute one time step within the environment
52
+
53
+ Parameters
54
+ ----------
55
+ action : action given by the agent
56
+
57
+ Returns
58
+ -------
59
+ obs : Observation of the state give the method _next_observation
60
+ reward : Cost given by the _reward method
61
+ done : returns True or False given by the _done method
62
+ dict : possible information for control to environment monitoring
63
+
64
+ """
65
+ self.last_inventory = copy.copy(self.inventory_level)
66
+
67
+ self.total_cost = self._take_action(action, self.machine_setup, self.inventory_level, self.demand)
68
+
69
+ # self.total_cost['setup_costs'] = 0
70
+ # self.total_cost['holding_costs'] = 0
71
+
72
+ reward = -sum([ele for key, ele in self.total_cost.items()])
73
+ #reward = -self.total_cost['lost_sales']
74
+
75
+ #reward = np.abs(action)
76
+
77
+ self.current_step += 1
78
+ done = self.current_step == self.T
79
+ obs = self._next_observation()
80
+
81
+ return obs, reward, done, self.total_cost
82
+
83
+ def _next_observation(self):
84
+ """
85
+ Returns the next demand
86
+ """
87
+ obs = SimplePlant._next_observation(self)
88
+ #obs['last_inventory_level'] = copy.copy(self.last_inventory)
89
+ if isinstance(obs, dict):
90
+ if not self.dict_obs:
91
+ obs = np.concatenate(
92
+ (
93
+ obs['inventory_level'], # n_items size
94
+ obs['machine_setup'], # n_machine size
95
+ #obs['last_inventory_level']# n_items size
96
+ )
97
+ )
98
+ else:
99
+ if self.dict_obs:
100
+ raise('Change dict_obst to False')
101
+ return obs
102
+
103
+
104
+ class PDPPOAgent():
105
+ def __init__(self, env: SimplePlant, settings: dict):
106
+ self.env = env
107
+
108
+ self.model_name = settings['model_name']
109
+ self.experiment_name = settings['experiment_name']
110
+ self.parallelization = settings['parallelization']
111
+ try:self.dict_obs = settings['dict_obs']
112
+ except:self.dict_obs = False
113
+
114
+ self.POSSIBLE_STATES = self.env.observation_space.n
115
+ self.env.cost_to_reward = True
116
+ self.epsilon = 0
117
+
118
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
119
+ # Use the logs file in the root path of the main.
120
+ self.LOG_DIR = os.path.join(BASE_DIR,'logs')
121
+
122
+ print("============================================================================================")
123
+
124
+ ####### initialize environment hyperparameters ######
125
+
126
+ self.has_continuous_action_space = False # continuous action space; else discrete
127
+
128
+ self.max_ep_len = 100 # max timesteps in one episode
129
+ self.tau = 1
130
+ self.tau_start = 1.0 # initial value of tau
131
+ self.tau_end = 2.0 # final value of tau
132
+
133
+ self.print_freq = self.max_ep_len * 4 # print avg reward in the interval (in num timesteps)
134
+ self.log_freq = self.max_ep_len * 4 # log avg reward in the interval (in num timesteps)
135
+ self.save_model_freq = int(4999) # save model frequency (in num timesteps)
136
+
137
+ self.action_std = 0.6 # starting std for action distribution (Multivariate Normal)
138
+ self.action_std_decay_rate = 0.05 # linearly decay self.action_std (self.action_std = self.action_std - self.action_std_decay_rate)
139
+ self.min_action_std = 0.1 # minimum self.action_std (stop decay after self.action_std <= min_self.action_std)
140
+ self.action_std_decay_freq = int(2.5e5) # self.action_std decay frequency (in num timesteps)
141
+ #####################################################
142
+
143
+ ## Note : print/log frequencies should be > than self.max_ep_len
144
+
145
+ ################ PDPPO hyperparameters ################
146
+ self.update_timestep = self.max_ep_len * 6 # update policy every n timesteps
147
+ self.K_epochs = 40 # update policy for K epochs in one PDPPO update
148
+
149
+ self.eps_clip = 0.21 # clip parameter for PDPPO
150
+ self.gamma = 0.991 # discount factor
151
+
152
+ self.lr_actor = 0.0004 # learning rate for actor network
153
+ self.lr_critic = 0.0012 # learning rate for critic network
154
+
155
+ self.random_seed = 0 # set random seed if required (0 = no random seed)
156
+ #####################################################
157
+ self.run_num_pretrained = 0 #### change this to prevent overwriting weights in same self.experiment_name folder
158
+
159
+ print("training environment name : " + self.experiment_name + '_PDPPO')
160
+
161
+
162
+
163
+ # state space dimension
164
+ self.state_dim = self.env.observation_space.n
165
+
166
+ # action space dimension
167
+ if self.has_continuous_action_space:
168
+ self.action_dim = self.env.action_space.n
169
+ else:
170
+ self.action_dim = self.env.action_space.n
171
+
172
+ self.pdppo_agent = PDPPO(self.state_dim, self.action_dim, self.lr_actor, self.lr_critic, self.gamma, self.K_epochs, self.eps_clip, copy.copy(self.env), self.has_continuous_action_space,self.tau, self.action_std)
173
+
174
+
175
+ ################################### Training ###################################
176
+ def learn(self,n_episodes = 100000):
177
+
178
+
179
+ ###################### logging ######################
180
+
181
+ self.max_training_timesteps = n_episodes # break training loop if timeteps > self.max_training_timesteps
182
+
183
+ env = self.env
184
+
185
+ #### log files for multiple runs are NOT overwritten
186
+ log_dir = self.LOG_DIR
187
+ if not os.path.exists(log_dir):
188
+ os.makedirs(log_dir)
189
+
190
+ log_dir = log_dir + '/' + self.experiment_name + '_PDPPO/'
191
+ if not os.path.exists(log_dir):
192
+ os.makedirs(log_dir)
193
+
194
+ #### get number of log files in log directory
195
+ run_num = 0
196
+ current_num_files = next(os.walk(log_dir))[2]
197
+ run_num = len(current_num_files)
198
+
199
+ #### create new log file for each run
200
+ log_f_name = log_dir + '/PDPPO_' + self.experiment_name + "_log_" + str(run_num) + ".csv"
201
+
202
+ print("current logging run number for " + self.experiment_name + " : ", run_num)
203
+ print("logging at : " + log_f_name)
204
+ #####################################################
205
+
206
+ ################### checkpointing ###################
207
+
208
+
209
+ directory = self.LOG_DIR
210
+ if not os.path.exists(directory):
211
+ os.makedirs(directory)
212
+
213
+ directory = directory + '/' + self.experiment_name + '_PDPPO' + '/'
214
+ if not os.path.exists(directory):
215
+ os.makedirs(directory)
216
+
217
+
218
+ checkpoint_path = directory + "PDPPO_{}_{}_{}.pth".format(self.experiment_name, self.random_seed, self.run_num_pretrained)
219
+ print("save checkpoint path : " + checkpoint_path)
220
+ #####################################################
221
+
222
+
223
+ ############# print all hyperparameters #############
224
+ print("--------------------------------------------------------------------------------------------")
225
+ print("max training timesteps : ", self.max_training_timesteps)
226
+ print("max timesteps per episode : ", self.max_ep_len)
227
+ print("model saving frequency : " + str(self.save_model_freq) + " timesteps")
228
+ print("log frequency : " + str(self.log_freq) + " timesteps")
229
+ print("printing average reward over episodes in last : " + str(self.print_freq) + " timesteps")
230
+ print("--------------------------------------------------------------------------------------------")
231
+ print("state space dimension : ", self.state_dim)
232
+ print("action space dimension : ", self.action_dim)
233
+ print("--------------------------------------------------------------------------------------------")
234
+ if self.has_continuous_action_space:
235
+ print("Initializing a continuous action space policy")
236
+ print("--------------------------------------------------------------------------------------------")
237
+ print("starting std of action distribution : ", self.action_std)
238
+ print("decay rate of std of action distribution : ", self.action_std_decay_rate)
239
+ print("minimum std of action distribution : ", min_self.action_std)
240
+ print("decay frequency of std of action distribution : " + str(self.action_std_decay_freq) + " timesteps")
241
+ else:
242
+ print("Initializing a discrete action space policy")
243
+ print("--------------------------------------------------------------------------------------------")
244
+ print("PDPPO update frequency : " + str(self.update_timestep) + " timesteps")
245
+ print("PDPPO K epochs : ", self.K_epochs)
246
+ print("PDPPO epsilon clip : ", self.eps_clip)
247
+ print("discount factor (self.gamma) : ", self.gamma)
248
+ print("--------------------------------------------------------------------------------------------")
249
+ print("optimizer learning rate actor : ", self.lr_actor)
250
+ print("optimizer learning rate critic : ", self.lr_critic)
251
+ if self.random_seed:
252
+ print("--------------------------------------------------------------------------------------------")
253
+ print("setting random seed to ", self.random_seed)
254
+
255
+ #####################################################
256
+
257
+ print("============================================================================================")
258
+
259
+ ################# training procedure ################
260
+
261
+ # initialize a PDPPO agent
262
+ self.PDPPO_agent = PDPPO(self.state_dim, self.action_dim, self.lr_actor, self.lr_critic, self.gamma, self.K_epochs, self.eps_clip, copy.copy(self.env), self.has_continuous_action_space, self.action_std)
263
+
264
+ # track total training time
265
+ start_time = datetime.now().replace(microsecond=0)
266
+ print("Started training at (GMT) : ", start_time)
267
+
268
+ print("============================================================================================")
269
+
270
+ # logging file
271
+ log_f = open(log_f_name,"w+")
272
+ log_f.write('episode,timestep,reward\n')
273
+
274
+ # printing and logging variables
275
+ print_running_reward = 0
276
+ print_running_episodes = 0
277
+
278
+ log_running_reward = 0
279
+ log_running_episodes = 0
280
+
281
+ time_step = 0
282
+ i_episode = 0
283
+
284
+ annealing_steps = self.max_training_timesteps # total number of training steps
285
+
286
+ # training loop
287
+ while time_step <= self.max_training_timesteps:
288
+
289
+ anneal_rate = (self.tau_end - self.tau_start) / annealing_steps # rate of tau increase per step
290
+
291
+ self.tau = max(self.tau_end, self.tau_start + anneal_rate * time_step)
292
+
293
+ state = env.reset()
294
+ current_ep_reward = 0
295
+
296
+ binary_array = np.zeros(self.state_dim, dtype=int)
297
+ binary_array[state] = 1
298
+ state = binary_array
299
+
300
+ for t in range(1, self.max_ep_len+1):
301
+
302
+ # select action with policy
303
+ action = self.pdppo_agent.select_action(state,self.tau)
304
+ state, reward, done, _ = env.step(action.item())
305
+
306
+ binary_array = np.zeros(self.state_dim, dtype=int)
307
+ binary_array[state] = 1
308
+ state = binary_array
309
+
310
+ # saving reward and is_terminals
311
+ self.pdppo_agent.buffer.rewards.append(reward)
312
+ self.pdppo_agent.buffer.is_terminals.append(done)
313
+
314
+ time_step +=1
315
+ current_ep_reward += reward
316
+
317
+ # update PDPPO agent
318
+ if time_step % self.update_timestep == 0:
319
+ self.pdppo_agent.update()
320
+
321
+ # if continuous action space; then decay action std of ouput action distribution
322
+ if self.has_continuous_action_space and time_step % self.action_std_decay_freq == 0:
323
+ self.pdppo_agent.decay_self.action_std(self.action_std_decay_rate, self.action_std)
324
+
325
+ # log in logging file
326
+ if time_step % self.log_freq == 0:
327
+
328
+ # log average reward till last episode
329
+ log_avg_reward = log_running_reward / log_running_episodes
330
+ log_avg_reward = round(log_avg_reward, 4)
331
+
332
+ log_f.write('{},{},{}\n'.format(i_episode, time_step, log_avg_reward))
333
+ log_f.flush()
334
+
335
+ log_running_reward = 0
336
+ log_running_episodes = 0
337
+
338
+ # printing average reward
339
+ if time_step % self.print_freq == 0:
340
+
341
+ # print average reward till last episode
342
+ print_avg_reward = print_running_reward / print_running_episodes
343
+ print_avg_reward = round(print_avg_reward, 2)
344
+
345
+ print("Episode : {} \t\t Timestep : {} \t\t Average Reward : {}".format(i_episode, time_step, print_avg_reward))
346
+
347
+ print_running_reward = 0
348
+ print_running_episodes = 0
349
+
350
+ # save model weights
351
+ if time_step % self.save_model_freq == 0:
352
+ print("--------------------------------------------------------------------------------------------")
353
+ #print("saving model at : " + checkpoint_path)
354
+ self.pdppo_agent.save(checkpoint_path)
355
+ #print("model saved")
356
+ print("PDPPO Elapsed Time : ", datetime.now().replace(microsecond=0) - start_time)
357
+ print("--------------------------------------------------------------------------------------------")
358
+
359
+ # break; if the episode is over
360
+ if done:
361
+ break
362
+
363
+ print_running_reward += current_ep_reward
364
+ print_running_episodes += 1
365
+
366
+ log_running_reward += current_ep_reward
367
+ log_running_episodes += 1
368
+
369
+ i_episode += 1
370
+
371
+ log_f.close()
372
+ #env.close()
373
+
374
+ # print total training time
375
+ print("============================================================================================")
376
+ end_time = datetime.now().replace(microsecond=0)
377
+ print("Started training at (GMT) : ", start_time)
378
+ print("Finished training at (GMT) : ", end_time)
379
+ print("Total training time : ", end_time - start_time)
380
+ print("============================================================================================")
381
+
382
+ def get_action(self,state):
383
+ if isinstance(state, dict):
384
+ if not self.dict_obs:
385
+ state = np.concatenate(
386
+ (
387
+ state['inventory_level'], # n_items size
388
+ state['machine_setup'], # n_machine size
389
+ )
390
+ )
391
+ else:
392
+ if self.dict_obs:
393
+ raise('Change dict_obst to False')
394
+ return self.pdppo_agent.select_action(state,self.tau)
395
+
396
+ def load_agent(self,path):
397
+ #directory = "PDPPO_preTrained" + '/' + env_name + '/'
398
+ directory = self.LOG_DIR
399
+ directory = directory + '/' + self.experiment_name + '_PDPPO' + '/'
400
+ checkpoint_path = directory + "PDPPO_{}_{}_{}.pth".format(self.experiment_name, self.random_seed, self.run_num_pretrained)
401
+ print("loading network from : " + checkpoint_path)
402
+ self.pdppo_agent.load(checkpoint_path)
code/Lake application/agents/PDPPO_two_actors.py ADDED
@@ -0,0 +1,353 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Wed Mar 1 00:43:49 2023
4
+
5
+ @author: leona
6
+ """
7
+
8
+ import torch
9
+ import numpy as np
10
+ import torch.nn as nn
11
+ from torch.distributions import MultivariateNormal
12
+ from torch.distributions import Categorical
13
+
14
+ ################################## set device ##################################
15
+ print("============================================================================================")
16
+ # set device to cpu or cuda
17
+ device = torch.device('cpu')
18
+ if(torch.cuda.is_available()):
19
+ device = torch.device('cuda:0')
20
+ torch.cuda.empty_cache()
21
+ print("Device set to : " + str(torch.cuda.get_device_name(device)))
22
+ else:
23
+ print("Device set to : cpu")
24
+ print("============================================================================================")
25
+
26
+
27
+ ################################## PDPPO Policy ##################################
28
+ class RolloutBuffer:
29
+ def __init__(self):
30
+ self.actions = []
31
+ self.actions_pre = []
32
+ self.actions_post = []
33
+ self.states = []
34
+ self.pre_states = []
35
+ self.post_states = []
36
+ self.logprobs = []
37
+ self.logprobs_pre = []
38
+ self.logprobs_post = []
39
+ self.rewards = []
40
+ self.rewards_pre = []
41
+ self.rewards_post = []
42
+ self.state_values = []
43
+ self.state_values_post = []
44
+ self.is_terminals = []
45
+
46
+ def clear(self):
47
+ del self.actions[:]
48
+ del self.actions_pre[:]
49
+ del self.actions_post[:]
50
+ del self.states[:]
51
+ del self.pre_states[:]
52
+ del self.post_states[:]
53
+ del self.logprobs[:]
54
+ del self.logprobs_pre[:]
55
+ del self.logprobs_post[:]
56
+ del self.rewards[:]
57
+ del self.rewards_pre[:]
58
+ del self.rewards_post[:]
59
+ del self.state_values[:]
60
+ del self.state_values_post[:]
61
+ del self.is_terminals[:]
62
+
63
+ class ActorCritic(nn.Module):
64
+ def __init__(self, state_dim,state_dim_pre,state_dim_post, action_dim, has_continuous_action_space, action_std_init):
65
+ super(ActorCritic, self).__init__()
66
+ # actor - multidiscrete
67
+ self.action_dim = action_dim
68
+
69
+
70
+ self.actor = nn.Sequential(
71
+ nn.Linear(state_dim, 128),
72
+ nn.Linear(128, 128),
73
+ nn.Linear(128, self.action_dim.nvec.sum())
74
+ )
75
+
76
+ self.actor_pre = nn.Sequential(
77
+ nn.Linear(state_dim_pre, 128),
78
+ nn.Linear(128, 128),
79
+ nn.Linear(128, self.action_dim.nvec.sum())
80
+ )
81
+ self.actor_post = nn.Sequential(
82
+ nn.Linear(state_dim_post, 128),
83
+ nn.Linear(128, 128),
84
+ nn.Linear(128, self.action_dim.nvec.sum())
85
+ )
86
+
87
+
88
+ # critic
89
+ self.critic_pre = nn.Sequential(
90
+ nn.Linear(state_dim_pre, 128),
91
+ # nn.Tanh(),
92
+ # nn.Linear(64, 64),
93
+ nn.Tanh(),
94
+ nn.Linear(128, 1)
95
+ )
96
+
97
+ self.critic_post = nn.Sequential(
98
+ nn.Linear(state_dim_post, 128),
99
+ # nn.Tanh(),
100
+ # nn.Linear(64, 64),
101
+ nn.Tanh(),
102
+ nn.Linear(128, 1)
103
+ )
104
+
105
+ def forward(self, state):
106
+ raise NotImplementedError
107
+
108
+
109
+
110
+ def set_action_std(self, new_action_std):
111
+ if self.has_continuous_action_space:
112
+ self.action_var = torch.full((self.action_dim,), new_action_std * new_action_std).to(device)
113
+ else:
114
+ print("--------------------------------------------------------------------------------------------")
115
+ print("WARNING : Calling ActorCritic::set_action_std() on discrete action space policy")
116
+ print("--------------------------------------------------------------------------------------------")
117
+
118
+
119
+
120
+ def act(self, state):
121
+
122
+ # x = nn.functional.relu(self.fc(state))
123
+ x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
124
+ logits = self.actor(x)
125
+ action_probs = nn.functional.softmax(logits, dim=-1)
126
+ dist = Categorical(action_probs.view(len(self.action_dim.nvec),-1))
127
+
128
+ action = dist.sample()
129
+ action_logprob = dist.log_prob(action)
130
+
131
+ return action.detach(), action_logprob.detach()
132
+
133
+ def evaluate(self, state, pre_state, post_state, action):
134
+
135
+ # x = nn.functional.relu(self.fc(state))
136
+ x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
137
+ logits = self.actor(x)
138
+ action_probs = nn.functional.softmax(logits, dim=-1)
139
+ dist = Categorical(action_probs.view(state.shape[0],len(self.action_dim.nvec),-1))
140
+ # action_probs = self.actor(state)
141
+ # dist = Categorical(action_probs)
142
+
143
+ action_logprobs = dist.log_prob(action)
144
+ dist_entropy = dist.entropy()
145
+ state_values_pre = self.critic_pre(pre_state)
146
+ state_values_post = self.critic_post(post_state)
147
+
148
+ return action_logprobs, state_values_pre,state_values_post, dist_entropy
149
+
150
+
151
+ class PDPPO:
152
+ def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, has_continuous_action_space, env, action_std_init=0.6):
153
+
154
+ self.has_continuous_action_space = has_continuous_action_space
155
+
156
+ if has_continuous_action_space:
157
+ self.action_std = action_std_init
158
+
159
+ self.env = env
160
+
161
+ self.reward_old_pre = -np.inf
162
+ self.reward_old_post = -np.inf
163
+
164
+ self.gamma = gamma
165
+ self.eps_clip = eps_clip
166
+ self.K_epochs = K_epochs
167
+
168
+ self.buffer = RolloutBuffer()
169
+
170
+ state_dim_pre = self.env.n_machines
171
+ state_dim_post = self.env.n_items
172
+
173
+ self.policy = ActorCritic(state_dim,state_dim_pre,state_dim_post, action_dim, has_continuous_action_space, action_std_init).to(device)
174
+ self.optimizer = torch.optim.Adam([
175
+ {'params': self.policy.actor.parameters(), 'lr': lr_actor},
176
+ {'params': self.policy.critic.parameters(), 'lr': lr_critic*10},
177
+ {'params': self.policy.critic_post.parameters(), 'lr': lr_critic*1}
178
+ ])
179
+
180
+ self.policy_old = ActorCritic(state_dim,state_dim_pre,state_dim_post, action_dim, has_continuous_action_space, action_std_init).to(device)
181
+ self.policy_old.load_state_dict(self.policy.state_dict())
182
+
183
+ self.MseLoss = nn.MSELoss()
184
+
185
+
186
+ def get_post_state(self, action, machine_setup, inventory_level):
187
+ setup_loss = np.zeros(self.env.n_machines, dtype=int)
188
+ setup_costs = np.zeros(self.env.n_machines)
189
+ # if we are just changing the setup, we use the setup cost matrix with the corresponding position given by the actual setup and the new setup
190
+ for m in range(self.env.n_machines):
191
+ if action[m] != 0: # if the machine is not iddle
192
+ # 1. IF NEEDED CHANGE SETUP
193
+ if machine_setup[m] != action[m] and action[m] != 0:
194
+ setup_costs[m] = self.env.setup_costs[m][action[m] - 1]
195
+ setup_loss[m] = self.env.setup_loss[m][action[m] - 1]
196
+ machine_setup[m] = action[m]
197
+ # 2. PRODUCTION
198
+ production = self.env.machine_production_matrix[m][action[m] - 1] - setup_loss[m]
199
+ inventory_level[action[m] - 1] += production
200
+ else:
201
+ machine_setup[m] = 0
202
+ # return the new machine_setup_inventory_level and the setup_cost
203
+ return machine_setup, inventory_level, setup_costs
204
+
205
+ def select_action(self, state):
206
+ with torch.no_grad():
207
+ state = torch.FloatTensor(state).to(device)
208
+ action, action_logprob = self.policy_old.act(state)
209
+
210
+
211
+ pre_state = state[self.env.n_items:self.env.n_items+self.env.n_machines].clone()
212
+
213
+ machine_setup, inventory_level, setup_cost = self.get_post_state(action, state[self.env.n_items:self.env.n_items+self.env.n_machines], state[0:self.env.n_items])
214
+
215
+ post_state = inventory_level.clone()
216
+
217
+ with torch.no_grad():
218
+ action_pre, action_logprob_pre = self.policy_old.act_pre(pre_state)
219
+ action_post, action_logprob_post = self.policy_old.act_post(post_state)
220
+
221
+
222
+ self.buffer.states.append(state)
223
+ self.buffer.pre_states.append(pre_state)
224
+ self.buffer.post_states.append(post_state)
225
+ self.buffer.actions.append(action)
226
+ self.buffer.actions_pre.append(action_pre)
227
+ self.buffer.actions_post.append(action_post)
228
+ self.buffer.logprobs.append(action_logprob)
229
+ self.buffer.logprobs.append(action_logprob_pre)
230
+ self.buffer.logprobs.append(action_logprob_post)
231
+
232
+ with torch.no_grad():
233
+ state_val = self.policy_old.critic(pre_state).detach()
234
+ state_val_post = self.policy_old.critic_post(post_state).detach()
235
+
236
+ self.buffer.state_values.append(state_val)
237
+ self.buffer.state_values_post.append(state_val_post)
238
+
239
+ if self.has_continuous_action_space:
240
+ return action.detach().cpu().numpy().flatten()
241
+
242
+ else:
243
+ return action.numpy()
244
+
245
+ def update(self):
246
+
247
+
248
+ # Monte Carlo estimate of returns
249
+ rewards = []
250
+ discounted_reward = 0
251
+ for reward, is_terminal in zip(reversed(self.buffer.rewards), reversed(self.buffer.is_terminals)):
252
+ if is_terminal:
253
+ discounted_reward = 0
254
+ discounted_reward = reward + (self.gamma * discounted_reward)
255
+ rewards.insert(0, discounted_reward)
256
+
257
+ # Normalizing the rewards
258
+ rewards = torch.tensor(rewards, dtype=torch.float32).to(device)
259
+ rewards = rewards/(-rewards).max()
260
+ #rewards = rewards - rewards.min()
261
+ rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-7)
262
+
263
+ # Monte Carlo estimate of returns (pre decision)
264
+ rewards_pre = []
265
+ discounted_reward = 0
266
+ for reward_pre, is_terminal in zip(reversed(self.buffer.rewards_pre), reversed(self.buffer.is_terminals)):
267
+ if is_terminal:
268
+ discounted_reward = 0
269
+ discounted_reward = reward_pre + (self.gamma * discounted_reward)
270
+ rewards_pre.insert(0, discounted_reward)
271
+
272
+ # Normalizing the rewards
273
+ rewards_pre = torch.tensor(rewards_pre, dtype=torch.float32).to(device)
274
+ #rewards_pre = rewards_pre/(-rewards_pre).max()
275
+ #rewards_pre = rewards_pre - rewards_pre.min()
276
+ rewards_pre = (rewards_pre - rewards_pre.mean()) / (rewards_pre.std() + 1e-7)
277
+
278
+ # Monte Carlo estimate of returns (post decision)
279
+ rewards_post = []
280
+ discounted_reward = 0
281
+ for reward_post, is_terminal in zip(reversed(self.buffer.rewards_post), reversed(self.buffer.is_terminals)):
282
+ if is_terminal:
283
+ discounted_reward = 0
284
+ discounted_reward = reward_post + (self.gamma * discounted_reward)
285
+ rewards_post.insert(0, discounted_reward)
286
+
287
+ # Normalizing the rewards
288
+ rewards_post = torch.tensor(rewards_post, dtype=torch.float32).to(device)
289
+ #rewards_post = rewards_post/(-rewards_post).max()
290
+ #rewards_post = rewards_post - rewards_post.min()
291
+ rewards_post = (rewards_post - rewards_post.mean()) / (rewards_post.std() + 1e-7)
292
+
293
+ # rewards_post = -rewards_post/(rewards_pre + rewards_post).min()
294
+
295
+ # rewards_pre = -rewards_pre/(rewards_pre + rewards_post).min()
296
+
297
+ # convert list to tensor
298
+ old_states = torch.squeeze(torch.stack(self.buffer.states, dim=0)).detach().to(device)
299
+ old_pre_states = torch.squeeze(torch.stack(self.buffer.pre_states, dim=0)).detach().to(device)
300
+ old_post_states = torch.squeeze(torch.stack(self.buffer.post_states, dim=0)).detach().to(device)
301
+ old_actions = torch.squeeze(torch.stack(self.buffer.actions, dim=0)).detach().to(device)
302
+ old_actions_pre = torch.squeeze(torch.stack(self.buffer.actions, dim=0)).detach().to(device)
303
+ old_actions_post = torch.squeeze(torch.stack(self.buffer.actions, dim=0)).detach().to(device)
304
+ old_logprobs = torch.squeeze(torch.stack(self.buffer.logprobs, dim=0)).detach().to(device)
305
+ old_logprobs_pre = torch.squeeze(torch.stack(self.buffer.logprobs, dim=0)).detach().to(device)
306
+ old_logprobs_post = torch.squeeze(torch.stack(self.buffer.logprobs, dim=0)).detach().to(device)
307
+ old_state_values = torch.squeeze(torch.stack(self.buffer.state_values, dim=0)).detach().to(device)
308
+ old_state_values_post = torch.squeeze(torch.stack(self.buffer.state_values_post, dim=0)).detach().to(device)
309
+
310
+ # calculate advantages
311
+ advantages_post = rewards_post.detach() - old_state_values_post.detach()
312
+ advantages_pre = rewards_pre.detach() - old_state_values.detach()
313
+ advantages = rewards.detach() - old_state_values_post.detach() - old_state_values.detach()
314
+
315
+ sum_loss = 0
316
+
317
+ # Optimize policy for K epochs
318
+ for i in range(self.K_epochs):
319
+
320
+ # Evaluating old actions and values
321
+ logprobs, logprobs_pre, logprobs_post, state_values, state_values_post, dist_entropy, dist_entropy_pre, dist_entropy_post = self.policy.evaluate(old_states, old_pre_states, old_post_states, old_actions, old_actions_pre, old_actions_post)
322
+
323
+ # Finding the ratio (pi_theta / pi_theta__old)
324
+ ratios = torch.exp(logprobs - old_logprobs)
325
+
326
+ # Finding Surrogate Loss
327
+ surr1 = ratios * advantages.unsqueeze(1)
328
+ surr2 = torch.clamp(ratios, 1 - self.eps_clip, 1 + self.eps_clip) * advantages.unsqueeze(1)
329
+
330
+ surr = -torch.min(surr1, surr2)
331
+
332
+ loss = surr + 0.5 * self.MseLoss(old_state_values_post, old_state_values) - 0.01*dist_entropy
333
+
334
+ # Optmization - gradient backpropagation
335
+ self.optimizer.zero_grad()
336
+ loss.mean().backward(retain_graph=True)
337
+ self.optimizer.step()
338
+
339
+
340
+ # print('Avg Loss: {}'.format(sum_loss.mean().item()))
341
+ print('Last Loss {}'.format(loss.sum().item()))
342
+ # Copy new weights into old policy
343
+ self.policy_old.load_state_dict(self.policy.state_dict())
344
+
345
+ # clear buffer
346
+ self.buffer.clear()
347
+
348
+ def save(self, checkpoint_path):
349
+ torch.save(self.policy_old.state_dict(), checkpoint_path)
350
+
351
+ def load(self, checkpoint_path):
352
+ self.policy_old.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
353
+ self.policy.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
code/Lake application/agents/PDPPO_two_critics_two_actors.py ADDED
@@ -0,0 +1,377 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Wed Mar 1 00:43:49 2023
4
+
5
+ @author: leona
6
+ """
7
+
8
+ import torch
9
+ import numpy as np
10
+ import torch.nn as nn
11
+ from torch.distributions import MultivariateNormal
12
+ from torch.distributions import Categorical
13
+
14
+ ################################## set device ##################################
15
+ print("============================================================================================")
16
+ # set device to cpu or cuda
17
+ device = torch.device('cpu')
18
+ if(torch.cuda.is_available()):
19
+ device = torch.device('cuda:0')
20
+ torch.cuda.empty_cache()
21
+ print("Device set to : " + str(torch.cuda.get_device_name(device)))
22
+ else:
23
+ print("Device set to : cpu")
24
+ print("============================================================================================")
25
+
26
+
27
+ ################################## PDPPO Policy ##################################
28
+ class RolloutBuffer:
29
+ def __init__(self):
30
+ self.actions = []
31
+ self.states = []
32
+ self.pre_states = []
33
+ self.post_states = []
34
+ self.logprobs = []
35
+ self.rewards = []
36
+ self.rewards_pre = []
37
+ self.rewards_post = []
38
+ self.state_values = []
39
+ self.state_values_post = []
40
+ self.is_terminals = []
41
+
42
+ def clear(self):
43
+ del self.actions[:]
44
+ del self.states[:]
45
+ del self.pre_states[:]
46
+ del self.post_states[:]
47
+ del self.logprobs[:]
48
+ del self.rewards[:]
49
+ del self.rewards_pre[:]
50
+ del self.rewards_post[:]
51
+ del self.state_values[:]
52
+ del self.state_values_post[:]
53
+ del self.is_terminals[:]
54
+
55
+ class ActorCritic(nn.Module):
56
+ def __init__(self, state_dim,state_dim_pre,state_dim_post, action_dim, has_continuous_action_space, action_std_init):
57
+ super(ActorCritic, self).__init__()
58
+
59
+ self.has_continuous_action_space = has_continuous_action_space
60
+
61
+ if has_continuous_action_space:
62
+ self.action_dim = action_dim
63
+ self.action_var = torch.full((action_dim,), action_std_init * action_std_init).to(device)
64
+ # actor
65
+ if has_continuous_action_space :
66
+ self.actor = nn.Sequential(
67
+ nn.Linear(state_dim, 64),
68
+ nn.Tanh(),
69
+ nn.Linear(64, 64),
70
+ nn.Tanh(),
71
+ nn.Linear(64, action_dim),
72
+ nn.Tanh()
73
+ )
74
+ else:
75
+ # actor - multidiscrete
76
+ self.action_dim = action_dim
77
+ # self.fc = nn.Linear(state_dim, 64)
78
+ self.fc1 = nn.Linear(state_dim, 128)
79
+ self.fc2 = nn.Linear(128, 128)
80
+ self.actor = nn.Linear(128, self.action_dim.nvec.sum())
81
+
82
+ # critic
83
+ self.critic = nn.Sequential(
84
+ nn.Linear(state_dim, 128),
85
+ nn.Tanh(),
86
+ nn.Linear(128, 128),
87
+ nn.Tanh(),
88
+ nn.Linear(128, 1)
89
+ )
90
+
91
+ self.critic_post = nn.Sequential(
92
+ nn.Linear(state_dim, 128),
93
+ nn.Tanh(),
94
+ nn.Linear(128, 128),
95
+ nn.Tanh(),
96
+ nn.Linear(128, 1)
97
+ )
98
+
99
+ def forward(self, state):
100
+ raise NotImplementedError
101
+
102
+
103
+
104
+ def set_action_std(self, new_action_std):
105
+ if self.has_continuous_action_space:
106
+ self.action_var = torch.full((self.action_dim,), new_action_std * new_action_std).to(device)
107
+ else:
108
+ print("--------------------------------------------------------------------------------------------")
109
+ print("WARNING : Calling ActorCritic::set_action_std() on discrete action space policy")
110
+ print("--------------------------------------------------------------------------------------------")
111
+
112
+
113
+
114
+ def act(self, state):
115
+
116
+ if self.has_continuous_action_space:
117
+ action_mean = self.actor(state)
118
+ cov_mat = torch.diag(self.action_var).unsqueeze(dim=0)
119
+ dist = MultivariateNormal(action_mean, cov_mat)
120
+ else:
121
+ # x = nn.functional.relu(self.fc(state))
122
+ x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
123
+ logits = self.actor(x)
124
+ action_probs = nn.functional.softmax(logits, dim=-1)
125
+ dist = Categorical(action_probs.view(len(self.action_dim.nvec),-1))
126
+
127
+ action = dist.sample()
128
+ action_logprob = dist.log_prob(action)
129
+
130
+ return action.detach(), action_logprob.detach()
131
+
132
+ def evaluate(self, state, pre_state, post_state, action):
133
+
134
+ if self.has_continuous_action_space:
135
+ action_mean = self.actor(state)
136
+
137
+ action_var = self.action_var.expand_as(action_mean)
138
+ cov_mat = torch.diag_embed(action_var).to(device)
139
+ dist = MultivariateNormal(action_mean, cov_mat)
140
+
141
+ # For Single Action Environments.
142
+ if self.action_dim == 1:
143
+ action = action.reshape(-1, self.action_dim)
144
+ else:
145
+ # x = nn.functional.relu(self.fc(state))
146
+ x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
147
+ logits = self.actor(x)
148
+ action_probs = nn.functional.softmax(logits, dim=-1)
149
+ dist = Categorical(action_probs.view(state.shape[0],len(self.action_dim.nvec),-1))
150
+ # action_probs = self.actor(state)
151
+ # dist = Categorical(action_probs)
152
+ action_logprobs = dist.log_prob(action)
153
+ dist_entropy = dist.entropy()
154
+ state_values = self.critic(pre_state)
155
+ state_values_post = self.critic_post(post_state)
156
+
157
+ return action_logprobs, state_values,state_values_post, dist_entropy
158
+
159
+
160
+ class PDPPO:
161
+ def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, has_continuous_action_space, env, action_std_init=0.6):
162
+
163
+ self.has_continuous_action_space = has_continuous_action_space
164
+
165
+ if has_continuous_action_space:
166
+ self.action_std = action_std_init
167
+
168
+ self.env = env
169
+
170
+ self.reward_old_pre = -np.inf
171
+ self.reward_old_post = -np.inf
172
+
173
+ self.gamma = gamma
174
+ self.eps_clip = eps_clip
175
+ self.K_epochs = K_epochs
176
+
177
+ self.buffer = RolloutBuffer()
178
+
179
+ state_dim_pre = self.env.n_machines
180
+ state_dim_post = self.env.n_items
181
+
182
+ self.policy = ActorCritic(state_dim,state_dim_pre,state_dim_post, action_dim, has_continuous_action_space, action_std_init).to(device)
183
+ self.optimizer = torch.optim.Adam([
184
+ {'params': self.policy.actor.parameters(), 'lr': lr_actor},
185
+ {'params': self.policy.critic.parameters(), 'lr': lr_critic*10},
186
+ {'params': self.policy.critic_post.parameters(), 'lr': lr_critic*1}
187
+ ])
188
+
189
+ self.policy_old = ActorCritic(state_dim,state_dim_pre,state_dim_post, action_dim, has_continuous_action_space, action_std_init).to(device)
190
+ self.policy_old.load_state_dict(self.policy.state_dict())
191
+
192
+ self.MseLoss = nn.MSELoss()
193
+
194
+ def set_action_std(self, new_action_std):
195
+ if self.has_continuous_action_space:
196
+ self.action_std = new_action_std
197
+ self.policy.set_action_std(new_action_std)
198
+ self.policy_old.set_action_std(new_action_std)
199
+ else:
200
+ print("--------------------------------------------------------------------------------------------")
201
+ print("WARNING : Calling PDPPO::set_action_std() on discrete action space policy")
202
+ print("--------------------------------------------------------------------------------------------")
203
+
204
+ def decay_action_std(self, action_std_decay_rate, min_action_std):
205
+ print("--------------------------------------------------------------------------------------------")
206
+ if self.has_continuous_action_space:
207
+ self.action_std = self.action_std - action_std_decay_rate
208
+ self.action_std = round(self.action_std, 4)
209
+ if (self.action_std <= min_action_std):
210
+ self.action_std = min_action_std
211
+ print("setting actor output action_std to min_action_std : ", self.action_std)
212
+ else:
213
+ print("setting actor output action_std to : ", self.action_std)
214
+ self.set_action_std(self.action_std)
215
+
216
+ else:
217
+ print("WARNING : Calling PDPPO::decay_action_std() on discrete action space policy")
218
+ print("--------------------------------------------------------------------------------------------")
219
+
220
+ def get_post_state(self, action, machine_setup, inventory_level):
221
+ setup_loss = np.zeros(self.env.n_machines, dtype=int)
222
+ setup_costs = np.zeros(self.env.n_machines)
223
+ # if we are just changing the setup, we use the setup cost matrix with the corresponding position given by the actual setup and the new setup
224
+ for m in range(self.env.n_machines):
225
+ if action[m] != 0: # if the machine is not iddle
226
+ # 1. IF NEEDED CHANGE SETUP
227
+ if machine_setup[m] != action[m] and action[m] != 0:
228
+ setup_costs[m] = self.env.setup_costs[m][action[m] - 1]
229
+ setup_loss[m] = self.env.setup_loss[m][action[m] - 1]
230
+ machine_setup[m] = action[m]
231
+ # 2. PRODUCTION
232
+ production = self.env.machine_production_matrix[m][action[m] - 1] - setup_loss[m]
233
+ inventory_level[action[m] - 1] += production
234
+ else:
235
+ machine_setup[m] = 0
236
+ # return the new machine_setup_inventory_level and the setup_cost
237
+ return machine_setup, inventory_level, setup_costs
238
+
239
+ def select_action(self, state):
240
+ with torch.no_grad():
241
+ state = torch.FloatTensor(state).to(device)
242
+ action, action_logprob = self.policy_old.act(state)
243
+
244
+ #pre_state = state[self.env.n_items:self.env.n_items+self.env.n_machines].clone()
245
+
246
+ machine_setup, inventory_level, setup_cost = self.get_post_state(action, state[self.env.n_items:self.env.n_items+self.env.n_machines], state[0:self.env.n_items])
247
+
248
+ post_state = state.clone()
249
+ post_state[self.env.n_items:self.env.n_items+self.env.n_machines] = machine_setup.clone()
250
+ post_state[0:self.env.n_items] = inventory_level.clone()
251
+
252
+ pre_state = state.clone()
253
+
254
+ self.buffer.states.append(state)
255
+ self.buffer.pre_states.append(pre_state)
256
+ self.buffer.post_states.append(post_state)
257
+ self.buffer.actions.append(action)
258
+ self.buffer.logprobs.append(action_logprob)
259
+
260
+ with torch.no_grad():
261
+ state_val = self.policy_old.critic(pre_state).detach()
262
+ state_val_post = self.policy_old.critic_post(post_state).detach()
263
+
264
+ self.buffer.state_values.append(state_val)
265
+ self.buffer.state_values_post.append(state_val_post)
266
+
267
+ if self.has_continuous_action_space:
268
+ return action.detach().cpu().numpy().flatten()
269
+
270
+ else:
271
+ return action.numpy()
272
+
273
+ def update(self):
274
+
275
+
276
+ # Monte Carlo estimate of returns
277
+ rewards = []
278
+ discounted_reward = 0
279
+ for reward, is_terminal in zip(reversed(self.buffer.rewards), reversed(self.buffer.is_terminals)):
280
+ if is_terminal:
281
+ discounted_reward = 0
282
+ discounted_reward = reward + (self.gamma * discounted_reward)
283
+ rewards.insert(0, discounted_reward)
284
+
285
+ # Normalizing the rewards
286
+ rewards = torch.tensor(rewards, dtype=torch.float32).to(device)
287
+ #rewards = rewards/(-rewards).max()
288
+ #rewards = rewards - rewards.min()
289
+ #rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-7)
290
+
291
+ # Monte Carlo estimate of returns (pre decision)
292
+ rewards_pre = []
293
+ discounted_reward = 0
294
+ for reward_pre, is_terminal in zip(reversed(self.buffer.rewards_pre), reversed(self.buffer.is_terminals)):
295
+ if is_terminal:
296
+ discounted_reward = 0
297
+ discounted_reward = reward_pre + (self.gamma * discounted_reward)
298
+ rewards_pre.insert(0, discounted_reward)
299
+
300
+ # Normalizing the rewards
301
+ rewards_pre = torch.tensor(rewards_pre, dtype=torch.float32).to(device)
302
+ #rewards_pre = rewards_pre/(-rewards_pre).max()
303
+ #rewards_pre = rewards_pre - rewards_pre.min()
304
+ rewards_pre = (rewards_pre - rewards_pre.mean()) / (rewards_pre.std() + 1e-7)
305
+
306
+ # Monte Carlo estimate of returns (post decision)
307
+ rewards_post = []
308
+ discounted_reward = 0
309
+ for reward_post, is_terminal in zip(reversed(self.buffer.rewards_post), reversed(self.buffer.is_terminals)):
310
+ if is_terminal:
311
+ discounted_reward = 0
312
+ discounted_reward = reward_post + (self.gamma * discounted_reward)
313
+ rewards_post.insert(0, discounted_reward)
314
+
315
+ # Normalizing the rewards
316
+ rewards_post = torch.tensor(rewards_post, dtype=torch.float32).to(device)
317
+ #rewards_post = rewards_post/(-rewards_post).max()
318
+ #rewards_post = rewards_post - rewards_post.min()
319
+ rewards_post = (rewards_post - rewards_post.mean()) / (rewards_post.std() + 1e-7)
320
+
321
+ #rewards_post = -rewards_post/(rewards_pre + rewards_post).min()
322
+
323
+ #rewards_pre = -rewards_pre/(rewards_pre + rewards_post).min()
324
+
325
+ # convert list to tensor
326
+ old_states = torch.squeeze(torch.stack(self.buffer.states, dim=0)).detach().to(device)
327
+ old_pre_states = torch.squeeze(torch.stack(self.buffer.pre_states, dim=0)).detach().to(device)
328
+ old_post_states = torch.squeeze(torch.stack(self.buffer.post_states, dim=0)).detach().to(device)
329
+ old_actions = torch.squeeze(torch.stack(self.buffer.actions, dim=0)).detach().to(device)
330
+ old_logprobs = torch.squeeze(torch.stack(self.buffer.logprobs, dim=0)).detach().to(device)
331
+ old_state_values = torch.squeeze(torch.stack(self.buffer.state_values, dim=0)).detach().to(device)
332
+ old_state_values_post = torch.squeeze(torch.stack(self.buffer.state_values_post, dim=0)).detach().to(device)
333
+
334
+ # calculate advantages
335
+ #advantages_post = rewards_post.detach() - old_state_values_post.detach()
336
+ #advantages_pre = rewards_pre.detach() - old_state_values.detach()
337
+ advantages = rewards - (old_state_values.detach()+old_state_values_post.detach())#(rewards_post + rewards_pre) - old_state_values_post.detach() - old_state_values.detach()# torch.min(advantages_pre,advantages_post)
338
+
339
+ sum_loss = 0
340
+
341
+ # Optimize policy for K epochs
342
+ for i in range(self.K_epochs):
343
+
344
+ # Evaluating old actions and values
345
+ logprobs, state_values, state_values_post, dist_entropy = self.policy.evaluate(old_states, old_pre_states, old_post_states, old_actions)
346
+
347
+ # Finding the ratio (pi_theta / pi_theta__old)
348
+ ratios = torch.exp(logprobs - old_logprobs)
349
+
350
+ # Finding Surrogate Loss
351
+ surr1 = ratios * advantages.unsqueeze(1)
352
+ surr2 = torch.clamp(ratios, 1 - self.eps_clip, 1 + self.eps_clip) * advantages.unsqueeze(1)
353
+
354
+ surr = -torch.min(surr1, surr2)
355
+
356
+ loss = surr + 0.5 * self.MseLoss(state_values_post+state_values,rewards) - 0.01*dist_entropy
357
+
358
+ # Optmization - gradient backpropagation
359
+ self.optimizer.zero_grad()
360
+ loss.mean().backward(retain_graph=True)
361
+ self.optimizer.step()
362
+
363
+
364
+ # print('Avg Loss: {}'.format(sum_loss.mean().item()))
365
+ print('Last Loss {}'.format(loss.sum().item()))
366
+ # Copy new weights into old policy
367
+ self.policy_old.load_state_dict(self.policy.state_dict())
368
+
369
+ # clear buffer
370
+ self.buffer.clear()
371
+
372
+ def save(self, checkpoint_path):
373
+ torch.save(self.policy_old.state_dict(), checkpoint_path)
374
+
375
+ def load(self, checkpoint_path):
376
+ self.policy_old.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
377
+ self.policy.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
code/Lake application/agents/PDPPO_v0.py ADDED
@@ -0,0 +1,328 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import copy
3
+ import numpy as np
4
+ import torch
5
+ import torch.nn as nn
6
+ import torch.optim as optim
7
+ import torch.nn.functional as F
8
+ from torch.distributions import Categorical
9
+ from envs import *
10
+ import gym
11
+
12
+
13
+
14
+ class SimplePlantSB(SimplePlant):
15
+ def __init__(self, settings, stoch_model):
16
+ super().__init__(settings, stoch_model)
17
+ try:self.dict_obs = settings['dict_obs']
18
+ except:self.dict_obs = False
19
+ self.last_inventory = copy.copy(self.inventory_level)
20
+ self.action_space = gym.spaces.MultiDiscrete(
21
+ [self.n_items+1] * self.n_machines
22
+ )
23
+
24
+ if self.dict_obs:
25
+ self.observation_space = gym.spaces.Dict({
26
+ 'inventory_level': gym.spaces.Box(low = np.zeros(self.n_items),high = np.ones(self.n_items)*(settings['max_inventory_level'][0]+1)*self.n_items),
27
+ 'machine_setup': gym.spaces.MultiDiscrete([self.n_items+1] * self.n_machines)
28
+ })
29
+ else:
30
+ self.observation_space = gym.spaces.Box(
31
+ low=np.zeros(self.n_items+self.n_machines),# high for the inventory level
32
+ high=np.concatenate(
33
+ [
34
+ np.array(self.max_inventory_level),
35
+ np.ones(self.n_machines) * (self.n_items+1), #high for the machine setups
36
+ ]),
37
+ dtype=np.int32
38
+ )
39
+
40
+ def step(self, action):
41
+ """
42
+ Step method: Execute one time step within the environment
43
+
44
+ Parameters
45
+ ----------
46
+ action : action given by the agent
47
+
48
+ Returns
49
+ -------
50
+ obs : Observation of the state give the method _next_observation
51
+ reward : Cost given by the _reward method
52
+ done : returns True or False given by the _done method
53
+ dict : possible information for control to environment monitoring
54
+
55
+ """
56
+ self.last_inventory = copy.copy(self.inventory_level)
57
+
58
+ self.total_cost = self._take_action(action, self.machine_setup, self.inventory_level, self.demand)
59
+
60
+ # self.total_cost['setup_costs'] = 0
61
+ # self.total_cost['holding_costs'] = 0
62
+
63
+ reward = -sum([ele for key, ele in self.total_cost.items()])
64
+ #reward = -self.total_cost['lost_sales']
65
+
66
+ #reward = np.abs(action)
67
+
68
+ self.current_step += 1
69
+ done = self.current_step == self.T
70
+ obs = self._next_observation()
71
+
72
+ return obs, reward, done, self.total_cost
73
+
74
+ def _next_observation(self):
75
+ """
76
+ Returns the next demand
77
+ """
78
+ obs = SimplePlant._next_observation(self)
79
+ #obs['last_inventory_level'] = copy.copy(self.last_inventory)
80
+ if isinstance(obs, dict):
81
+ if not self.dict_obs:
82
+ obs = np.concatenate(
83
+ (
84
+ obs['inventory_level'], # n_items size
85
+ obs['machine_setup'], # n_machine size
86
+ #obs['last_inventory_level']# n_items size
87
+ )
88
+ )
89
+ else:
90
+ if self.dict_obs:
91
+ raise('Change dict_obst to False')
92
+ return obs
93
+
94
+ # Define the policy network
95
+ class Policy(nn.Module):
96
+ def __init__(self, input_size, output_shape):
97
+ super(Policy, self).__init__()
98
+ self.fc1 = nn.Linear(input_size, 128)
99
+ self.fc_list = nn.ModuleList([nn.Linear(128, output_shape[0]) for list(output_shape)[1] in range(0,output_shape[1])])
100
+
101
+ def forward(self, x):
102
+ x = F.relu(self.fc1(x)).requires_grad_()
103
+ outputs = [F.softmax(fc(x), dim=1)for fc in self.fc_list]
104
+ return outputs
105
+
106
+ # Define the value network for deterministic components
107
+ class Value(nn.Module):
108
+ def __init__(self,input_size,output_size):
109
+ super(Value, self).__init__()
110
+ self.fc1 = nn.Linear(input_size, 128)
111
+ self.fc2 = nn.Linear(128, output_size)
112
+
113
+ def forward(self, x):
114
+ x = F.relu(self.fc1(x)).requires_grad_()
115
+ x = self.fc2(x)
116
+ return x
117
+
118
+ # Define the value network for stochastic components
119
+ class ValueStochastic(nn.Module):
120
+ def __init__(self,input_size,output_size):
121
+ super(ValueStochastic, self).__init__()
122
+ self.fc1 = nn.Linear(input_size, 128)
123
+ self.fc2 = nn.Linear(128, output_size)
124
+
125
+ def forward(self, x):
126
+ x = F.relu(self.fc1(x)).requires_grad_()
127
+ x = F.softmax(self.fc2(x), dim=1)
128
+ return x
129
+
130
+ # Define the PPO agent
131
+ class PDPPO:
132
+ def __init__(self, env: SimplePlant, settings: dict):
133
+
134
+ self.env = SimplePlantSB(env.settings, env.stoch_model)
135
+ self.last_inventory = env.inventory_level
136
+ self.experiment_name = settings['experiment_name']
137
+ try:self.dict_obs = settings['dict_obs']
138
+ except:self.dict_obs = False
139
+
140
+ self.POSSIBLE_STATES = self.env.n_items + 1
141
+ self.env.cost_to_reward = True
142
+ self.epsilon = 0
143
+
144
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
145
+ # Use the logs file in the root path of the main.
146
+ self.LOG_DIR = os.path.join(BASE_DIR,'logs')
147
+
148
+
149
+ if self.dict_obs == False:
150
+ input_size = self.env.observation_space.shape[0]
151
+ output_size_policy = (self.env.n_items+1, self.env.action_space.shape[0]) # we add 1 for the idle state
152
+ output_size_value = self.env.action_space.shape[0]
153
+ self.policy = Policy(input_size,output_size_policy)
154
+ self.value = Value(input_size,output_size_value)
155
+ self.value_post = ValueStochastic(input_size,output_size_value)
156
+ self.optimizer_policy = optim.Adam(self.policy.parameters(), lr=1e-3)
157
+ self.optimizer_value = optim.Adam(self.value.parameters(), lr=1e-3)
158
+ self.optimizer_value_post = optim.Adam(self.value_post.parameters(), lr=1e-3)
159
+ self.eps_clip = 0.2
160
+ self.gamma = 0.99
161
+ self.lmbda = 0.95
162
+
163
+ def get_post_state(self, action, machine_setup, inventory_level):
164
+ setup_loss = np.zeros(self.env.n_machines, dtype=int)
165
+ setup_costs = np.zeros(self.env.n_machines)
166
+ # if we are just changing the setup, we use the setup cost matrix with the corresponding position given by the actual setup and the new setup
167
+ for m in range(self.env.n_machines):
168
+ if action[m] != 0: # if the machine is not iddle
169
+ # 1. IF NEEDED CHANGE SETUP
170
+ if machine_setup[m] != action[m] and action[m] != 0:
171
+ setup_costs[m] = self.env.setup_costs[m][action[m] - 1]
172
+ setup_loss[m] = self.env.setup_loss[m][action[m] - 1]
173
+ machine_setup[m] = action[m]
174
+ # 2. PRODUCTION
175
+ production = self.env.machine_production_matrix[m][action[m] - 1] - setup_loss[m]
176
+ inventory_level[action[m] - 1] += production
177
+ else:
178
+ machine_setup[m] = 0
179
+ # return the new machine_setup_inventory_level and the setup_cost
180
+ return machine_setup, inventory_level, setup_costs
181
+
182
+ def get_action(self, state):
183
+ state = torch.from_numpy(state).float().unsqueeze(0)
184
+ probs = self.policy(state)
185
+ probs_concat = torch.stack(probs, dim=1)
186
+ m = Categorical(probs_concat)
187
+ action = m.sample()
188
+ value = self.value(state)
189
+ machine_setup, inventory_level, setup_cost = self.get_post_state(action.numpy()[0], state[0][self.env.n_items:self.env.n_items+self.env.n_machines].numpy(), state[0][0:self.env.n_items].numpy())
190
+ value_post = self.value_post(state)
191
+
192
+ return action, m.log_prob(action), probs_concat, value, value_post
193
+
194
+
195
+ def update(self, rewards, rewards_pre_state, rewards_post_state, states, post_states, actions, probs, next_states):
196
+ # Update deterministic value function
197
+ for epoch in range(10):
198
+ for i in range(len(actions)):
199
+ state = torch.from_numpy(states[i]).float().unsqueeze(0)
200
+ value = self.value(state)
201
+ next_state = torch.from_numpy(next_states[i]).float().unsqueeze(0)
202
+ next_value = self.value(next_state)
203
+ target = rewards_pre_state[i] + self.gamma * next_value
204
+ advantage = target - value
205
+ loss = advantage.pow(2).mean()
206
+ self.optimizer_value.zero_grad()
207
+ loss.backward()
208
+ self.optimizer_value.step()
209
+
210
+ # Update stochastic value function
211
+ for epoch in range(10):
212
+ for i in range(len(actions)):
213
+ state = torch.from_numpy(states[i]).float().unsqueeze(0)
214
+ value = self.value_post(state)
215
+ post_state = torch.from_numpy(post_states[i]).float().unsqueeze(0)
216
+ value_post = self.value_post(post_state)
217
+ target = rewards_post_state[i] + self.gamma * value_post
218
+ advantage = target - value
219
+ loss = advantage.pow(2).mean()
220
+ self.optimizer_value_post.zero_grad()
221
+ loss.backward()
222
+ self.optimizer_value_post.step()
223
+
224
+ # Update policy network
225
+ states = torch.from_numpy(np.vstack(states)).float()
226
+ actions = torch.cat(actions).unsqueeze(1)
227
+ old_probs = torch.cat(probs)
228
+ old_probs = torch.gather(old_probs.clone(),2, actions)
229
+
230
+ policy_epochs = 10
231
+ for epoch in range(policy_epochs):
232
+ probs = self.policy(states)
233
+ probs = torch.stack(probs, dim=1).clone()
234
+ m = Categorical(probs)
235
+ action = m.sample()
236
+ probs = torch.gather(probs, 2, actions)
237
+ kl_div = (old_probs * (torch.log(old_probs) - torch.log(probs))).sum()
238
+
239
+ for state,post_state, action, old_prob, prob, next_state, reward_pre_state, reward_post_state in zip(states,post_states, actions, old_probs, probs, next_states,rewards_pre_state,rewards_post_state):
240
+ state = state.unsqueeze(0)
241
+ next_state = torch.from_numpy(next_state).unsqueeze(0).float()
242
+ post_state = torch.from_numpy(post_state).unsqueeze(0).float()
243
+ action = action.unsqueeze(0)
244
+ old_prob = old_prob.unsqueeze(0)
245
+ prob = prob.unsqueeze(0)
246
+ value = self.value(state)
247
+ value_post = self.value_post(post_state)
248
+ advantage = reward_pre_state + self.gamma * self.value(next_state) - self.value(state)
249
+ advantage_post = reward_post_state + self.gamma * self.value_post(post_state) - self.value_post(state)
250
+
251
+ ratio = (prob / old_prob)
252
+ surr1 = ratio * advantage
253
+ surr2 = torch.clamp(ratio, 1 - self.eps_clip, 1 + self.eps_clip) * advantage
254
+ policy_loss = -torch.min(surr1, surr2) - 0.01 * m.entropy()
255
+
256
+ ratio_post = ratio
257
+ surr1_post = ratio_post * advantage_post
258
+ surr2_post = torch.clamp(ratio_post, 1 - self.eps_clip, 1 + self.eps_clip) * advantage_post
259
+ policy_loss_post = -torch.min(surr1_post, surr2_post) - 0.01 * m.entropy()
260
+
261
+ self.optimizer_policy.zero_grad()
262
+ (policy_loss.pow(2).mean() + policy_loss_post.pow(2).mean() + 0.5 * value.pow(2).mean() + 0.5 * value_post.pow(2).mean()).backward(retain_graph=True)
263
+ self.optimizer_policy.step()
264
+
265
+ def learn(self, n_episodes=1000, save_interval=100):
266
+ # Train the agent
267
+ for episode in range(n_episodes):
268
+ state = self.env.reset()
269
+ rewards = []
270
+ rewards_pre_state = []
271
+ rewards_post_state = []
272
+ states = []
273
+ next_states = []
274
+ actions = []
275
+ probs = []
276
+ post_states = []
277
+ # next_post_states = []
278
+ done = False
279
+ while not done:
280
+ action, log_prob, prob, value, value_post = self.get_action(state)
281
+ next_state, reward, done, info = self.env.step(action[0].detach().numpy())
282
+ machine_setup, inventory_level, setup_cost = self.get_post_state(action[0].detach().numpy(), state[self.env.n_items:self.env.n_items+self.env.n_machines], state[0:self.env.n_items])
283
+ post_state = state.copy()
284
+ post_state[self.env.n_items:self.env.n_items+self.env.n_machines] = machine_setup
285
+ post_state[0:self.env.n_items] = inventory_level
286
+ post_states.append(post_state)
287
+ post_state = torch.from_numpy(post_state).float().unsqueeze(0)
288
+ rewards.append(reward)
289
+ reward_pre_state = -(self.env.total_cost['holding_costs'] + self.env.total_cost['lost_sales'])
290
+ reward_post_state = -setup_cost.sum()
291
+ rewards_pre_state.append(reward_pre_state)
292
+ rewards_post_state.append(reward_post_state)
293
+ states.append(state)
294
+ next_states.append(next_state)
295
+ actions.append(action)
296
+ probs.append(prob)
297
+
298
+ state = next_state
299
+ if done:
300
+ self.update(rewards, rewards_pre_state, rewards_post_state, states, post_states, actions, probs, next_states)
301
+ print('Episode:', episode, 'Reward:', sum(rewards))
302
+ if episode % save_interval == 0:
303
+ self.save(f'policy_{episode}.pt')
304
+ self.save(self.LOG_DIR)
305
+
306
+
307
+ def save(self, filepath):
308
+ torch.save({
309
+ 'policy_state_dict': self.policy.state_dict(),
310
+ 'value_state_dict': self.value.state_dict(),
311
+ 'value_post_state_dict': self.value_post.state_dict(),
312
+ 'optimizer_policy_state_dict': self.optimizer_policy.state_dict(),
313
+ 'optimizer_value_state_dict': self.optimizer_value.state_dict(),
314
+ 'optimizer_value_post_state_dict': self.optimizer_value_post.state_dict()
315
+ }, filepath)
316
+
317
+
318
+
319
+ def load(self, filepath):
320
+ checkpoint = torch.load(filepath)
321
+ self.policy.load_state_dict(checkpoint['policy_state_dict'])
322
+ self.value.load_state_dict(checkpoint['value_state_dict'])
323
+ self.value_post.load_state_dict(checkpoint['value_post_state_dict'])
324
+ self.optimizer_policy.load_state_dict(checkpoint['optimizer_policy_state_dict'])
325
+ self.optimizer_value.load_state_dict(checkpoint['optimizer_value_state_dict'])
326
+ self.optimizer_value_post.load_state_dict(checkpoint['optimizer_value_post_state_dict'])
327
+
328
+
code/Lake application/agents/PPO.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Wed Mar 1 00:43:49 2023
4
+
5
+ @author: leona
6
+ """
7
+
8
+ import torch
9
+ import torch.nn as nn
10
+ from torch.distributions import MultivariateNormal
11
+ from torch.distributions import Categorical
12
+
13
+ ################################## set device ##################################
14
+ print("============================================================================================")
15
+ # set device to cpu or cuda
16
+ device = torch.device('cpu')
17
+ if(torch.cuda.is_available()):
18
+ device = torch.device('cuda:0')
19
+ torch.cuda.empty_cache()
20
+ print("Device set to : " + str(torch.cuda.get_device_name(device)))
21
+ else:
22
+ print("Device set to : cpu")
23
+ print("============================================================================================")
24
+
25
+
26
+ ################################## PPO Policy ##################################
27
+ class RolloutBuffer:
28
+ def __init__(self):
29
+ self.actions = []
30
+ self.states = []
31
+ self.logprobs = []
32
+ self.rewards = []
33
+ self.state_values = []
34
+ self.is_terminals = []
35
+
36
+ def clear(self):
37
+ del self.actions[:]
38
+ del self.states[:]
39
+ del self.logprobs[:]
40
+ del self.rewards[:]
41
+ del self.state_values[:]
42
+ del self.is_terminals[:]
43
+
44
+
45
+ class ActorCritic(nn.Module):
46
+ def __init__(self, state_dim, action_dim, has_continuous_action_space, action_std_init):
47
+ super(ActorCritic, self).__init__()
48
+
49
+ self.has_continuous_action_space = has_continuous_action_space
50
+ self.action_dim = action_dim
51
+
52
+ if has_continuous_action_space:
53
+ self.action_dim = action_dim
54
+ self.action_var = torch.full((action_dim,), action_std_init * action_std_init).to(device)
55
+ # actor
56
+ if has_continuous_action_space :
57
+ self.actor = nn.Sequential(
58
+ nn.Linear(state_dim, 64),
59
+ nn.Tanh(),
60
+ nn.Linear(64, 64),
61
+ nn.Tanh(),
62
+ nn.Linear(64, action_dim),
63
+ nn.Tanh()
64
+ )
65
+ else:
66
+
67
+ self.actor = nn.Sequential(
68
+ nn.Linear(state_dim, 128),
69
+ nn.Tanh(),
70
+ nn.Linear(128, 128),
71
+ nn.Tanh(),
72
+ nn.Linear(128, action_dim)
73
+ )
74
+
75
+ # critic
76
+ self.critic = nn.Sequential(
77
+ nn.Linear(state_dim, 128),
78
+ nn.Tanh(),
79
+ nn.Linear(128, 128),
80
+ nn.Tanh(),
81
+ nn.Linear(128, 1)
82
+ )
83
+
84
+ def forward(self, state):
85
+ raise NotImplementedError
86
+
87
+
88
+
89
+ def set_action_std(self, new_action_std):
90
+ if self.has_continuous_action_space:
91
+ self.action_var = torch.full((self.action_dim,), new_action_std * new_action_std).to(device)
92
+ else:
93
+ print("--------------------------------------------------------------------------------------------")
94
+ print("WARNING : Calling ActorCritic::set_action_std() on discrete action space policy")
95
+ print("--------------------------------------------------------------------------------------------")
96
+
97
+ def act(self, state):
98
+
99
+ logits = self.actor(state)
100
+ action_probs = nn.functional.softmax(logits, dim=-1)
101
+ dist = Categorical(action_probs)
102
+
103
+ action = dist.sample()
104
+ action_logprob = dist.log_prob(action)
105
+ state_val = self.critic(state)
106
+
107
+ return action.detach(), action_logprob.detach(), state_val.detach()
108
+
109
+ def evaluate(self, state, action):
110
+
111
+ logits = self.actor(state)
112
+ action_probs = nn.functional.softmax(logits, dim=-1)
113
+ dist = Categorical(action_probs)
114
+
115
+ action_logprobs = dist.log_prob(action.T).T
116
+ dist_entropy = dist.entropy()
117
+ state_values = self.critic(state)
118
+
119
+ return action_logprobs, state_values, dist_entropy
120
+
121
+
122
+ class PPO:
123
+ def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, has_continuous_action_space, action_std_init=0.6):
124
+
125
+ self.has_continuous_action_space = has_continuous_action_space
126
+
127
+ if has_continuous_action_space:
128
+ self.action_std = action_std_init
129
+
130
+ self.gamma = gamma
131
+ self.eps_clip = eps_clip
132
+ self.K_epochs = K_epochs
133
+
134
+ self.buffer = RolloutBuffer()
135
+
136
+ self.policy = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
137
+ self.optimizer = torch.optim.Adam([
138
+ {'params': self.policy.actor.parameters(), 'lr': lr_actor},
139
+ {'params': self.policy.critic.parameters(), 'lr': lr_critic}
140
+ ])
141
+
142
+ self.policy_old = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
143
+ self.policy_old.load_state_dict(self.policy.state_dict())
144
+
145
+ self.MseLoss = nn.MSELoss()
146
+
147
+ def set_action_std(self, new_action_std):
148
+ if self.has_continuous_action_space:
149
+ self.action_std = new_action_std
150
+ self.policy.set_action_std(new_action_std)
151
+ self.policy_old.set_action_std(new_action_std)
152
+ else:
153
+ print("--------------------------------------------------------------------------------------------")
154
+ print("WARNING : Calling PPO::set_action_std() on discrete action space policy")
155
+ print("--------------------------------------------------------------------------------------------")
156
+
157
+ def decay_action_std(self, action_std_decay_rate, min_action_std):
158
+ print("--------------------------------------------------------------------------------------------")
159
+ if self.has_continuous_action_space:
160
+ self.action_std = self.action_std - action_std_decay_rate
161
+ self.action_std = round(self.action_std, 4)
162
+ if (self.action_std <= min_action_std):
163
+ self.action_std = min_action_std
164
+ print("setting actor output action_std to min_action_std : ", self.action_std)
165
+ else:
166
+ print("setting actor output action_std to : ", self.action_std)
167
+ self.set_action_std(self.action_std)
168
+
169
+ else:
170
+ print("WARNING : Calling PPO::decay_action_std() on discrete action space policy")
171
+ print("--------------------------------------------------------------------------------------------")
172
+
173
+ def select_action(self, state):
174
+
175
+ with torch.no_grad():
176
+ state = torch.tensor(state).to(device)
177
+ state = state.float()
178
+ state = torch.unsqueeze(state, 1).T
179
+ action, action_logprob, state_val = self.policy_old.act(state)
180
+
181
+ self.buffer.states.append(state)
182
+ self.buffer.actions.append(action)
183
+ self.buffer.logprobs.append(action_logprob)
184
+ self.buffer.state_values.append(state_val)
185
+
186
+ return action.cpu().numpy()
187
+
188
+ def update(self):
189
+ # Monte Carlo estimate of returns
190
+ rewards = []
191
+ discounted_reward = 0
192
+ for reward, is_terminal in zip(reversed(self.buffer.rewards), reversed(self.buffer.is_terminals)):
193
+ if is_terminal:
194
+ discounted_reward = 0
195
+ discounted_reward = reward + (self.gamma * discounted_reward)
196
+ rewards.insert(0, discounted_reward)
197
+
198
+ # Normalizing the rewards
199
+ rewards = torch.tensor(rewards, dtype=torch.float32).to(device)
200
+ rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-7)
201
+
202
+ # convert list to tensor
203
+ old_states = torch.squeeze(torch.stack(self.buffer.states, dim=1)).detach().to(device)
204
+ old_actions = torch.stack(self.buffer.actions, dim=0).detach().to(device)
205
+ old_logprobs = torch.stack(self.buffer.logprobs, dim=0).detach().to(device)
206
+ old_state_values = torch.squeeze(torch.stack(self.buffer.state_values, dim=0)).detach().to(device)
207
+
208
+ # calculate advantages
209
+ advantages = rewards.detach() - old_state_values.detach()
210
+
211
+ # Optimize policy for K epochs
212
+ for _ in range(self.K_epochs):
213
+
214
+ # Evaluating old actions and values
215
+ logprobs, state_values, dist_entropy = self.policy.evaluate(old_states, old_actions)
216
+
217
+ # match state_values tensor dimensions with rewards tensor
218
+ state_values = torch.squeeze(state_values)
219
+
220
+ # Finding the ratio (pi_theta / pi_theta__old)
221
+ ratios = torch.exp(logprobs - old_logprobs.detach())
222
+
223
+ # Finding Surrogate Loss
224
+ surr1 = ratios * advantages.unsqueeze(1)
225
+ surr2 = torch.clamp(ratios, 1-self.eps_clip, 1+self.eps_clip) * advantages.unsqueeze(1)
226
+
227
+ # final loss of clipped objective PPO
228
+ loss = -torch.min(surr1, surr2) + 0.5 * self.MseLoss(state_values, rewards) - 0.012 * dist_entropy
229
+
230
+ loss_numpy = loss.detach().cpu().numpy()
231
+
232
+ # take gradient step
233
+ self.optimizer.zero_grad()
234
+ loss.mean().backward()
235
+ self.optimizer.step()
236
+
237
+ # Copy new weights into old policy
238
+ self.policy_old.load_state_dict(self.policy.state_dict())
239
+
240
+ # clear buffer
241
+ self.buffer.clear()
242
+
243
+ def save(self, checkpoint_path):
244
+ torch.save(self.policy_old.state_dict(), checkpoint_path)
245
+
246
+ def load(self, checkpoint_path):
247
+ self.policy_old.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
248
+ self.policy.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
code/Lake application/agents/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from .PPOAgent import PPOAgent
2
+ from .PDPPOAgent import PDPPOAgent
3
+
4
+
5
+ __all__ = [
6
+ "PPOAgent",
7
+ "PDPPOAgent"
8
+ ]
code/Lake application/envs/frozen_lake.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from contextlib import closing
3
+
4
+ import numpy as np
5
+ from io import StringIO
6
+
7
+ from gym import utils
8
+ from gym.envs.toy_text import discrete
9
+
10
+ LEFT = 0
11
+ DOWN = 1
12
+ RIGHT = 2
13
+ UP = 3
14
+
15
+ MAPS = {
16
+ "4x4": ["SFFF", "FHFH", "FFFH", "HFFG"],
17
+ "8x8": [
18
+ "SFFFFFFF",
19
+ "FFFFFFFF",
20
+ "FFFHFFFF",
21
+ "FFFFFHFF",
22
+ "FFFHFFFF",
23
+ "FHHFFFHF",
24
+ "FHFFHFHF",
25
+ "FFFHFFFG",
26
+ ],
27
+ }
28
+
29
+
30
+ def generate_random_map(size=8, p=0.8):
31
+ """Generates a random valid map (one that has a path from start to goal)
32
+ :param size: size of each side of the grid
33
+ :param p: probability that a tile is frozen
34
+ """
35
+ valid = False
36
+
37
+ # DFS to check that it's a valid path.
38
+ def is_valid(res):
39
+ frontier, discovered = [], set()
40
+ frontier.append((0, 0))
41
+ while frontier:
42
+ r, c = frontier.pop()
43
+ if not (r, c) in discovered:
44
+ discovered.add((r, c))
45
+ directions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
46
+ for x, y in directions:
47
+ r_new = r + x
48
+ c_new = c + y
49
+ if r_new < 0 or r_new >= size or c_new < 0 or c_new >= size:
50
+ continue
51
+ if res[r_new][c_new] == "G":
52
+ return True
53
+ if res[r_new][c_new] != "H":
54
+ frontier.append((r_new, c_new))
55
+ return False
56
+
57
+ while not valid:
58
+ p = min(1, p)
59
+ res = np.random.choice(["F", "H"], (size, size), p=[p, 1 - p])
60
+ res[0][0] = "S"
61
+ res[-1][-1] = "G"
62
+ valid = is_valid(res)
63
+ return ["".join(x) for x in res]
64
+
65
+
66
+ class FrozenLakeEnv(discrete.DiscreteEnv):
67
+ """
68
+ Winter is here. You and your friends were tossing around a frisbee at the
69
+ park when you made a wild throw that left the frisbee out in the middle of
70
+ the lake. The water is mostly frozen, but there are a few holes where the
71
+ ice has melted. If you step into one of those holes, you'll fall into the
72
+ freezing water. At this time, there's an international frisbee shortage, so
73
+ it's absolutely imperative that you navigate across the lake and retrieve
74
+ the disc. However, the ice is slippery, so you won't always move in the
75
+ direction you intend.
76
+ The surface is described using a grid like the following
77
+
78
+ SFFF
79
+ FHFH
80
+ FFFH
81
+ HFFG
82
+
83
+ S : starting point, safe
84
+ F : frozen surface, safe
85
+ H : hole, fall to your doom
86
+ G : goal, where the frisbee is located
87
+
88
+ The episode ends when you reach the goal or fall in a hole.
89
+ You receive a reward of 1 if you reach the goal, and zero otherwise.
90
+ """
91
+
92
+ metadata = {"render.modes": ["human", "ansi"]}
93
+
94
+ def __init__(self, desc=None, map_name="4x4", is_slippery=True):
95
+ if desc is None and map_name is None:
96
+ desc = generate_random_map()
97
+ elif desc is None:
98
+ desc = MAPS[map_name]
99
+ self.desc = desc = np.asarray(desc, dtype="c")
100
+ self.nrow, self.ncol = nrow, ncol = desc.shape
101
+ self.reward_range = (0, 1)
102
+
103
+ nA = 4
104
+ nS = nrow * ncol
105
+
106
+ isd = np.array(desc == b"S").astype("float64").ravel()
107
+ isd /= isd.sum()
108
+
109
+ P = {s: {a: [] for a in range(nA)} for s in range(nS)}
110
+
111
+ def to_s(row, col):
112
+ return row * ncol + col
113
+
114
+ def inc(row, col, a):
115
+ if a == LEFT:
116
+ col = max(col - 1, 0)
117
+ elif a == DOWN:
118
+ row = min(row + 1, nrow - 1)
119
+ elif a == RIGHT:
120
+ col = min(col + 1, ncol - 1)
121
+ elif a == UP:
122
+ row = max(row - 1, 0)
123
+ return (row, col)
124
+
125
+ goal_position = None
126
+ for row in range(nrow):
127
+ for col in range(ncol):
128
+ if desc[row, col] == b'G':
129
+ goal_position = (row, col)
130
+ break
131
+ if goal_position:
132
+ break
133
+
134
+ def proximity_reward(current_row, current_col):
135
+ goal_row, goal_col = goal_position
136
+ distance = abs(goal_row - current_row) + abs(goal_col - current_col)
137
+ return 1.0 / (1.0 + distance)
138
+
139
+ def update_probability_matrix(row, col, a):
140
+ newrow, newcol = inc(row, col, a)
141
+ newstate = to_s(newrow, newcol)
142
+ newletter = desc[newrow, newcol]
143
+ terminated = bytes(newletter) in b"GH"
144
+ reward = float(newletter == b"G")
145
+ if not terminated:
146
+ reward = proximity_reward(newrow, newcol)
147
+ return newstate, reward, terminated
148
+
149
+ # def update_probability_matrix(row, col, action):
150
+ # newrow, newcol = inc(row, col, action)
151
+ # newstate = to_s(newrow, newcol)
152
+ # newletter = desc[newrow, newcol]
153
+ # done = bytes(newletter) in b"GH"
154
+ # reward = float(newletter == b"G")
155
+ # return newstate, reward, done
156
+
157
+ # for row in range(nrow):
158
+ # for col in range(ncol):
159
+ # s = to_s(row, col)
160
+ # for a in range(4):
161
+ # li = P[s][a]
162
+ # letter = desc[row, col]
163
+ # if letter in b"GH":
164
+ # li.append((1.0, s, 0, True))
165
+ # else:
166
+ # if is_slippery:
167
+ # for b in [(a - 1) % 4, a, (a + 1) % 4]:
168
+ # li.append(
169
+ # (1.0 / 3.0, *update_probability_matrix(row, col, b))
170
+ # )
171
+ # else:
172
+ # li.append((1.0, *update_probability_matrix(row, col, a)))
173
+
174
+ np.random.seed(42) # Set a seed for reproducibility
175
+ tile_probabilities = np.random.dirichlet(np.ones(4), size=(nrow, ncol))
176
+
177
+ def to_row_col(s):
178
+ return divmod(s, ncol)
179
+
180
+ # for row in range(nrow):
181
+ # for col in range(ncol):
182
+ # s = to_s(row, col)
183
+ # for a in range(4):
184
+ # li = P[s][a]
185
+ # letter = desc[row, col]
186
+ # if letter in b"GH":
187
+ # li.append((1.0, s, 0, True))
188
+ # else:
189
+ # if is_slippery:
190
+ # # First, the agent moves in the desired direction
191
+ # newstate, reward, terminated = update_probability_matrix(row, col, a)
192
+ # if terminated:
193
+ # li.append((1.0, newstate, reward, terminated))
194
+ # else:
195
+ # # After the first move, slippery condition causes a random additional movement
196
+ # row2, col2 = to_row_col(newstate)
197
+ # for b in range(4):
198
+ # li.append(
199
+ # (1.0 / 4.0, *update_probability_matrix(row2, col2, b))
200
+ # )
201
+ # else:
202
+ # li.append((1.0, *update_probability_matrix(row, col, a)))
203
+
204
+ # for row in range(nrow):
205
+ # for col in range(ncol):
206
+ # s = to_s(row, col)
207
+ # for a in range(4):
208
+ # li = P[s][a]
209
+ # letter = desc[row, col]
210
+ # if letter in b"GH":
211
+ # li.append((1.0, s, 0, True))
212
+ # else:
213
+ # if is_slippery:
214
+ # # First, the agent moves in the desired direction
215
+ # newstate, reward, terminated = update_probability_matrix(row, col, a)
216
+ # if terminated:
217
+ # li.append((1.0, newstate, reward, terminated))
218
+ # else:
219
+ # # After the first move, slippery condition causes an additional movement
220
+ # row2, col2 = to_row_col(newstate)
221
+ # for b, prob in enumerate(tile_probabilities[row2, col2]):
222
+ # li.append(
223
+ # (prob, *update_probability_matrix(row2, col2, b))
224
+ # )
225
+ # else:
226
+ # li.append((1.0, *update_probability_matrix(row, col, a)))
227
+
228
+ base_slip_prob=0.3
229
+
230
+ for row in range(nrow):
231
+ for col in range(ncol):
232
+ s = to_s(row, col)
233
+ for a in range(4):
234
+ li = P[s][a]
235
+ letter = desc[row, col]
236
+ if letter in b"GH":
237
+ li.append((1.0, s, 0, True))
238
+ else:
239
+ if is_slippery:
240
+ # First, the agent moves in the desired direction
241
+ newstate, reward, terminated = update_probability_matrix(row, col, a)
242
+ if terminated:
243
+ li.append((1.0, newstate, reward, terminated))
244
+ else:
245
+ # After the first move, slippery condition causes an additional movement
246
+ row2, col2 = to_row_col(newstate)
247
+ for b, prob in enumerate(tile_probabilities[row2, col2]):
248
+ li.append(
249
+ (base_slip_prob * prob, *update_probability_matrix(row2, col2, b))
250
+ )
251
+ # Add the remaining probability for staying at the newstate
252
+ li.append((1.0 - base_slip_prob, newstate, reward, False))
253
+ else:
254
+ li.append((1.0, *update_probability_matrix(row, col, a)))
255
+
256
+
257
+ super(FrozenLakeEnv, self).__init__(nS, nA, P, isd)
258
+
259
+ def get_post_decision_state(self, s, a):
260
+ def inc(row, col, a):
261
+ if a == LEFT:
262
+ col = max(col - 1, 0)
263
+ elif a == DOWN:
264
+ row = min(row + 1, self.nrow - 1)
265
+ elif a == RIGHT:
266
+ col = min(col + 1, self.ncol - 1)
267
+ elif a == UP:
268
+ row = max(row - 1, 0)
269
+ return (row, col)
270
+
271
+ def to_s(row, col):
272
+ return row * self.ncol + col
273
+
274
+ def to_row_col(s):
275
+ row = s // self.ncol
276
+ col = s % self.ncol
277
+ return row, col
278
+
279
+ row, col = to_row_col(s)
280
+ next_row, next_col = inc(row, col, a)
281
+ next_s = to_s(next_row, next_col)
282
+ return next_s
283
+
284
+ def render(self, mode="human"):
285
+ outfile = StringIO() if mode == "ansi" else sys.stdout
286
+
287
+ row, col = self.s // self.ncol, self.s % self.ncol
288
+ desc = self.desc.tolist()
289
+ desc = [[c.decode("utf-8") for c in line] for line in desc]
290
+ desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True)
291
+ if self.lastaction is not None:
292
+ outfile.write(
293
+ " ({})\n".format(["Left", "Down", "Right", "Up"][self.lastaction])
294
+ )
295
+ else:
296
+ outfile.write("\n")
297
+ outfile.write("\n".join("".join(line) for line in desc) + "\n")
298
+
299
+ if mode != "human":
300
+ with closing(outfile):
301
+ return outfile.getvalue()
code/Lake application/experiments.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # -*- coding: utf-8 -*-
3
+ import os
4
+ import gc
5
+ import sys
6
+ import json
7
+ import random
8
+ import gym
9
+
10
+ BASE_DIR = os.path.dirname(os.path.abspath('__file__'))
11
+ AGENTS_DIR = os.path.join(BASE_DIR,'agents')
12
+ sys.path.append(AGENTS_DIR)
13
+
14
+ from agents.PPO import PPO
15
+ from agents.PDPPO import PDPPO
16
+
17
+ import numpy as np
18
+ from agents import *
19
+ from agents import StochasticProgrammingAgent, AdpAgentHD3
20
+ from agents import StableBaselineAgent, MultiAgentRL, EnsembleAgent, PerfectInfoAgent,PSOagent,AdpAgentHD, PPOAgent
21
+ from test_functions import *
22
+ from scenarioManager.stochasticDemandModel import StochasticDemandModel
23
+
24
+
25
+ #'15items_5machines_i100','25items_10machines'
26
+
27
+ if __name__ == '__main__':
28
+ for i in range(0,5):
29
+
30
+ experiment_name = 'frozen_lake'
31
+
32
+ # Setting the seeds
33
+ np.random.seed(1)
34
+ random.seed(10)
35
+
36
+ from gym.envs.toy_text.frozen_lake import generate_random_map
37
+
38
+ # Models setups:
39
+ env = gym.make('FrozenLake-v1', desc=generate_random_map(size=8), is_slippery=True)
40
+
41
+ setting_sol_method = {
42
+ 'discount_rate': 0.99,
43
+ 'experiment_name': experiment_name,
44
+ 'parallelization': False,
45
+ 'model_name': 'PPO',
46
+ 'branching_factors': [4, 2, 2],
47
+ 'dict_obs': False # To be employed if dictionary observations are necessary
48
+ }
49
+ # Parameters for the RL:
50
+
51
+ training_epochs_RL = 200000
52
+
53
+ setting_sol_method['parallelization'] = False
54
+
55
+ # Number of test execution (number of complet environment iterations)
56
+ nreps = 100
57
+
58
+ ###########################################################################
59
+ # #PPO
60
+ ###########################################################################
61
+
62
+ base_model_name = 'PPO'
63
+ ppo_agent = PPOAgent(
64
+ env,
65
+ setting_sol_method
66
+ )
67
+ ppo_agent.learn(n_episodes=training_epochs_RL) # Each ep with 200 steps
68
+
69
+ #load best agent before appending in the test list
70
+ BEST_MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath('__file__')),'logs',f'best_{base_model_name}_{experiment_name}','best_model')
71
+ ppo_agent.load_agent(BEST_MODEL_DIR) # For training purposes
72
+
73
+
74
+
75
+ ###########################################################################
76
+ # Post-decision PPO
77
+ ###########################################################################
78
+
79
+ base_model_name = 'PDPPO'
80
+ pdppo_agent = PDPPOAgent(
81
+ env,
82
+ setting_sol_method
83
+ )
84
+ pdppo_agent.learn(n_episodes=training_epochs_RL) # Each ep with 200 steps
85
+
86
+ #load best agent before appending in the test list
87
+ BEST_MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath('__file__')),'logs',f'best_{base_model_name}_{experiment_name}','best_model')
88
+ pdppo_agent.load_agent(BEST_MODEL_DIR) # For training purposes
89
+
90
+
91
+ ###########################################################################
92
+ #TESTING
93
+ # settings['dict_obs'] = False
94
+ # setting_sol_method['multiagent'] = False
95
+ # setting_sol_method['dict_obs'] = False
96
+ # env = SimplePlant(settings, stoch_model)
97
+ # setting_sol_method['experiment_name'] = experiment_name
98
+ # dict_res = test_agents(
99
+ # env,
100
+ # agents=agents,
101
+ # n_reps=nreps,
102
+ # setting_sol_method = setting_sol_method,
103
+ # use_benchmark_PI=False
104
+ # )
105
+
106
+ # for key,_ in agents:
107
+ # cost = dict_res[key,'costs']
108
+ # print(f'\n Cost in {nreps} iterations for the model {key}: {cost}')
109
+ # try:
110
+ # cost = dict_res['PI','costs']
111
+ # print(f'\n Cost in {nreps} repetitions for the model PI: {cost}')
112
+ # except:
113
+ # pass
114
+
115
+ #del multiagent
116
+ del env
117
+ gc.collect()
code/Lake application/generate_tables.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+
7
+ main_folder = 'logs/results_2'
8
+
9
+ def get_max_rewards():
10
+ experiment_names = ['frozen_lake']
11
+ methods = ['PDPPO', 'PPO']
12
+ results = pd.DataFrame(columns=['Environment', 'Method', 'Max Reward', 'Max Reward Standard Deviation'])
13
+ main_folder = 'logs/results_2'
14
+ for experiment_name in experiment_names:
15
+ for method in methods:
16
+ env_name = experiment_name
17
+ max_rewards = []
18
+ for run_num in range(1, 6):
19
+ log_f_name = f'{main_folder}/{method}_{env_name}_log_{run_num}.csv'
20
+ data = pd.read_csv(log_f_name)
21
+ max_reward = data['reward'].max()
22
+ max_rewards.append(max_reward)
23
+ mean_max_reward = np.mean(max_rewards)
24
+ std_max_reward = np.std(max_rewards)
25
+ results = results.append({'Environment': env_name, 'Method': method, 'Max Reward': mean_max_reward, 'Max Reward Standard Deviation': std_max_reward}, ignore_index=True)
26
+
27
+ return results
28
+
29
+ def get_first_rewards():
30
+ experiment_names = ['frozen_lake']
31
+ methods = ['PDPPO', 'PPO']
32
+ results = pd.DataFrame(columns=['Environment', 'Method', 'First Reward', 'First Reward Standard Deviation'])
33
+
34
+ for experiment_name in experiment_names:
35
+ for method in methods:
36
+ env_name = experiment_name
37
+ first_rewards = []
38
+ for run_num in range(1, 6):
39
+ log_f_name = f'{main_folder}/{method}_{env_name}_log_{run_num}.csv'
40
+ data = pd.read_csv(log_f_name)
41
+ reward_50000 = data[data['timestep'] == 50000]['reward'].values
42
+ first_rewards.append(reward_50000)
43
+ mean_first_reward = np.mean(first_rewards)
44
+ std_first_reward = np.std(first_rewards)
45
+ results = results.append({'Environment': env_name, 'Method': method, 'First Reward': mean_first_reward, 'First Reward Standard Deviation': std_first_reward}, ignore_index=True)
46
+
47
+ return results
48
+
49
+ def get_steps_reward_threshold():
50
+ experiment_names = ['frozen_lake']
51
+ methods = ['PDPPO', 'PPO']
52
+ reward_thresholds = [10]
53
+ results = pd.DataFrame(columns=['Environment', 'Method', 'Steps', 'Steps Standard Deviation'])
54
+
55
+ for i, experiment_name in enumerate(experiment_names):
56
+ for j, method in enumerate(methods):
57
+ env_name = experiment_name
58
+ reward_steps = []
59
+ for run_num in range(1, 6):
60
+ log_f_name = f'logs/results_1/{method}_{env_name}_log_{run_num}.csv'
61
+ data = pd.read_csv(log_f_name)
62
+ reward_threshold_value = reward_thresholds[i]
63
+ reward_steps.append(data[data['reward'] >= reward_threshold_value]['timestep'].iloc[0])
64
+ mean_reward_steps = np.mean(reward_steps) if reward_steps else np.nan
65
+ std_reward_steps = np.std(reward_steps) if reward_steps else np.nan
66
+ results = results.append({'Environment': env_name, 'Method': method, 'Steps': mean_reward_steps, 'Steps Standard Deviation': std_reward_steps}, ignore_index=True)
67
+
68
+ return results
69
+
70
+
71
+ if __name__ == '__main__':
72
+ max_rewards_df = get_max_rewards()
73
+ first_rewards_df = get_first_rewards()
74
+ steps_rewards_df = get_steps_reward_threshold()
75
+
76
+ final_results = pd.merge(max_rewards_df, first_rewards_df, on=['Environment', 'Method'])
77
+ final_results = pd.merge(final_results, steps_rewards_df, on=['Environment', 'Method'])
78
+ print(final_results)
code/Lake application/logs/.gitkeep ADDED
File without changes
code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_0_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1c9e2a71c5d15566400c1716b2450770bb0b1e02e290ba4d6a0e6d866e97a1a
3
+ size 307271
code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_1.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 22,400,1.2526
3
+ 48,800,1.1762
4
+ 66,1200,1.698
5
+ 86,1600,1.5699
6
+ 100,2000,2.2196
7
+ 114,2400,2.2243
8
+ 130,2800,1.9494
9
+ 144,3200,2.1269
10
+ 161,3600,1.8416
11
+ 173,4000,2.4108
12
+ 186,4400,2.3831
13
+ 195,4800,3.7798
14
+ 204,5200,3.3905
15
+ 212,5600,3.7893
16
+ 221,6000,2.6417
17
+ 230,6400,3.9088
18
+ 245,6800,2.3282
19
+ 256,7200,2.7405
20
+ 263,7600,4.6528
21
+ 273,8000,3.0146
22
+ 281,8400,3.2375
23
+ 291,8800,3.553
24
+ 297,9200,4.7911
25
+ 307,9600,3.4636
26
+ 314,10000,4.4748
27
+ 320,10400,4.6664
28
+ 329,10800,3.4853
29
+ 336,11200,4.7085
30
+ 341,11600,6.6859
31
+ 349,12000,3.6321
32
+ 355,12400,5.3428
33
+ 362,12800,4.1236
34
+ 367,13200,6.1173
35
+ 373,13600,5.3324
36
+ 377,14000,7.2656
37
+ 381,14400,7.7223
38
+ 386,14800,5.9847
39
+ 390,15200,7.6658
40
+ 395,15600,5.8334
41
+ 400,16000,7.1939
42
+ 404,16400,7.1478
43
+ 408,16800,7.5988
44
+ 414,17200,6.1732
45
+ 419,17600,6.1007
46
+ 423,18000,7.2673
47
+ 427,18400,7.41
48
+ 432,18800,7.0193
49
+ 438,19200,5.3807
50
+ 443,19600,5.9541
51
+ 447,20000,7.6451
52
+ 452,20400,7.0583
53
+ 457,20800,7.3468
54
+ 461,21200,7.6243
55
+ 466,21600,6.6661
56
+ 473,22000,4.9625
57
+ 479,22400,4.5857
58
+ 484,22800,6.9212
59
+ 488,23200,6.1686
60
+ 493,23600,7.897
61
+ 497,24000,7.9243
62
+ 501,24400,8.608
63
+ 506,24800,6.6952
64
+ 511,25200,6.2059
65
+ 516,25600,7.1384
66
+ 522,26000,5.7323
67
+ 527,26400,5.9145
68
+ 534,26800,5.6198
69
+ 538,27200,7.6158
70
+ 542,27600,7.7017
71
+ 546,28000,7.8191
72
+ 551,28400,8.2935
73
+ 556,28800,7.3137
74
+ 562,29200,5.8557
75
+ 570,29600,4.3915
76
+ 576,30000,4.9182
77
+ 581,30400,6.3954
78
+ 585,30800,7.2091
79
+ 590,31200,9.0975
80
+ 594,31600,5.9126
81
+ 600,32000,5.1805
82
+ 606,32400,6.9075
83
+ 611,32800,6.2059
84
+ 616,33200,4.6794
85
+ 621,33600,7.0498
86
+ 626,34000,7.4649
87
+ 631,34400,6.6183
88
+ 635,34800,8.8603
89
+ 639,35200,6.275
90
+ 643,35600,9.1126
91
+ 648,36000,6.9308
92
+ 653,36400,6.7373
93
+ 657,36800,7.6857
94
+ 662,37200,7.836
95
+ 666,37600,6.3725
96
+ 671,38000,6.2922
97
+ 676,38400,6.6979
98
+ 680,38800,7.6388
99
+ 684,39200,7.0079
100
+ 688,39600,7.5892
101
+ 692,40000,7.7355
102
+ 697,40400,6.5238
103
+ 702,40800,6.181
104
+ 706,41200,6.5753
105
+ 711,41600,6.499
106
+ 716,42000,6.1642
107
+ 721,42400,6.3709
108
+ 725,42800,6.5148
109
+ 730,43200,6.1071
110
+ 734,43600,7.2231
111
+ 739,44000,6.9884
112
+ 743,44400,7.6752
113
+ 747,44800,7.645
114
+ 751,45200,7.6253
115
+ 756,45600,6.9888
116
+ 760,46000,6.551
117
+ 764,46400,8.8765
118
+ 768,46800,7.644
119
+ 772,47200,7.7078
120
+ 776,47600,7.7402
121
+ 780,48000,7.7096
122
+ 786,48400,6.4542
123
+ 790,48800,6.8511
124
+ 795,49200,6.7727
125
+ 799,49600,7.6417
126
+ 803,50000,7.6993
127
+ 807,50400,7.7043
128
+ 812,50800,6.8261
129
+ 816,51200,6.92
130
+ 820,51600,7.9919
131
+ 825,52000,6.4103
132
+ 830,52400,5.7618
133
+ 834,52800,7.0424
134
+ 838,53200,7.6928
135
+ 842,53600,7.7354
136
+ 847,54000,7.3411
137
+ 852,54400,6.3078
138
+ 856,54800,7.4612
139
+ 860,55200,6.6696
140
+ 865,55600,6.7569
141
+ 869,56000,6.9279
142
+ 874,56400,6.5996
143
+ 878,56800,7.7271
144
+ 882,57200,7.6932
145
+ 886,57600,7.5902
146
+ 890,58000,7.1683
147
+ 897,58400,4.5812
148
+ 903,58800,6.1682
149
+ 908,59200,6.1325
150
+ 912,59600,7.8386
151
+ 916,60000,6.3943
152
+ 921,60400,6.8943
153
+ 925,60800,7.8821
154
+ 932,61200,4.9096
155
+ 937,61600,6.3275
156
+ 941,62000,8.5893
157
+ 945,62400,7.9071
158
+ 949,62800,7.8808
159
+ 953,63200,7.8324
160
+ 957,63600,8.5609
161
+ 961,64000,8.4952
162
+ 965,64400,7.8793
163
+ 971,64800,5.9811
164
+ 975,65200,6.3148
165
+ 979,65600,8.3047
166
+ 985,66000,6.6049
167
+ 991,66400,5.5465
168
+ 995,66800,6.7472
169
+ 1000,67200,6.688
170
+ 1004,67600,8.5059
171
+ 1008,68000,8.0416
172
+ 1012,68400,9.3594
173
+ 1017,68800,7.135
174
+ 1022,69200,7.7882
175
+ 1028,69600,5.1304
176
+ 1032,70000,7.9267
177
+ 1036,70400,7.9253
178
+ 1040,70800,7.522
179
+ 1045,71200,6.7979
180
+ 1050,71600,7.0769
181
+ 1055,72000,7.1814
182
+ 1059,72400,7.9156
183
+ 1063,72800,6.9876
184
+ 1069,73200,5.6939
185
+ 1073,73600,7.603
186
+ 1078,74000,6.9544
187
+ 1085,74400,5.1872
188
+ 1089,74800,8.0712
189
+ 1094,75200,5.4866
190
+ 1099,75600,6.856
191
+ 1104,76000,6.6695
192
+ 1111,76400,5.2366
193
+ 1115,76800,8.231
194
+ 1120,77200,6.3017
195
+ 1125,77600,6.1984
196
+ 1129,78000,8.0981
197
+ 1134,78400,5.6874
198
+ 1139,78800,6.7436
199
+ 1144,79200,6.7781
200
+ 1148,79600,8.035
201
+ 1153,80000,7.0241
202
+ 1157,80400,8.1216
203
+ 1161,80800,7.7847
204
+ 1165,81200,7.0458
205
+ 1172,81600,4.9159
206
+ 1176,82000,8.8252
207
+ 1180,82400,8.1435
208
+ 1184,82800,8.0794
209
+ 1188,83200,8.4439
210
+ 1194,83600,6.5179
211
+ 1198,84000,9.5129
212
+ 1202,84400,9.5982
213
+ 1206,84800,8.1605
214
+ 1213,85200,5.1735
215
+ 1218,85600,6.3955
216
+ 1222,86000,6.5836
217
+ 1227,86400,7.8827
218
+ 1231,86800,9.0583
219
+ 1236,87200,7.4464
220
+ 1242,87600,6.9449
221
+ 1247,88000,7.6785
222
+ 1251,88400,7.7551
223
+ 1255,88800,9.1958
224
+ 1260,89200,7.8329
225
+ 1265,89600,9.6998
226
+ 1269,90000,12.2941
227
+ 1274,90400,7.8743
228
+ 1281,90800,6.7261
229
+ 1285,91200,10.3406
230
+ 1289,91600,10.3629
231
+ 1294,92000,12.8416
232
+ 1298,92400,11.8637
233
+ 1302,92800,10.95
234
+ 1307,93200,12.6815
235
+ 1312,93600,10.2359
236
+ 1316,94000,14.5616
237
+ 1320,94400,12.2057
238
+ 1325,94800,10.3496
239
+ 1331,95200,10.0749
240
+ 1336,95600,12.5332
241
+ 1341,96000,7.9488
242
+ 1347,96400,11.6734
243
+ 1353,96800,8.6258
244
+ 1359,97200,8.6982
245
+ 1363,97600,16.8782
246
+ 1369,98000,13.076
247
+ 1374,98400,12.6862
248
+ 1380,98800,13.0069
249
+ 1385,99200,13.2064
250
+ 1394,99600,7.5732
251
+ 1399,100000,13.571
252
+ 1406,100400,11.8885
253
+ 1411,100800,14.0567
254
+ 1416,101200,14.4459
255
+ 1422,101600,14.7572
256
+ 1430,102000,9.0038
257
+ 1436,102400,11.7543
258
+ 1443,102800,12.3206
259
+ 1448,103200,13.6686
260
+ 1454,103600,14.6563
261
+ 1461,104000,10.5416
262
+ 1467,104400,11.8187
263
+ 1473,104800,12.6766
264
+ 1478,105200,13.234
265
+ 1487,105600,10.4377
266
+ 1494,106000,10.4682
267
+ 1499,106400,14.2136
268
+ 1505,106800,13.3214
269
+ 1512,107200,9.9131
270
+ 1518,107600,11.3795
271
+ 1524,108000,13.6525
272
+ 1531,108400,10.5166
273
+ 1536,108800,15.6816
274
+ 1541,109200,13.5686
275
+ 1546,109600,15.8877
276
+ 1552,110000,10.798
277
+ 1556,110400,15.505
278
+ 1564,110800,8.7457
279
+ 1571,111200,9.8442
280
+ 1576,111600,16.783
281
+ 1581,112000,16.6507
282
+ 1587,112400,12.5058
283
+ 1597,112800,7.0879
284
+ 1604,113200,9.4795
285
+ 1612,113600,8.8247
286
+ 1619,114000,9.8475
287
+ 1626,114400,9.1637
288
+ 1631,114800,14.0166
289
+ 1638,115200,8.0027
290
+ 1642,115600,17.5209
291
+ 1650,116000,7.7073
292
+ 1654,116400,20.1086
293
+ 1661,116800,7.9623
294
+ 1668,117200,12.2066
295
+ 1675,117600,8.7988
296
+ 1681,118000,14.56
297
+ 1686,118400,16.3382
298
+ 1691,118800,12.1992
299
+ 1699,119200,10.4266
300
+ 1703,119600,19.3562
301
+ 1708,120000,11.3536
302
+ 1712,120400,20.2356
303
+ 1716,120800,19.5323
304
+ 1721,121200,17.4551
305
+ 1728,121600,10.1801
306
+ 1732,122000,16.947
307
+ 1739,122400,9.827
308
+ 1745,122800,13.3383
309
+ 1749,123200,19.4348
310
+ 1755,123600,12.4996
311
+ 1761,124000,13.8883
312
+ 1765,124400,20.16
313
+ 1769,124800,19.6822
314
+ 1776,125200,10.0647
315
+ 1780,125600,18.9827
316
+ 1785,126000,10.8816
317
+ 1791,126400,12.6382
318
+ 1797,126800,11.2938
319
+ 1806,127200,8.8997
320
+ 1811,127600,15.7514
321
+ 1817,128000,9.8611
322
+ 1825,128400,10.0803
323
+ 1833,128800,9.5121
324
+ 1841,129200,7.1428
325
+ 1850,129600,8.5931
326
+ 1858,130000,9.1178
327
+ 1866,130400,10.6061
328
+ 1871,130800,15.7645
329
+ 1875,131200,19.0111
330
+ 1881,131600,10.6172
331
+ 1885,132000,20.6998
332
+ 1889,132400,20.2484
333
+ 1894,132800,16.4684
334
+ 1900,133200,13.5329
335
+ 1904,133600,20.9186
336
+ 1910,134000,15.0977
337
+ 1915,134400,14.7358
338
+ 1920,134800,15.5285
339
+ 1928,135200,8.3647
340
+ 1934,135600,11.9479
341
+ 1940,136000,14.4666
342
+ 1947,136400,9.6302
343
+ 1953,136800,13.6634
344
+ 1958,137200,14.0396
345
+ 1966,137600,9.1948
346
+ 1973,138000,9.3788
347
+ 1982,138400,9.0964
348
+ 1991,138800,7.105
349
+ 1998,139200,11.6642
350
+ 2005,139600,8.2024
351
+ 2014,140000,7.7126
352
+ 2021,140400,11.0422
353
+ 2027,140800,11.2108
354
+ 2032,141200,15.9589
355
+ 2039,141600,8.9692
356
+ 2046,142000,12.0016
357
+ 2052,142400,13.3516
358
+ 2056,142800,19.3885
359
+ 2061,143200,14.0276
360
+ 2066,143600,14.164
361
+ 2073,144000,11.3607
362
+ 2082,144400,6.5908
363
+ 2089,144800,10.36
364
+ 2094,145200,14.2824
365
+ 2105,145600,4.9849
366
+ 2111,146000,11.1523
367
+ 2116,146400,12.4602
368
+ 2123,146800,10.201
369
+ 2129,147200,11.5534
370
+ 2138,147600,5.9679
371
+ 2142,148000,16.4718
372
+ 2147,148400,17.5833
373
+ 2152,148800,16.768
374
+ 2156,149200,19.1331
375
+ 2162,149600,12.3424
376
+ 2166,150000,18.0845
377
+ 2170,150400,18.9557
378
+ 2175,150800,14.0846
379
+ 2179,151200,20.1474
380
+ 2184,151600,15.3493
381
+ 2188,152000,18.1315
382
+ 2193,152400,15.8044
383
+ 2197,152800,17.7751
384
+ 2203,153200,11.6984
385
+ 2209,153600,13.0837
386
+ 2213,154000,15.855
387
+ 2218,154400,16.8814
388
+ 2223,154800,16.0311
389
+ 2227,155200,15.3192
390
+ 2234,155600,11.457
391
+ 2239,156000,16.1934
392
+ 2246,156400,11.9881
393
+ 2252,156800,9.2926
394
+ 2257,157200,16.73
395
+ 2261,157600,14.6868
396
+ 2268,158000,7.9747
397
+ 2273,158400,10.7901
398
+ 2277,158800,18.3051
399
+ 2283,159200,14.0301
400
+ 2288,159600,15.5108
401
+ 2294,160000,10.0966
402
+ 2300,160400,8.0898
403
+ 2306,160800,8.0323
404
+ 2315,161200,5.836
405
+ 2321,161600,10.9128
406
+ 2329,162000,5.4973
407
+ 2334,162400,11.0091
408
+ 2340,162800,8.3576
409
+ 2349,163200,6.2928
410
+ 2354,163600,13.008
411
+ 2358,164000,15.7582
412
+ 2364,164400,10.6657
413
+ 2372,164800,7.3352
414
+ 2378,165200,7.9416
415
+ 2385,165600,9.7033
416
+ 2390,166000,14.8003
417
+ 2395,166400,13.5949
418
+ 2399,166800,16.5582
419
+ 2405,167200,13.1281
420
+ 2410,167600,13.6583
421
+ 2415,168000,13.3859
422
+ 2422,168400,12.7336
423
+ 2427,168800,13.4928
424
+ 2432,169200,14.222
425
+ 2438,169600,12.2052
426
+ 2442,170000,17.538
427
+ 2449,170400,13.4975
428
+ 2454,170800,13.4316
429
+ 2460,171200,14.3904
430
+ 2466,171600,14.1914
431
+ 2472,172000,10.7212
432
+ 2478,172400,13.2043
433
+ 2484,172800,12.3505
434
+ 2488,173200,18.4707
435
+ 2493,173600,13.7594
436
+ 2498,174000,15.2434
437
+ 2503,174400,17.6234
438
+ 2509,174800,11.5482
439
+ 2513,175200,18.5919
440
+ 2518,175600,18.2289
441
+ 2525,176000,10.1992
442
+ 2529,176400,19.2187
443
+ 2533,176800,20.1763
444
+ 2538,177200,15.8706
445
+ 2542,177600,20.1437
446
+ 2547,178000,14.6606
447
+ 2552,178400,16.5006
448
+ 2557,178800,17.2947
449
+ 2561,179200,20.4127
450
+ 2566,179600,16.8026
451
+ 2571,180000,16.4537
452
+ 2575,180400,19.0096
453
+ 2580,180800,12.4961
454
+ 2586,181200,13.702
455
+ 2590,181600,19.4121
456
+ 2595,182000,19.1734
457
+ 2599,182400,15.6252
458
+ 2604,182800,17.4543
459
+ 2608,183200,18.4527
460
+ 2613,183600,17.0079
461
+ 2617,184000,15.8574
462
+ 2623,184400,14.6486
463
+ 2627,184800,16.7431
464
+ 2632,185200,16.3204
465
+ 2636,185600,18.1816
466
+ 2640,186000,19.3138
467
+ 2646,186400,13.6693
468
+ 2652,186800,12.1288
469
+ 2656,187200,19.5385
470
+ 2662,187600,12.7194
471
+ 2666,188000,18.8978
472
+ 2671,188400,17.7266
473
+ 2675,188800,20.3572
474
+ 2680,189200,16.1169
475
+ 2685,189600,14.712
476
+ 2692,190000,11.1334
477
+ 2696,190400,17.4639
478
+ 2701,190800,16.8716
479
+ 2705,191200,19.8286
480
+ 2710,191600,18.7337
481
+ 2714,192000,19.8553
482
+ 2719,192400,14.2238
483
+ 2725,192800,12.8373
484
+ 2735,193200,7.0151
485
+ 2739,193600,13.3644
486
+ 2745,194000,13.3385
487
+ 2753,194400,8.739
488
+ 2758,194800,13.8917
489
+ 2764,195200,10.3893
490
+ 2773,195600,4.8033
491
+ 2784,196000,5.5534
492
+ 2792,196400,7.2659
493
+ 2796,196800,13.9818
494
+ 2802,197200,13.2211
495
+ 2807,197600,16.0152
496
+ 2814,198000,9.9562
497
+ 2823,198400,6.7525
498
+ 2828,198800,14.4238
499
+ 2833,199200,10.7252
500
+ 2838,199600,13.7153
501
+ 2842,200000,19.2825
code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_2.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 25,400,1.2001
3
+ 50,800,1.1974
4
+ 67,1200,1.7766
5
+ 87,1600,1.4873
6
+ 104,2000,1.7077
7
+ 114,2400,2.5908
8
+ 134,2800,1.8992
9
+ 145,3200,2.8642
10
+ 154,3600,2.9615
11
+ 163,4000,3.7629
12
+ 170,4400,4.1044
13
+ 180,4800,3.4985
14
+ 187,5200,4.3872
15
+ 192,5600,5.6852
16
+ 198,6000,5.7852
17
+ 205,6400,4.5497
18
+ 210,6800,5.1203
19
+ 215,7200,6.3622
20
+ 222,7600,4.9101
21
+ 230,8000,3.7216
22
+ 237,8400,3.9412
23
+ 243,8800,5.1797
24
+ 250,9200,4.9764
25
+ 255,9600,5.0597
26
+ 260,10000,6.6622
27
+ 266,10400,5.4034
28
+ 274,10800,3.7803
29
+ 279,11200,4.8601
30
+ 284,11600,6.8704
31
+ 289,12000,5.884
32
+ 298,12400,3.1085
33
+ 306,12800,3.7402
34
+ 317,13200,2.4474
35
+ 325,13600,4.697
36
+ 333,14000,3.7155
37
+ 340,14400,4.1133
38
+ 345,14800,6.3342
39
+ 354,15200,3.4713
40
+ 363,15600,2.973
41
+ 369,16000,5.3844
42
+ 376,16400,3.9942
43
+ 384,16800,4.3917
44
+ 390,17200,3.9032
45
+ 396,17600,5.5522
46
+ 402,18000,5.0959
47
+ 410,18400,4.0555
48
+ 419,18800,3.8592
49
+ 424,19200,5.5717
50
+ 430,19600,5.4533
51
+ 436,20000,4.4112
52
+ 442,20400,5.2485
53
+ 453,20800,3.6337
54
+ 458,21200,5.7439
55
+ 464,21600,5.8554
56
+ 470,22000,6.1356
57
+ 478,22400,4.1016
58
+ 483,22800,6.9287
59
+ 487,23200,7.9029
60
+ 492,23600,7.2121
61
+ 496,24000,6.8316
62
+ 501,24400,7.3779
63
+ 506,24800,7.0025
64
+ 511,25200,7.3224
65
+ 516,25600,6.0262
66
+ 521,26000,7.3493
67
+ 525,26400,8.5734
68
+ 530,26800,7.505
69
+ 536,27200,5.6146
70
+ 541,27600,7.32
71
+ 547,28000,5.8791
72
+ 551,28400,8.3691
73
+ 555,28800,8.5242
74
+ 560,29200,6.8016
75
+ 564,29600,6.4966
76
+ 570,30000,6.5884
77
+ 574,30400,7.9429
78
+ 579,30800,8.2591
79
+ 586,31200,5.1781
80
+ 590,31600,6.725
81
+ 595,32000,7.3339
82
+ 602,32400,5.5153
83
+ 606,32800,7.294
84
+ 611,33200,7.861
85
+ 617,33600,5.8614
86
+ 622,34000,6.2895
87
+ 626,34400,7.641
88
+ 631,34800,7.1765
89
+ 635,35200,8.3742
90
+ 639,35600,8.5644
91
+ 644,36000,7.5061
92
+ 648,36400,7.137
93
+ 654,36800,6.317
94
+ 659,37200,7.5171
95
+ 663,37600,8.4671
96
+ 667,38000,8.8055
97
+ 672,38400,7.2286
98
+ 677,38800,7.0709
99
+ 682,39200,6.0232
100
+ 687,39600,7.7556
101
+ 691,40000,7.7135
102
+ 695,40400,8.3451
103
+ 701,40800,6.0775
104
+ 705,41200,8.3363
105
+ 709,41600,8.6071
106
+ 715,42000,5.3694
107
+ 720,42400,7.6011
108
+ 724,42800,8.7117
109
+ 729,43200,7.0684
110
+ 735,43600,5.433
111
+ 739,44000,8.6411
112
+ 743,44400,8.5924
113
+ 747,44800,8.9168
114
+ 752,45200,7.2905
115
+ 756,45600,8.9831
116
+ 761,46000,7.4007
117
+ 766,46400,6.664
118
+ 770,46800,7.3928
119
+ 777,47200,5.7024
120
+ 783,47600,5.151
121
+ 788,48000,7.2108
122
+ 793,48400,7.2763
123
+ 799,48800,7.0493
124
+ 803,49200,8.5485
125
+ 808,49600,7.5421
126
+ 812,50000,9.5257
127
+ 816,50400,9.1144
128
+ 822,50800,5.6572
129
+ 828,51200,7.8776
130
+ 832,51600,10.0686
131
+ 838,52000,6.2858
132
+ 842,52400,8.7905
133
+ 847,52800,6.9064
134
+ 852,53200,8.591
135
+ 857,53600,7.9326
136
+ 861,54000,10.7555
137
+ 865,54400,11.0084
138
+ 870,54800,8.5343
139
+ 874,55200,10.2597
140
+ 878,55600,11.0706
141
+ 883,56000,9.7567
142
+ 887,56400,11.0066
143
+ 891,56800,11.0205
144
+ 897,57200,8.6028
145
+ 902,57600,7.8706
146
+ 906,58000,11.6827
147
+ 911,58400,11.5678
148
+ 916,58800,7.3302
149
+ 920,59200,11.8686
150
+ 925,59600,9.6659
151
+ 929,60000,11.8218
152
+ 933,60400,11.753
153
+ 938,60800,10.8838
154
+ 942,61200,11.9606
155
+ 946,61600,11.9891
156
+ 950,62000,10.7408
157
+ 955,62400,9.9649
158
+ 959,62800,10.1192
159
+ 963,63200,11.9243
160
+ 967,63600,12.0996
161
+ 971,64000,12.2641
162
+ 975,64400,12.2119
163
+ 980,64800,9.6896
164
+ 984,65200,11.985
165
+ 988,65600,12.0389
166
+ 993,66000,9.7261
167
+ 998,66400,10.0918
168
+ 1002,66800,11.9409
169
+ 1007,67200,9.7976
170
+ 1011,67600,11.9562
171
+ 1015,68000,11.9393
172
+ 1019,68400,11.9489
173
+ 1023,68800,10.6665
174
+ 1029,69200,8.6481
175
+ 1035,69600,8.2744
176
+ 1040,70000,9.8761
177
+ 1044,70400,12.7648
178
+ 1048,70800,12.0824
179
+ 1053,71200,9.9615
180
+ 1059,71600,7.5596
181
+ 1063,72000,12.066
182
+ 1068,72400,10.3717
183
+ 1072,72800,11.9771
184
+ 1076,73200,11.9456
185
+ 1080,73600,9.1042
186
+ 1084,74000,12.076
187
+ 1090,74400,9.4681
188
+ 1094,74800,11.529
189
+ 1098,75200,12.0008
190
+ 1103,75600,10.1566
191
+ 1108,76000,9.751
192
+ 1112,76400,12.3104
193
+ 1116,76800,9.2405
194
+ 1120,77200,12.0348
195
+ 1125,77600,10.0069
196
+ 1130,78000,10.0386
197
+ 1136,78400,10.4839
198
+ 1142,78800,7.6819
199
+ 1149,79200,5.9398
200
+ 1156,79600,7.5437
201
+ 1163,80000,7.4114
202
+ 1167,80400,9.315
203
+ 1172,80800,11.2425
204
+ 1176,81200,10.7758
205
+ 1180,81600,12.0153
206
+ 1185,82000,10.7721
207
+ 1190,82400,9.8986
208
+ 1194,82800,11.9053
209
+ 1198,83200,11.9558
210
+ 1203,83600,9.3269
211
+ 1207,84000,12.2099
212
+ 1213,84400,8.4628
213
+ 1217,84800,10.7038
214
+ 1221,85200,12.2837
215
+ 1226,85600,9.9915
216
+ 1231,86000,9.9481
217
+ 1236,86400,10.1545
218
+ 1240,86800,12.2327
219
+ 1244,87200,12.075
220
+ 1248,87600,12.01
221
+ 1252,88000,10.5855
222
+ 1256,88400,12.134
223
+ 1260,88800,12.3757
224
+ 1264,89200,11.9803
225
+ 1269,89600,9.3804
226
+ 1274,90000,9.8415
227
+ 1278,90400,11.9713
228
+ 1284,90800,8.3152
229
+ 1289,91200,8.8216
230
+ 1293,91600,12.3776
231
+ 1299,92000,8.471
232
+ 1303,92400,12.5027
233
+ 1307,92800,12.517
234
+ 1313,93200,8.4122
235
+ 1317,93600,13.1839
236
+ 1322,94000,10.4515
237
+ 1326,94400,11.1728
238
+ 1330,94800,13.0548
239
+ 1334,95200,12.0536
240
+ 1338,95600,12.4526
241
+ 1344,96000,9.0872
242
+ 1351,96400,7.9055
243
+ 1355,96800,11.2378
244
+ 1360,97200,11.1121
245
+ 1364,97600,12.7524
246
+ 1368,98000,12.6111
247
+ 1373,98400,11.2153
248
+ 1378,98800,8.2927
249
+ 1382,99200,12.5971
250
+ 1388,99600,9.538
251
+ 1392,100000,12.5941
252
+ 1397,100400,10.2156
253
+ 1401,100800,12.5003
254
+ 1406,101200,9.1204
255
+ 1411,101600,11.3426
256
+ 1418,102000,6.7031
257
+ 1422,102400,13.0065
258
+ 1427,102800,10.4644
259
+ 1431,103200,12.6849
260
+ 1435,103600,12.3948
261
+ 1441,104000,9.7853
262
+ 1446,104400,9.4532
263
+ 1450,104800,10.5596
264
+ 1456,105200,8.6362
265
+ 1461,105600,11.1625
266
+ 1465,106000,9.7383
267
+ 1471,106400,9.5542
268
+ 1476,106800,10.1016
269
+ 1480,107200,11.3806
270
+ 1484,107600,12.8012
271
+ 1488,108000,12.754
272
+ 1493,108400,9.2045
273
+ 1497,108800,12.9866
274
+ 1504,109200,7.3446
275
+ 1509,109600,11.8225
276
+ 1514,110000,8.9346
277
+ 1518,110400,12.7821
278
+ 1523,110800,11.2293
279
+ 1527,111200,12.976
280
+ 1532,111600,8.6775
281
+ 1536,112000,12.7469
282
+ 1540,112400,12.7591
283
+ 1545,112800,10.3778
284
+ 1550,113200,10.3744
285
+ 1554,113600,12.7611
286
+ 1558,114000,11.8172
287
+ 1562,114400,12.8452
288
+ 1566,114800,12.6974
289
+ 1570,115200,12.7894
290
+ 1575,115600,10.9595
291
+ 1579,116000,13.0989
292
+ 1583,116400,12.9278
293
+ 1589,116800,9.0682
294
+ 1593,117200,12.9978
295
+ 1597,117600,10.7087
296
+ 1601,118000,13.5158
297
+ 1605,118400,13.435
298
+ 1610,118800,11.5973
299
+ 1614,119200,13.0533
300
+ 1618,119600,13.0017
301
+ 1623,120000,10.8166
302
+ 1629,120400,9.0293
303
+ 1634,120800,9.2539
304
+ 1639,121200,10.6134
305
+ 1644,121600,11.5216
306
+ 1648,122000,13.3206
307
+ 1652,122400,12.207
308
+ 1657,122800,11.3809
309
+ 1661,123200,10.1333
310
+ 1665,123600,13.4154
311
+ 1669,124000,13.7245
312
+ 1673,124400,13.2766
313
+ 1677,124800,13.6751
314
+ 1681,125200,13.3002
315
+ 1686,125600,11.3154
316
+ 1690,126000,14.442
317
+ 1697,126400,9.1752
318
+ 1701,126800,12.6002
319
+ 1710,127200,8.7327
320
+ 1715,127600,11.4596
321
+ 1722,128000,9.0605
322
+ 1730,128400,6.6071
323
+ 1738,128800,7.8556
324
+ 1743,129200,8.6705
325
+ 1748,129600,13.2619
326
+ 1757,130000,8.0688
327
+ 1762,130400,11.255
328
+ 1769,130800,10.1434
329
+ 1776,131200,7.9637
330
+ 1780,131600,16.234
331
+ 1784,132000,16.009
332
+ 1790,132400,11.5696
333
+ 1795,132800,13.03
334
+ 1801,133200,11.5952
335
+ 1807,133600,10.1351
336
+ 1811,134000,14.54
337
+ 1823,134400,4.607
338
+ 1829,134800,12.6576
339
+ 1833,135200,16.8332
340
+ 1838,135600,11.8201
341
+ 1842,136000,17.1005
342
+ 1846,136400,16.8673
343
+ 1851,136800,15.3565
344
+ 1857,137200,8.8723
345
+ 1862,137600,13.1234
346
+ 1868,138000,12.9135
347
+ 1876,138400,6.6299
348
+ 1883,138800,8.6404
349
+ 1892,139200,8.0842
350
+ 1896,139600,16.6064
351
+ 1900,140000,12.9285
352
+ 1912,140400,5.249
353
+ 1918,140800,9.7863
354
+ 1926,141200,7.8766
355
+ 1932,141600,7.9322
356
+ 1942,142000,5.3181
357
+ 1947,142400,12.7024
358
+ 1956,142800,8.2081
359
+ 1968,143200,2.9574
360
+ 1975,143600,6.7944
361
+ 1981,144000,11.6649
362
+ 1994,144400,5.4107
363
+ 2003,144800,6.3419
364
+ 2012,145200,6.9728
365
+ 2023,145600,5.0183
366
+ 2031,146000,7.8319
367
+ 2041,146400,4.6116
368
+ 2054,146800,4.337
369
+ 2063,147200,5.3691
370
+ 2069,147600,12.9849
371
+ 2078,148000,8.5405
372
+ 2091,148400,4.9827
373
+ 2108,148800,3.026
374
+ 2123,149200,3.3827
375
+ 2132,149600,6.8501
376
+ 2141,150000,7.5638
377
+ 2152,150400,5.2852
378
+ 2162,150800,7.5752
379
+ 2170,151200,7.9508
380
+ 2177,151600,10.2038
381
+ 2191,152000,3.0956
382
+ 2200,152400,7.7211
383
+ 2208,152800,6.9913
384
+ 2215,153200,10.9929
385
+ 2219,153600,15.1937
386
+ 2226,154000,12.1173
387
+ 2235,154400,6.3827
388
+ 2244,154800,9.904
389
+ 2251,155200,10.4535
390
+ 2257,155600,11.1928
391
+ 2262,156000,11.8628
392
+ 2269,156400,11.1129
393
+ 2275,156800,10.8419
394
+ 2282,157200,10.6974
395
+ 2288,157600,12.6491
396
+ 2293,158000,17.1227
397
+ 2297,158400,15.1925
398
+ 2302,158800,15.8225
399
+ 2310,159200,10.5351
400
+ 2314,159600,20.5124
401
+ 2318,160000,20.7472
402
+ 2323,160400,16.1619
403
+ 2328,160800,17.0157
404
+ 2332,161200,19.5865
405
+ 2336,161600,20.3359
406
+ 2340,162000,15.7826
407
+ 2344,162400,20.7786
408
+ 2349,162800,19.027
409
+ 2353,163200,16.8306
410
+ 2358,163600,15.0345
411
+ 2364,164000,13.7065
412
+ 2370,164400,15.8193
413
+ 2375,164800,15.9792
414
+ 2379,165200,16.8467
415
+ 2384,165600,17.832
416
+ 2388,166000,20.4626
417
+ 2393,166400,14.8119
418
+ 2399,166800,12.4114
419
+ 2403,167200,20.9186
420
+ 2408,167600,13.3934
421
+ 2412,168000,20.5788
422
+ 2418,168400,16.2933
423
+ 2422,168800,18.8223
424
+ 2427,169200,19.3578
425
+ 2432,169600,13.3396
426
+ 2437,170000,17.3548
427
+ 2443,170400,16.2848
428
+ 2448,170800,18.1538
429
+ 2453,171200,16.7561
430
+ 2457,171600,17.7607
431
+ 2463,172000,15.1953
432
+ 2469,172400,13.134
433
+ 2476,172800,12.5457
434
+ 2482,173200,14.6165
435
+ 2487,173600,18.5189
436
+ 2492,174000,19.1314
437
+ 2496,174400,17.353
438
+ 2500,174800,22.18
439
+ 2506,175200,15.5115
440
+ 2511,175600,19.8744
441
+ 2515,176000,18.1736
442
+ 2519,176400,21.4187
443
+ 2525,176800,17.5231
444
+ 2531,177200,12.1789
445
+ 2536,177600,17.784
446
+ 2540,178000,20.6193
447
+ 2545,178400,17.4618
448
+ 2549,178800,21.499
449
+ 2553,179200,17.4586
450
+ 2557,179600,20.6227
451
+ 2561,180000,20.1806
452
+ 2567,180400,13.3524
453
+ 2573,180800,14.6734
454
+ 2577,181200,21.1076
455
+ 2582,181600,17.1151
456
+ 2587,182000,12.7979
457
+ 2592,182400,15.9493
458
+ 2599,182800,14.3687
459
+ 2604,183200,15.514
460
+ 2610,183600,12.1476
461
+ 2616,184000,13.7643
462
+ 2622,184400,14.6858
463
+ 2627,184800,13.8197
464
+ 2631,185200,20.2988
465
+ 2637,185600,13.8708
466
+ 2641,186000,21.6494
467
+ 2646,186400,15.6542
468
+ 2653,186800,11.9435
469
+ 2659,187200,13.8883
470
+ 2666,187600,12.5814
471
+ 2671,188000,17.3771
472
+ 2678,188400,11.9801
473
+ 2683,188800,13.1353
474
+ 2692,189200,10.337
475
+ 2698,189600,11.3343
476
+ 2705,190000,12.8742
477
+ 2711,190400,14.6771
478
+ 2717,190800,11.9189
479
+ 2726,191200,8.7059
480
+ 2732,191600,13.1508
481
+ 2739,192000,9.7473
482
+ 2743,192400,19.009
483
+ 2748,192800,16.8758
484
+ 2754,193200,15.7392
485
+ 2758,193600,20.0703
486
+ 2764,194000,12.4249
487
+ 2768,194400,21.7073
488
+ 2773,194800,18.1804
489
+ 2779,195200,15.5854
490
+ 2785,195600,12.3091
491
+ 2790,196000,15.0442
492
+ 2794,196400,20.9159
493
+ 2802,196800,11.6643
494
+ 2811,197200,7.4276
495
+ 2819,197600,8.6076
496
+ 2826,198000,10.6653
497
+ 2831,198400,16.6902
498
+ 2837,198800,13.4796
499
+ 2843,199200,14.7899
500
+ 2848,199600,14.6657
501
+ 2855,200000,11.2761
code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_3.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 20,400,1.4511
3
+ 42,800,1.4159
4
+ 60,1200,1.7077
5
+ 73,1600,2.1135
6
+ 84,2000,2.7468
7
+ 105,2400,1.7674
8
+ 113,2800,3.7814
9
+ 122,3200,3.7896
10
+ 134,3600,2.7067
11
+ 146,4000,2.5524
12
+ 152,4400,6.5864
13
+ 157,4800,6.5195
14
+ 162,5200,6.1349
15
+ 168,5600,5.317
16
+ 177,6000,3.6724
17
+ 183,6400,6.1446
18
+ 189,6800,5.5608
19
+ 195,7200,6.1726
20
+ 199,7600,6.344
21
+ 205,8000,5.8716
22
+ 213,8400,4.0247
23
+ 217,8800,6.9902
24
+ 224,9200,6.2118
25
+ 229,9600,6.7983
26
+ 236,10000,6.4537
27
+ 241,10400,9.274
28
+ 246,10800,7.4492
29
+ 253,11200,6.7536
30
+ 257,11600,10.0378
31
+ 264,12000,5.3493
32
+ 270,12400,6.8639
33
+ 274,12800,9.7271
34
+ 278,13200,10.6744
35
+ 284,13600,7.8632
36
+ 290,14000,8.3459
37
+ 296,14400,6.5538
38
+ 301,14800,9.4489
39
+ 307,15200,8.4332
40
+ 313,15600,7.6024
41
+ 318,16000,9.6646
42
+ 323,16400,7.5682
43
+ 327,16800,12.2827
44
+ 332,17200,11.2367
45
+ 336,17600,10.6158
46
+ 341,18000,8.7318
47
+ 347,18400,8.9077
48
+ 352,18800,10.0014
49
+ 357,19200,9.9727
50
+ 362,19600,8.4138
51
+ 368,20000,8.7518
52
+ 372,20400,12.3645
53
+ 376,20800,10.8224
54
+ 381,21200,11.2952
55
+ 387,21600,7.5035
56
+ 392,22000,10.1362
57
+ 398,22400,8.0769
58
+ 402,22800,11.4559
59
+ 408,23200,10.9315
60
+ 412,23600,12.5869
61
+ 416,24000,9.7607
62
+ 421,24400,10.3972
63
+ 427,24800,9.1654
64
+ 434,25200,6.5726
65
+ 440,25600,7.9518
66
+ 445,26000,12.163
67
+ 449,26400,12.5389
68
+ 453,26800,12.4047
69
+ 457,27200,12.8434
70
+ 461,27600,12.508
71
+ 465,28000,12.7977
72
+ 469,28400,12.7722
73
+ 475,28800,7.5211
74
+ 481,29200,8.8252
75
+ 485,29600,11.5555
76
+ 489,30000,11.477
77
+ 493,30400,12.9982
78
+ 497,30800,13.1206
79
+ 504,31200,9.1732
80
+ 509,31600,9.6877
81
+ 515,32000,8.65
82
+ 522,32400,6.6807
83
+ 529,32800,7.7346
84
+ 534,33200,7.8476
85
+ 541,33600,7.9549
86
+ 548,34000,6.9946
87
+ 555,34400,5.7617
88
+ 562,34800,7.4759
89
+ 567,35200,9.4369
90
+ 574,35600,8.9111
91
+ 581,36000,6.5248
92
+ 586,36400,10.6508
93
+ 594,36800,4.8766
94
+ 606,37200,5.0255
95
+ 611,37600,8.7737
96
+ 619,38000,7.297
97
+ 625,38400,8.7064
98
+ 631,38800,7.6392
99
+ 637,39200,8.1329
100
+ 643,39600,8.6996
101
+ 648,40000,12.1593
102
+ 654,40400,8.0097
103
+ 658,40800,13.7355
104
+ 663,41200,10.8767
105
+ 667,41600,13.4623
106
+ 671,42000,13.7128
107
+ 677,42400,7.9521
108
+ 685,42800,7.0389
109
+ 693,43200,7.1228
110
+ 699,43600,7.1906
111
+ 704,44000,10.2499
112
+ 711,44400,8.8439
113
+ 715,44800,10.723
114
+ 722,45200,8.0561
115
+ 726,45600,12.8837
116
+ 730,46000,11.6005
117
+ 735,46400,10.5382
118
+ 740,46800,10.9579
119
+ 744,47200,12.8439
120
+ 749,47600,8.5832
121
+ 755,48000,10.2625
122
+ 759,48400,11.0394
123
+ 764,48800,10.5021
124
+ 771,49200,7.5662
125
+ 775,49600,11.8596
126
+ 779,50000,10.9197
127
+ 785,50400,9.6613
128
+ 791,50800,7.1842
129
+ 797,51200,8.9205
130
+ 810,51600,3.7799
131
+ 814,52000,11.7011
132
+ 818,52400,13.1067
133
+ 825,52800,6.8794
134
+ 837,53200,4.1435
135
+ 844,53600,7.8758
136
+ 850,54000,6.8512
137
+ 857,54400,7.279
138
+ 863,54800,8.8461
139
+ 870,55200,5.1664
140
+ 876,55600,8.291
141
+ 882,56000,7.4186
142
+ 886,56400,12.7099
143
+ 895,56800,5.8991
144
+ 900,57200,10.3267
145
+ 905,57600,10.4081
146
+ 910,58000,8.2201
147
+ 917,58400,7.2555
148
+ 923,58800,8.6902
149
+ 929,59200,8.4311
150
+ 935,59600,9.3899
151
+ 940,60000,9.48
152
+ 947,60400,8.8701
153
+ 952,60800,10.6712
154
+ 957,61200,12.1303
155
+ 963,61600,10.0041
156
+ 967,62000,11.8506
157
+ 975,62400,7.9768
158
+ 981,62800,10.121
159
+ 985,63200,12.7484
160
+ 992,63600,8.6506
161
+ 996,64000,12.0166
162
+ 1003,64400,8.6989
163
+ 1007,64800,13.5841
164
+ 1012,65200,11.8948
165
+ 1020,65600,7.5362
166
+ 1024,66000,14.5183
167
+ 1031,66400,8.4667
168
+ 1039,66800,6.099
169
+ 1044,67200,9.9433
170
+ 1051,67600,10.2187
171
+ 1056,68000,12.4386
172
+ 1065,68400,4.846
173
+ 1071,68800,12.9973
174
+ 1076,69200,11.6068
175
+ 1084,69600,9.6249
176
+ 1090,70000,8.821
177
+ 1098,70400,8.9721
178
+ 1104,70800,10.8627
179
+ 1110,71200,12.9207
180
+ 1117,71600,9.6223
181
+ 1124,72000,9.5217
182
+ 1129,72400,12.7009
183
+ 1137,72800,7.1291
184
+ 1142,73200,15.8915
185
+ 1147,73600,12.773
186
+ 1152,74000,14.0152
187
+ 1158,74400,12.8197
188
+ 1165,74800,7.6715
189
+ 1171,75200,11.6273
190
+ 1176,75600,14.7481
191
+ 1182,76000,10.9798
192
+ 1186,76400,17.6803
193
+ 1193,76800,10.2361
194
+ 1198,77200,11.135
195
+ 1203,77600,13.5392
196
+ 1207,78000,16.3684
197
+ 1213,78400,12.9063
198
+ 1218,78800,11.9918
199
+ 1226,79200,8.0133
200
+ 1231,79600,13.8717
201
+ 1236,80000,13.592
202
+ 1240,80400,16.6494
203
+ 1247,80800,9.0394
204
+ 1253,81200,11.6721
205
+ 1257,81600,12.4117
206
+ 1263,82000,13.1805
207
+ 1268,82400,12.8848
208
+ 1273,82800,13.0017
209
+ 1279,83200,11.932
210
+ 1286,83600,10.3167
211
+ 1292,84000,11.0747
212
+ 1298,84400,12.7847
213
+ 1302,84800,16.4195
214
+ 1307,85200,14.5389
215
+ 1313,85600,11.3515
216
+ 1317,86000,19.0045
217
+ 1324,86400,12.204
218
+ 1328,86800,18.172
219
+ 1334,87200,11.4799
220
+ 1339,87600,15.2817
221
+ 1346,88000,12.8543
222
+ 1351,88400,15.2124
223
+ 1355,88800,20.012
224
+ 1360,89200,15.6753
225
+ 1364,89600,18.9953
226
+ 1369,90000,14.7316
227
+ 1373,90400,18.3781
228
+ 1379,90800,12.0495
229
+ 1383,91200,19.1038
230
+ 1388,91600,15.5228
231
+ 1394,92000,13.1508
232
+ 1399,92400,13.739
233
+ 1404,92800,15.4669
234
+ 1409,93200,12.2052
235
+ 1413,93600,18.4534
236
+ 1417,94000,18.4736
237
+ 1421,94400,19.451
238
+ 1426,94800,17.0253
239
+ 1431,95200,13.6257
240
+ 1436,95600,15.849
241
+ 1447,96000,6.1302
242
+ 1452,96400,13.5995
243
+ 1460,96800,10.4263
244
+ 1466,97200,12.8175
245
+ 1470,97600,19.8985
246
+ 1474,98000,19.4499
247
+ 1480,98400,12.2035
248
+ 1485,98800,15.5921
249
+ 1493,99200,9.945
250
+ 1500,99600,8.3822
251
+ 1504,100000,20.1108
252
+ 1511,100400,13.2678
253
+ 1517,100800,13.3653
254
+ 1521,101200,20.5694
255
+ 1526,101600,12.9576
256
+ 1531,102000,18.6283
257
+ 1537,102400,13.6185
258
+ 1542,102800,13.5109
259
+ 1547,103200,14.7459
260
+ 1555,103600,11.7803
261
+ 1562,104000,8.6873
262
+ 1572,104400,8.425
263
+ 1577,104800,12.4127
264
+ 1583,105200,11.9189
265
+ 1590,105600,12.8694
266
+ 1597,106000,10.6233
267
+ 1604,106400,8.3289
268
+ 1613,106800,9.3008
269
+ 1622,107200,7.9757
270
+ 1627,107600,15.1024
271
+ 1633,108000,12.5267
272
+ 1641,108400,10.7986
273
+ 1646,108800,10.9844
274
+ 1650,109200,20.3921
275
+ 1657,109600,12.9882
276
+ 1664,110000,8.8107
277
+ 1671,110400,13.2145
278
+ 1677,110800,13.7578
279
+ 1683,111200,11.6441
280
+ 1691,111600,10.6352
281
+ 1698,112000,9.8465
282
+ 1704,112400,13.4316
283
+ 1708,112800,19.3443
284
+ 1715,113200,11.1754
285
+ 1722,113600,10.6371
286
+ 1728,114000,9.1292
287
+ 1734,114400,14.4382
288
+ 1741,114800,13.7379
289
+ 1745,115200,20.8923
290
+ 1749,115600,16.3165
291
+ 1755,116000,15.3684
292
+ 1762,116400,9.6267
293
+ 1766,116800,17.2582
294
+ 1772,117200,14.4907
295
+ 1777,117600,16.4743
296
+ 1783,118000,13.0177
297
+ 1792,118400,7.393
298
+ 1799,118800,10.4686
299
+ 1804,119200,17.1223
300
+ 1814,119600,7.6813
301
+ 1820,120000,10.9129
302
+ 1826,120400,15.8259
303
+ 1830,120800,20.2207
304
+ 1837,121200,9.1836
305
+ 1843,121600,14.5043
306
+ 1848,122000,12.8295
307
+ 1853,122400,18.3265
308
+ 1858,122800,16.1205
309
+ 1863,123200,19.6436
310
+ 1869,123600,11.276
311
+ 1873,124000,20.749
312
+ 1882,124400,9.1372
313
+ 1888,124800,13.6346
314
+ 1893,125200,17.135
315
+ 1899,125600,10.655
316
+ 1904,126000,20.1275
317
+ 1908,126400,19.847
318
+ 1913,126800,15.7564
319
+ 1918,127200,14.3012
320
+ 1923,127600,15.9916
321
+ 1928,128000,15.8541
322
+ 1932,128400,20.358
323
+ 1937,128800,16.4342
324
+ 1941,129200,21.0778
325
+ 1946,129600,15.8157
326
+ 1952,130000,14.1684
327
+ 1956,130400,17.5727
328
+ 1962,130800,13.636
329
+ 1967,131200,16.7871
330
+ 1971,131600,19.9122
331
+ 1976,132000,17.2468
332
+ 1984,132400,10.4792
333
+ 1989,132800,15.3806
334
+ 1994,133200,15.9361
335
+ 1998,133600,20.595
336
+ 2003,134000,14.5683
337
+ 2010,134400,12.9443
338
+ 2017,134800,11.1499
339
+ 2021,135200,16.4114
340
+ 2028,135600,11.15
341
+ 2033,136000,16.1641
342
+ 2039,136400,11.8905
343
+ 2045,136800,12.094
344
+ 2051,137200,13.1055
345
+ 2057,137600,12.3794
346
+ 2062,138000,16.7795
347
+ 2066,138400,15.2186
348
+ 2071,138800,17.5386
349
+ 2075,139200,19.7387
350
+ 2080,139600,17.7345
351
+ 2087,140000,9.286
352
+ 2091,140400,20.1843
353
+ 2095,140800,20.6579
354
+ 2099,141200,20.4451
355
+ 2105,141600,12.451
356
+ 2111,142000,13.8034
357
+ 2116,142400,16.789
358
+ 2121,142800,17.3337
359
+ 2127,143200,12.512
360
+ 2132,143600,18.0873
361
+ 2137,144000,16.5148
362
+ 2143,144400,14.2245
363
+ 2150,144800,11.4969
364
+ 2158,145200,6.4741
365
+ 2167,145600,10.4108
366
+ 2173,146000,9.8208
367
+ 2180,146400,14.0351
368
+ 2186,146800,11.3705
369
+ 2191,147200,17.1942
370
+ 2196,147600,16.5483
371
+ 2201,148000,17.2101
372
+ 2206,148400,15.8254
373
+ 2211,148800,16.944
374
+ 2216,149200,13.2711
375
+ 2222,149600,16.0584
376
+ 2227,150000,15.4412
377
+ 2231,150400,20.0904
378
+ 2235,150800,18.9388
379
+ 2241,151200,10.2277
380
+ 2248,151600,9.7938
381
+ 2257,152000,6.8651
382
+ 2265,152400,10.0151
383
+ 2269,152800,16.5873
384
+ 2275,153200,13.0192
385
+ 2280,153600,16.4787
386
+ 2284,154000,19.9945
387
+ 2288,154400,21.1409
388
+ 2293,154800,17.4874
389
+ 2298,155200,16.9422
390
+ 2302,155600,16.5356
391
+ 2307,156000,17.7096
392
+ 2314,156400,11.4627
393
+ 2318,156800,20.033
394
+ 2323,157200,14.9603
395
+ 2327,157600,19.5649
396
+ 2334,158000,10.3648
397
+ 2339,158400,17.1039
398
+ 2343,158800,21.143
399
+ 2348,159200,17.2194
400
+ 2352,159600,21.0071
401
+ 2356,160000,21.8045
402
+ 2361,160400,17.6473
403
+ 2367,160800,15.5177
404
+ 2371,161200,20.4818
405
+ 2376,161600,17.1564
406
+ 2381,162000,14.4102
407
+ 2386,162400,18.4027
408
+ 2390,162800,22.3808
409
+ 2395,163200,14.4186
410
+ 2400,163600,13.4343
411
+ 2404,164000,19.605
412
+ 2409,164400,14.7853
413
+ 2415,164800,11.0992
414
+ 2421,165200,12.6908
415
+ 2426,165600,17.289
416
+ 2431,166000,17.1976
417
+ 2435,166400,21.6014
418
+ 2441,166800,12.793
419
+ 2445,167200,14.232
420
+ 2450,167600,15.6217
421
+ 2457,168000,14.2949
422
+ 2461,168400,16.1677
423
+ 2465,168800,21.0584
424
+ 2470,169200,19.4555
425
+ 2474,169600,16.2403
426
+ 2481,170000,10.8878
427
+ 2491,170400,4.966
428
+ 2497,170800,11.4854
429
+ 2509,171200,4.8386
430
+ 2517,171600,9.0965
431
+ 2532,172000,4.4732
432
+ 2539,172400,10.3322
433
+ 2548,172800,6.617
434
+ 2559,173200,6.6052
435
+ 2566,173600,8.1639
436
+ 2571,174000,14.7295
437
+ 2575,174400,19.1853
438
+ 2581,174800,11.4953
439
+ 2586,175200,15.2959
440
+ 2591,175600,17.3706
441
+ 2597,176000,16.6239
442
+ 2602,176400,16.1638
443
+ 2608,176800,13.7811
444
+ 2614,177200,11.7461
445
+ 2624,177600,8.2969
446
+ 2629,178000,16.7226
447
+ 2636,178400,10.9358
448
+ 2642,178800,16.5572
449
+ 2649,179200,10.4538
450
+ 2655,179600,12.1145
451
+ 2660,180000,17.3922
452
+ 2669,180400,10.0921
453
+ 2675,180800,14.2412
454
+ 2680,181200,16.8869
455
+ 2684,181600,17.4712
456
+ 2689,182000,19.3122
457
+ 2693,182400,22.4468
458
+ 2699,182800,15.3699
459
+ 2704,183200,18.0721
460
+ 2710,183600,13.5026
461
+ 2717,184000,12.2414
462
+ 2721,184400,17.901
463
+ 2726,184800,16.2374
464
+ 2731,185200,17.4196
465
+ 2736,185600,18.1275
466
+ 2740,186000,20.9471
467
+ 2745,186400,20.6423
468
+ 2750,186800,18.2358
469
+ 2755,187200,17.1191
470
+ 2760,187600,17.9405
471
+ 2766,188000,12.8474
472
+ 2770,188400,19.6427
473
+ 2776,188800,15.7317
474
+ 2780,189200,19.7856
475
+ 2787,189600,10.9979
476
+ 2792,190000,18.7782
477
+ 2798,190400,13.124
478
+ 2803,190800,15.1497
479
+ 2808,191200,15.8551
480
+ 2818,191600,6.6157
481
+ 2824,192000,15.3035
482
+ 2831,192400,11.4718
483
+ 2835,192800,22.8164
484
+ 2841,193200,16.0213
485
+ 2846,193600,17.6302
486
+ 2851,194000,14.8716
487
+ 2855,194400,20.6271
488
+ 2862,194800,14.8134
489
+ 2868,195200,10.8108
490
+ 2872,195600,22.52
491
+ 2879,196000,13.9515
492
+ 2885,196400,12.6253
493
+ 2893,196800,8.5209
494
+ 2900,197200,11.9898
495
+ 2906,197600,13.4315
496
+ 2911,198000,18.833
497
+ 2918,198400,11.5714
498
+ 2923,198800,15.1536
499
+ 2931,199200,9.8851
500
+ 2937,199600,12.3961
501
+ 2943,200000,10.9256
code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_4.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 27,400,1.0785
3
+ 45,800,1.6504
4
+ 65,1200,1.4004
5
+ 75,1600,2.9704
6
+ 92,2000,1.7748
7
+ 101,2400,3.44
8
+ 112,2800,2.7486
9
+ 118,3200,4.5942
10
+ 124,3600,5.5212
11
+ 132,4000,3.7269
12
+ 139,4400,4.3077
13
+ 146,4800,4.8111
14
+ 151,5200,5.5228
15
+ 157,5600,6.1189
16
+ 163,6000,5.2543
17
+ 168,6400,5.2306
18
+ 175,6800,5.3845
19
+ 182,7200,4.3935
20
+ 187,7600,5.7774
21
+ 192,8000,6.1537
22
+ 197,8400,5.7906
23
+ 202,8800,7.0905
24
+ 209,9200,4.5122
25
+ 213,9600,6.5891
26
+ 218,10000,7.7331
27
+ 223,10400,6.6175
28
+ 227,10800,10.1737
29
+ 231,11200,8.0792
30
+ 238,11600,5.0695
31
+ 243,12000,6.8807
32
+ 247,12400,6.8492
33
+ 252,12800,7.4324
34
+ 256,13200,7.9897
35
+ 263,13600,4.5812
36
+ 270,14000,4.8277
37
+ 275,14400,6.4227
38
+ 281,14800,7.0743
39
+ 287,15200,5.8331
40
+ 292,15600,5.6509
41
+ 297,16000,7.8764
42
+ 303,16400,4.8805
43
+ 308,16800,6.1394
44
+ 312,17200,7.3848
45
+ 319,17600,5.4064
46
+ 326,18000,4.2903
47
+ 333,18400,4.4865
48
+ 338,18800,6.5078
49
+ 344,19200,5.3164
50
+ 348,19600,6.5957
51
+ 352,20000,8.4949
52
+ 357,20400,8.0414
53
+ 361,20800,8.259
54
+ 366,21200,8.7431
55
+ 371,21600,8.1425
56
+ 375,22000,7.6031
57
+ 379,22400,10.6469
58
+ 384,22800,7.4268
59
+ 389,23200,7.8948
60
+ 393,23600,9.7095
61
+ 399,24000,7.9959
62
+ 403,24400,12.6126
63
+ 407,24800,10.4783
64
+ 414,25200,8.8362
65
+ 418,25600,12.1951
66
+ 422,26000,12.6242
67
+ 426,26400,13.1493
68
+ 430,26800,11.1338
69
+ 434,27200,11.6175
70
+ 440,27600,7.7618
71
+ 445,28000,10.8874
72
+ 451,28400,9.7606
73
+ 456,28800,11.5253
74
+ 462,29200,6.7856
75
+ 467,29600,11.7964
76
+ 472,30000,8.8168
77
+ 476,30400,13.8556
78
+ 481,30800,11.2407
79
+ 485,31200,13.1211
80
+ 491,31600,10.3695
81
+ 496,32000,8.6598
82
+ 500,32400,13.3756
83
+ 506,32800,9.0459
84
+ 511,33200,10.4668
85
+ 516,33600,9.0482
86
+ 522,34000,9.8797
87
+ 526,34400,13.0444
88
+ 532,34800,7.3031
89
+ 536,35200,12.3843
90
+ 541,35600,11.6548
91
+ 545,36000,12.1808
92
+ 550,36400,8.937
93
+ 554,36800,13.32
94
+ 559,37200,10.2187
95
+ 563,37600,11.1425
96
+ 569,38000,10.4837
97
+ 573,38400,9.4084
98
+ 579,38800,9.5927
99
+ 585,39200,8.1512
100
+ 591,39600,7.4306
101
+ 598,40000,7.4929
102
+ 603,40400,12.0014
103
+ 609,40800,6.8877
104
+ 615,41200,8.7736
105
+ 622,41600,6.575
106
+ 632,42000,3.9331
107
+ 638,42400,9.7754
108
+ 644,42800,8.4288
109
+ 649,43200,9.7733
110
+ 656,43600,6.2225
111
+ 666,44000,4.1808
112
+ 672,44400,6.0964
113
+ 685,44800,3.3983
114
+ 694,45200,5.6596
115
+ 702,45600,6.6803
116
+ 708,46000,7.2517
117
+ 716,46400,5.9717
118
+ 729,46800,3.6724
119
+ 740,47200,4.6841
120
+ 744,47600,12.0424
121
+ 751,48000,8.4528
122
+ 757,48400,7.1028
123
+ 761,48800,12.6503
124
+ 770,49200,4.9012
125
+ 775,49600,9.6465
126
+ 782,50000,7.875
127
+ 787,50400,12.0186
128
+ 793,50800,9.2714
129
+ 798,51200,9.0638
130
+ 804,51600,9.1859
131
+ 808,52000,13.3962
132
+ 814,52400,9.1845
133
+ 819,52800,8.6704
134
+ 823,53200,12.922
135
+ 827,53600,13.1999
136
+ 832,54000,10.8399
137
+ 837,54400,12.6242
138
+ 842,54800,11.2661
139
+ 847,55200,10.5994
140
+ 853,55600,8.1275
141
+ 858,56000,12.1206
142
+ 866,56400,6.3821
143
+ 871,56800,8.825
144
+ 875,57200,13.9679
145
+ 880,57600,13.0994
146
+ 884,58000,14.0452
147
+ 888,58400,10.9111
148
+ 892,58800,13.7891
149
+ 897,59200,12.1729
150
+ 902,59600,12.7809
151
+ 907,60000,10.9993
152
+ 914,60400,6.609
153
+ 922,60800,6.1909
154
+ 927,61200,10.7844
155
+ 934,61600,7.0903
156
+ 945,62000,4.2601
157
+ 953,62400,5.6534
158
+ 958,62800,10.075
159
+ 963,63200,10.4977
160
+ 967,63600,11.9044
161
+ 971,64000,13.6212
162
+ 975,64400,13.086
163
+ 980,64800,12.0809
164
+ 987,65200,8.3579
165
+ 993,65600,8.2868
166
+ 997,66000,14.3792
167
+ 1001,66400,11.228
168
+ 1008,66800,8.2525
169
+ 1017,67200,6.1501
170
+ 1022,67600,9.1965
171
+ 1027,68000,11.0591
172
+ 1032,68400,11.1621
173
+ 1038,68800,10.1783
174
+ 1042,69200,10.0313
175
+ 1052,69600,5.234
176
+ 1060,70000,7.2449
177
+ 1064,70400,11.1294
178
+ 1070,70800,7.2192
179
+ 1075,71200,10.5247
180
+ 1082,71600,8.6408
181
+ 1087,72000,12.3906
182
+ 1091,72400,10.4689
183
+ 1095,72800,14.3503
184
+ 1100,73200,13.3414
185
+ 1104,73600,14.6343
186
+ 1108,74000,12.0234
187
+ 1113,74400,10.4043
188
+ 1118,74800,13.1958
189
+ 1122,75200,14.1113
190
+ 1128,75600,8.9121
191
+ 1133,76000,9.4241
192
+ 1138,76400,12.1937
193
+ 1142,76800,10.8917
194
+ 1148,77200,9.6075
195
+ 1152,77600,12.5465
196
+ 1157,78000,12.2911
197
+ 1161,78400,14.3991
198
+ 1166,78800,11.7178
199
+ 1170,79200,14.2131
200
+ 1176,79600,9.4352
201
+ 1182,80000,9.8783
202
+ 1187,80400,8.9671
203
+ 1192,80800,11.4625
204
+ 1197,81200,13.3021
205
+ 1201,81600,14.9308
206
+ 1205,82000,11.3369
207
+ 1212,82400,9.6094
208
+ 1217,82800,11.86
209
+ 1221,83200,12.7279
210
+ 1227,83600,10.7913
211
+ 1232,84000,11.0473
212
+ 1236,84400,14.2463
213
+ 1242,84800,9.8461
214
+ 1246,85200,14.7624
215
+ 1252,85600,10.4951
216
+ 1259,86000,8.9123
217
+ 1265,86400,10.1258
218
+ 1271,86800,9.0527
219
+ 1276,87200,12.5009
220
+ 1280,87600,14.9112
221
+ 1286,88000,9.7683
222
+ 1291,88400,12.2766
223
+ 1295,88800,14.6523
224
+ 1303,89200,8.0044
225
+ 1307,89600,14.4251
226
+ 1311,90000,14.7435
227
+ 1316,90400,13.5846
228
+ 1322,90800,11.1363
229
+ 1326,91200,13.1161
230
+ 1330,91600,14.8782
231
+ 1335,92000,12.9898
232
+ 1341,92400,10.5745
233
+ 1349,92800,7.7201
234
+ 1355,93200,9.7362
235
+ 1362,93600,9.0283
236
+ 1371,94000,5.595
237
+ 1376,94400,9.584
238
+ 1384,94800,7.7162
239
+ 1389,95200,9.5471
240
+ 1395,95600,11.8725
241
+ 1400,96000,11.4859
242
+ 1406,96400,10.0133
243
+ 1411,96800,10.0898
244
+ 1419,97200,7.1929
245
+ 1425,97600,10.2308
246
+ 1431,98000,10.8156
247
+ 1436,98400,11.1904
248
+ 1441,98800,13.0126
249
+ 1448,99200,9.5278
250
+ 1456,99600,7.7219
251
+ 1460,100000,11.838
252
+ 1467,100400,10.3963
253
+ 1471,100800,15.1954
254
+ 1477,101200,9.7102
255
+ 1483,101600,10.8302
256
+ 1487,102000,13.6986
257
+ 1491,102400,17.6513
258
+ 1497,102800,11.7696
259
+ 1502,103200,14.6363
260
+ 1506,103600,12.3268
261
+ 1510,104000,15.2753
262
+ 1515,104400,13.8737
263
+ 1520,104800,9.0522
264
+ 1524,105200,15.7886
265
+ 1529,105600,14.429
266
+ 1534,106000,12.0745
267
+ 1539,106400,13.0085
268
+ 1543,106800,16.5044
269
+ 1547,107200,12.8213
270
+ 1552,107600,10.883
271
+ 1557,108000,12.4299
272
+ 1561,108400,12.7274
273
+ 1565,108800,15.701
274
+ 1571,109200,11.4077
275
+ 1576,109600,13.4852
276
+ 1580,110000,15.1326
277
+ 1585,110400,12.6496
278
+ 1592,110800,11.1331
279
+ 1598,111200,10.001
280
+ 1604,111600,9.8463
281
+ 1610,112000,9.5269
282
+ 1615,112400,12.4523
283
+ 1623,112800,8.4184
284
+ 1627,113200,14.1469
285
+ 1634,113600,7.9029
286
+ 1640,114000,11.989
287
+ 1646,114400,10.0607
288
+ 1654,114800,7.935
289
+ 1658,115200,15.8146
290
+ 1662,115600,16.4529
291
+ 1666,116000,16.031
292
+ 1670,116400,16.961
293
+ 1674,116800,16.9752
294
+ 1680,117200,11.4042
295
+ 1687,117600,10.9484
296
+ 1693,118000,11.3393
297
+ 1697,118400,19.1626
298
+ 1702,118800,16.0651
299
+ 1707,119200,12.5378
300
+ 1713,119600,14.3033
301
+ 1720,120000,11.5588
302
+ 1724,120400,15.8731
303
+ 1729,120800,16.0878
304
+ 1734,121200,16.3894
305
+ 1739,121600,14.4359
306
+ 1745,122000,14.7653
307
+ 1752,122400,10.6896
308
+ 1756,122800,17.8908
309
+ 1762,123200,14.3452
310
+ 1768,123600,13.2548
311
+ 1772,124000,19.6647
312
+ 1776,124400,19.1847
313
+ 1780,124800,16.8379
314
+ 1786,125200,14.3642
315
+ 1790,125600,16.4554
316
+ 1794,126000,19.7687
317
+ 1801,126400,10.1183
318
+ 1806,126800,18.1265
319
+ 1812,127200,11.8105
320
+ 1818,127600,11.0402
321
+ 1823,128000,16.0082
322
+ 1830,128400,9.7205
323
+ 1836,128800,10.7887
324
+ 1841,129200,12.7927
325
+ 1845,129600,17.2088
326
+ 1852,130000,11.4728
327
+ 1857,130400,13.019
328
+ 1861,130800,18.7369
329
+ 1868,131200,11.284
330
+ 1873,131600,13.2389
331
+ 1878,132000,15.4627
332
+ 1882,132400,19.8252
333
+ 1889,132800,10.8438
334
+ 1894,133200,12.0874
335
+ 1901,133600,11.009
336
+ 1906,134000,13.7517
337
+ 1915,134400,8.6863
338
+ 1921,134800,10.9831
339
+ 1928,135200,11.1524
340
+ 1934,135600,11.4495
341
+ 1941,136000,9.4189
342
+ 1945,136400,18.9608
343
+ 1952,136800,9.5868
344
+ 1958,137200,10.5218
345
+ 1964,137600,13.0402
346
+ 1972,138000,7.7796
347
+ 1980,138400,7.3364
348
+ 1986,138800,13.7623
349
+ 1991,139200,16.2419
350
+ 1996,139600,11.4699
351
+ 2003,140000,10.983
352
+ 2012,140400,7.5045
353
+ 2016,140800,20.0741
354
+ 2022,141200,12.6223
355
+ 2027,141600,15.3017
356
+ 2033,142000,12.7772
357
+ 2037,142400,20.7311
358
+ 2041,142800,18.8427
359
+ 2046,143200,19.43
360
+ 2051,143600,15.0906
361
+ 2056,144000,13.0377
362
+ 2065,144400,7.2051
363
+ 2071,144800,13.2424
364
+ 2078,145200,11.0955
365
+ 2088,145600,5.3895
366
+ 2093,146000,12.318
367
+ 2100,146400,12.8436
368
+ 2105,146800,14.4873
369
+ 2109,147200,19.8853
370
+ 2117,147600,10.7438
371
+ 2123,148000,13.8923
372
+ 2128,148400,15.6226
373
+ 2132,148800,18.0464
374
+ 2137,149200,16.3704
375
+ 2144,149600,12.576
376
+ 2148,150000,20.3183
377
+ 2156,150400,9.807
378
+ 2161,150800,12.9068
379
+ 2165,151200,21.0426
380
+ 2171,151600,15.371
381
+ 2175,152000,17.7154
382
+ 2180,152400,17.2565
383
+ 2185,152800,18.8765
384
+ 2190,153200,17.2862
385
+ 2194,153600,19.4167
386
+ 2200,154000,13.2546
387
+ 2205,154400,15.271
388
+ 2211,154800,14.3329
389
+ 2215,155200,21.1205
390
+ 2221,155600,12.1206
391
+ 2227,156000,14.4072
392
+ 2232,156400,16.5247
393
+ 2236,156800,21.231
394
+ 2240,157200,20.8302
395
+ 2244,157600,19.4892
396
+ 2248,158000,20.936
397
+ 2252,158400,21.2178
398
+ 2256,158800,21.1056
399
+ 2261,159200,16.8762
400
+ 2267,159600,14.0483
401
+ 2272,160000,13.3591
402
+ 2279,160400,12.6142
403
+ 2285,160800,9.0069
404
+ 2291,161200,13.7843
405
+ 2300,161600,7.8473
406
+ 2307,162000,9.9078
407
+ 2312,162400,15.3595
408
+ 2317,162800,14.6767
409
+ 2324,163200,10.7115
410
+ 2331,163600,9.728
411
+ 2336,164000,14.8431
412
+ 2341,164400,15.2026
413
+ 2346,164800,12.322
414
+ 2351,165200,11.1342
415
+ 2356,165600,14.9371
416
+ 2361,166000,13.2367
417
+ 2365,166400,20.3463
418
+ 2371,166800,10.1723
419
+ 2377,167200,10.4504
420
+ 2381,167600,18.0134
421
+ 2388,168000,10.336
422
+ 2395,168400,11.1223
423
+ 2403,168800,6.6491
424
+ 2410,169200,9.6499
425
+ 2418,169600,7.0802
426
+ 2423,170000,10.1417
427
+ 2431,170400,6.2054
428
+ 2440,170800,7.8601
429
+ 2449,171200,7.7198
430
+ 2454,171600,14.5012
431
+ 2459,172000,15.7605
432
+ 2467,172400,7.7054
433
+ 2473,172800,12.5347
434
+ 2478,173200,15.5908
435
+ 2486,173600,5.7416
436
+ 2494,174000,9.3572
437
+ 2501,174400,9.0669
438
+ 2505,174800,11.2773
439
+ 2512,175200,6.6277
440
+ 2518,175600,4.9397
441
+ 2524,176000,6.4387
442
+ 2529,176400,9.8694
443
+ 2534,176800,8.5331
444
+ 2538,177200,8.5411
445
+ 2545,177600,6.2453
446
+ 2549,178000,11.7878
447
+ 2554,178400,10.175
448
+ 2558,178800,12.4336
449
+ 2565,179200,6.5908
450
+ 2570,179600,6.219
451
+ 2577,180000,6.0009
452
+ 2582,180400,5.8638
453
+ 2588,180800,6.9441
454
+ 2592,181200,6.8218
455
+ 2598,181600,9.0203
456
+ 2604,182000,7.8072
457
+ 2609,182400,6.4259
458
+ 2616,182800,6.3331
459
+ 2622,183200,9.3925
460
+ 2631,183600,6.0891
461
+ 2639,184000,5.3467
462
+ 2645,184400,5.1621
463
+ 2652,184800,7.0998
464
+ 2656,185200,8.041
465
+ 2663,185600,4.7481
466
+ 2670,186000,5.0002
467
+ 2677,186400,4.4118
468
+ 2682,186800,5.4232
469
+ 2689,187200,7.2963
470
+ 2693,187600,6.6073
471
+ 2699,188000,8.326
472
+ 2707,188400,5.5025
473
+ 2713,188800,5.5795
474
+ 2721,189200,3.796
475
+ 2728,189600,5.6303
476
+ 2734,190000,6.2013
477
+ 2740,190400,6.0202
478
+ 2746,190800,4.2373
479
+ 2751,191200,8.5135
480
+ 2758,191600,3.9532
481
+ 2763,192000,7.6742
482
+ 2770,192400,4.637
483
+ 2776,192800,4.4964
484
+ 2781,193200,8.2152
485
+ 2789,193600,4.0498
486
+ 2793,194000,7.944
487
+ 2798,194400,6.7629
488
+ 2802,194800,6.5532
489
+ 2807,195200,7.4252
490
+ 2811,195600,8.0144
491
+ 2815,196000,7.9792
492
+ 2819,196400,7.9971
493
+ 2824,196800,5.5573
494
+ 2828,197200,7.3163
495
+ 2833,197600,8.0791
496
+ 2838,198000,5.7746
497
+ 2843,198400,7.1306
498
+ 2848,198800,7.198
499
+ 2852,199200,8.5707
500
+ 2858,199600,5.5788
501
+ 2862,200000,8.7377
code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_5.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 22,400,1.2943
3
+ 41,800,1.6015
4
+ 55,1200,1.9737
5
+ 64,1600,3.5487
6
+ 73,2000,3.146
7
+ 82,2400,3.2991
8
+ 92,2800,2.7673
9
+ 100,3200,3.8523
10
+ 107,3600,3.7979
11
+ 115,4000,4.256
12
+ 119,4400,6.4793
13
+ 124,4800,5.5414
14
+ 128,5200,7.3476
15
+ 133,5600,5.8006
16
+ 138,6000,6.1344
17
+ 144,6400,5.8326
18
+ 150,6800,4.8366
19
+ 154,7200,6.7648
20
+ 159,7600,6.5947
21
+ 163,8000,7.3957
22
+ 167,8400,7.3241
23
+ 172,8800,5.9961
24
+ 177,9200,6.0296
25
+ 182,9600,7.2685
26
+ 187,10000,6.637
27
+ 192,10400,5.999
28
+ 196,10800,6.4386
29
+ 201,11200,6.6646
30
+ 207,11600,5.4069
31
+ 212,12000,6.6252
32
+ 216,12400,6.4882
33
+ 223,12800,5.5298
34
+ 228,13200,6.2035
35
+ 233,13600,5.9536
36
+ 237,14000,6.9952
37
+ 244,14400,4.9771
38
+ 249,14800,6.1157
39
+ 253,15200,7.8858
40
+ 257,15600,7.8068
41
+ 261,16000,7.9837
42
+ 266,16400,6.6484
43
+ 270,16800,8.0041
44
+ 274,17200,6.6656
45
+ 278,17600,8.0284
46
+ 283,18000,7.3533
47
+ 290,18400,4.5723
48
+ 295,18800,6.8589
49
+ 299,19200,8.1765
50
+ 303,19600,8.1
51
+ 307,20000,8.1549
52
+ 312,20400,5.818
53
+ 318,20800,6.0381
54
+ 323,21200,5.8448
55
+ 327,21600,8.4165
56
+ 332,22000,6.3011
57
+ 336,22400,8.2937
58
+ 343,22800,5.503
59
+ 347,23200,8.5968
60
+ 353,23600,5.1292
61
+ 358,24000,7.4338
62
+ 363,24400,6.6991
63
+ 368,24800,6.5642
64
+ 373,25200,6.8225
65
+ 377,25600,8.3731
66
+ 382,26000,7.4055
67
+ 387,26400,7.1132
68
+ 393,26800,5.6948
69
+ 399,27200,5.2845
70
+ 404,27600,5.9669
71
+ 408,28000,8.3363
72
+ 415,28400,5.6672
73
+ 420,28800,5.4611
74
+ 425,29200,7.714
75
+ 429,29600,7.5512
76
+ 434,30000,7.2347
77
+ 439,30400,6.5507
78
+ 443,30800,8.4242
79
+ 449,31200,6.0604
80
+ 453,31600,8.2476
81
+ 458,32000,8.0065
82
+ 462,32400,8.2966
83
+ 467,32800,6.7805
84
+ 471,33200,9.484
85
+ 475,33600,8.7649
86
+ 481,34000,8.2474
87
+ 485,34400,9.5795
88
+ 490,34800,8.6045
89
+ 496,35200,7.4124
90
+ 501,35600,7.6157
91
+ 505,36000,10.9111
92
+ 510,36400,8.8556
93
+ 514,36800,9.9951
94
+ 519,37200,11.3025
95
+ 523,37600,11.5043
96
+ 527,38000,11.7302
97
+ 532,38400,10.3442
98
+ 536,38800,12.0948
99
+ 541,39200,10.2846
100
+ 545,39600,12.2089
101
+ 549,40000,11.8314
102
+ 554,40400,9.8942
103
+ 559,40800,10.0485
104
+ 563,41200,12.4629
105
+ 567,41600,12.2332
106
+ 571,42000,11.1847
107
+ 575,42400,12.4524
108
+ 580,42800,10.2353
109
+ 587,43200,6.303
110
+ 591,43600,12.3469
111
+ 596,44000,11.1024
112
+ 600,44400,12.4106
113
+ 604,44800,11.5728
114
+ 608,45200,10.9579
115
+ 614,45600,9.6399
116
+ 618,46000,11.3217
117
+ 622,46400,12.529
118
+ 628,46800,9.0714
119
+ 633,47200,9.9013
120
+ 638,47600,10.0751
121
+ 643,48000,10.2499
122
+ 647,48400,12.5708
123
+ 652,48800,8.7569
124
+ 656,49200,12.3604
125
+ 660,49600,12.5165
126
+ 664,50000,12.3671
127
+ 668,50400,12.618
128
+ 672,50800,12.6543
129
+ 677,51200,10.6911
130
+ 681,51600,11.8317
131
+ 685,52000,12.6955
132
+ 690,52400,10.1157
133
+ 695,52800,10.0116
134
+ 700,53200,10.2901
135
+ 705,53600,8.1824
136
+ 710,54000,9.8285
137
+ 716,54400,8.8717
138
+ 721,54800,9.9854
139
+ 725,55200,12.9736
140
+ 731,55600,8.6973
141
+ 737,56000,8.4719
142
+ 742,56400,10.3744
143
+ 747,56800,8.9466
144
+ 752,57200,10.6086
145
+ 758,57600,10.0045
146
+ 763,58000,9.8052
147
+ 768,58400,11.2697
148
+ 774,58800,7.2144
149
+ 780,59200,9.6594
150
+ 784,59600,12.3888
151
+ 789,60000,10.4215
152
+ 795,60400,8.8573
153
+ 800,60800,10.8871
154
+ 807,61200,5.8992
155
+ 812,61600,10.12
156
+ 817,62000,10.8824
157
+ 821,62400,13.1221
158
+ 826,62800,10.7535
159
+ 830,63200,13.1014
160
+ 835,63600,10.7345
161
+ 839,64000,13.082
162
+ 843,64400,13.2581
163
+ 847,64800,13.4413
164
+ 851,65200,13.5097
165
+ 855,65600,13.3641
166
+ 859,66000,13.3583
167
+ 863,66400,13.1701
168
+ 868,66800,11.1134
169
+ 872,67200,13.6245
170
+ 880,67600,5.9886
171
+ 886,68000,10.2475
172
+ 890,68400,14.6202
173
+ 894,68800,10.0685
174
+ 900,69200,11.0053
175
+ 904,69600,13.6849
176
+ 909,70000,9.6346
177
+ 913,70400,13.5021
178
+ 917,70800,13.1414
179
+ 922,71200,10.8646
180
+ 926,71600,13.4908
181
+ 932,72000,9.1396
182
+ 936,72400,11.6489
183
+ 940,72800,13.2407
184
+ 944,73200,13.0342
185
+ 948,73600,13.5329
186
+ 952,74000,13.4403
187
+ 956,74400,13.7446
188
+ 960,74800,13.1605
189
+ 964,75200,13.3433
190
+ 969,75600,10.9952
191
+ 974,76000,11.5741
192
+ 980,76400,9.3918
193
+ 985,76800,11.7496
194
+ 992,77200,8.3268
195
+ 997,77600,10.0094
196
+ 1002,78000,10.6563
197
+ 1007,78400,11.5348
198
+ 1011,78800,15.119
199
+ 1017,79200,12.0912
200
+ 1023,79600,8.9712
201
+ 1027,80000,11.6409
202
+ 1031,80400,15.629
203
+ 1036,80800,13.3766
204
+ 1040,81200,15.2536
205
+ 1045,81600,12.2289
206
+ 1049,82000,15.4839
207
+ 1055,82400,9.1355
208
+ 1060,82800,12.202
209
+ 1064,83200,14.748
210
+ 1068,83600,14.5001
211
+ 1072,84000,15.0432
212
+ 1080,84400,7.0194
213
+ 1085,84800,10.1806
214
+ 1090,85200,11.795
215
+ 1095,85600,12.7322
216
+ 1100,86000,10.2449
217
+ 1109,86400,6.4535
218
+ 1117,86800,6.893
219
+ 1123,87200,8.8297
220
+ 1129,87600,9.9537
221
+ 1134,88000,9.4762
222
+ 1138,88400,15.7652
223
+ 1144,88800,12.4334
224
+ 1153,89200,6.025
225
+ 1157,89600,14.0408
226
+ 1162,90000,12.8277
227
+ 1167,90400,12.4492
228
+ 1173,90800,8.5448
229
+ 1180,91200,9.9864
230
+ 1184,91600,15.1804
231
+ 1190,92000,8.7061
232
+ 1196,92400,11.9629
233
+ 1202,92800,9.8801
234
+ 1207,93200,10.6953
235
+ 1211,93600,16.5854
236
+ 1221,94000,6.0867
237
+ 1229,94400,9.2746
238
+ 1234,94800,12.5134
239
+ 1240,95200,8.38
240
+ 1245,95600,14.4995
241
+ 1249,96000,16.0927
242
+ 1255,96400,8.763
243
+ 1261,96800,11.1095
244
+ 1265,97200,15.5608
245
+ 1271,97600,12.1043
246
+ 1278,98000,9.2772
247
+ 1283,98400,10.4808
248
+ 1291,98800,9.4265
249
+ 1296,99200,13.4881
250
+ 1303,99600,8.7073
251
+ 1311,100000,7.7693
252
+ 1323,100400,5.2588
253
+ 1328,100800,12.0091
254
+ 1338,101200,6.3879
255
+ 1346,101600,9.2148
256
+ 1354,102000,6.5712
257
+ 1359,102400,15.4022
258
+ 1363,102800,17.8751
259
+ 1372,103200,5.6517
260
+ 1379,103600,11.7892
261
+ 1383,104000,13.7202
262
+ 1390,104400,12.0935
263
+ 1398,104800,8.3031
264
+ 1403,105200,11.4273
265
+ 1409,105600,13.6581
266
+ 1414,106000,12.8389
267
+ 1421,106400,10.5737
268
+ 1426,106800,14.0672
269
+ 1431,107200,15.1905
270
+ 1435,107600,16.499
271
+ 1441,108000,14.9027
272
+ 1447,108400,12.2743
273
+ 1453,108800,12.5351
274
+ 1458,109200,13.3324
275
+ 1464,109600,10.1987
276
+ 1470,110000,14.6681
277
+ 1480,110400,6.3981
278
+ 1484,110800,17.443
279
+ 1489,111200,11.7544
280
+ 1494,111600,12.3627
281
+ 1500,112000,14.2433
282
+ 1507,112400,9.9183
283
+ 1514,112800,10.1194
284
+ 1520,113200,11.6173
285
+ 1526,113600,12.6407
286
+ 1530,114000,14.9395
287
+ 1537,114400,10.6247
288
+ 1546,114800,8.2285
289
+ 1555,115200,7.4419
290
+ 1565,115600,6.9423
291
+ 1571,116000,11.8016
292
+ 1576,116400,13.4006
293
+ 1582,116800,12.1714
294
+ 1589,117200,9.5481
295
+ 1594,117600,17.1002
296
+ 1599,118000,13.3729
297
+ 1605,118400,14.4748
298
+ 1610,118800,15.9086
299
+ 1616,119200,13.515
300
+ 1621,119600,13.6216
301
+ 1627,120000,13.3638
302
+ 1631,120400,15.8724
303
+ 1637,120800,13.1813
304
+ 1642,121200,16.7492
305
+ 1647,121600,16.2201
306
+ 1652,122000,14.2471
307
+ 1658,122400,11.9562
308
+ 1663,122800,14.3429
309
+ 1670,123200,8.2829
310
+ 1675,123600,12.7689
311
+ 1679,124000,17.9555
312
+ 1686,124400,10.3992
313
+ 1693,124800,9.3774
314
+ 1700,125200,10.5229
315
+ 1705,125600,13.095
316
+ 1709,126000,16.827
317
+ 1716,126400,11.5151
318
+ 1720,126800,15.0954
319
+ 1725,127200,17.1223
320
+ 1729,127600,19.2025
321
+ 1734,128000,16.3688
322
+ 1740,128400,13.0476
323
+ 1744,128800,17.5761
324
+ 1751,129200,9.9195
325
+ 1758,129600,12.3492
326
+ 1763,130000,12.9946
327
+ 1768,130400,15.0853
328
+ 1774,130800,12.7493
329
+ 1780,131200,13.7049
330
+ 1784,131600,16.4027
331
+ 1791,132000,10.02
332
+ 1796,132400,13.2953
333
+ 1802,132800,13.2571
334
+ 1807,133200,16.9227
335
+ 1816,133600,7.5362
336
+ 1823,134000,9.0337
337
+ 1831,134400,8.2112
338
+ 1839,134800,9.5222
339
+ 1845,135200,10.5068
340
+ 1851,135600,15.2168
341
+ 1860,136000,7.9646
342
+ 1866,136400,9.9186
343
+ 1872,136800,11.7983
344
+ 1878,137200,15.5265
345
+ 1884,137600,11.4403
346
+ 1889,138000,14.1125
347
+ 1893,138400,18.7814
348
+ 1898,138800,15.9716
349
+ 1907,139200,6.6898
350
+ 1916,139600,8.6765
351
+ 1926,140000,6.0023
352
+ 1930,140400,18.1008
353
+ 1938,140800,8.4123
354
+ 1946,141200,8.4666
355
+ 1953,141600,9.8084
356
+ 1958,142000,12.6225
357
+ 1962,142400,16.6253
358
+ 1969,142800,11.879
359
+ 1979,143200,5.891
360
+ 1983,143600,14.6341
361
+ 1991,144000,10.6154
362
+ 1995,144400,19.3371
363
+ 2000,144800,14.6999
364
+ 2005,145200,17.3212
365
+ 2010,145600,13.511
366
+ 2019,146000,8.9176
367
+ 2025,146400,11.9234
368
+ 2029,146800,18.7508
369
+ 2035,147200,11.3652
370
+ 2042,147600,10.7875
371
+ 2047,148000,17.2345
372
+ 2052,148400,13.469
373
+ 2057,148800,15.0187
374
+ 2062,149200,18.2483
375
+ 2067,149600,15.1589
376
+ 2072,150000,13.8506
377
+ 2078,150400,13.5154
378
+ 2083,150800,17.4931
379
+ 2088,151200,13.0434
380
+ 2094,151600,12.4448
381
+ 2098,152000,16.2641
382
+ 2103,152400,18.5631
383
+ 2109,152800,13.4721
384
+ 2114,153200,17.5024
385
+ 2118,153600,17.2629
386
+ 2123,154000,16.0443
387
+ 2130,154400,12.4068
388
+ 2134,154800,19.7433
389
+ 2141,155200,12.1725
390
+ 2145,155600,17.4482
391
+ 2151,156000,14.9725
392
+ 2158,156400,8.3286
393
+ 2163,156800,15.3569
394
+ 2168,157200,16.8249
395
+ 2176,157600,10.7159
396
+ 2181,158000,13.5756
397
+ 2186,158400,16.8722
398
+ 2191,158800,16.1566
399
+ 2196,159200,16.4402
400
+ 2200,159600,19.2901
401
+ 2206,160000,13.408
402
+ 2216,160400,6.5277
403
+ 2221,160800,16.5398
404
+ 2225,161200,17.9313
405
+ 2229,161600,16.0893
406
+ 2234,162000,13.5297
407
+ 2239,162400,18.5255
408
+ 2244,162800,15.412
409
+ 2249,163200,16.7656
410
+ 2254,163600,12.2883
411
+ 2260,164000,12.8931
412
+ 2268,164400,9.994
413
+ 2273,164800,14.903
414
+ 2278,165200,14.9247
415
+ 2284,165600,13.8873
416
+ 2288,166000,15.893
417
+ 2294,166400,16.0733
418
+ 2298,166800,20.3023
419
+ 2306,167200,8.823
420
+ 2311,167600,14.7959
421
+ 2316,168000,15.5506
422
+ 2320,168400,18.9412
423
+ 2328,168800,9.9269
424
+ 2337,169200,7.8662
425
+ 2342,169600,13.8098
426
+ 2347,170000,16.0699
427
+ 2352,170400,14.2326
428
+ 2357,170800,14.3457
429
+ 2365,171200,7.755
430
+ 2372,171600,11.1969
431
+ 2377,172000,17.7461
432
+ 2382,172400,14.5072
433
+ 2387,172800,15.6355
434
+ 2392,173200,16.9889
435
+ 2397,173600,16.7953
436
+ 2402,174000,14.2381
437
+ 2407,174400,15.2855
438
+ 2414,174800,9.9614
439
+ 2422,175200,6.716
440
+ 2429,175600,9.1914
441
+ 2433,176000,15.9262
442
+ 2439,176400,13.2029
443
+ 2444,176800,12.0649
444
+ 2448,177200,11.2086
445
+ 2453,177600,17.596
446
+ 2458,178000,10.4265
447
+ 2464,178400,12.7507
448
+ 2470,178800,10.5018
449
+ 2477,179200,9.6174
450
+ 2484,179600,12.3463
451
+ 2493,180000,7.463
452
+ 2502,180400,7.0977
453
+ 2511,180800,6.566
454
+ 2516,181200,12.4002
455
+ 2522,181600,12.8125
456
+ 2526,182000,16.8705
457
+ 2532,182400,15.6621
458
+ 2537,182800,15.5005
459
+ 2545,183200,7.1256
460
+ 2549,183600,19.091
461
+ 2553,184000,20.7269
462
+ 2558,184400,17.8736
463
+ 2562,184800,16.5145
464
+ 2568,185200,12.29
465
+ 2574,185600,11.165
466
+ 2580,186000,14.862
467
+ 2585,186400,12.9498
468
+ 2590,186800,16.6379
469
+ 2597,187200,12.0999
470
+ 2601,187600,19.986
471
+ 2605,188000,21.4165
472
+ 2610,188400,12.3688
473
+ 2614,188800,17.9821
474
+ 2618,189200,20.5196
475
+ 2623,189600,19.5269
476
+ 2629,190000,11.2376
477
+ 2633,190400,20.1738
478
+ 2637,190800,20.5599
479
+ 2642,191200,15.8952
480
+ 2649,191600,12.2397
481
+ 2654,192000,16.6287
482
+ 2659,192400,14.966
483
+ 2665,192800,13.1073
484
+ 2671,193200,11.1274
485
+ 2676,193600,15.6753
486
+ 2681,194000,15.8626
487
+ 2688,194400,9.1965
488
+ 2693,194800,15.0967
489
+ 2697,195200,19.8348
490
+ 2701,195600,14.855
491
+ 2706,196000,13.0618
492
+ 2715,196400,9.1275
493
+ 2721,196800,7.1574
494
+ 2726,197200,16.5246
495
+ 2732,197600,10.026
496
+ 2739,198000,11.8092
497
+ 2745,198400,10.2368
498
+ 2751,198800,11.7544
499
+ 2757,199200,11.6034
500
+ 2764,199600,9.3743
501
+ 2769,200000,15.3278
code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_0_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8370e4fdc218fd2e709261132b6cb059f988bd8753b44228b5191c345b8cc2ed
3
+ size 205481
code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_1.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 15,400,1.9345
3
+ 32,800,1.8784
4
+ 42,1200,3.0732
5
+ 52,1600,2.9835
6
+ 65,2000,2.3838
7
+ 69,2400,7.2159
8
+ 75,2800,4.758
9
+ 80,3200,5.6732
10
+ 85,3600,7.076
11
+ 89,4000,5.77
12
+ 95,4400,5.4889
13
+ 101,4800,4.8132
14
+ 106,5200,7.4812
15
+ 115,5600,2.816
16
+ 123,6000,4.0168
17
+ 129,6400,5.4723
18
+ 137,6800,3.675
19
+ 142,7200,5.8091
20
+ 147,7600,5.6523
21
+ 151,8000,7.3976
22
+ 158,8400,4.279
23
+ 164,8800,4.7596
24
+ 168,9200,7.4893
25
+ 173,9600,6.204
26
+ 177,10000,7.3342
27
+ 183,10400,4.9965
28
+ 188,10800,6.2773
29
+ 193,11200,6.1197
30
+ 197,11600,7.2967
31
+ 201,12000,7.4756
32
+ 205,12400,7.1791
33
+ 209,12800,7.3157
34
+ 213,13200,7.0592
35
+ 218,13600,5.8954
36
+ 222,14000,7.1992
37
+ 227,14400,6.5493
38
+ 232,14800,5.9315
39
+ 237,15200,7.0655
40
+ 243,15600,5.2935
41
+ 251,16000,3.6833
42
+ 258,16400,4.511
43
+ 267,16800,4.1031
44
+ 271,17200,7.4993
45
+ 275,17600,9.0943
46
+ 279,18000,7.6642
47
+ 285,18400,6.6167
48
+ 291,18800,5.4259
49
+ 295,19200,7.9226
50
+ 299,19600,8.2753
51
+ 304,20000,6.8015
52
+ 308,20400,7.8775
53
+ 315,20800,4.5316
54
+ 321,21200,5.4028
55
+ 326,21600,6.3314
56
+ 330,22000,8.1528
57
+ 334,22400,7.4973
58
+ 339,22800,6.6922
59
+ 343,23200,7.7072
60
+ 347,23600,7.2187
61
+ 351,24000,7.7859
62
+ 356,24400,7.0575
63
+ 360,24800,7.2281
64
+ 366,25200,5.8797
65
+ 372,25600,4.8101
66
+ 378,26000,5.0378
67
+ 384,26400,5.6277
68
+ 389,26800,5.2954
69
+ 400,27200,2.9769
70
+ 406,27600,4.6418
71
+ 416,28000,3.4303
72
+ 423,28400,4.0932
73
+ 430,28800,4.505
74
+ 435,29200,5.5546
75
+ 439,29600,10.5906
76
+ 445,30000,6.1133
77
+ 451,30400,5.9823
78
+ 455,30800,8.4017
79
+ 462,31200,5.0732
80
+ 468,31600,5.7932
81
+ 474,32000,6.708
82
+ 479,32400,6.8853
83
+ 485,32800,9.0958
84
+ 490,33200,6.4707
85
+ 495,33600,7.4192
86
+ 501,34000,6.0651
87
+ 506,34400,6.7035
88
+ 511,34800,6.8316
89
+ 518,35200,5.9218
90
+ 524,35600,7.2932
91
+ 528,36000,8.8676
92
+ 534,36400,8.8016
93
+ 538,36800,8.7465
94
+ 544,37200,8.41
95
+ 549,37600,8.3048
96
+ 555,38000,8.0676
97
+ 561,38400,6.0523
98
+ 566,38800,8.8973
99
+ 571,39200,8.7871
100
+ 577,39600,6.6108
101
+ 582,40000,10.0771
102
+ 587,40400,7.6992
103
+ 594,40800,5.8859
104
+ 602,41200,5.2757
105
+ 607,41600,7.4604
106
+ 614,42000,6.3297
107
+ 624,42400,4.3
108
+ 630,42800,7.3462
109
+ 636,43200,7.7832
110
+ 642,43600,7.2278
111
+ 649,44000,7.0903
112
+ 656,44400,7.2864
113
+ 660,44800,11.4337
114
+ 664,45200,9.4219
115
+ 669,45600,10.1337
116
+ 674,46000,10.8808
117
+ 679,46400,9.5832
118
+ 686,46800,6.8856
119
+ 692,47200,7.9853
120
+ 699,47600,5.4185
121
+ 705,48000,8.51
122
+ 711,48400,8.7795
123
+ 717,48800,8.1386
124
+ 721,49200,12.1496
125
+ 727,49600,9.671
126
+ 734,50000,4.7004
127
+ 742,50400,5.4595
128
+ 751,50800,7.1566
129
+ 756,51200,11.0405
130
+ 763,51600,7.8217
131
+ 769,52000,6.9623
132
+ 774,52400,9.7564
133
+ 780,52800,8.2737
134
+ 786,53200,7.7905
135
+ 792,53600,8.2016
136
+ 798,54000,11.1113
137
+ 803,54400,9.71
138
+ 811,54800,6.3473
139
+ 816,55200,10.3967
140
+ 822,55600,8.5624
141
+ 829,56000,7.9157
142
+ 836,56400,7.3681
143
+ 843,56800,7.9999
144
+ 848,57200,12.6498
145
+ 855,57600,6.7851
146
+ 864,58000,5.926
147
+ 871,58400,8.3211
148
+ 877,58800,8.4122
149
+ 885,59200,5.5055
150
+ 892,59600,4.8152
151
+ 896,60000,10.2298
152
+ 903,60400,8.8526
153
+ 908,60800,10.4385
154
+ 914,61200,6.8265
155
+ 920,61600,7.4231
156
+ 926,62000,7.7046
157
+ 932,62400,8.744
158
+ 939,62800,6.815
159
+ 944,63200,8.9794
160
+ 950,63600,9.0955
161
+ 960,64000,3.9011
162
+ 966,64400,9.1169
163
+ 970,64800,9.8831
164
+ 975,65200,10.7317
165
+ 983,65600,5.231
166
+ 988,66000,9.8312
167
+ 993,66400,8.1545
168
+ 999,66800,8.3283
169
+ 1008,67200,3.3866
170
+ 1014,67600,6.2135
171
+ 1018,68000,8.4834
172
+ 1025,68400,4.2607
173
+ 1031,68800,7.755
174
+ 1035,69200,6.7764
175
+ 1043,69600,4.8484
176
+ 1049,70000,6.8767
177
+ 1055,70400,7.2876
178
+ 1061,70800,6.5337
179
+ 1066,71200,8.7095
180
+ 1071,71600,8.5315
181
+ 1076,72000,8.1106
182
+ 1082,72400,6.5303
183
+ 1088,72800,5.1858
184
+ 1092,73200,8.2166
185
+ 1097,73600,7.7642
186
+ 1103,74000,5.7689
187
+ 1109,74400,6.5443
188
+ 1113,74800,6.0629
189
+ 1119,75200,6.9554
190
+ 1124,75600,6.5107
191
+ 1131,76000,5.8779
192
+ 1136,76400,7.0196
193
+ 1141,76800,8.221
194
+ 1146,77200,7.0804
195
+ 1150,77600,7.0824
196
+ 1158,78000,4.9257
197
+ 1165,78400,4.3861
198
+ 1171,78800,5.412
199
+ 1178,79200,5.8524
200
+ 1186,79600,4.6331
201
+ 1193,80000,5.5782
202
+ 1199,80400,7.2403
203
+ 1204,80800,8.5925
204
+ 1208,81200,9.0404
205
+ 1212,81600,10.5095
206
+ 1220,82000,7.0118
207
+ 1225,82400,8.1882
208
+ 1229,82800,10.863
209
+ 1233,83200,10.7497
210
+ 1239,83600,6.4536
211
+ 1243,84000,11.2907
212
+ 1247,84400,11.783
213
+ 1252,84800,11.0893
214
+ 1257,85200,9.8707
215
+ 1261,85600,13.0076
216
+ 1265,86000,12.8858
217
+ 1269,86400,13.1408
218
+ 1273,86800,11.8421
219
+ 1277,87200,9.9549
220
+ 1284,87600,6.7063
221
+ 1288,88000,12.4549
222
+ 1292,88400,13.2602
223
+ 1299,88800,7.2956
224
+ 1303,89200,15.6022
225
+ 1308,89600,12.0928
226
+ 1317,90000,3.9969
227
+ 1323,90400,6.6326
228
+ 1331,90800,6.7157
229
+ 1337,91200,6.7272
230
+ 1346,91600,4.2204
231
+ 1351,92000,10.5966
232
+ 1357,92400,6.7511
233
+ 1364,92800,6.8195
234
+ 1371,93200,6.92
235
+ 1381,93600,4.6135
236
+ 1387,94000,9.0048
237
+ 1395,94400,4.8047
238
+ 1401,94800,9.5174
239
+ 1407,95200,11.6457
240
+ 1414,95600,6.9109
241
+ 1421,96000,5.7336
242
+ 1430,96400,5.3074
243
+ 1434,96800,9.2165
244
+ 1439,97200,9.5083
245
+ 1447,97600,5.2046
246
+ 1456,98000,4.3764
247
+ 1462,98400,5.923
248
+ 1467,98800,8.1644
249
+ 1473,99200,6.7967
250
+ 1480,99600,7.7647
251
+ 1484,100000,12.2281
252
+ 1488,100400,9.1569
253
+ 1494,100800,8.54
254
+ 1499,101200,9.4563
255
+ 1503,101600,9.5965
256
+ 1509,102000,8.6554
257
+ 1514,102400,11.0492
258
+ 1518,102800,10.4916
259
+ 1525,103200,5.7317
260
+ 1534,103600,5.4161
261
+ 1539,104000,9.7729
262
+ 1546,104400,6.9356
263
+ 1552,104800,7.0423
264
+ 1556,105200,12.5092
265
+ 1563,105600,8.7948
266
+ 1567,106000,12.3567
267
+ 1573,106400,7.8177
268
+ 1577,106800,12.8039
269
+ 1584,107200,6.8714
270
+ 1589,107600,8.3869
271
+ 1593,108000,12.6683
272
+ 1601,108400,6.5902
273
+ 1607,108800,8.628
274
+ 1615,109200,5.4762
275
+ 1621,109600,9.3018
276
+ 1627,110000,7.4751
277
+ 1640,110400,4.139
278
+ 1647,110800,7.7327
279
+ 1652,111200,10.3567
280
+ 1662,111600,4.9881
281
+ 1668,112000,8.8983
282
+ 1673,112400,9.1326
283
+ 1683,112800,3.9693
284
+ 1693,113200,4.1639
285
+ 1699,113600,8.137
286
+ 1705,114000,8.6208
287
+ 1710,114400,9.1387
288
+ 1717,114800,6.4969
289
+ 1724,115200,5.8328
290
+ 1729,115600,9.6428
291
+ 1736,116000,7.7885
292
+ 1742,116400,6.1858
293
+ 1751,116800,5.3759
294
+ 1760,117200,4.9088
295
+ 1764,117600,12.8121
296
+ 1773,118000,5.528
297
+ 1781,118400,4.8728
298
+ 1786,118800,9.4107
299
+ 1794,119200,5.6412
300
+ 1800,119600,8.3779
301
+ 1804,120000,14.4553
302
+ 1814,120400,4.32
303
+ 1819,120800,12.7509
304
+ 1828,121200,6.4023
305
+ 1833,121600,15.743
306
+ 1841,122000,6.9661
307
+ 1847,122400,11.3894
308
+ 1852,122800,12.2086
309
+ 1857,123200,16.0304
310
+ 1862,123600,16.1933
311
+ 1870,124000,8.6231
312
+ 1875,124400,13.6078
313
+ 1880,124800,14.3245
314
+ 1889,125200,5.8726
315
+ 1896,125600,9.655
316
+ 1904,126000,6.397
317
+ 1912,126400,5.4703
318
+ 1917,126800,13.7367
319
+ 1923,127200,10.1322
320
+ 1928,127600,12.9031
321
+ 1932,128000,11.6047
322
+ 1941,128400,8.1528
323
+ 1948,128800,10.0696
324
+ 1953,129200,12.8557
325
+ 1964,129600,6.1629
326
+ 1974,130000,4.7264
327
+ 1983,130400,5.0263
328
+ 1989,130800,13.0645
329
+ 2000,131200,4.9296
330
+ 2006,131600,9.139
331
+ 2016,132000,6.4189
332
+ 2025,132400,4.758
333
+ 2033,132800,7.2135
334
+ 2038,133200,8.544
335
+ 2045,133600,9.7969
336
+ 2051,134000,7.3074
337
+ 2063,134400,4.5675
338
+ 2072,134800,4.937
339
+ 2080,135200,6.392
340
+ 2088,135600,4.6451
341
+ 2093,136000,9.6252
342
+ 2100,136400,5.984
343
+ 2106,136800,7.3098
344
+ 2114,137200,6.8205
345
+ 2122,137600,5.1513
346
+ 2130,138000,6.8115
347
+ 2140,138400,4.8287
348
+ 2149,138800,3.6011
349
+ 2157,139200,5.5075
350
+ 2163,139600,9.1481
351
+ 2171,140000,6.788
352
+ 2178,140400,7.9736
353
+ 2186,140800,8.5882
354
+ 2191,141200,9.2546
355
+ 2196,141600,15.4158
356
+ 2206,142000,7.1935
357
+ 2211,142400,13.2408
358
+ 2223,142800,4.9066
359
+ 2235,143200,4.5763
360
+ 2241,143600,10.3808
361
+ 2246,144000,10.8564
362
+ 2254,144400,7.7899
363
+ 2261,144800,8.8404
364
+ 2268,145200,8.8255
365
+ 2276,145600,7.4552
366
+ 2282,146000,11.216
367
+ 2289,146400,10.299
368
+ 2295,146800,9.2089
369
+ 2302,147200,8.5813
370
+ 2311,147600,5.5125
371
+ 2318,148000,7.7001
372
+ 2323,148400,11.1793
373
+ 2333,148800,3.7442
374
+ 2340,149200,10.413
375
+ 2345,149600,11.7132
376
+ 2350,150000,9.5357
377
+ 2357,150400,8.7914
378
+ 2363,150800,8.3319
379
+ 2370,151200,7.1484
380
+ 2376,151600,5.6768
381
+ 2385,152000,4.1424
382
+ 2390,152400,6.528
383
+ 2394,152800,7.7268
384
+ 2399,153200,8.4871
385
+ 2406,153600,6.8809
386
+ 2411,154000,8.3506
387
+ 2415,154400,11.0307
388
+ 2419,154800,10.397
389
+ 2427,155200,5.1522
390
+ 2434,155600,5.8036
391
+ 2439,156000,10.0986
392
+ 2445,156400,7.7494
393
+ 2452,156800,8.7844
394
+ 2458,157200,9.1953
395
+ 2464,157600,9.7125
396
+ 2469,158000,14.3923
397
+ 2475,158400,10.5696
398
+ 2479,158800,19.5742
399
+ 2488,159200,6.9904
400
+ 2494,159600,12.7756
401
+ 2499,160000,12.9402
402
+ 2506,160400,11.8509
403
+ 2510,160800,14.1441
404
+ 2522,161200,5.8916
405
+ 2530,161600,8.0032
406
+ 2536,162000,11.8227
407
+ 2540,162400,14.6518
408
+ 2548,162800,9.0002
409
+ 2555,163200,5.4754
410
+ 2560,163600,15.7747
411
+ 2568,164000,9.1886
412
+ 2572,164400,18.4005
413
+ 2580,164800,6.8589
414
+ 2586,165200,13.1998
415
+ 2591,165600,12.0732
416
+ 2599,166000,7.623
417
+ 2608,166400,6.2975
418
+ 2614,166800,10.325
419
+ 2622,167200,7.3189
420
+ 2628,167600,11.0537
421
+ 2636,168000,8.6657
422
+ 2642,168400,13.2533
423
+ 2650,168800,5.9973
424
+ 2660,169200,7.9659
425
+ 2665,169600,14.8961
426
+ 2673,170000,9.1592
427
+ 2680,170400,12.4329
428
+ 2687,170800,12.6719
429
+ 2694,171200,11.0606
430
+ 2702,171600,10.5086
431
+ 2709,172000,10.1179
432
+ 2716,172400,9.1879
433
+ 2723,172800,11.2291
434
+ 2728,173200,12.3325
435
+ 2734,173600,12.1718
436
+ 2741,174000,10.1937
437
+ 2748,174400,9.3497
438
+ 2753,174800,10.5969
439
+ 2761,175200,10.1798
440
+ 2767,175600,10.9461
441
+ 2772,176000,13.9387
442
+ 2777,176400,14.2562
443
+ 2783,176800,8.5326
444
+ 2789,177200,9.8383
445
+ 2794,177600,12.2766
446
+ 2800,178000,14.2094
447
+ 2804,178400,15.156
448
+ 2811,178800,6.4663
449
+ 2816,179200,15.0295
450
+ 2823,179600,10.1727
451
+ 2828,180000,11.4667
452
+ 2834,180400,11.0373
453
+ 2840,180800,11.0221
454
+ 2844,181200,18.4976
455
+ 2850,181600,9.7416
456
+ 2857,182000,8.77
457
+ 2863,182400,7.9617
458
+ 2867,182800,21.1088
459
+ 2872,183200,15.0862
460
+ 2880,183600,9.2334
461
+ 2885,184000,15.0575
462
+ 2891,184400,8.7998
463
+ 2898,184800,9.4749
464
+ 2903,185200,15.2583
465
+ 2908,185600,15.9073
466
+ 2913,186000,18.1103
467
+ 2920,186400,10.0962
468
+ 2925,186800,14.1606
469
+ 2931,187200,13.2483
470
+ 2936,187600,9.6115
471
+ 2942,188000,10.5395
472
+ 2948,188400,13.0603
473
+ 2952,188800,15.9296
474
+ 2958,189200,11.0247
475
+ 2964,189600,13.2235
476
+ 2973,190000,6.3575
477
+ 2978,190400,11.5839
478
+ 2983,190800,13.9557
479
+ 2989,191200,9.2667
480
+ 2995,191600,10.0113
481
+ 3000,192000,10.4833
482
+ 3006,192400,11.3706
483
+ 3012,192800,10.4763
484
+ 3018,193200,10.3391
485
+ 3022,193600,16.8413
486
+ 3030,194000,9.8189
487
+ 3042,194400,4.0255
488
+ 3049,194800,8.3469
489
+ 3058,195200,6.7786
490
+ 3066,195600,9.455
491
+ 3072,196000,9.361
492
+ 3078,196400,11.942
493
+ 3086,196800,8.1976
494
+ 3093,197200,8.8319
495
+ 3098,197600,10.2572
496
+ 3105,198000,10.4746
497
+ 3111,198400,9.7314
498
+ 3117,198800,13.439
499
+ 3121,199200,15.2623
500
+ 3127,199600,15.3384
501
+ 3137,200000,8.1123
code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_2.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 19,400,1.6097
3
+ 38,800,1.6416
4
+ 61,1200,1.4003
5
+ 73,1600,2.5901
6
+ 81,2000,3.7322
7
+ 90,2400,3.1764
8
+ 95,2800,6.9871
9
+ 102,3200,4.2802
10
+ 109,3600,4.7436
11
+ 116,4000,4.2818
12
+ 120,4400,7.5307
13
+ 124,4800,6.3049
14
+ 132,5200,4.3085
15
+ 137,5600,6.2239
16
+ 141,6000,7.0954
17
+ 145,6400,6.7039
18
+ 150,6800,6.2469
19
+ 155,7200,6.36
20
+ 160,7600,6.1549
21
+ 164,8000,6.9545
22
+ 169,8400,7.0148
23
+ 175,8800,4.7251
24
+ 179,9200,7.9361
25
+ 184,9600,6.5351
26
+ 189,10000,6.2774
27
+ 195,10400,5.7017
28
+ 199,10800,7.825
29
+ 203,11200,6.8724
30
+ 207,11600,7.7336
31
+ 211,12000,7.5364
32
+ 215,12400,7.8172
33
+ 219,12800,7.9367
34
+ 223,13200,7.7311
35
+ 228,13600,7.2277
36
+ 232,14000,7.0049
37
+ 238,14400,5.1928
38
+ 242,14800,7.6765
39
+ 246,15200,7.4842
40
+ 250,15600,7.5239
41
+ 254,16000,7.1122
42
+ 260,16400,5.5361
43
+ 265,16800,7.1787
44
+ 272,17200,4.4245
45
+ 276,17600,6.2406
46
+ 280,18000,7.7256
47
+ 286,18400,5.6842
48
+ 291,18800,6.646
49
+ 297,19200,5.5573
50
+ 302,19600,4.9828
51
+ 308,20000,5.9812
52
+ 313,20400,6.4346
53
+ 320,20800,4.4117
54
+ 324,21200,6.6082
55
+ 330,21600,6.2485
56
+ 334,22000,7.8137
57
+ 339,22400,5.5175
58
+ 343,22800,7.8811
59
+ 347,23200,7.1971
60
+ 353,23600,5.196
61
+ 358,24000,6.0231
62
+ 364,24400,6.6306
63
+ 369,24800,5.7511
64
+ 374,25200,6.1309
65
+ 378,25600,7.6626
66
+ 382,26000,6.8129
67
+ 387,26400,6.4101
68
+ 391,26800,8.1621
69
+ 395,27200,7.6024
70
+ 399,27600,7.6139
71
+ 404,28000,7.434
72
+ 409,28400,7.5108
73
+ 413,28800,10.0048
74
+ 417,29200,8.9736
75
+ 423,29600,5.5721
76
+ 428,30000,5.6607
77
+ 433,30400,7.3198
78
+ 437,30800,7.5887
79
+ 441,31200,7.1714
80
+ 446,31600,5.5443
81
+ 452,32000,6.0095
82
+ 456,32400,6.5919
83
+ 461,32800,6.9332
84
+ 465,33200,5.9742
85
+ 472,33600,4.6549
86
+ 476,34000,6.7065
87
+ 481,34400,6.6351
88
+ 485,34800,7.3781
89
+ 491,35200,5.3901
90
+ 495,35600,7.481
91
+ 499,36000,7.3904
92
+ 504,36400,5.5203
93
+ 508,36800,6.6256
94
+ 513,37200,6.2715
95
+ 519,37600,5.4467
96
+ 523,38000,7.5914
97
+ 528,38400,5.8271
98
+ 535,38800,4.7497
99
+ 539,39200,6.705
100
+ 543,39600,7.2193
101
+ 547,40000,7.6591
102
+ 553,40400,5.4713
103
+ 559,40800,5.6278
104
+ 563,41200,7.0377
105
+ 567,41600,6.5975
106
+ 572,42000,6.4589
107
+ 577,42400,6.4962
108
+ 581,42800,7.5891
109
+ 585,43200,7.5598
110
+ 593,43600,3.9839
111
+ 597,44000,6.7188
112
+ 601,44400,6.9495
113
+ 606,44800,7.4883
114
+ 613,45200,3.7567
115
+ 617,45600,7.7218
116
+ 622,46000,6.69
117
+ 626,46400,7.9262
118
+ 630,46800,6.3034
119
+ 635,47200,6.0545
120
+ 639,47600,7.4195
121
+ 644,48000,7.13
122
+ 648,48400,7.0398
123
+ 652,48800,7.6392
124
+ 660,49200,3.7038
125
+ 667,49600,4.3578
126
+ 672,50000,5.9194
127
+ 678,50400,4.8665
128
+ 683,50800,6.3008
129
+ 691,51200,4.5975
130
+ 697,51600,5.1888
131
+ 702,52000,5.7734
132
+ 707,52400,7.4679
133
+ 715,52800,3.3284
134
+ 721,53200,6.0641
135
+ 727,53600,6.0441
136
+ 731,54000,7.6702
137
+ 736,54400,7.4219
138
+ 740,54800,6.222
139
+ 746,55200,6.4839
140
+ 751,55600,5.6146
141
+ 755,56000,7.5972
142
+ 761,56400,5.612
143
+ 765,56800,7.8148
144
+ 770,57200,5.7253
145
+ 774,57600,7.9334
146
+ 778,58000,8.3276
147
+ 783,58400,6.8913
148
+ 787,58800,7.2828
149
+ 793,59200,6.9596
150
+ 797,59600,8.2329
151
+ 803,60000,6.4227
152
+ 808,60400,6.7123
153
+ 812,60800,7.9976
154
+ 819,61200,5.5859
155
+ 826,61600,4.4553
156
+ 832,62000,6.7451
157
+ 837,62400,6.241
158
+ 843,62800,6.4673
159
+ 848,63200,6.9543
160
+ 855,63600,5.0913
161
+ 861,64000,7.2159
162
+ 865,64400,7.9432
163
+ 870,64800,7.8044
164
+ 875,65200,7.2243
165
+ 880,65600,7.7488
166
+ 886,66000,6.3843
167
+ 890,66400,9.9507
168
+ 894,66800,9.063
169
+ 899,67200,7.5112
170
+ 903,67600,7.6428
171
+ 908,68000,8.2787
172
+ 912,68400,8.5889
173
+ 918,68800,5.723
174
+ 922,69200,8.2975
175
+ 926,69600,7.3824
176
+ 931,70000,8.2068
177
+ 937,70400,7.7218
178
+ 941,70800,8.7229
179
+ 945,71200,10.2961
180
+ 950,71600,7.6121
181
+ 955,72000,7.8974
182
+ 960,72400,7.8978
183
+ 967,72800,5.6801
184
+ 979,73200,2.1174
185
+ 990,73600,3.7935
186
+ 995,74000,10.1194
187
+ 1000,74400,6.8977
188
+ 1006,74800,7.6098
189
+ 1011,75200,7.2811
190
+ 1017,75600,7.2474
191
+ 1023,76000,8.5673
192
+ 1029,76400,6.6197
193
+ 1035,76800,8.3262
194
+ 1041,77200,7.9771
195
+ 1047,77600,7.1542
196
+ 1051,78000,11.2335
197
+ 1056,78400,8.8322
198
+ 1062,78800,7.5779
199
+ 1066,79200,13.4042
200
+ 1071,79600,10.4175
201
+ 1076,80000,13.3333
202
+ 1080,80400,12.734
203
+ 1086,80800,11.1026
204
+ 1090,81200,14.7759
205
+ 1096,81600,8.6412
206
+ 1104,82000,6.3892
207
+ 1110,82400,9.8357
208
+ 1118,82800,7.1741
209
+ 1125,83200,7.8912
210
+ 1131,83600,8.455
211
+ 1135,84000,14.1645
212
+ 1140,84400,9.4645
213
+ 1146,84800,10.1664
214
+ 1153,85200,5.8022
215
+ 1160,85600,6.215
216
+ 1168,86000,4.1062
217
+ 1175,86400,6.1067
218
+ 1180,86800,6.3874
219
+ 1187,87200,6.3452
220
+ 1192,87600,6.9666
221
+ 1198,88000,5.1382
222
+ 1203,88400,6.1001
223
+ 1212,88800,4.8099
224
+ 1218,89200,7.4769
225
+ 1226,89600,7.5724
226
+ 1231,90000,10.0412
227
+ 1238,90400,8.6996
228
+ 1242,90800,13.2195
229
+ 1247,91200,11.4526
230
+ 1252,91600,12.0908
231
+ 1260,92000,6.7688
232
+ 1265,92400,10.449
233
+ 1270,92800,10.2558
234
+ 1277,93200,5.5992
235
+ 1284,93600,9.3672
236
+ 1292,94000,7.979
237
+ 1302,94400,6.4251
238
+ 1308,94800,8.6478
239
+ 1314,95200,12.1168
240
+ 1322,95600,6.8864
241
+ 1333,96000,5.5147
242
+ 1339,96400,9.0327
243
+ 1347,96800,6.0256
244
+ 1356,97200,4.7894
245
+ 1365,97600,5.5282
246
+ 1371,98000,7.7578
247
+ 1380,98400,8.3785
248
+ 1389,98800,5.239
249
+ 1395,99200,8.3681
250
+ 1400,99600,9.7949
251
+ 1406,100000,9.5216
252
+ 1414,100400,5.3299
253
+ 1420,100800,7.8871
254
+ 1428,101200,6.0441
255
+ 1434,101600,6.6909
256
+ 1444,102000,5.0341
257
+ 1450,102400,7.7649
258
+ 1461,102800,4.1021
259
+ 1472,103200,2.9832
260
+ 1482,103600,4.4576
261
+ 1489,104000,7.3442
262
+ 1497,104400,5.6498
263
+ 1505,104800,6.4131
264
+ 1511,105200,10.5803
265
+ 1516,105600,11.3073
266
+ 1520,106000,13.2675
267
+ 1527,106400,10.6688
268
+ 1535,106800,5.8265
269
+ 1540,107200,10.7056
270
+ 1545,107600,7.6742
271
+ 1553,108000,5.7264
272
+ 1560,108400,6.6821
273
+ 1567,108800,5.4216
274
+ 1573,109200,8.4766
275
+ 1578,109600,8.6739
276
+ 1582,110000,11.4666
277
+ 1586,110400,9.0832
278
+ 1594,110800,6.2276
279
+ 1599,111200,8.6866
280
+ 1606,111600,6.2615
281
+ 1614,112000,5.6982
282
+ 1621,112400,5.8051
283
+ 1626,112800,7.93
284
+ 1631,113200,10.595
285
+ 1636,113600,7.6407
286
+ 1640,114000,11.1847
287
+ 1647,114400,5.6078
288
+ 1651,114800,9.1446
289
+ 1658,115200,6.8432
290
+ 1662,115600,12.9911
291
+ 1671,116000,4.613
292
+ 1676,116400,11.0305
293
+ 1681,116800,8.694
294
+ 1688,117200,6.11
295
+ 1698,117600,6.4954
296
+ 1703,118000,9.7062
297
+ 1712,118400,5.8668
298
+ 1717,118800,7.5547
299
+ 1724,119200,8.3224
300
+ 1729,119600,8.329
301
+ 1737,120000,7.1094
302
+ 1742,120400,9.2663
303
+ 1747,120800,9.6127
304
+ 1757,121200,5.6769
305
+ 1762,121600,8.5658
306
+ 1770,122000,7.4468
307
+ 1776,122400,8.0238
308
+ 1782,122800,8.0927
309
+ 1790,123200,5.6296
310
+ 1798,123600,6.9246
311
+ 1806,124000,6.5561
312
+ 1813,124400,7.4058
313
+ 1821,124800,6.9491
314
+ 1833,125200,4.4584
315
+ 1839,125600,8.2732
316
+ 1848,126000,6.3892
317
+ 1853,126400,10.4589
318
+ 1858,126800,12.1972
319
+ 1867,127200,5.6719
320
+ 1874,127600,7.9428
321
+ 1880,128000,7.5333
322
+ 1885,128400,12.1847
323
+ 1889,128800,11.591
324
+ 1895,129200,9.8666
325
+ 1902,129600,9.6165
326
+ 1910,130000,8.469
327
+ 1917,130400,5.6391
328
+ 1924,130800,8.5008
329
+ 1931,131200,9.7077
330
+ 1935,131600,13.5229
331
+ 1939,132000,16.4664
332
+ 1944,132400,13.0046
333
+ 1949,132800,8.4371
334
+ 1955,133200,8.8647
335
+ 1959,133600,14.3521
336
+ 1963,134000,11.7871
337
+ 1968,134400,10.4688
338
+ 1974,134800,9.3431
339
+ 1979,135200,8.226
340
+ 1984,135600,10.8513
341
+ 1988,136000,10.6682
342
+ 1998,136400,4.4273
343
+ 2003,136800,12.778
344
+ 2011,137200,8.0067
345
+ 2017,137600,9.4886
346
+ 2027,138000,5.6532
347
+ 2033,138400,7.9827
348
+ 2039,138800,5.9282
349
+ 2045,139200,9.6567
350
+ 2053,139600,7.7935
351
+ 2059,140000,5.9489
352
+ 2064,140400,6.9939
353
+ 2070,140800,7.3466
354
+ 2075,141200,6.8939
355
+ 2079,141600,9.1796
356
+ 2085,142000,9.0719
357
+ 2089,142400,13.9325
358
+ 2094,142800,11.0623
359
+ 2099,143200,6.7284
360
+ 2107,143600,5.6728
361
+ 2115,144000,6.321
362
+ 2119,144400,10.3033
363
+ 2124,144800,8.3466
364
+ 2130,145200,6.2657
365
+ 2136,145600,5.7428
366
+ 2142,146000,7.4074
367
+ 2147,146400,7.8209
368
+ 2153,146800,7.3415
369
+ 2161,147200,5.2917
370
+ 2166,147600,12.5109
371
+ 2174,148000,5.0395
372
+ 2179,148400,10.3555
373
+ 2187,148800,5.349
374
+ 2192,149200,10.0253
375
+ 2198,149600,7.0379
376
+ 2205,150000,7.5125
377
+ 2211,150400,7.8038
378
+ 2217,150800,7.8167
379
+ 2221,151200,11.7039
380
+ 2229,151600,5.489
381
+ 2236,152000,6.0868
382
+ 2243,152400,6.2941
383
+ 2249,152800,6.6384
384
+ 2254,153200,8.3705
385
+ 2259,153600,8.5642
386
+ 2265,154000,6.8584
387
+ 2272,154400,7.3834
388
+ 2278,154800,8.2766
389
+ 2286,155200,5.1656
390
+ 2290,155600,12.9118
391
+ 2294,156000,11.8071
392
+ 2299,156400,8.9169
393
+ 2303,156800,8.9791
394
+ 2308,157200,9.3741
395
+ 2312,157600,11.1361
396
+ 2316,158000,11.7926
397
+ 2320,158400,11.7151
398
+ 2326,158800,8.0207
399
+ 2333,159200,6.0691
400
+ 2338,159600,9.577
401
+ 2344,160000,6.4232
402
+ 2350,160400,8.8049
403
+ 2354,160800,9.1235
404
+ 2359,161200,8.3861
405
+ 2365,161600,7.0742
406
+ 2369,162000,7.6221
407
+ 2373,162400,7.9897
408
+ 2378,162800,5.8857
409
+ 2384,163200,7.0723
410
+ 2389,163600,6.654
411
+ 2395,164000,7.1041
412
+ 2399,164400,8.4728
413
+ 2407,164800,4.6772
414
+ 2411,165200,7.5167
415
+ 2417,165600,7.8597
416
+ 2422,166000,9.3692
417
+ 2427,166400,8.0704
418
+ 2431,166800,10.7773
419
+ 2435,167200,9.905
420
+ 2440,167600,8.5513
421
+ 2444,168000,8.9629
422
+ 2450,168400,7.6352
423
+ 2457,168800,6.9678
424
+ 2463,169200,7.899
425
+ 2469,169600,7.6206
426
+ 2474,170000,8.0358
427
+ 2481,170400,7.7953
428
+ 2486,170800,10.2047
429
+ 2492,171200,12.0048
430
+ 2500,171600,5.3765
431
+ 2509,172000,5.1863
432
+ 2513,172400,12.2984
433
+ 2517,172800,11.7797
434
+ 2524,173200,7.875
435
+ 2534,173600,3.9569
436
+ 2539,174000,11.8781
437
+ 2544,174400,10.3608
438
+ 2550,174800,8.7275
439
+ 2556,175200,8.4987
440
+ 2561,175600,10.7181
441
+ 2566,176000,8.2619
442
+ 2571,176400,8.8454
443
+ 2575,176800,8.7559
444
+ 2580,177200,8.4039
445
+ 2584,177600,11.9876
446
+ 2592,178000,6.4634
447
+ 2597,178400,7.8467
448
+ 2602,178800,6.8881
449
+ 2607,179200,7.1522
450
+ 2612,179600,7.6053
451
+ 2620,180000,5.2739
452
+ 2628,180400,4.7562
453
+ 2636,180800,3.7301
454
+ 2641,181200,10.6831
455
+ 2645,181600,8.9943
456
+ 2652,182000,5.277
457
+ 2661,182400,4.275
458
+ 2671,182800,3.453
459
+ 2676,183200,8.5581
460
+ 2681,183600,7.2929
461
+ 2686,184000,12.0382
462
+ 2690,184400,12.133
463
+ 2694,184800,12.2167
464
+ 2701,185200,5.3062
465
+ 2706,185600,10.3605
466
+ 2714,186000,5.9351
467
+ 2718,186400,9.3307
468
+ 2723,186800,9.5625
469
+ 2730,187200,7.1449
470
+ 2735,187600,7.7182
471
+ 2741,188000,6.4756
472
+ 2745,188400,8.736
473
+ 2751,188800,8.123
474
+ 2757,189200,8.8964
475
+ 2761,189600,10.7888
476
+ 2765,190000,12.0779
477
+ 2769,190400,10.5991
478
+ 2775,190800,8.0868
479
+ 2780,191200,8.917
480
+ 2785,191600,9.9389
481
+ 2789,192000,11.5622
482
+ 2795,192400,8.386
483
+ 2800,192800,8.3721
484
+ 2805,193200,8.8166
485
+ 2811,193600,8.1763
486
+ 2817,194000,8.3255
487
+ 2823,194400,8.8563
488
+ 2827,194800,12.6773
489
+ 2833,195200,7.9132
490
+ 2837,195600,10.4681
491
+ 2844,196000,8.2131
492
+ 2849,196400,9.2795
493
+ 2855,196800,7.094
494
+ 2860,197200,10.5194
495
+ 2864,197600,12.9269
496
+ 2870,198000,7.9327
497
+ 2876,198400,7.223
498
+ 2881,198800,10.8269
499
+ 2885,199200,12.3889
500
+ 2892,199600,6.5769
501
+ 2897,200000,11.076
code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_3.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 15,400,1.8261
3
+ 32,800,1.7685
4
+ 45,1200,2.4142
5
+ 51,1600,4.6704
6
+ 61,2000,3.1899
7
+ 67,2400,5.3735
8
+ 75,2800,4.1154
9
+ 82,3200,3.7725
10
+ 92,3600,3.7704
11
+ 103,4000,2.6905
12
+ 110,4400,4.0276
13
+ 117,4800,5.8064
14
+ 124,5200,4.4713
15
+ 132,5600,4.0093
16
+ 137,6000,5.9495
17
+ 142,6400,5.8014
18
+ 148,6800,5.8551
19
+ 155,7200,4.6169
20
+ 162,7600,4.2494
21
+ 170,8000,3.9722
22
+ 177,8400,3.9192
23
+ 186,8800,3.8894
24
+ 192,9200,5.2858
25
+ 196,9600,6.1068
26
+ 201,10000,7.4978
27
+ 206,10400,6.5813
28
+ 210,10800,7.0333
29
+ 216,11200,5.7853
30
+ 220,11600,6.9548
31
+ 226,12000,6.9295
32
+ 231,12400,8.6606
33
+ 235,12800,8.1602
34
+ 241,13200,5.5479
35
+ 248,13600,5.2991
36
+ 253,14000,5.5222
37
+ 259,14400,6.6097
38
+ 266,14800,5.7681
39
+ 272,15200,5.1424
40
+ 281,15600,3.9703
41
+ 289,16000,5.446
42
+ 296,16400,5.6469
43
+ 301,16800,8.1023
44
+ 309,17200,5.4118
45
+ 313,17600,10.0884
46
+ 318,18000,7.85
47
+ 324,18400,7.1867
48
+ 334,18800,4.1973
49
+ 341,19200,6.5775
50
+ 346,19600,8.8721
51
+ 353,20000,8.3356
52
+ 358,20400,8.4092
53
+ 364,20800,7.0598
54
+ 369,21200,9.7722
55
+ 375,21600,8.7407
56
+ 381,22000,7.1701
57
+ 387,22400,6.8198
58
+ 391,22800,11.2731
59
+ 399,23200,6.9213
60
+ 404,23600,7.6675
61
+ 408,24000,8.5927
62
+ 412,24400,9.5454
63
+ 416,24800,11.0606
64
+ 421,25200,11.0094
65
+ 426,25600,9.2321
66
+ 431,26000,7.2778
67
+ 436,26400,10.7602
68
+ 440,26800,10.2041
69
+ 444,27200,10.7586
70
+ 448,27600,8.5868
71
+ 452,28000,9.4227
72
+ 458,28400,8.901
73
+ 462,28800,9.059
74
+ 467,29200,9.2612
75
+ 472,29600,10.802
76
+ 478,30000,7.4041
77
+ 483,30400,8.8905
78
+ 489,30800,6.3011
79
+ 493,31200,12.0364
80
+ 497,31600,11.5981
81
+ 501,32000,11.5024
82
+ 505,32400,11.9104
83
+ 509,32800,11.9212
84
+ 513,33200,12.0185
85
+ 518,33600,9.4035
86
+ 522,34000,11.5176
87
+ 526,34400,11.0239
88
+ 534,34800,6.1402
89
+ 538,35200,9.7348
90
+ 546,35600,6.35
91
+ 551,36000,7.8883
92
+ 558,36400,7.7455
93
+ 563,36800,7.1019
94
+ 571,37200,6.6505
95
+ 576,37600,7.5348
96
+ 581,38000,12.1861
97
+ 586,38400,9.1601
98
+ 591,38800,8.1292
99
+ 596,39200,7.3226
100
+ 602,39600,9.3
101
+ 608,40000,10.1455
102
+ 614,40400,7.404
103
+ 620,40800,9.5543
104
+ 627,41200,8.0328
105
+ 636,41600,5.051
106
+ 648,42000,4.3144
107
+ 654,42400,8.7103
108
+ 661,42800,8.5619
109
+ 666,43200,9.0912
110
+ 671,43600,12.1562
111
+ 679,44000,6.8929
112
+ 683,44400,12.4673
113
+ 690,44800,7.4547
114
+ 700,45200,6.1627
115
+ 708,45600,5.2344
116
+ 712,46000,14.522
117
+ 718,46400,9.7264
118
+ 724,46800,9.4083
119
+ 731,47200,7.3673
120
+ 735,47600,10.918
121
+ 741,48000,9.7135
122
+ 746,48400,11.6226
123
+ 753,48800,6.5335
124
+ 760,49200,6.1922
125
+ 765,49600,11.59
126
+ 772,50000,7.6406
127
+ 779,50400,7.3931
128
+ 785,50800,8.8649
129
+ 790,51200,13.0236
130
+ 796,51600,9.1355
131
+ 802,52000,9.2798
132
+ 812,52400,4.6073
133
+ 818,52800,8.5625
134
+ 823,53200,8.0732
135
+ 829,53600,8.2494
136
+ 837,54000,5.0721
137
+ 849,54400,3.926
138
+ 857,54800,5.9843
139
+ 866,55200,5.4496
140
+ 872,55600,9.6436
141
+ 877,56000,9.8259
142
+ 882,56400,12.0831
143
+ 886,56800,11.8707
144
+ 892,57200,9.3723
145
+ 897,57600,8.75
146
+ 902,58000,9.1673
147
+ 908,58400,8.2213
148
+ 919,58800,3.6353
149
+ 929,59200,3.9628
150
+ 935,59600,6.8984
151
+ 942,60000,6.928
152
+ 948,60400,8.007
153
+ 954,60800,7.1696
154
+ 962,61200,6.8068
155
+ 970,61600,5.7813
156
+ 979,62000,6.7075
157
+ 990,62400,4.5979
158
+ 995,62800,11.0131
159
+ 1001,63200,9.9881
160
+ 1007,63600,8.771
161
+ 1013,64000,6.4708
162
+ 1020,64400,8.4602
163
+ 1024,64800,12.5658
164
+ 1029,65200,12.6734
165
+ 1033,65600,13.9195
166
+ 1037,66000,10.7454
167
+ 1043,66400,10.9443
168
+ 1048,66800,10.9429
169
+ 1053,67200,9.4126
170
+ 1059,67600,8.414
171
+ 1066,68000,6.8977
172
+ 1071,68400,8.4342
173
+ 1077,68800,6.9781
174
+ 1081,69200,9.3134
175
+ 1087,69600,8.1705
176
+ 1091,70000,8.8618
177
+ 1096,70400,10.7669
178
+ 1100,70800,10.662
179
+ 1104,71200,9.404
180
+ 1108,71600,10.9212
181
+ 1114,72000,7.7906
182
+ 1120,72400,6.9977
183
+ 1124,72800,10.0251
184
+ 1130,73200,8.4128
185
+ 1134,73600,9.9691
186
+ 1138,74000,11.4181
187
+ 1143,74400,8.6228
188
+ 1152,74800,5.3614
189
+ 1157,75200,8.1655
190
+ 1164,75600,7.1774
191
+ 1171,76000,5.9159
192
+ 1180,76400,4.0023
193
+ 1189,76800,4.7476
194
+ 1197,77200,5.5766
195
+ 1202,77600,8.0878
196
+ 1209,78000,6.6897
197
+ 1213,78400,13.7633
198
+ 1221,78800,6.482
199
+ 1230,79200,6.0141
200
+ 1234,79600,12.2026
201
+ 1242,80000,5.6537
202
+ 1251,80400,4.3695
203
+ 1259,80800,7.4921
204
+ 1264,81200,9.8077
205
+ 1269,81600,10.9606
206
+ 1275,82000,9.6273
207
+ 1280,82400,12.195
208
+ 1287,82800,7.4125
209
+ 1292,83200,12.1273
210
+ 1296,83600,13.0822
211
+ 1303,84000,7.0237
212
+ 1308,84400,11.6651
213
+ 1313,84800,9.4606
214
+ 1318,85200,12.5532
215
+ 1324,85600,9.9701
216
+ 1329,86000,11.7337
217
+ 1333,86400,14.7119
218
+ 1339,86800,9.0294
219
+ 1345,87200,7.5184
220
+ 1353,87600,7.3597
221
+ 1359,88000,10.1495
222
+ 1365,88400,8.7107
223
+ 1375,88800,4.0411
224
+ 1383,89200,5.6476
225
+ 1389,89600,6.5996
226
+ 1397,90000,8.0321
227
+ 1404,90400,8.9973
228
+ 1410,90800,8.9065
229
+ 1417,91200,5.712
230
+ 1427,91600,4.6027
231
+ 1436,92000,5.296
232
+ 1441,92400,7.6209
233
+ 1449,92800,7.4688
234
+ 1456,93200,5.0893
235
+ 1464,93600,6.9208
236
+ 1472,94000,7.4646
237
+ 1479,94400,8.2095
238
+ 1484,94800,9.7461
239
+ 1490,95200,8.781
240
+ 1494,95600,8.3202
241
+ 1499,96000,9.0412
242
+ 1506,96400,6.9789
243
+ 1511,96800,7.9199
244
+ 1517,97200,10.6694
245
+ 1524,97600,6.481
246
+ 1532,98000,6.7254
247
+ 1541,98400,6.2726
248
+ 1547,98800,7.5851
249
+ 1555,99200,5.3696
250
+ 1560,99600,10.4877
251
+ 1567,100000,8.8012
252
+ 1577,100400,5.5168
253
+ 1584,100800,10.155
254
+ 1588,101200,8.5114
255
+ 1595,101600,6.3359
256
+ 1602,102000,6.6452
257
+ 1608,102400,6.8052
258
+ 1612,102800,8.0109
259
+ 1617,103200,5.8693
260
+ 1621,103600,8.5857
261
+ 1626,104000,9.6799
262
+ 1634,104400,5.6426
263
+ 1639,104800,6.5151
264
+ 1644,105200,7.8849
265
+ 1648,105600,8.9338
266
+ 1654,106000,5.6678
267
+ 1659,106400,6.3244
268
+ 1665,106800,5.6218
269
+ 1669,107200,9.5578
270
+ 1676,107600,5.3338
271
+ 1686,108000,4.6969
272
+ 1693,108400,5.4701
273
+ 1698,108800,10.4455
274
+ 1704,109200,8.6891
275
+ 1710,109600,10.0138
276
+ 1717,110000,7.5448
277
+ 1726,110400,7.8171
278
+ 1731,110800,10.4446
279
+ 1739,111200,8.1385
280
+ 1748,111600,6.0488
281
+ 1756,112000,7.1346
282
+ 1761,112400,9.6157
283
+ 1767,112800,9.1395
284
+ 1774,113200,8.8882
285
+ 1782,113600,8.0369
286
+ 1789,114000,9.7018
287
+ 1795,114400,8.5519
288
+ 1802,114800,12.4735
289
+ 1810,115200,6.0752
290
+ 1815,115600,11.7469
291
+ 1824,116000,5.4505
292
+ 1829,116400,9.3351
293
+ 1835,116800,10.7987
294
+ 1840,117200,15.1342
295
+ 1846,117600,14.5398
296
+ 1853,118000,10.6334
297
+ 1861,118400,11.3101
298
+ 1866,118800,14.8907
299
+ 1873,119200,9.6076
300
+ 1882,119600,7.7126
301
+ 1893,120000,4.7907
302
+ 1901,120400,6.0066
303
+ 1906,120800,15.4955
304
+ 1911,121200,13.3978
305
+ 1919,121600,9.6642
306
+ 1924,122000,15.7393
307
+ 1928,122400,18.9361
308
+ 1932,122800,19.4331
309
+ 1940,123200,8.6073
310
+ 1947,123600,10.161
311
+ 1954,124000,8.8061
312
+ 1959,124400,13.5384
313
+ 1966,124800,9.9289
314
+ 1981,125200,4.1506
315
+ 1991,125600,8.5539
316
+ 1995,126000,20.6911
317
+ 2002,126400,11.9305
318
+ 2007,126800,16.2045
319
+ 2016,127200,6.5353
320
+ 2021,127600,17.3839
321
+ 2027,128000,13.1571
322
+ 2035,128400,11.3393
323
+ 2039,128800,22.3028
324
+ 2047,129200,12.8433
325
+ 2055,129600,12.7583
326
+ 2066,130000,6.9837
327
+ 2071,130400,18.0303
328
+ 2076,130800,18.2896
329
+ 2084,131200,8.7988
330
+ 2093,131600,8.7713
331
+ 2098,132000,11.0877
332
+ 2109,132400,7.8183
333
+ 2115,132800,12.4679
334
+ 2124,133200,10.5047
335
+ 2137,133600,6.17
336
+ 2142,134000,20.3565
337
+ 2148,134400,10.4538
338
+ 2154,134800,12.7331
339
+ 2161,135200,12.5367
340
+ 2171,135600,5.7754
341
+ 2178,136000,13.3435
342
+ 2182,136400,13.3376
343
+ 2195,136800,5.0278
344
+ 2203,137200,8.039
345
+ 2215,137600,5.0622
346
+ 2225,138000,7.6281
347
+ 2232,138400,12.4199
348
+ 2243,138800,5.7324
349
+ 2249,139200,14.5818
350
+ 2255,139600,14.0929
351
+ 2262,140000,13.6329
352
+ 2267,140400,18.3515
353
+ 2272,140800,18.0695
354
+ 2280,141200,12.0349
355
+ 2287,141600,13.6652
356
+ 2296,142000,9.2929
357
+ 2305,142400,10.1985
358
+ 2312,142800,12.7522
359
+ 2323,143200,7.2459
360
+ 2331,143600,8.9751
361
+ 2338,144000,11.4881
362
+ 2344,144400,15.2227
363
+ 2351,144800,12.8927
364
+ 2358,145200,10.6543
365
+ 2362,145600,22.496
366
+ 2368,146000,13.9616
367
+ 2373,146400,18.1932
368
+ 2378,146800,16.1787
369
+ 2382,147200,21.2142
370
+ 2386,147600,22.1002
371
+ 2396,148000,8.9528
372
+ 2401,148400,15.8869
373
+ 2408,148800,13.7149
374
+ 2413,149200,13.7033
375
+ 2419,149600,17.2193
376
+ 2425,150000,11.3894
377
+ 2432,150400,13.8544
378
+ 2437,150800,17.5939
379
+ 2444,151200,12.8075
380
+ 2449,151600,12.1515
381
+ 2457,152000,10.4033
382
+ 2465,152400,11.4859
383
+ 2470,152800,14.4762
384
+ 2477,153200,12.3627
385
+ 2483,153600,14.8347
386
+ 2488,154000,18.2382
387
+ 2497,154400,9.2311
388
+ 2501,154800,19.7235
389
+ 2509,155200,13.3697
390
+ 2515,155600,11.9598
391
+ 2525,156000,7.2526
392
+ 2534,156400,9.3025
393
+ 2545,156800,8.9835
394
+ 2551,157200,12.1765
395
+ 2558,157600,14.0303
396
+ 2564,158000,13.4739
397
+ 2573,158400,9.8322
398
+ 2578,158800,19.6338
399
+ 2584,159200,15.7125
400
+ 2588,159600,17.0086
401
+ 2594,160000,14.7127
402
+ 2598,160400,23.1588
403
+ 2607,160800,11.0373
404
+ 2615,161200,9.348
405
+ 2619,161600,21.6514
406
+ 2624,162000,12.9316
407
+ 2631,162400,12.1088
408
+ 2636,162800,20.0918
409
+ 2640,163200,18.6887
410
+ 2644,163600,19.3577
411
+ 2653,164000,8.5057
412
+ 2662,164400,8.0083
413
+ 2668,164800,15.0007
414
+ 2676,165200,8.8861
415
+ 2682,165600,15.3621
416
+ 2689,166000,13.6995
417
+ 2696,166400,10.5381
418
+ 2701,166800,19.4263
419
+ 2708,167200,12.0695
420
+ 2713,167600,11.9025
421
+ 2719,168000,10.0897
422
+ 2725,168400,15.0383
423
+ 2731,168800,14.8992
424
+ 2735,169200,14.9242
425
+ 2739,169600,20.4302
426
+ 2745,170000,15.2987
427
+ 2750,170400,16.7812
428
+ 2754,170800,16.0345
429
+ 2759,171200,16.4285
430
+ 2768,171600,9.1208
431
+ 2780,172000,5.583
432
+ 2787,172400,8.2014
433
+ 2793,172800,10.1961
434
+ 2798,173200,17.1725
435
+ 2806,173600,9.115
436
+ 2814,174000,9.2754
437
+ 2821,174400,10.8946
438
+ 2827,174800,11.5879
439
+ 2834,175200,10.3869
440
+ 2840,175600,13.8918
441
+ 2845,176000,12.5769
442
+ 2854,176400,10.322
443
+ 2863,176800,6.8967
444
+ 2869,177200,17.4846
445
+ 2874,177600,19.6151
446
+ 2881,178000,12.9361
447
+ 2886,178400,18.2368
448
+ 2892,178800,12.8876
449
+ 2898,179200,12.3181
450
+ 2903,179600,17.6907
451
+ 2908,180000,15.7174
452
+ 2915,180400,11.7662
453
+ 2920,180800,17.438
454
+ 2925,181200,14.2649
455
+ 2931,181600,12.5882
456
+ 2936,182000,17.2888
457
+ 2942,182400,15.7864
458
+ 2946,182800,19.7236
459
+ 2952,183200,14.7757
460
+ 2957,183600,13.2554
461
+ 2962,184000,16.9161
462
+ 2966,184400,19.4477
463
+ 2971,184800,14.806
464
+ 2976,185200,14.7174
465
+ 2981,185600,14.6584
466
+ 2985,186000,13.1555
467
+ 2993,186400,8.2998
468
+ 2999,186800,10.4079
469
+ 3004,187200,14.5865
470
+ 3011,187600,9.0036
471
+ 3015,188000,13.7298
472
+ 3022,188400,8.8899
473
+ 3026,188800,15.7034
474
+ 3032,189200,11.7676
475
+ 3036,189600,17.0897
476
+ 3044,190000,10.1182
477
+ 3049,190400,13.9028
478
+ 3054,190800,16.7113
479
+ 3059,191200,16.4022
480
+ 3064,191600,18.3592
481
+ 3069,192000,17.6439
482
+ 3074,192400,15.5535
483
+ 3079,192800,15.6137
484
+ 3085,193200,14.7975
485
+ 3091,193600,11.9988
486
+ 3099,194000,10.8644
487
+ 3106,194400,14.6474
488
+ 3113,194800,12.833
489
+ 3117,195200,22.6677
490
+ 3121,195600,21.1913
491
+ 3126,196000,17.2308
492
+ 3131,196400,14.7737
493
+ 3140,196800,6.5658
494
+ 3147,197200,8.1251
495
+ 3155,197600,8.3248
496
+ 3162,198000,10.0096
497
+ 3168,198400,9.34
498
+ 3175,198800,9.7054
499
+ 3181,199200,9.9748
500
+ 3186,199600,11.6185
501
+ 3191,200000,10.6864
code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_4.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 30,400,0.8897
3
+ 47,800,1.6985
4
+ 70,1200,1.5344
5
+ 81,1600,2.8837
6
+ 93,2000,2.7853
7
+ 99,2400,4.9228
8
+ 105,2800,4.8879
9
+ 109,3200,6.7205
10
+ 116,3600,5.1037
11
+ 121,4000,6.8188
12
+ 126,4400,5.7358
13
+ 130,4800,7.5711
14
+ 136,5200,5.2419
15
+ 141,5600,5.8543
16
+ 147,6000,5.2331
17
+ 152,6400,4.9293
18
+ 161,6800,4.0985
19
+ 166,7200,5.2082
20
+ 172,7600,5.3103
21
+ 177,8000,6.0557
22
+ 181,8400,6.9039
23
+ 188,8800,4.5342
24
+ 192,9200,9.1478
25
+ 199,9600,5.3605
26
+ 206,10000,5.1273
27
+ 211,10400,7.8887
28
+ 216,10800,6.2693
29
+ 220,11200,7.3964
30
+ 224,11600,7.1054
31
+ 230,12000,5.4118
32
+ 234,12400,7.4287
33
+ 238,12800,7.4478
34
+ 245,13200,4.4117
35
+ 249,13600,7.2105
36
+ 253,14000,6.7015
37
+ 257,14400,7.4679
38
+ 262,14800,6.5292
39
+ 267,15200,6.025
40
+ 271,15600,7.6356
41
+ 275,16000,7.6501
42
+ 280,16400,7.0316
43
+ 284,16800,8.6843
44
+ 289,17200,6.7495
45
+ 294,17600,6.023
46
+ 299,18000,6.415
47
+ 305,18400,5.4163
48
+ 311,18800,5.4882
49
+ 316,19200,6.2097
50
+ 320,19600,8.0329
51
+ 324,20000,7.5882
52
+ 329,20400,7.3095
53
+ 333,20800,7.5687
54
+ 339,21200,6.6201
55
+ 343,21600,9.5695
56
+ 349,22000,6.358
57
+ 354,22400,7.729
58
+ 362,22800,3.8919
59
+ 367,23200,6.2263
60
+ 372,23600,8.9296
61
+ 377,24000,8.9107
62
+ 383,24400,6.8109
63
+ 391,24800,4.3733
64
+ 397,25200,7.8942
65
+ 402,25600,8.1473
66
+ 408,26000,6.342
67
+ 414,26400,6.6643
68
+ 419,26800,8.5145
69
+ 423,27200,9.8487
70
+ 427,27600,8.3884
71
+ 432,28000,8.1417
72
+ 438,28400,6.5363
73
+ 442,28800,8.3084
74
+ 447,29200,7.5203
75
+ 451,29600,8.0109
76
+ 455,30000,7.1375
77
+ 459,30400,6.972
78
+ 464,30800,7.2792
79
+ 468,31200,8.3772
80
+ 472,31600,8.6912
81
+ 476,32000,7.6424
82
+ 480,32400,8.1047
83
+ 484,32800,6.6454
84
+ 489,33200,7.4736
85
+ 493,33600,8.1904
86
+ 497,34000,7.1456
87
+ 503,34400,6.2541
88
+ 507,34800,7.7885
89
+ 511,35200,7.3507
90
+ 515,35600,8.0471
91
+ 520,36000,8.5436
92
+ 524,36400,6.8725
93
+ 529,36800,8.4028
94
+ 535,37200,5.2433
95
+ 542,37600,4.7139
96
+ 546,38000,7.3213
97
+ 555,38400,3.8831
98
+ 561,38800,5.6601
99
+ 568,39200,4.7948
100
+ 576,39600,4.6981
101
+ 584,40000,4.3181
102
+ 589,40400,7.5472
103
+ 593,40800,9.3392
104
+ 602,41200,3.9924
105
+ 609,41600,7.1339
106
+ 615,42000,6.7132
107
+ 620,42400,6.7015
108
+ 628,42800,5.4925
109
+ 636,43200,3.7468
110
+ 644,43600,4.3569
111
+ 651,44000,5.8671
112
+ 655,44400,8.3115
113
+ 660,44800,9.1009
114
+ 665,45200,7.2625
115
+ 672,45600,5.378
116
+ 678,46000,5.686
117
+ 684,46400,5.8378
118
+ 688,46800,7.674
119
+ 693,47200,7.7574
120
+ 697,47600,9.4904
121
+ 706,48000,4.0155
122
+ 712,48400,9.193
123
+ 718,48800,6.0672
124
+ 723,49200,8.703
125
+ 729,49600,6.4219
126
+ 737,50000,5.2146
127
+ 742,50400,7.6968
128
+ 747,50800,9.559
129
+ 753,51200,6.7111
130
+ 758,51600,9.7201
131
+ 764,52000,7.5954
132
+ 770,52400,8.0675
133
+ 775,52800,7.1163
134
+ 782,53200,5.3886
135
+ 786,53600,10.9581
136
+ 791,54000,9.5825
137
+ 800,54400,4.9313
138
+ 808,54800,3.2748
139
+ 813,55200,9.4975
140
+ 819,55600,8.5919
141
+ 828,56000,4.0659
142
+ 834,56400,6.4677
143
+ 839,56800,8.6157
144
+ 847,57200,7.6231
145
+ 854,57600,6.1867
146
+ 864,58000,5.138
147
+ 875,58400,4.1107
148
+ 884,58800,4.6541
149
+ 890,59200,8.6775
150
+ 898,59600,4.5193
151
+ 903,60000,10.8015
152
+ 909,60400,7.2792
153
+ 916,60800,6.9898
154
+ 920,61200,9.2429
155
+ 926,61600,7.8279
156
+ 930,62000,9.559
157
+ 938,62400,6.2201
158
+ 942,62800,12.4695
159
+ 949,63200,6.0011
160
+ 955,63600,7.5678
161
+ 960,64000,8.5841
162
+ 965,64400,8.8059
163
+ 969,64800,9.559
164
+ 974,65200,8.137
165
+ 979,65600,6.2258
166
+ 985,66000,6.0418
167
+ 990,66400,7.6972
168
+ 994,66800,10.6031
169
+ 999,67200,6.4527
170
+ 1004,67600,7.6003
171
+ 1009,68000,8.036
172
+ 1014,68400,11.559
173
+ 1018,68800,9.9028
174
+ 1024,69200,8.8209
175
+ 1030,69600,6.8682
176
+ 1034,70000,9.1513
177
+ 1039,70400,8.8808
178
+ 1045,70800,6.2892
179
+ 1052,71200,6.6137
180
+ 1056,71600,9.1258
181
+ 1061,72000,8.2712
182
+ 1069,72400,5.1346
183
+ 1073,72800,9.3301
184
+ 1079,73200,8.6006
185
+ 1083,73600,9.7199
186
+ 1088,74000,8.5393
187
+ 1093,74400,9.4136
188
+ 1098,74800,9.2309
189
+ 1104,75200,9.9483
190
+ 1111,75600,6.868
191
+ 1117,76000,7.3642
192
+ 1123,76400,8.7512
193
+ 1128,76800,7.7363
194
+ 1133,77200,11.2048
195
+ 1138,77600,8.7672
196
+ 1142,78000,11.2022
197
+ 1148,78400,8.6267
198
+ 1156,78800,5.7085
199
+ 1165,79200,5.2502
200
+ 1171,79600,9.1847
201
+ 1181,80000,3.8257
202
+ 1187,80400,9.0944
203
+ 1193,80800,7.8396
204
+ 1204,81200,4.6099
205
+ 1214,81600,4.2423
206
+ 1223,82000,4.2463
207
+ 1231,82400,6.0866
208
+ 1236,82800,9.3631
209
+ 1244,83200,6.0139
210
+ 1250,83600,9.3664
211
+ 1258,84000,5.5445
212
+ 1263,84400,9.2419
213
+ 1270,84800,6.5126
214
+ 1277,85200,5.3284
215
+ 1284,85600,4.7442
216
+ 1289,86000,6.6238
217
+ 1295,86400,5.9787
218
+ 1300,86800,6.273
219
+ 1304,87200,7.7552
220
+ 1310,87600,6.0625
221
+ 1315,88000,6.2442
222
+ 1320,88400,7.2193
223
+ 1326,88800,5.8607
224
+ 1333,89200,5.8177
225
+ 1338,89600,5.6992
226
+ 1344,90000,6.309
227
+ 1350,90400,7.4904
228
+ 1357,90800,5.3341
229
+ 1362,91200,9.1276
230
+ 1370,91600,5.6335
231
+ 1377,92000,5.5404
232
+ 1382,92400,10.4014
233
+ 1387,92800,8.972
234
+ 1393,93200,7.6199
235
+ 1400,93600,7.0028
236
+ 1408,94000,6.7953
237
+ 1417,94400,4.607
238
+ 1425,94800,6.7686
239
+ 1431,95200,6.3672
240
+ 1437,95600,7.3133
241
+ 1442,96000,5.5286
242
+ 1449,96400,8.1326
243
+ 1454,96800,6.6459
244
+ 1459,97200,9.6138
245
+ 1465,97600,8.3167
246
+ 1470,98000,11.7781
247
+ 1474,98400,17.2643
248
+ 1481,98800,9.2478
249
+ 1490,99200,8.6222
250
+ 1497,99600,10.0905
251
+ 1504,100000,7.0319
252
+ 1510,100400,11.7434
253
+ 1515,100800,12.7016
254
+ 1519,101200,14.5775
255
+ 1525,101600,7.4347
256
+ 1531,102000,17.0148
257
+ 1535,102400,21.5497
258
+ 1541,102800,13.3595
259
+ 1546,103200,20.2214
260
+ 1553,103600,12.2627
261
+ 1562,104000,10.1528
262
+ 1572,104400,7.0324
263
+ 1577,104800,13.988
264
+ 1587,105200,9.7205
265
+ 1591,105600,22.2021
266
+ 1599,106000,12.8697
267
+ 1611,106400,6.7423
268
+ 1618,106800,9.1311
269
+ 1624,107200,13.5325
270
+ 1630,107600,15.3574
271
+ 1637,108000,11.4605
272
+ 1647,108400,8.1593
273
+ 1657,108800,7.3692
274
+ 1664,109200,12.4204
275
+ 1671,109600,16.0635
276
+ 1677,110000,11.0747
277
+ 1687,110400,10.0776
278
+ 1695,110800,9.713
279
+ 1704,111200,6.6402
280
+ 1709,111600,16.4947
281
+ 1714,112000,12.0573
282
+ 1720,112400,12.4928
283
+ 1726,112800,16.9818
284
+ 1731,113200,16.4082
285
+ 1735,113600,19.4684
286
+ 1741,114000,17.2942
287
+ 1746,114400,17.3803
288
+ 1752,114800,14.3429
289
+ 1759,115200,15.4686
290
+ 1764,115600,18.3797
291
+ 1771,116000,10.6607
292
+ 1778,116400,12.9278
293
+ 1783,116800,21.2477
294
+ 1789,117200,12.1737
295
+ 1795,117600,15.1248
296
+ 1801,118000,11.4594
297
+ 1808,118400,11.8572
298
+ 1816,118800,8.6953
299
+ 1822,119200,12.9991
300
+ 1830,119600,8.346
301
+ 1835,120000,16.5775
302
+ 1840,120400,18.7012
303
+ 1845,120800,18.5211
304
+ 1852,121200,12.3151
305
+ 1860,121600,11.191
306
+ 1868,122000,10.6305
307
+ 1875,122400,12.1361
308
+ 1883,122800,12.0561
309
+ 1887,123200,21.1206
310
+ 1895,123600,10.3102
311
+ 1901,124000,15.5468
312
+ 1905,124400,21.1214
313
+ 1913,124800,12.598
314
+ 1919,125200,8.6702
315
+ 1923,125600,19.5976
316
+ 1928,126000,17.347
317
+ 1936,126400,12.0519
318
+ 1944,126800,6.2953
319
+ 1949,127200,13.6435
320
+ 1956,127600,9.3424
321
+ 1960,128000,22.6692
322
+ 1966,128400,12.2863
323
+ 1973,128800,15.4013
324
+ 1978,129200,17.9858
325
+ 1988,129600,7.2154
326
+ 1996,130000,10.964
327
+ 2004,130400,10.9658
328
+ 2009,130800,16.1921
329
+ 2015,131200,19.8994
330
+ 2020,131600,12.5598
331
+ 2026,132000,18.5603
332
+ 2034,132400,8.9442
333
+ 2039,132800,15.7247
334
+ 2044,133200,19.6043
335
+ 2048,133600,22.708
336
+ 2055,134000,12.1769
337
+ 2059,134400,30.2886
338
+ 2064,134800,19.3976
339
+ 2069,135200,24.011
340
+ 2075,135600,22.3232
341
+ 2079,136000,22.4054
342
+ 2087,136400,14.8207
343
+ 2095,136800,14.1154
344
+ 2102,137200,13.3378
345
+ 2106,137600,22.9892
346
+ 2112,138000,19.1975
347
+ 2119,138400,16.2562
348
+ 2125,138800,16.5325
349
+ 2134,139200,9.7804
350
+ 2143,139600,12.9261
351
+ 2149,140000,15.1729
352
+ 2157,140400,11.4505
353
+ 2163,140800,16.225
354
+ 2168,141200,15.0464
355
+ 2175,141600,12.2286
356
+ 2181,142000,14.5324
357
+ 2187,142400,17.9193
358
+ 2192,142800,21.9792
359
+ 2202,143200,7.5693
360
+ 2214,143600,7.0395
361
+ 2219,144000,20.2988
362
+ 2230,144400,8.1503
363
+ 2237,144800,12.8959
364
+ 2246,145200,11.8272
365
+ 2254,145600,15.8534
366
+ 2259,146000,20.5079
367
+ 2266,146400,10.7379
368
+ 2271,146800,21.599
369
+ 2279,147200,12.679
370
+ 2284,147600,14.8514
371
+ 2291,148000,8.6118
372
+ 2297,148400,7.5502
373
+ 2306,148800,5.1645
374
+ 2313,149200,10.6152
375
+ 2319,149600,15.1497
376
+ 2324,150000,13.6594
377
+ 2331,150400,10.1251
378
+ 2337,150800,10.9294
379
+ 2345,151200,5.0712
380
+ 2350,151600,13.3293
381
+ 2359,152000,6.394
382
+ 2370,152400,4.9969
383
+ 2379,152800,7.9595
384
+ 2391,153200,4.0272
385
+ 2398,153600,7.3762
386
+ 2407,154000,7.3333
387
+ 2413,154400,10.8586
388
+ 2423,154800,9.7345
389
+ 2432,155200,7.9822
390
+ 2439,155600,10.3486
391
+ 2450,156000,6.3284
392
+ 2458,156400,9.4372
393
+ 2473,156800,3.132
394
+ 2481,157200,10.3754
395
+ 2487,157600,10.447
396
+ 2493,158000,15.9101
397
+ 2503,158400,8.3842
398
+ 2510,158800,15.4866
399
+ 2518,159200,11.682
400
+ 2526,159600,11.0361
401
+ 2537,160000,8.7871
402
+ 2545,160400,11.1971
403
+ 2555,160800,4.4022
404
+ 2562,161200,13.1779
405
+ 2568,161600,12.9045
406
+ 2579,162000,8.0949
407
+ 2587,162400,11.4998
408
+ 2597,162800,7.2315
409
+ 2604,163200,14.4484
410
+ 2609,163600,15.2563
411
+ 2622,164000,7.1052
412
+ 2631,164400,8.0708
413
+ 2636,164800,15.3412
414
+ 2646,165200,7.0698
415
+ 2654,165600,10.9479
416
+ 2661,166000,12.6783
417
+ 2666,166400,16.1794
418
+ 2671,166800,18.6718
419
+ 2676,167200,13.5239
420
+ 2681,167600,12.3507
421
+ 2691,168000,7.2502
422
+ 2700,168400,6.2099
423
+ 2707,168800,13.9091
424
+ 2712,169200,16.7988
425
+ 2717,169600,22.2866
426
+ 2728,170000,8.0224
427
+ 2738,170400,11.7132
428
+ 2743,170800,22.9338
429
+ 2750,171200,15.1354
430
+ 2762,171600,8.9432
431
+ 2768,172000,14.02
432
+ 2773,172400,19.0923
433
+ 2780,172800,14.6205
434
+ 2787,173200,12.9528
435
+ 2796,173600,11.7497
436
+ 2803,174000,14.0874
437
+ 2808,174400,21.8773
438
+ 2813,174800,16.507
439
+ 2819,175200,19.8032
440
+ 2827,175600,13.8242
441
+ 2833,176000,19.0166
442
+ 2838,176400,27.172
443
+ 2846,176800,11.3364
444
+ 2851,177200,19.2692
445
+ 2857,177600,21.4003
446
+ 2865,178000,13.1762
447
+ 2871,178400,20.7159
448
+ 2878,178800,12.615
449
+ 2882,179200,17.292
450
+ 2886,179600,22.37
451
+ 2893,180000,16.9824
452
+ 2903,180400,7.4276
453
+ 2912,180800,12.7024
454
+ 2918,181200,14.4444
455
+ 2926,181600,14.2302
456
+ 2932,182000,18.124
457
+ 2938,182400,13.5542
458
+ 2943,182800,33.0073
459
+ 2950,183200,12.0173
460
+ 2954,183600,22.766
461
+ 2959,184000,16.2093
462
+ 2965,184400,15.9019
463
+ 2972,184800,13.7782
464
+ 2976,185200,26.2554
465
+ 2985,185600,10.2687
466
+ 2991,186000,14.767
467
+ 2998,186400,12.8508
468
+ 3002,186800,19.9929
469
+ 3008,187200,16.2693
470
+ 3016,187600,14.6283
471
+ 3026,188000,9.5896
472
+ 3034,188400,11.7475
473
+ 3041,188800,12.2039
474
+ 3045,189200,23.8742
475
+ 3056,189600,8.3229
476
+ 3063,190000,11.436
477
+ 3070,190400,10.1774
478
+ 3077,190800,10.9202
479
+ 3082,191200,16.139
480
+ 3087,191600,17.0197
481
+ 3096,192000,6.3408
482
+ 3103,192400,10.6383
483
+ 3108,192800,14.7467
484
+ 3113,193200,16.4579
485
+ 3121,193600,9.5428
486
+ 3125,194000,22.9954
487
+ 3129,194400,11.4853
488
+ 3136,194800,8.377
489
+ 3141,195200,13.0133
490
+ 3145,195600,14.8357
491
+ 3150,196000,20.832
492
+ 3159,196400,8.4116
493
+ 3166,196800,16.5597
494
+ 3178,197200,6.8153
495
+ 3186,197600,11.2551
496
+ 3197,198000,9.8681
497
+ 3202,198400,16.0912
498
+ 3210,198800,13.4439
499
+ 3216,199200,11.4534
500
+ 3222,199600,16.3593
501
+ 3229,200000,11.4777
code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_5.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 25,400,1.1394
3
+ 48,800,1.3002
4
+ 55,1200,4.2174
5
+ 65,1600,3.1185
6
+ 72,2000,4.6995
7
+ 77,2400,5.4912
8
+ 83,2800,5.3572
9
+ 87,3200,6.4659
10
+ 93,3600,5.7893
11
+ 97,4000,7.5009
12
+ 101,4400,7.0671
13
+ 105,4800,6.9336
14
+ 109,5200,7.0397
15
+ 114,5600,6.2202
16
+ 120,6000,4.966
17
+ 126,6400,5.4569
18
+ 131,6800,5.7776
19
+ 135,7200,7.3573
20
+ 139,7600,6.3645
21
+ 144,8000,6.1568
22
+ 148,8400,7.3496
23
+ 152,8800,6.4834
24
+ 157,9200,6.0306
25
+ 162,9600,6.5141
26
+ 167,10000,6.2576
27
+ 172,10400,5.8399
28
+ 176,10800,5.7246
29
+ 182,11200,5.6336
30
+ 186,11600,6.3594
31
+ 192,12000,5.9243
32
+ 196,12400,7.3959
33
+ 200,12800,7.3087
34
+ 204,13200,7.4288
35
+ 208,13600,6.0092
36
+ 212,14000,7.4051
37
+ 219,14400,5.0751
38
+ 223,14800,6.1337
39
+ 228,15200,6.0328
40
+ 233,15600,6.2323
41
+ 237,16000,7.2947
42
+ 242,16400,6.21
43
+ 247,16800,5.2467
44
+ 251,17200,7.301
45
+ 256,17600,6.7575
46
+ 260,18000,7.3051
47
+ 264,18400,7.3659
48
+ 270,18800,4.9364
49
+ 275,19200,4.7764
50
+ 280,19600,6.6525
51
+ 286,20000,5.1666
52
+ 290,20400,7.305
53
+ 295,20800,5.3332
54
+ 301,21200,5.8009
55
+ 306,21600,6.3444
56
+ 311,22000,5.38
57
+ 316,22400,5.8886
58
+ 321,22800,5.9808
59
+ 326,23200,6.1518
60
+ 331,23600,6.2596
61
+ 335,24000,7.4699
62
+ 340,24400,5.0459
63
+ 344,24800,7.1979
64
+ 348,25200,7.1906
65
+ 354,25600,5.5289
66
+ 358,26000,7.4249
67
+ 362,26400,7.3209
68
+ 366,26800,6.8339
69
+ 371,27200,6.2896
70
+ 375,27600,5.8839
71
+ 382,28000,4.5678
72
+ 387,28400,6.4948
73
+ 391,28800,7.3334
74
+ 395,29200,6.8561
75
+ 400,29600,5.7921
76
+ 405,30000,5.775
77
+ 411,30400,5.2429
78
+ 416,30800,6.3733
79
+ 420,31200,6.9394
80
+ 424,31600,7.4063
81
+ 430,32000,5.6989
82
+ 434,32400,6.6094
83
+ 439,32800,6.4591
84
+ 443,33200,7.5158
85
+ 448,33600,5.1683
86
+ 457,34000,3.2012
87
+ 462,34400,6.4385
88
+ 466,34800,7.6264
89
+ 471,35200,6.8552
90
+ 475,35600,6.6808
91
+ 479,36000,7.5093
92
+ 485,36400,6.0369
93
+ 491,36800,5.9012
94
+ 497,37200,4.7016
95
+ 507,37600,3.1521
96
+ 513,38000,5.874
97
+ 517,38400,7.713
98
+ 522,38800,6.4352
99
+ 526,39200,7.6985
100
+ 530,39600,7.8557
101
+ 540,40000,3.9398
102
+ 546,40400,5.5794
103
+ 551,40800,6.7952
104
+ 555,41200,7.4607
105
+ 561,41600,6.2385
106
+ 565,42000,6.9421
107
+ 571,42400,5.5255
108
+ 577,42800,7.0349
109
+ 583,43200,7.038
110
+ 587,43600,8.1308
111
+ 592,44000,6.715
112
+ 598,44400,6.6102
113
+ 603,44800,5.7423
114
+ 609,45200,5.7222
115
+ 616,45600,4.6904
116
+ 621,46000,6.6802
117
+ 627,46400,5.1794
118
+ 631,46800,7.8428
119
+ 636,47200,6.6569
120
+ 640,47600,6.0637
121
+ 645,48000,6.6306
122
+ 649,48400,7.8827
123
+ 654,48800,7.2829
124
+ 660,49200,5.3484
125
+ 666,49600,4.965
126
+ 671,50000,6.2847
127
+ 675,50400,7.2723
128
+ 681,50800,5.6506
129
+ 689,51200,4.2316
130
+ 693,51600,6.588
131
+ 699,52000,5.8609
132
+ 704,52400,6.7229
133
+ 710,52800,5.2104
134
+ 716,53200,5.5105
135
+ 720,53600,6.7428
136
+ 728,54000,4.2067
137
+ 733,54400,6.1901
138
+ 737,54800,6.1446
139
+ 743,55200,6.4328
140
+ 747,55600,7.5352
141
+ 753,56000,4.6297
142
+ 758,56400,6.7488
143
+ 765,56800,4.4836
144
+ 769,57200,7.332
145
+ 773,57600,7.3174
146
+ 778,58000,7.5865
147
+ 784,58400,5.1354
148
+ 788,58800,7.5499
149
+ 792,59200,7.6182
150
+ 802,59600,2.9436
151
+ 807,60000,6.7174
152
+ 812,60400,6.5712
153
+ 820,60800,3.8267
154
+ 827,61200,5.0311
155
+ 831,61600,7.6775
156
+ 835,62000,6.2429
157
+ 842,62400,5.1519
158
+ 847,62800,5.1346
159
+ 858,63200,3.2845
160
+ 862,63600,6.7612
161
+ 868,64000,5.4555
162
+ 874,64400,5.6836
163
+ 879,64800,5.4058
164
+ 883,65200,7.5245
165
+ 887,65600,7.5208
166
+ 892,66000,7.063
167
+ 897,66400,6.6028
168
+ 903,66800,6.9216
169
+ 908,67200,7.874
170
+ 912,67600,6.5846
171
+ 918,68000,6.0015
172
+ 923,68400,6.5042
173
+ 927,68800,6.4518
174
+ 932,69200,7.5216
175
+ 937,69600,7.0083
176
+ 942,70000,6.8853
177
+ 948,70400,5.5392
178
+ 954,70800,8.848
179
+ 960,71200,11.4058
180
+ 967,71600,7.5975
181
+ 972,72000,8.9093
182
+ 979,72400,9.266
183
+ 985,72800,9.023
184
+ 991,73200,11.5379
185
+ 1000,73600,7.4839
186
+ 1006,74000,10.8982
187
+ 1011,74400,9.6794
188
+ 1016,74800,11.5398
189
+ 1022,75200,12.7577
190
+ 1032,75600,7.5257
191
+ 1040,76000,10.313
192
+ 1052,76400,4.9592
193
+ 1057,76800,9.929
194
+ 1065,77200,9.0269
195
+ 1074,77600,10.0283
196
+ 1080,78000,9.1994
197
+ 1084,78400,17.318
198
+ 1090,78800,8.6919
199
+ 1099,79200,8.3069
200
+ 1104,79600,14.1304
201
+ 1110,80000,13.6171
202
+ 1114,80400,16.2281
203
+ 1119,80800,13.7672
204
+ 1127,81200,9.2344
205
+ 1133,81600,12.9287
206
+ 1139,82000,9.0991
207
+ 1146,82400,10.4085
208
+ 1154,82800,6.4297
209
+ 1161,83200,11.2657
210
+ 1170,83600,8.6181
211
+ 1176,84000,10.1173
212
+ 1182,84400,10.5116
213
+ 1189,84800,7.5418
214
+ 1197,85200,7.8979
215
+ 1204,85600,10.4355
216
+ 1214,86000,5.9039
217
+ 1228,86400,4.1987
218
+ 1238,86800,6.374
219
+ 1246,87200,5.9424
220
+ 1251,87600,15.9749
221
+ 1257,88000,14.0111
222
+ 1261,88400,19.8135
223
+ 1270,88800,7.7016
224
+ 1276,89200,10.2966
225
+ 1281,89600,12.6069
226
+ 1288,90000,10.6588
227
+ 1293,90400,17.1633
228
+ 1300,90800,9.8388
229
+ 1308,91200,9.1061
230
+ 1314,91600,10.2858
231
+ 1319,92000,15.5991
232
+ 1323,92400,19.9744
233
+ 1329,92800,13.7349
234
+ 1333,93200,16.2973
235
+ 1340,93600,12.1433
236
+ 1346,94000,15.6216
237
+ 1350,94400,16.604
238
+ 1356,94800,17.9473
239
+ 1360,95200,22.7261
240
+ 1367,95600,9.7798
241
+ 1372,96000,18.8177
242
+ 1376,96400,23.3835
243
+ 1380,96800,22.7676
244
+ 1385,97200,17.9196
245
+ 1391,97600,11.0045
246
+ 1398,98000,12.9032
247
+ 1403,98400,19.9428
248
+ 1411,98800,10.0649
249
+ 1415,99200,23.4105
250
+ 1421,99600,14.4659
251
+ 1427,100000,12.6086
252
+ 1434,100400,11.2016
253
+ 1440,100800,16.4195
254
+ 1445,101200,14.8528
255
+ 1451,101600,13.3423
256
+ 1457,102000,15.6651
257
+ 1462,102400,19.3589
258
+ 1467,102800,19.2814
259
+ 1472,103200,17.4048
260
+ 1479,103600,10.9156
261
+ 1484,104000,19.1347
262
+ 1489,104400,16.7974
263
+ 1500,104800,7.7077
264
+ 1508,105200,8.3873
265
+ 1515,105600,11.8502
266
+ 1522,106000,12.4617
267
+ 1529,106400,10.997
268
+ 1534,106800,18.1086
269
+ 1538,107200,21.5753
270
+ 1542,107600,18.1229
271
+ 1548,108000,19.0807
272
+ 1553,108400,19.9151
273
+ 1557,108800,24.3347
274
+ 1565,109200,11.5838
275
+ 1571,109600,10.4892
276
+ 1576,110000,18.4124
277
+ 1583,110400,9.6659
278
+ 1589,110800,15.3845
279
+ 1594,111200,19.4332
280
+ 1603,111600,9.1848
281
+ 1608,112000,19.8579
282
+ 1614,112400,14.6327
283
+ 1620,112800,15.4716
284
+ 1628,113200,7.6968
285
+ 1633,113600,14.4689
286
+ 1637,114000,19.6793
287
+ 1642,114400,20.0721
288
+ 1647,114800,15.0668
289
+ 1652,115200,17.4454
290
+ 1657,115600,19.6026
291
+ 1663,116000,14.572
292
+ 1669,116400,12.3857
293
+ 1675,116800,15.0434
294
+ 1679,117200,23.0521
295
+ 1685,117600,15.9115
296
+ 1691,118000,16.4641
297
+ 1695,118400,18.5005
298
+ 1701,118800,13.3055
299
+ 1705,119200,20.5855
300
+ 1711,119600,15.2568
301
+ 1716,120000,17.1653
302
+ 1721,120400,16.2964
303
+ 1726,120800,17.3911
304
+ 1731,121200,18.9176
305
+ 1735,121600,20.2643
306
+ 1741,122000,21.9711
307
+ 1748,122400,15.1474
308
+ 1752,122800,21.0002
309
+ 1756,123200,27.017
310
+ 1761,123600,21.0847
311
+ 1768,124000,16.9835
312
+ 1774,124400,16.988
313
+ 1780,124800,17.102
314
+ 1784,125200,27.0598
315
+ 1790,125600,18.8929
316
+ 1795,126000,18.4346
317
+ 1799,126400,27.4704
318
+ 1803,126800,20.451
319
+ 1812,127200,12.5548
320
+ 1816,127600,24.9355
321
+ 1821,128000,14.002
322
+ 1826,128400,16.9177
323
+ 1834,128800,9.9734
324
+ 1847,129200,4.5403
325
+ 1853,129600,10.6147
326
+ 1865,130000,6.26
327
+ 1872,130400,12.1423
328
+ 1877,130800,16.8818
329
+ 1882,131200,14.1034
330
+ 1887,131600,19.5902
331
+ 1894,132000,12.8515
332
+ 1899,132400,16.2843
333
+ 1904,132800,15.5745
334
+ 1914,133200,8.3905
335
+ 1922,133600,13.0687
336
+ 1929,134000,13.9548
337
+ 1935,134400,11.7435
338
+ 1944,134800,12.2644
339
+ 1949,135200,18.9015
340
+ 1957,135600,10.5449
341
+ 1968,136000,6.614
342
+ 1979,136400,7.9006
343
+ 1988,136800,8.6919
344
+ 1993,137200,19.6558
345
+ 1999,137600,13.7705
346
+ 2004,138000,19.7431
347
+ 2010,138400,16.1015
348
+ 2018,138800,7.969
349
+ 2024,139200,10.7627
350
+ 2033,139600,9.8075
351
+ 2038,140000,15.1353
352
+ 2044,140400,14.33
353
+ 2051,140800,13.0915
354
+ 2059,141200,11.0496
355
+ 2067,141600,8.5425
356
+ 2074,142000,12.6574
357
+ 2079,142400,18.6865
358
+ 2083,142800,18.9614
359
+ 2091,143200,7.6956
360
+ 2097,143600,19.3319
361
+ 2106,144000,6.1586
362
+ 2112,144400,11.7879
363
+ 2117,144800,14.6574
364
+ 2124,145200,9.78
365
+ 2131,145600,8.172
366
+ 2138,146000,9.3161
367
+ 2145,146400,10.1464
368
+ 2151,146800,13.3546
369
+ 2158,147200,10.2643
370
+ 2162,147600,17.7297
371
+ 2167,148000,12.2066
372
+ 2174,148400,11.723
373
+ 2181,148800,12.61
374
+ 2185,149200,20.9512
375
+ 2192,149600,9.257
376
+ 2200,150000,13.0471
377
+ 2206,150400,10.6689
378
+ 2212,150800,16.0447
379
+ 2219,151200,13.6559
380
+ 2225,151600,13.2487
381
+ 2235,152000,7.2764
382
+ 2242,152400,11.6686
383
+ 2248,152800,12.3615
384
+ 2255,153200,13.5621
385
+ 2263,153600,9.6251
386
+ 2269,154000,9.0672
387
+ 2276,154400,13.0372
388
+ 2281,154800,16.5969
389
+ 2286,155200,18.0225
390
+ 2292,155600,14.2052
391
+ 2298,156000,11.6988
392
+ 2304,156400,9.5336
393
+ 2312,156800,8.0191
394
+ 2324,157200,5.3825
395
+ 2330,157600,8.1571
396
+ 2337,158000,10.3493
397
+ 2344,158400,10.4621
398
+ 2350,158800,10.5959
399
+ 2356,159200,7.2691
400
+ 2364,159600,6.3992
401
+ 2372,160000,7.9295
402
+ 2377,160400,7.4555
403
+ 2384,160800,7.9996
404
+ 2389,161200,13.726
405
+ 2395,161600,7.7046
406
+ 2399,162000,16.8889
407
+ 2410,162400,5.528
408
+ 2422,162800,4.9575
409
+ 2429,163200,8.7608
410
+ 2438,163600,7.2575
411
+ 2446,164000,7.0835
412
+ 2452,164400,10.8246
413
+ 2459,164800,7.831
414
+ 2467,165200,6.133
415
+ 2476,165600,7.8923
416
+ 2483,166000,8.0733
417
+ 2489,166400,11.0754
418
+ 2493,166800,20.1624
419
+ 2500,167200,12.4293
420
+ 2504,167600,15.0355
421
+ 2510,168000,13.0286
422
+ 2518,168400,7.4877
423
+ 2523,168800,12.2261
424
+ 2531,169200,7.3993
425
+ 2536,169600,8.9622
426
+ 2542,170000,10.8549
427
+ 2547,170400,11.6566
428
+ 2555,170800,8.8997
429
+ 2562,171200,8.861
430
+ 2568,171600,13.2091
431
+ 2574,172000,10.3659
432
+ 2580,172400,11.7853
433
+ 2585,172800,14.792
434
+ 2592,173200,10.6782
435
+ 2602,173600,6.9546
436
+ 2609,174000,9.9301
437
+ 2614,174400,17.7772
438
+ 2623,174800,6.2142
439
+ 2630,175200,12.9292
440
+ 2637,175600,10.1204
441
+ 2645,176000,9.0597
442
+ 2651,176400,15.3755
443
+ 2657,176800,13.128
444
+ 2662,177200,19.3868
445
+ 2670,177600,10.7437
446
+ 2679,178000,6.904
447
+ 2686,178400,9.2907
448
+ 2696,178800,6.4837
449
+ 2705,179200,8.2248
450
+ 2711,179600,12.1069
451
+ 2720,180000,8.3973
452
+ 2724,180400,22.0167
453
+ 2729,180800,15.3768
454
+ 2734,181200,15.6707
455
+ 2738,181600,21.4503
456
+ 2744,182000,14.5199
457
+ 2750,182400,16.9138
458
+ 2756,182800,12.2078
459
+ 2762,183200,15.9948
460
+ 2769,183600,12.4933
461
+ 2775,184000,14.7625
462
+ 2781,184400,16.4597
463
+ 2786,184800,12.6036
464
+ 2793,185200,11.1748
465
+ 2799,185600,13.5976
466
+ 2805,186000,13.175
467
+ 2811,186400,14.25
468
+ 2816,186800,22.0337
469
+ 2822,187200,17.4297
470
+ 2827,187600,17.1395
471
+ 2832,188000,18.1786
472
+ 2837,188400,16.0257
473
+ 2844,188800,11.8928
474
+ 2850,189200,16.6968
475
+ 2855,189600,19.1383
476
+ 2860,190000,21.8792
477
+ 2864,190400,27.2875
478
+ 2868,190800,25.2937
479
+ 2873,191200,20.9754
480
+ 2882,191600,12.5236
481
+ 2886,192000,26.9158
482
+ 2896,192400,9.8619
483
+ 2912,192800,5.0885
484
+ 2923,193200,4.6341
485
+ 2930,193600,13.8767
486
+ 2937,194000,11.1766
487
+ 2944,194400,14.2145
488
+ 2952,194800,7.6092
489
+ 2961,195200,9.0705
490
+ 2968,195600,10.5332
491
+ 2973,196000,13.6747
492
+ 2979,196400,17.6262
493
+ 2986,196800,12.0028
494
+ 2997,197200,5.1463
495
+ 3008,197600,9.2429
496
+ 3015,198000,11.3805
497
+ 3022,198400,7.9289
498
+ 3029,198800,12.3534
499
+ 3033,199200,26.1072
500
+ 3041,199600,13.3515
501
+ 3048,200000,11.5205
code/Lake application/logs/results_1/PDPPO_frozen_lake_log_1.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 23,400,1.2374
3
+ 44,800,1.475
4
+ 62,1200,1.6597
5
+ 75,1600,2.2131
6
+ 86,2000,2.8599
7
+ 98,2400,2.246
8
+ 109,2800,3.0708
9
+ 120,3200,3.0907
10
+ 134,3600,2.2625
11
+ 148,4000,2.1622
12
+ 162,4400,2.2727
13
+ 177,4800,1.635
14
+ 192,5200,2.4163
15
+ 203,5600,2.6421
16
+ 211,6000,4.1822
17
+ 219,6400,3.2539
18
+ 229,6800,3.5347
19
+ 238,7200,3.3297
20
+ 246,7600,3.7071
21
+ 254,8000,3.3388
22
+ 263,8400,3.4999
23
+ 272,8800,3.3479
24
+ 279,9200,3.8583
25
+ 284,9600,6.0999
26
+ 292,10000,3.6754
27
+ 301,10400,3.3741
28
+ 307,10800,5.4199
29
+ 315,11200,3.3395
30
+ 320,11600,6.3554
31
+ 329,12000,3.8476
32
+ 333,12400,7.4853
33
+ 340,12800,4.5367
34
+ 345,13200,5.1413
35
+ 350,13600,5.9491
36
+ 355,14000,6.9454
37
+ 362,14400,4.5899
38
+ 366,14800,7.3912
39
+ 371,15200,5.3159
40
+ 378,15600,4.6648
41
+ 382,16000,6.0774
42
+ 387,16400,6.5447
43
+ 393,16800,4.8197
44
+ 401,17200,4.653
45
+ 405,17600,7.3019
46
+ 410,18000,6.4426
47
+ 415,18400,6.2152
48
+ 420,18800,5.3748
49
+ 424,19200,7.3437
50
+ 428,19600,6.2297
51
+ 432,20000,7.2185
52
+ 438,20400,5.2351
53
+ 442,20800,6.6825
54
+ 448,21200,4.8845
55
+ 455,21600,4.6814
56
+ 459,22000,7.2773
57
+ 463,22400,7.2303
58
+ 467,22800,7.3327
59
+ 474,23200,4.6069
60
+ 479,23600,5.9596
61
+ 483,24000,7.4753
62
+ 487,24400,5.694
63
+ 492,24800,6.0146
64
+ 497,25200,6.2868
65
+ 501,25600,7.4234
66
+ 505,26000,7.4502
67
+ 509,26400,7.501
68
+ 514,26800,6.3456
69
+ 518,27200,6.6853
70
+ 523,27600,6.0334
71
+ 527,28000,7.6037
72
+ 531,28400,7.5199
73
+ 535,28800,7.0901
74
+ 540,29200,6.0599
75
+ 545,29600,7.4143
76
+ 549,30000,7.4906
77
+ 553,30400,7.4618
78
+ 558,30800,5.9038
79
+ 562,31200,7.5903
80
+ 567,31600,5.8108
81
+ 572,32000,6.3685
82
+ 577,32400,6.0121
83
+ 581,32800,7.5516
84
+ 585,33200,7.5793
85
+ 589,33600,6.2426
86
+ 593,34000,7.6518
87
+ 597,34400,7.5643
88
+ 601,34800,7.5213
89
+ 606,35200,7.1744
90
+ 610,35600,7.5771
91
+ 614,36000,7.4704
92
+ 618,36400,8.3538
93
+ 622,36800,6.0225
94
+ 626,37200,7.6223
95
+ 631,37600,6.5412
96
+ 636,38000,6.5792
97
+ 641,38400,6.4666
98
+ 645,38800,6.1159
99
+ 649,39200,7.8856
100
+ 653,39600,7.7125
101
+ 658,40000,7.68
102
+ 662,40400,6.3203
103
+ 666,40800,7.7558
104
+ 672,41200,5.3449
105
+ 678,41600,5.3048
106
+ 683,42000,6.0098
107
+ 687,42400,7.7948
108
+ 693,42800,5.2185
109
+ 698,43200,6.8124
110
+ 703,43600,7.1864
111
+ 707,44000,6.9499
112
+ 712,44400,7.3481
113
+ 716,44800,6.9955
114
+ 722,45200,5.2644
115
+ 727,45600,7.1592
116
+ 731,46000,7.8775
117
+ 736,46400,6.8166
118
+ 740,46800,7.7517
119
+ 745,47200,6.382
120
+ 749,47600,9.0147
121
+ 755,48000,5.9239
122
+ 760,48400,6.0407
123
+ 766,48800,5.7661
124
+ 770,49200,7.6221
125
+ 777,49600,4.4481
126
+ 782,50000,6.4733
127
+ 786,50400,7.9791
128
+ 790,50800,6.8551
129
+ 797,51200,5.3038
130
+ 801,51600,6.3203
131
+ 808,52000,4.4792
132
+ 813,52400,7.2173
133
+ 817,52800,7.9572
134
+ 821,53200,7.8356
135
+ 826,53600,6.6151
136
+ 830,54000,6.719
137
+ 835,54400,6.5328
138
+ 839,54800,7.9386
139
+ 843,55200,7.8856
140
+ 847,55600,7.8485
141
+ 852,56000,6.087
142
+ 858,56400,5.8267
143
+ 862,56800,6.6001
144
+ 869,57200,5.4358
145
+ 874,57600,5.1707
146
+ 879,58000,6.6192
147
+ 883,58400,7.9342
148
+ 887,58800,7.1165
149
+ 891,59200,7.814
150
+ 896,59600,6.5746
151
+ 900,60000,7.7367
152
+ 905,60400,7.0371
153
+ 911,60800,6.1385
154
+ 915,61200,6.8106
155
+ 920,61600,6.758
156
+ 924,62000,8.4806
157
+ 928,62400,7.9427
158
+ 933,62800,7.618
159
+ 937,63200,7.3321
160
+ 941,63600,7.6414
161
+ 945,64000,7.5861
162
+ 952,64400,5.5493
163
+ 956,64800,7.9559
164
+ 960,65200,7.2965
165
+ 966,65600,6.0452
166
+ 971,66000,6.785
167
+ 975,66400,7.7964
168
+ 979,66800,7.9213
169
+ 983,67200,6.4723
170
+ 987,67600,8.0026
171
+ 992,68000,7.5901
172
+ 996,68400,7.9942
173
+ 1001,68800,5.7542
174
+ 1007,69200,5.245
175
+ 1011,69600,8.1308
176
+ 1016,70000,6.861
177
+ 1026,70400,4.9051
178
+ 1033,70800,4.1145
179
+ 1039,71200,8.9367
180
+ 1045,71600,6.8587
181
+ 1051,72000,6.208
182
+ 1056,72400,9.627
183
+ 1062,72800,6.7777
184
+ 1068,73200,8.1144
185
+ 1072,73600,10.0016
186
+ 1078,74000,8.3931
187
+ 1083,74400,8.5617
188
+ 1090,74800,6.4565
189
+ 1096,75200,7.7086
190
+ 1102,75600,8.0011
191
+ 1108,76000,7.8719
192
+ 1117,76400,5.4102
193
+ 1125,76800,5.3871
194
+ 1132,77200,6.1708
195
+ 1138,77600,9.6164
196
+ 1143,78000,9.3242
197
+ 1151,78400,6.2785
198
+ 1156,78800,9.268
199
+ 1163,79200,7.2284
200
+ 1169,79600,7.7116
201
+ 1174,80000,10.1517
202
+ 1179,80400,10.7016
203
+ 1184,80800,9.2447
204
+ 1189,81200,9.5792
205
+ 1194,81600,10.5578
206
+ 1202,82000,6.7523
207
+ 1206,82400,10.3011
208
+ 1214,82800,8.1824
209
+ 1219,83200,9.1268
210
+ 1224,83600,9.169
211
+ 1229,84000,11.3677
212
+ 1234,84400,10.2567
213
+ 1238,84800,12.7622
214
+ 1242,85200,13.0512
215
+ 1247,85600,11.1427
216
+ 1253,86000,7.929
217
+ 1258,86400,10.1415
218
+ 1262,86800,13.3642
219
+ 1267,87200,10.9775
220
+ 1271,87600,12.6409
221
+ 1276,88000,10.5992
222
+ 1282,88400,9.3828
223
+ 1289,88800,7.4124
224
+ 1293,89200,11.4734
225
+ 1298,89600,12.2623
226
+ 1302,90000,10.1244
227
+ 1307,90400,9.9046
228
+ 1313,90800,9.779
229
+ 1319,91200,8.0214
230
+ 1325,91600,8.5971
231
+ 1331,92000,9.6304
232
+ 1336,92400,8.3368
233
+ 1341,92800,9.7023
234
+ 1345,93200,13.0409
235
+ 1350,93600,11.4912
236
+ 1356,94000,8.8157
237
+ 1360,94400,11.3592
238
+ 1366,94800,8.2193
239
+ 1372,95200,10.369
240
+ 1377,95600,9.2107
241
+ 1381,96000,11.1205
242
+ 1387,96400,9.8026
243
+ 1392,96800,10.0977
244
+ 1397,97200,11.0417
245
+ 1401,97600,10.8291
246
+ 1406,98000,10.6965
247
+ 1413,98400,8.3182
248
+ 1417,98800,12.0391
249
+ 1422,99200,10.573
250
+ 1427,99600,11.3273
251
+ 1431,100000,13.3514
252
+ 1436,100400,8.3171
253
+ 1442,100800,9.6702
254
+ 1446,101200,12.2739
255
+ 1450,101600,13.6371
256
+ 1454,102000,13.3332
257
+ 1461,102400,8.6744
258
+ 1468,102800,7.1006
259
+ 1473,103200,9.9262
260
+ 1479,103600,9.9459
261
+ 1483,104000,12.8872
262
+ 1487,104400,13.5411
263
+ 1493,104800,8.3614
264
+ 1501,105200,6.8721
265
+ 1505,105600,12.9734
266
+ 1510,106000,11.0086
267
+ 1514,106400,13.0415
268
+ 1519,106800,10.641
269
+ 1525,107200,9.6682
270
+ 1529,107600,11.3172
271
+ 1537,108000,7.6408
272
+ 1545,108400,7.5169
273
+ 1554,108800,5.0547
274
+ 1562,109200,8.1673
275
+ 1566,109600,11.351
276
+ 1572,110000,11.8269
277
+ 1576,110400,14.5999
278
+ 1581,110800,11.9343
279
+ 1587,111200,9.0708
280
+ 1599,111600,4.4076
281
+ 1604,112000,9.6173
282
+ 1610,112400,10.2097
283
+ 1615,112800,11.4213
284
+ 1619,113200,15.4919
285
+ 1627,113600,8.0107
286
+ 1633,114000,8.2533
287
+ 1637,114400,15.4628
288
+ 1643,114800,11.739
289
+ 1648,115200,10.4027
290
+ 1652,115600,14.405
291
+ 1658,116000,10.397
292
+ 1662,116400,15.0393
293
+ 1667,116800,12.0286
294
+ 1673,117200,10.1889
295
+ 1678,117600,13.0175
296
+ 1682,118000,13.202
297
+ 1689,118400,8.8392
298
+ 1694,118800,12.0206
299
+ 1699,119200,15.2346
300
+ 1706,119600,7.4044
301
+ 1710,120000,15.8376
302
+ 1716,120400,9.405
303
+ 1721,120800,12.1716
304
+ 1727,121200,10.639
305
+ 1731,121600,12.4195
306
+ 1735,122000,15.801
307
+ 1740,122400,14.7642
308
+ 1744,122800,14.9512
309
+ 1749,123200,12.8902
310
+ 1754,123600,12.6179
311
+ 1758,124000,15.3085
312
+ 1763,124400,15.2636
313
+ 1767,124800,15.9938
314
+ 1771,125200,13.2417
315
+ 1776,125600,12.6163
316
+ 1782,126000,10.864
317
+ 1787,126400,12.165
318
+ 1792,126800,12.2211
319
+ 1796,127200,16.1173
320
+ 1802,127600,9.7973
321
+ 1811,128000,6.8105
322
+ 1818,128400,7.4772
323
+ 1824,128800,11.3689
324
+ 1830,129200,9.7769
325
+ 1837,129600,7.5446
326
+ 1842,130000,12.6235
327
+ 1848,130400,11.2806
328
+ 1852,130800,13.906
329
+ 1858,131200,12.9947
330
+ 1865,131600,7.8085
331
+ 1870,132000,12.0638
332
+ 1876,132400,12.8332
333
+ 1881,132800,12.0972
334
+ 1888,133200,10.0171
335
+ 1894,133600,8.1607
336
+ 1906,134000,5.2001
337
+ 1912,134400,10.769
338
+ 1918,134800,10.4093
339
+ 1922,135200,14.9802
340
+ 1928,135600,12.3873
341
+ 1932,136000,16.5533
342
+ 1938,136400,10.1507
343
+ 1944,136800,11.7987
344
+ 1948,137200,16.9859
345
+ 1953,137600,12.8739
346
+ 1957,138000,15.2543
347
+ 1962,138400,13.5113
348
+ 1970,138800,9.3558
349
+ 1974,139200,17.044
350
+ 1980,139600,10.9737
351
+ 1985,140000,12.4654
352
+ 1991,140400,12.8888
353
+ 1996,140800,14.0378
354
+ 2003,141200,9.7178
355
+ 2008,141600,14.7386
356
+ 2013,142000,14.8715
357
+ 2020,142400,9.5865
358
+ 2025,142800,12.7104
359
+ 2029,143200,16.0303
360
+ 2034,143600,15.8244
361
+ 2038,144000,16.9077
362
+ 2042,144400,16.3177
363
+ 2046,144800,17.1757
364
+ 2050,145200,15.4536
365
+ 2054,145600,16.8005
366
+ 2063,146000,7.8445
367
+ 2067,146400,13.3113
368
+ 2074,146800,10.177
369
+ 2085,147200,5.7266
370
+ 2089,147600,13.1785
371
+ 2097,148000,9.0101
372
+ 2103,148400,11.5439
373
+ 2110,148800,8.3925
374
+ 2120,149200,7.0782
375
+ 2127,149600,6.752
376
+ 2136,150000,8.609
377
+ 2141,150400,12.6775
378
+ 2147,150800,11.4195
379
+ 2152,151200,14.956
380
+ 2159,151600,8.3099
381
+ 2163,152000,13.0747
382
+ 2169,152400,10.8855
383
+ 2173,152800,16.1138
384
+ 2179,153200,11.1642
385
+ 2183,153600,17.5901
386
+ 2189,154000,12.9443
387
+ 2193,154400,15.0154
388
+ 2199,154800,11.8207
389
+ 2205,155200,12.6347
390
+ 2209,155600,18.7259
391
+ 2215,156000,12.7894
392
+ 2224,156400,7.0928
393
+ 2229,156800,13.0815
394
+ 2237,157200,9.53
395
+ 2244,157600,8.183
396
+ 2250,158000,11.3156
397
+ 2254,158400,17.8963
398
+ 2264,158800,6.7863
399
+ 2271,159200,9.1487
400
+ 2278,159600,10.2853
401
+ 2285,160000,11.1034
402
+ 2291,160400,12.3193
403
+ 2296,160800,12.8277
404
+ 2304,161200,10.186
405
+ 2311,161600,10.0795
406
+ 2317,162000,13.2198
407
+ 2322,162400,13.6536
408
+ 2327,162800,16.2005
409
+ 2333,163200,11.8695
410
+ 2337,163600,18.9552
411
+ 2341,164000,19.3696
412
+ 2346,164400,15.9116
413
+ 2351,164800,13.0325
414
+ 2356,165200,14.5295
415
+ 2362,165600,12.7752
416
+ 2366,166000,20.2381
417
+ 2370,166400,18.5701
418
+ 2374,166800,19.7957
419
+ 2378,167200,20.6729
420
+ 2383,167600,17.1297
421
+ 2388,168000,16.415
422
+ 2392,168400,16.3995
423
+ 2398,168800,14.5399
424
+ 2403,169200,14.3127
425
+ 2407,169600,20.0631
426
+ 2412,170000,14.8381
427
+ 2417,170400,16.6159
428
+ 2422,170800,15.6721
429
+ 2429,171200,11.7749
430
+ 2435,171600,12.1638
431
+ 2439,172000,20.5949
432
+ 2443,172400,18.4239
433
+ 2447,172800,20.2025
434
+ 2453,173200,15.569
435
+ 2459,173600,12.0643
436
+ 2466,174000,11.6623
437
+ 2471,174400,12.4459
438
+ 2479,174800,8.9067
439
+ 2483,175200,19.4989
440
+ 2487,175600,19.945
441
+ 2491,176000,18.5005
442
+ 2497,176400,15.8793
443
+ 2503,176800,11.8832
444
+ 2509,177200,11.3501
445
+ 2514,177600,11.7629
446
+ 2518,178000,19.6937
447
+ 2522,178400,19.9908
448
+ 2526,178800,20.0871
449
+ 2534,179200,9.8611
450
+ 2539,179600,16.1641
451
+ 2544,180000,16.6044
452
+ 2551,180400,11.9178
453
+ 2556,180800,15.7903
454
+ 2565,181200,8.1818
455
+ 2573,181600,6.7254
456
+ 2582,182000,6.4099
457
+ 2593,182400,6.8256
458
+ 2608,182800,4.9001
459
+ 2620,183200,5.3354
460
+ 2625,183600,13.1512
461
+ 2645,184000,3.785
462
+ 2660,184400,3.8907
463
+ 2667,184800,8.5334
464
+ 2678,185200,6.3167
465
+ 2688,185600,6.4812
466
+ 2698,186000,5.3833
467
+ 2706,186400,9.5081
468
+ 2716,186800,6.8215
469
+ 2722,187200,10.4982
470
+ 2730,187600,9.5086
471
+ 2738,188000,9.3461
472
+ 2744,188400,14.2225
473
+ 2757,188800,5.7347
474
+ 2766,189200,7.2451
475
+ 2772,189600,13.8376
476
+ 2776,190000,19.2079
477
+ 2785,190400,8.2667
478
+ 2794,190800,7.8716
479
+ 2800,191200,10.8995
480
+ 2808,191600,11.1967
481
+ 2813,192000,13.3437
482
+ 2818,192400,15.5833
483
+ 2825,192800,12.1659
484
+ 2829,193200,18.1718
485
+ 2841,193600,6.6037
486
+ 2847,194000,13.7069
487
+ 2853,194400,12.6538
488
+ 2860,194800,12.0022
489
+ 2868,195200,8.8985
490
+ 2875,195600,12.2925
491
+ 2881,196000,11.201
492
+ 2889,196400,11.2774
493
+ 2897,196800,9.0422
494
+ 2903,197200,12.1459
495
+ 2908,197600,17.9243
496
+ 2914,198000,12.5648
497
+ 2923,198400,8.3362
498
+ 2931,198800,9.3883
499
+ 2935,199200,19.7271
500
+ 2941,199600,13.2621
501
+ 2945,200000,18.3102
code/Lake application/logs/results_1/PDPPO_frozen_lake_log_2.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 13,400,2.4027
3
+ 32,800,1.617
4
+ 46,1200,2.0829
5
+ 55,1600,3.3664
6
+ 70,2000,1.9602
7
+ 77,2400,4.2734
8
+ 87,2800,3.3484
9
+ 96,3200,2.8441
10
+ 108,3600,2.8373
11
+ 113,4000,6.0449
12
+ 122,4400,3.834
13
+ 128,4800,4.6497
14
+ 134,5200,5.7626
15
+ 139,5600,5.8112
16
+ 148,6000,3.9162
17
+ 153,6400,6.4239
18
+ 161,6800,3.2877
19
+ 167,7200,6.0931
20
+ 173,7600,5.6529
21
+ 177,8000,8.1896
22
+ 183,8400,5.4595
23
+ 188,8800,5.5099
24
+ 194,9200,5.9555
25
+ 200,9600,5.4228
26
+ 207,10000,5.1593
27
+ 213,10400,5.8843
28
+ 221,10800,3.7587
29
+ 226,11200,6.9919
30
+ 233,11600,5.0004
31
+ 239,12000,5.6778
32
+ 245,12400,6.7453
33
+ 251,12800,5.6725
34
+ 257,13200,5.7192
35
+ 263,13600,6.5788
36
+ 267,14000,7.3344
37
+ 272,14400,7.4462
38
+ 278,14800,6.0194
39
+ 283,15200,7.349
40
+ 289,15600,6.475
41
+ 294,16000,8.7377
42
+ 302,16400,5.5482
43
+ 310,16800,4.4978
44
+ 314,17200,12.6707
45
+ 322,17600,6.9202
46
+ 327,18000,8.4112
47
+ 332,18400,7.8845
48
+ 337,18800,8.8895
49
+ 344,19200,7.0861
50
+ 350,19600,8.066
51
+ 357,20000,6.4104
52
+ 364,20400,6.3238
53
+ 369,20800,9.4534
54
+ 373,21200,10.6902
55
+ 378,21600,10.5201
56
+ 383,22000,9.18
57
+ 387,22400,11.8679
58
+ 393,22800,7.6084
59
+ 397,23200,11.8894
60
+ 402,23600,8.3823
61
+ 406,24000,11.8962
62
+ 410,24400,11.9462
63
+ 414,24800,12.0936
64
+ 418,25200,12.2516
65
+ 423,25600,11.6307
66
+ 427,26000,12.2494
67
+ 431,26400,10.3211
68
+ 435,26800,12.193
69
+ 441,27200,8.2828
70
+ 447,27600,8.323
71
+ 451,28000,12.0851
72
+ 455,28400,11.8691
73
+ 460,28800,10.3406
74
+ 464,29200,11.2745
75
+ 469,29600,9.7434
76
+ 475,30000,6.0876
77
+ 481,30400,9.8042
78
+ 485,30800,9.8166
79
+ 491,31200,8.5955
80
+ 496,31600,8.8879
81
+ 502,32000,9.6651
82
+ 506,32400,12.4816
83
+ 513,32800,5.3655
84
+ 518,33200,10.5211
85
+ 523,33600,8.8889
86
+ 529,34000,8.49
87
+ 533,34400,11.531
88
+ 539,34800,8.149
89
+ 543,35200,10.8391
90
+ 548,35600,10.0555
91
+ 552,36000,12.2689
92
+ 557,36400,10.1142
93
+ 562,36800,11.1685
94
+ 568,37200,7.5849
95
+ 574,37600,7.9313
96
+ 578,38000,12.7468
97
+ 582,38400,12.8371
98
+ 587,38800,10.0817
99
+ 593,39200,7.806
100
+ 599,39600,8.1719
101
+ 606,40000,7.7715
102
+ 610,40400,12.7892
103
+ 615,40800,8.8938
104
+ 622,41200,6.7361
105
+ 630,41600,6.5825
106
+ 638,42000,6.4833
107
+ 643,42400,8.2059
108
+ 649,42800,10.0644
109
+ 658,43200,5.4777
110
+ 663,43600,8.1624
111
+ 668,44000,10.306
112
+ 673,44400,13.0668
113
+ 681,44800,5.2228
114
+ 686,45200,12.7062
115
+ 691,45600,8.3513
116
+ 698,46000,7.9629
117
+ 702,46400,13.0319
118
+ 709,46800,6.41
119
+ 713,47200,13.8998
120
+ 718,47600,10.1406
121
+ 723,48000,11.4989
122
+ 732,48400,5.7759
123
+ 736,48800,13.8137
124
+ 745,49200,3.7937
125
+ 753,49600,8.4375
126
+ 757,50000,13.3919
127
+ 764,50400,7.7574
128
+ 770,50800,7.3266
129
+ 774,51200,14.4457
130
+ 782,51600,7.6632
131
+ 786,52000,14.5052
132
+ 791,52400,12.7646
133
+ 796,52800,12.3906
134
+ 801,53200,12.5327
135
+ 806,53600,9.7097
136
+ 812,54000,9.998
137
+ 821,54400,6.5391
138
+ 826,54800,10.0426
139
+ 830,55200,13.2772
140
+ 837,55600,9.5483
141
+ 841,56000,11.9928
142
+ 848,56400,9.3228
143
+ 852,56800,13.7227
144
+ 858,57200,8.4757
145
+ 862,57600,14.4407
146
+ 868,58000,9.607
147
+ 873,58400,12.6694
148
+ 883,58800,4.7655
149
+ 888,59200,11.5319
150
+ 894,59600,10.6887
151
+ 899,60000,11.8105
152
+ 904,60400,10.2226
153
+ 910,60800,8.0744
154
+ 914,61200,14.8116
155
+ 919,61600,12.2418
156
+ 928,62000,6.0928
157
+ 935,62400,8.7412
158
+ 942,62800,8.2501
159
+ 951,63200,7.3312
160
+ 958,63600,6.9865
161
+ 963,64000,13.1523
162
+ 968,64400,11.9024
163
+ 975,64800,10.2803
164
+ 979,65200,15.9866
165
+ 983,65600,13.8684
166
+ 988,66000,13.0459
167
+ 993,66400,13.1794
168
+ 997,66800,16.858
169
+ 1005,67200,9.2472
170
+ 1011,67600,10.5944
171
+ 1015,68000,18.199
172
+ 1019,68400,16.954
173
+ 1023,68800,16.9229
174
+ 1028,69200,12.5304
175
+ 1032,69600,17.6321
176
+ 1039,70000,10.8713
177
+ 1048,70400,8.1558
178
+ 1053,70800,12.2909
179
+ 1060,71200,10.5877
180
+ 1066,71600,10.6469
181
+ 1073,72000,10.9766
182
+ 1077,72400,15.5587
183
+ 1082,72800,14.5922
184
+ 1088,73200,11.1597
185
+ 1093,73600,16.6017
186
+ 1098,74000,13.7014
187
+ 1103,74400,10.6047
188
+ 1108,74800,14.9205
189
+ 1112,75200,18.965
190
+ 1119,75600,12.518
191
+ 1126,76000,7.3995
192
+ 1131,76400,15.3457
193
+ 1136,76800,13.6769
194
+ 1140,77200,17.8605
195
+ 1144,77600,18.1357
196
+ 1151,78000,11.2644
197
+ 1156,78400,11.4578
198
+ 1161,78800,16.01
199
+ 1167,79200,13.5079
200
+ 1171,79600,15.4602
201
+ 1176,80000,18.2113
202
+ 1181,80400,17.2741
203
+ 1185,80800,19.4672
204
+ 1193,81200,8.1171
205
+ 1198,81600,16.5683
206
+ 1204,82000,12.9389
207
+ 1209,82400,16.3976
208
+ 1217,82800,9.9467
209
+ 1221,83200,20.1043
210
+ 1229,83600,8.9266
211
+ 1235,84000,12.0714
212
+ 1241,84400,14.5858
213
+ 1245,84800,19.3948
214
+ 1252,85200,9.0477
215
+ 1256,85600,19.9998
216
+ 1263,86000,11.7696
217
+ 1271,86400,9.3787
218
+ 1278,86800,8.5559
219
+ 1284,87200,14.7973
220
+ 1290,87600,12.839
221
+ 1295,88000,14.3447
222
+ 1299,88400,20.0166
223
+ 1305,88800,13.8517
224
+ 1310,89200,15.5374
225
+ 1315,89600,11.2921
226
+ 1322,90000,12.7502
227
+ 1331,90400,8.0843
228
+ 1335,90800,15.8949
229
+ 1342,91200,11.7088
230
+ 1347,91600,17.2511
231
+ 1351,92000,18.5015
232
+ 1357,92400,15.5406
233
+ 1363,92800,11.4947
234
+ 1367,93200,18.5403
235
+ 1375,93600,10.876
236
+ 1380,94000,16.1795
237
+ 1388,94400,9.5337
238
+ 1392,94800,18.751
239
+ 1401,95200,9.2096
240
+ 1406,95600,13.6803
241
+ 1413,96000,11.6685
242
+ 1424,96400,4.7407
243
+ 1436,96800,6.0117
244
+ 1448,97200,6.2754
245
+ 1453,97600,14.8301
246
+ 1457,98000,19.4724
247
+ 1462,98400,12.7641
248
+ 1470,98800,10.515
249
+ 1479,99200,7.7847
250
+ 1487,99600,8.115
251
+ 1495,100000,9.6315
252
+ 1503,100400,9.6751
253
+ 1508,100800,14.0532
254
+ 1514,101200,13.3345
255
+ 1520,101600,14.4949
256
+ 1528,102000,7.3491
257
+ 1533,102400,16.749
258
+ 1537,102800,16.6947
259
+ 1544,103200,13.2561
260
+ 1549,103600,16.2186
261
+ 1555,104000,10.76
262
+ 1561,104400,13.0782
263
+ 1569,104800,9.7765
264
+ 1576,105200,9.9622
265
+ 1581,105600,16.9109
266
+ 1587,106000,12.2365
267
+ 1596,106400,8.4596
268
+ 1602,106800,13.4264
269
+ 1607,107200,13.8518
270
+ 1614,107600,11.4566
271
+ 1620,108000,14.6299
272
+ 1628,108400,9.2927
273
+ 1634,108800,12.3812
274
+ 1641,109200,9.7538
275
+ 1655,109600,4.8454
276
+ 1660,110000,14.5315
277
+ 1673,110400,3.9869
278
+ 1680,110800,8.9558
279
+ 1691,111200,5.7395
280
+ 1695,111600,19.8293
281
+ 1701,112000,13.327
282
+ 1706,112400,13.7603
283
+ 1711,112800,14.9665
284
+ 1719,113200,7.5555
285
+ 1726,113600,10.7098
286
+ 1731,114000,14.8993
287
+ 1736,114400,14.7975
288
+ 1743,114800,7.8648
289
+ 1751,115200,7.9691
290
+ 1760,115600,9.1848
291
+ 1767,116000,8.9283
292
+ 1774,116400,11.6902
293
+ 1778,116800,18.8354
294
+ 1785,117200,12.2704
295
+ 1789,117600,15.4445
296
+ 1794,118000,16.3556
297
+ 1806,118400,5.2073
298
+ 1814,118800,9.6861
299
+ 1818,119200,15.142
300
+ 1825,119600,13.3942
301
+ 1830,120000,14.1063
302
+ 1836,120400,12.9716
303
+ 1844,120800,8.7663
304
+ 1853,121200,7.4106
305
+ 1857,121600,20.3496
306
+ 1863,122000,11.6921
307
+ 1868,122400,14.598
308
+ 1873,122800,16.1342
309
+ 1878,123200,17.5161
310
+ 1886,123600,8.8583
311
+ 1894,124000,7.4233
312
+ 1903,124400,9.8375
313
+ 1908,124800,12.7669
314
+ 1912,125200,18.0466
315
+ 1919,125600,12.422
316
+ 1924,126000,13.4632
317
+ 1930,126400,14.4262
318
+ 1935,126800,16.3081
319
+ 1940,127200,17.1442
320
+ 1945,127600,13.8432
321
+ 1950,128000,17.8724
322
+ 1954,128400,15.4276
323
+ 1961,128800,11.0026
324
+ 1967,129200,13.7937
325
+ 1972,129600,13.3407
326
+ 1980,130000,11.0252
327
+ 1985,130400,13.7636
328
+ 1989,130800,20.3436
329
+ 1996,131200,9.9471
330
+ 2001,131600,14.7954
331
+ 2005,132000,19.5569
332
+ 2010,132400,15.0541
333
+ 2015,132800,16.94
334
+ 2020,133200,16.584
335
+ 2026,133600,12.1681
336
+ 2030,134000,16.941
337
+ 2035,134400,18.5808
338
+ 2040,134800,16.5108
339
+ 2048,135200,9.7247
340
+ 2054,135600,13.6772
341
+ 2060,136000,12.384
342
+ 2066,136400,14.664
343
+ 2071,136800,16.1609
344
+ 2077,137200,11.9693
345
+ 2085,137600,12.2337
346
+ 2092,138000,8.7871
347
+ 2096,138400,20.9815
348
+ 2105,138800,9.5597
349
+ 2111,139200,12.4106
350
+ 2123,139600,5.885
351
+ 2129,140000,12.46
352
+ 2137,140400,8.6104
353
+ 2141,140800,19.3967
354
+ 2146,141200,18.9291
355
+ 2153,141600,11.1995
356
+ 2157,142000,21.0472
357
+ 2166,142400,9.0968
358
+ 2173,142800,11.4345
359
+ 2177,143200,17.4054
360
+ 2182,143600,16.7436
361
+ 2187,144000,15.979
362
+ 2191,144400,20.6625
363
+ 2196,144800,17.967
364
+ 2201,145200,14.4191
365
+ 2206,145600,17.7912
366
+ 2212,146000,12.9955
367
+ 2216,146400,20.7862
368
+ 2220,146800,18.8328
369
+ 2225,147200,17.0398
370
+ 2233,147600,9.1588
371
+ 2238,148000,17.0543
372
+ 2245,148400,11.4016
373
+ 2250,148800,13.5677
374
+ 2259,149200,9.3432
375
+ 2264,149600,16.0615
376
+ 2272,150000,7.0293
377
+ 2278,150400,15.0398
378
+ 2284,150800,13.835
379
+ 2288,151200,15.2233
380
+ 2294,151600,13.696
381
+ 2302,152000,9.5545
382
+ 2307,152400,15.6187
383
+ 2313,152800,10.671
384
+ 2319,153200,13.9607
385
+ 2325,153600,10.3803
386
+ 2332,154000,11.999
387
+ 2339,154400,9.1082
388
+ 2347,154800,9.2813
389
+ 2355,155200,8.6008
390
+ 2359,155600,19.8647
391
+ 2369,156000,6.1099
392
+ 2376,156400,13.2466
393
+ 2381,156800,13.0148
394
+ 2390,157200,7.6919
395
+ 2395,157600,14.2158
396
+ 2400,158000,17.9611
397
+ 2408,158400,8.9897
398
+ 2415,158800,10.4802
399
+ 2420,159200,13.5744
400
+ 2425,159600,16.1444
401
+ 2430,160000,13.9878
402
+ 2437,160400,12.0302
403
+ 2445,160800,10.5921
404
+ 2449,161200,15.9317
405
+ 2453,161600,19.0136
406
+ 2462,162000,9.6758
407
+ 2467,162400,15.8064
408
+ 2474,162800,9.9397
409
+ 2478,163200,20.8661
410
+ 2488,163600,6.9612
411
+ 2496,164000,9.1259
412
+ 2502,164400,12.425
413
+ 2512,164800,5.2967
414
+ 2521,165200,9.5709
415
+ 2528,165600,8.9144
416
+ 2537,166000,7.9899
417
+ 2549,166400,4.6902
418
+ 2558,166800,8.4622
419
+ 2572,167200,4.6769
420
+ 2579,167600,10.6164
421
+ 2589,168000,5.4096
422
+ 2597,168400,7.1444
423
+ 2605,168800,11.1901
424
+ 2610,169200,13.3372
425
+ 2619,169600,8.1873
426
+ 2626,170000,9.9578
427
+ 2634,170400,8.5779
428
+ 2639,170800,16.6372
429
+ 2644,171200,17.5453
430
+ 2649,171600,14.0101
431
+ 2656,172000,11.4441
432
+ 2661,172400,12.8794
433
+ 2668,172800,11.5128
434
+ 2674,173200,12.0959
435
+ 2683,173600,5.4352
436
+ 2687,174000,18.7393
437
+ 2694,174400,11.3662
438
+ 2700,174800,12.8577
439
+ 2704,175200,16.8251
440
+ 2708,175600,21.1851
441
+ 2712,176000,19.6086
442
+ 2717,176400,18.0934
443
+ 2722,176800,15.998
444
+ 2726,177200,20.8385
445
+ 2731,177600,13.3021
446
+ 2737,178000,15.9943
447
+ 2743,178400,11.5759
448
+ 2749,178800,13.4046
449
+ 2756,179200,9.6437
450
+ 2761,179600,16.2845
451
+ 2766,180000,15.4722
452
+ 2770,180400,16.5676
453
+ 2777,180800,10.0904
454
+ 2783,181200,13.448
455
+ 2788,181600,14.414
456
+ 2792,182000,20.08
457
+ 2797,182400,16.0141
458
+ 2801,182800,20.2584
459
+ 2807,183200,14.7895
460
+ 2812,183600,14.5707
461
+ 2818,184000,13.1659
462
+ 2825,184400,11.4905
463
+ 2830,184800,15.3281
464
+ 2835,185200,15.8941
465
+ 2841,185600,11.6085
466
+ 2846,186000,12.8389
467
+ 2853,186400,12.1964
468
+ 2858,186800,15.4969
469
+ 2863,187200,16.2343
470
+ 2868,187600,13.6736
471
+ 2874,188000,13.7629
472
+ 2879,188400,18.2208
473
+ 2884,188800,14.7769
474
+ 2888,189200,20.1099
475
+ 2892,189600,20.665
476
+ 2897,190000,14.9009
477
+ 2902,190400,15.9363
478
+ 2908,190800,13.7088
479
+ 2914,191200,12.0374
480
+ 2919,191600,11.6974
481
+ 2923,192000,19.4542
482
+ 2929,192400,12.6135
483
+ 2935,192800,13.6599
484
+ 2941,193200,14.4815
485
+ 2945,193600,18.2334
486
+ 2950,194000,13.9492
487
+ 2956,194400,12.6527
488
+ 2960,194800,19.9709
489
+ 2964,195200,20.1638
490
+ 2970,195600,12.4923
491
+ 2976,196000,13.7684
492
+ 2983,196400,11.1614
493
+ 2988,196800,12.4877
494
+ 2996,197200,6.8995
495
+ 3005,197600,7.2801
496
+ 3013,198000,10.8011
497
+ 3018,198400,13.5368
498
+ 3023,198800,15.2503
499
+ 3032,199200,8.255
500
+ 3037,199600,15.5807
501
+ 3044,200000,9.8756
code/Lake application/logs/results_1/PDPPO_frozen_lake_log_3.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 24,400,1.1789
3
+ 42,800,2.1233
4
+ 61,1200,1.5503
5
+ 75,1600,2.1531
6
+ 94,2000,1.8369
7
+ 103,2400,3.5188
8
+ 113,2800,3.0467
9
+ 126,3200,2.5075
10
+ 137,3600,2.784
11
+ 142,4000,5.3221
12
+ 149,4400,4.356
13
+ 158,4800,3.5811
14
+ 166,5200,4.0608
15
+ 174,5600,3.8915
16
+ 178,6000,7.7872
17
+ 184,6400,5.5597
18
+ 189,6800,6.3654
19
+ 193,7200,6.812
20
+ 198,7600,6.1092
21
+ 202,8000,7.5749
22
+ 207,8400,6.2928
23
+ 213,8800,5.3826
24
+ 217,9200,6.452
25
+ 221,9600,7.7105
26
+ 225,10000,7.7963
27
+ 231,10400,5.4617
28
+ 235,10800,7.5387
29
+ 239,11200,7.7388
30
+ 243,11600,7.2572
31
+ 247,12000,7.8198
32
+ 251,12400,7.7371
33
+ 255,12800,7.8481
34
+ 260,13200,7.7476
35
+ 264,13600,7.8618
36
+ 268,14000,6.7995
37
+ 272,14400,7.8333
38
+ 277,14800,7.2725
39
+ 282,15200,5.7045
40
+ 286,15600,7.8592
41
+ 290,16000,7.9573
42
+ 294,16400,7.996
43
+ 298,16800,8.0243
44
+ 302,17200,7.1519
45
+ 307,17600,7.6693
46
+ 312,18000,6.182
47
+ 317,18400,6.3662
48
+ 321,18800,6.6984
49
+ 326,19200,7.1547
50
+ 332,19600,5.4313
51
+ 337,20000,6.58
52
+ 341,20400,6.4143
53
+ 348,20800,4.6998
54
+ 354,21200,6.0579
55
+ 359,21600,6.3869
56
+ 363,22000,8.4537
57
+ 367,22400,7.6145
58
+ 371,22800,8.7217
59
+ 375,23200,8.0302
60
+ 379,23600,7.855
61
+ 383,24000,7.9192
62
+ 388,24400,7.1253
63
+ 392,24800,7.2569
64
+ 396,25200,8.0458
65
+ 400,25600,8.0878
66
+ 404,26000,7.6454
67
+ 411,26400,5.4292
68
+ 417,26800,6.2442
69
+ 421,27200,7.1395
70
+ 425,27600,8.138
71
+ 429,28000,7.4518
72
+ 433,28400,8.2299
73
+ 437,28800,8.0705
74
+ 443,29200,6.4598
75
+ 448,29600,6.5685
76
+ 453,30000,7.0016
77
+ 457,30400,8.3606
78
+ 461,30800,8.0059
79
+ 466,31200,6.8728
80
+ 470,31600,8.0419
81
+ 475,32000,6.5841
82
+ 479,32400,8.576
83
+ 484,32800,7.5318
84
+ 489,33200,7.0436
85
+ 493,33600,8.4315
86
+ 500,34000,5.9633
87
+ 506,34400,6.3842
88
+ 511,34800,8.348
89
+ 515,35200,11.5177
90
+ 520,35600,8.375
91
+ 525,36000,8.8358
92
+ 530,36400,8.3674
93
+ 536,36800,8.5203
94
+ 541,37200,8.0434
95
+ 548,37600,5.7622
96
+ 552,38000,12.1796
97
+ 556,38400,11.9279
98
+ 562,38800,9.0523
99
+ 566,39200,10.6577
100
+ 571,39600,9.9867
101
+ 575,40000,12.9051
102
+ 580,40400,9.6013
103
+ 584,40800,9.0039
104
+ 588,41200,11.6103
105
+ 593,41600,11.3596
106
+ 599,42000,8.0795
107
+ 604,42400,9.4068
108
+ 608,42800,10.1782
109
+ 613,43200,10.2511
110
+ 618,43600,8.7243
111
+ 627,44000,6.9675
112
+ 633,44400,6.1848
113
+ 638,44800,12.508
114
+ 643,45200,9.435
115
+ 649,45600,10.0267
116
+ 654,46000,12.8219
117
+ 658,46400,14.4656
118
+ 663,46800,12.4416
119
+ 672,47200,7.6444
120
+ 679,47600,6.7948
121
+ 686,48000,10.5707
122
+ 692,48400,11.3461
123
+ 696,48800,15.5451
124
+ 703,49200,7.6138
125
+ 709,49600,14.2849
126
+ 713,50000,17.6588
127
+ 717,50400,16.8401
128
+ 724,50800,9.6536
129
+ 732,51200,7.0814
130
+ 737,51600,15.1825
131
+ 745,52000,9.6236
132
+ 749,52400,16.5895
133
+ 757,52800,8.9825
134
+ 763,53200,13.4571
135
+ 769,53600,12.3041
136
+ 774,54000,13.3599
137
+ 779,54400,16.2315
138
+ 783,54800,18.7432
139
+ 788,55200,14.2708
140
+ 792,55600,19.0263
141
+ 796,56000,19.6853
142
+ 804,56400,8.6649
143
+ 810,56800,13.5299
144
+ 815,57200,15.4356
145
+ 822,57600,8.4714
146
+ 827,58000,14.8482
147
+ 832,58400,13.1313
148
+ 836,58800,17.7723
149
+ 841,59200,15.1604
150
+ 846,59600,16.5815
151
+ 850,60000,18.4684
152
+ 856,60400,11.723
153
+ 860,60800,15.37
154
+ 866,61200,14.931
155
+ 871,61600,11.3223
156
+ 876,62000,11.9169
157
+ 883,62400,12.8174
158
+ 888,62800,13.5601
159
+ 893,63200,15.0672
160
+ 898,63600,11.3614
161
+ 903,64000,15.3905
162
+ 908,64400,13.6269
163
+ 912,64800,17.5681
164
+ 917,65200,13.3706
165
+ 922,65600,13.8827
166
+ 927,66000,13.7525
167
+ 932,66400,15.7249
168
+ 937,66800,12.4379
169
+ 943,67200,12.0458
170
+ 947,67600,18.2474
171
+ 953,68000,11.2208
172
+ 961,68400,5.6857
173
+ 966,68800,11.2325
174
+ 977,69200,6.065
175
+ 981,69600,12.7678
176
+ 986,70000,15.0279
177
+ 993,70400,11.0013
178
+ 1001,70800,6.6314
179
+ 1008,71200,8.9485
180
+ 1015,71600,11.0937
181
+ 1020,72000,13.6421
182
+ 1027,72400,9.3296
183
+ 1033,72800,12.498
184
+ 1037,73200,13.5609
185
+ 1042,73600,15.2572
186
+ 1049,74000,9.7781
187
+ 1059,74400,6.6146
188
+ 1067,74800,8.4938
189
+ 1078,75200,5.6932
190
+ 1085,75600,9.7875
191
+ 1089,76000,14.85
192
+ 1095,76400,11.959
193
+ 1099,76800,18.4955
194
+ 1106,77200,11.1234
195
+ 1110,77600,16.6448
196
+ 1116,78000,14.7683
197
+ 1121,78400,15.4418
198
+ 1126,78800,15.6231
199
+ 1133,79200,8.6739
200
+ 1137,79600,19.0853
201
+ 1141,80000,19.3869
202
+ 1146,80400,15.0416
203
+ 1151,80800,16.4105
204
+ 1155,81200,18.5403
205
+ 1162,81600,10.4387
206
+ 1168,82000,12.8525
207
+ 1173,82400,15.0793
208
+ 1179,82800,11.1054
209
+ 1184,83200,15.8964
210
+ 1189,83600,15.9822
211
+ 1193,84000,20.0662
212
+ 1197,84400,19.8951
213
+ 1203,84800,12.0668
214
+ 1207,85200,19.6185
215
+ 1216,85600,9.3892
216
+ 1221,86000,13.0625
217
+ 1228,86400,10.6035
218
+ 1236,86800,8.5601
219
+ 1243,87200,11.3572
220
+ 1248,87600,12.3989
221
+ 1255,88000,10.0261
222
+ 1261,88400,12.0088
223
+ 1265,88800,18.8742
224
+ 1272,89200,13.0981
225
+ 1281,89600,7.8559
226
+ 1288,90000,11.3191
227
+ 1293,90400,16.281
228
+ 1299,90800,12.2251
229
+ 1306,91200,10.0396
230
+ 1318,91600,7.6743
231
+ 1322,92000,17.0425
232
+ 1328,92400,13.7841
233
+ 1335,92800,11.843
234
+ 1339,93200,18.7906
235
+ 1348,93600,7.6509
236
+ 1355,94000,11.5199
237
+ 1360,94400,15.8789
238
+ 1365,94800,16.2332
239
+ 1372,95200,8.0954
240
+ 1378,95600,12.8351
241
+ 1386,96000,10.1123
242
+ 1391,96400,13.7096
243
+ 1398,96800,9.9747
244
+ 1405,97200,9.7309
245
+ 1413,97600,10.6216
246
+ 1417,98000,19.8107
247
+ 1425,98400,8.7529
248
+ 1430,98800,12.5949
249
+ 1437,99200,10.2796
250
+ 1442,99600,14.1966
251
+ 1448,100000,12.1246
252
+ 1453,100400,14.1483
253
+ 1459,100800,14.6867
254
+ 1466,101200,11.5653
255
+ 1470,101600,19.7992
256
+ 1475,102000,16.2707
257
+ 1480,102400,12.707
258
+ 1490,102800,7.9692
259
+ 1496,103200,14.6578
260
+ 1504,103600,8.9977
261
+ 1512,104000,8.5685
262
+ 1516,104400,18.8262
263
+ 1525,104800,8.1862
264
+ 1532,105200,11.2229
265
+ 1539,105600,10.9358
266
+ 1543,106000,15.9632
267
+ 1551,106400,11.0493
268
+ 1555,106800,15.4412
269
+ 1561,107200,12.9739
270
+ 1568,107600,11.9314
271
+ 1573,108000,15.9377
272
+ 1580,108400,12.2091
273
+ 1586,108800,13.3599
274
+ 1592,109200,10.2974
275
+ 1598,109600,12.8394
276
+ 1605,110000,11.8069
277
+ 1611,110400,15.0917
278
+ 1619,110800,8.6425
279
+ 1626,111200,11.7447
280
+ 1632,111600,11.9684
281
+ 1637,112000,17.0765
282
+ 1643,112400,10.8535
283
+ 1647,112800,20.3134
284
+ 1654,113200,14.2449
285
+ 1658,113600,15.6927
286
+ 1663,114000,16.4525
287
+ 1667,114400,20.7007
288
+ 1675,114800,10.9193
289
+ 1681,115200,13.8018
290
+ 1686,115600,12.168
291
+ 1691,116000,16.1065
292
+ 1696,116400,16.9738
293
+ 1703,116800,12.1336
294
+ 1707,117200,17.4688
295
+ 1711,117600,20.7223
296
+ 1717,118000,12.8775
297
+ 1724,118400,10.7785
298
+ 1729,118800,16.0037
299
+ 1737,119200,9.3322
300
+ 1741,119600,15.5575
301
+ 1746,120000,16.7479
302
+ 1759,120400,4.1965
303
+ 1774,120800,5.0799
304
+ 1784,121200,7.0285
305
+ 1797,121600,6.1574
306
+ 1813,122000,4.1487
307
+ 1824,122400,6.7635
308
+ 1832,122800,10.5592
309
+ 1838,123200,11.4983
310
+ 1850,123600,6.0096
311
+ 1857,124000,13.2751
312
+ 1865,124400,10.6374
313
+ 1872,124800,10.4461
314
+ 1879,125200,10.8001
315
+ 1887,125600,10.5679
316
+ 1893,126000,13.6904
317
+ 1899,126400,13.3654
318
+ 1906,126800,11.1289
319
+ 1911,127200,14.4808
320
+ 1917,127600,14.7797
321
+ 1922,128000,16.3632
322
+ 1927,128400,14.8871
323
+ 1931,128800,18.4437
324
+ 1936,129200,13.8601
325
+ 1941,129600,19.323
326
+ 1948,130000,11.3219
327
+ 1957,130400,6.0778
328
+ 1963,130800,13.8097
329
+ 1968,131200,17.4356
330
+ 1972,131600,15.6235
331
+ 1978,132000,13.9724
332
+ 1983,132400,16.7818
333
+ 1990,132800,11.7503
334
+ 1995,133200,16.7384
335
+ 2000,133600,11.0031
336
+ 2007,134000,13.208
337
+ 2013,134400,12.1374
338
+ 2018,134800,10.6665
339
+ 2022,135200,20.4056
340
+ 2027,135600,13.8923
341
+ 2033,136000,14.9732
342
+ 2039,136400,11.7272
343
+ 2049,136800,5.9728
344
+ 2055,137200,13.4777
345
+ 2061,137600,12.7418
346
+ 2070,138000,7.9501
347
+ 2076,138400,9.8375
348
+ 2085,138800,8.8678
349
+ 2096,139200,6.0538
350
+ 2104,139600,7.5676
351
+ 2112,140000,10.4544
352
+ 2126,140400,3.3254
353
+ 2135,140800,8.5089
354
+ 2147,141200,5.0491
355
+ 2155,141600,8.8535
356
+ 2165,142000,4.7412
357
+ 2174,142400,6.3371
358
+ 2180,142800,13.6134
359
+ 2189,143200,7.7075
360
+ 2202,143600,4.1581
361
+ 2206,144000,15.2914
362
+ 2217,144400,7.21
363
+ 2222,144800,14.1197
364
+ 2229,145200,12.3017
365
+ 2238,145600,7.5941
366
+ 2245,146000,8.9586
367
+ 2252,146400,11.0808
368
+ 2257,146800,14.2912
369
+ 2263,147200,13.1251
370
+ 2269,147600,12.861
371
+ 2274,148000,12.1163
372
+ 2279,148400,16.2446
373
+ 2284,148800,15.9297
374
+ 2288,149200,14.7428
375
+ 2295,149600,10.4804
376
+ 2302,150000,12.5935
377
+ 2308,150400,10.2608
378
+ 2315,150800,11.9565
379
+ 2321,151200,10.1254
380
+ 2325,151600,17.2792
381
+ 2332,152000,10.5685
382
+ 2343,152400,7.1165
383
+ 2355,152800,5.5324
384
+ 2362,153200,10.2419
385
+ 2367,153600,17.1911
386
+ 2372,154000,16.5306
387
+ 2377,154400,13.0943
388
+ 2383,154800,17.1094
389
+ 2390,155200,11.8845
390
+ 2394,155600,21.3396
391
+ 2400,156000,12.3499
392
+ 2409,156400,7.6367
393
+ 2414,156800,19.1307
394
+ 2418,157200,21.5015
395
+ 2424,157600,14.4416
396
+ 2431,158000,11.1522
397
+ 2438,158400,11.1949
398
+ 2450,158800,7.2224
399
+ 2457,159200,11.6111
400
+ 2468,159600,5.0266
401
+ 2476,160000,10.68
402
+ 2487,160400,5.6073
403
+ 2493,160800,13.4424
404
+ 2503,161200,6.8739
405
+ 2512,161600,7.4788
406
+ 2520,162000,10.2704
407
+ 2531,162400,6.3896
408
+ 2537,162800,10.6474
409
+ 2542,163200,14.1442
410
+ 2555,163600,5.9226
411
+ 2566,164000,6.0377
412
+ 2575,164400,7.4593
413
+ 2588,164800,4.9726
414
+ 2594,165200,10.6298
415
+ 2599,165600,12.0844
416
+ 2616,166000,3.8811
417
+ 2626,166400,4.2739
418
+ 2634,166800,9.1184
419
+ 2641,167200,8.623
420
+ 2647,167600,12.8333
421
+ 2657,168000,5.8736
422
+ 2663,168400,7.3345
423
+ 2671,168800,6.602
424
+ 2680,169200,7.7097
425
+ 2692,169600,3.3408
426
+ 2698,170000,12.0658
427
+ 2710,170400,4.496
428
+ 2719,170800,7.4669
429
+ 2729,171200,6.8099
430
+ 2738,171600,7.104
431
+ 2744,172000,11.9163
432
+ 2751,172400,11.3711
433
+ 2757,172800,10.847
434
+ 2765,173200,7.2486
435
+ 2774,173600,6.5974
436
+ 2783,174000,9.2593
437
+ 2790,174400,10.1808
438
+ 2797,174800,7.8738
439
+ 2805,175200,8.1315
440
+ 2811,175600,12.4611
441
+ 2817,176000,11.2274
442
+ 2824,176400,9.6997
443
+ 2829,176800,16.383
444
+ 2835,177200,9.7906
445
+ 2841,177600,9.8226
446
+ 2850,178000,8.9189
447
+ 2856,178400,8.719
448
+ 2862,178800,14.3174
449
+ 2868,179200,9.976
450
+ 2873,179600,16.595
451
+ 2879,180000,11.7942
452
+ 2886,180400,8.4306
453
+ 2890,180800,19.6647
454
+ 2895,181200,16.7307
455
+ 2899,181600,16.9061
456
+ 2907,182000,8.0865
457
+ 2912,182400,16.0249
458
+ 2917,182800,12.6253
459
+ 2924,183200,10.9561
460
+ 2930,183600,9.5365
461
+ 2935,184000,11.5344
462
+ 2943,184400,7.9225
463
+ 2949,184800,11.1286
464
+ 2959,185200,7.7913
465
+ 2964,185600,14.7933
466
+ 2972,186000,7.3943
467
+ 2979,186400,10.7152
468
+ 2986,186800,8.1646
469
+ 2992,187200,11.7603
470
+ 2997,187600,13.334
471
+ 3003,188000,12.5786
472
+ 3009,188400,12.2962
473
+ 3015,188800,14.3177
474
+ 3019,189200,15.0194
475
+ 3024,189600,19.2136
476
+ 3029,190000,15.3857
477
+ 3035,190400,9.9101
478
+ 3043,190800,10.0892
479
+ 3047,191200,17.6767
480
+ 3051,191600,19.3897
481
+ 3056,192000,15.6982
482
+ 3060,192400,19.0069
483
+ 3065,192800,17.3691
484
+ 3070,193200,13.5637
485
+ 3075,193600,14.7953
486
+ 3080,194000,16.2313
487
+ 3086,194400,13.6929
488
+ 3091,194800,17.0558
489
+ 3095,195200,17.121
490
+ 3100,195600,14.9038
491
+ 3108,196000,9.5739
492
+ 3112,196400,16.1362
493
+ 3118,196800,9.8704
494
+ 3127,197200,8.0936
495
+ 3132,197600,17.4588
496
+ 3137,198000,12.1721
497
+ 3143,198400,9.6148
498
+ 3147,198800,18.6538
499
+ 3151,199200,19.5497
500
+ 3156,199600,15.3876
501
+ 3163,200000,10.7067
code/Lake application/logs/results_1/PDPPO_frozen_lake_log_4.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 20,400,1.5088
3
+ 42,800,1.3079
4
+ 65,1200,1.3319
5
+ 88,1600,1.3245
6
+ 101,2000,2.2609
7
+ 116,2400,2.0004
8
+ 122,2800,4.3646
9
+ 130,3200,4.0763
10
+ 138,3600,3.5934
11
+ 147,4000,3.15
12
+ 158,4400,3.0842
13
+ 164,4800,4.0592
14
+ 171,5200,4.5173
15
+ 177,5600,4.8523
16
+ 183,6000,4.215
17
+ 190,6400,5.0604
18
+ 197,6800,4.5393
19
+ 201,7200,7.2405
20
+ 207,7600,4.3459
21
+ 213,8000,4.605
22
+ 220,8400,3.9493
23
+ 227,8800,4.0154
24
+ 233,9200,5.1817
25
+ 237,9600,7.2706
26
+ 241,10000,6.1275
27
+ 246,10400,6.517
28
+ 251,10800,6.4718
29
+ 256,11200,5.3332
30
+ 263,11600,4.8526
31
+ 267,12000,7.5372
32
+ 273,12400,4.4563
33
+ 278,12800,6.2474
34
+ 283,13200,5.1259
35
+ 288,13600,6.6405
36
+ 293,14000,5.4922
37
+ 297,14400,7.3697
38
+ 301,14800,7.46
39
+ 306,15200,6.8119
40
+ 310,15600,7.5148
41
+ 314,16000,7.566
42
+ 318,16400,5.6766
43
+ 324,16800,5.7109
44
+ 328,17200,6.9979
45
+ 333,17600,6.6446
46
+ 337,18000,7.1488
47
+ 342,18400,6.612
48
+ 348,18800,5.6986
49
+ 352,19200,7.4129
50
+ 357,19600,6.3231
51
+ 364,20000,4.8872
52
+ 369,20400,6.2171
53
+ 373,20800,7.6549
54
+ 379,21200,5.0764
55
+ 386,21600,3.8937
56
+ 392,22000,4.8971
57
+ 396,22400,7.7155
58
+ 401,22800,6.522
59
+ 405,23200,6.6372
60
+ 410,23600,7.358
61
+ 417,24000,4.7262
62
+ 423,24400,4.3083
63
+ 428,24800,6.6989
64
+ 432,25200,7.4748
65
+ 437,25600,6.8524
66
+ 441,26000,7.4783
67
+ 447,26400,5.58
68
+ 451,26800,7.5209
69
+ 457,27200,5.4526
70
+ 461,27600,7.2869
71
+ 466,28000,6.9935
72
+ 470,28400,8.5876
73
+ 475,28800,6.8761
74
+ 480,29200,5.9352
75
+ 486,29600,6.2676
76
+ 490,30000,7.6652
77
+ 494,30400,8.3381
78
+ 499,30800,9.2616
79
+ 504,31200,6.5226
80
+ 509,31600,6.8202
81
+ 513,32000,8.7099
82
+ 519,32400,7.2073
83
+ 523,32800,9.4631
84
+ 530,33200,5.9642
85
+ 534,33600,9.2221
86
+ 539,34000,7.8802
87
+ 545,34400,7.4494
88
+ 549,34800,10.2656
89
+ 554,35200,8.5635
90
+ 559,35600,8.5277
91
+ 564,36000,9.2301
92
+ 570,36400,7.3293
93
+ 577,36800,5.9939
94
+ 583,37200,8.0029
95
+ 590,37600,6.5089
96
+ 595,38000,9.9265
97
+ 600,38400,7.7368
98
+ 605,38800,10.0703
99
+ 610,39200,9.405
100
+ 615,39600,9.2747
101
+ 620,40000,10.6746
102
+ 625,40400,10.8504
103
+ 629,40800,12.3268
104
+ 634,41200,7.9002
105
+ 638,41600,12.2213
106
+ 643,42000,10.5923
107
+ 649,42400,8.3824
108
+ 653,42800,12.3659
109
+ 659,43200,7.8569
110
+ 664,43600,7.7947
111
+ 668,44000,11.9043
112
+ 673,44400,11.3176
113
+ 677,44800,9.5839
114
+ 683,45200,8.7337
115
+ 687,45600,11.8954
116
+ 691,46000,10.9464
117
+ 697,46400,9.0987
118
+ 701,46800,11.5694
119
+ 706,47200,10.0594
120
+ 711,47600,8.6043
121
+ 715,48000,11.3733
122
+ 721,48400,7.4597
123
+ 727,48800,8.6735
124
+ 733,49200,9.2683
125
+ 737,49600,12.2719
126
+ 743,50000,8.5308
127
+ 748,50400,7.8866
128
+ 754,50800,9.0977
129
+ 758,51200,10.5799
130
+ 763,51600,10.2186
131
+ 769,52000,9.4147
132
+ 774,52400,9.8676
133
+ 779,52800,9.8646
134
+ 783,53200,11.9396
135
+ 788,53600,8.3464
136
+ 792,54000,10.8994
137
+ 800,54400,7.1043
138
+ 804,54800,11.0494
139
+ 810,55200,7.2492
140
+ 816,55600,9.008
141
+ 820,56000,11.969
142
+ 824,56400,12.3475
143
+ 830,56800,8.1093
144
+ 836,57200,8.6649
145
+ 841,57600,10.3044
146
+ 848,58000,6.3358
147
+ 853,58400,8.6059
148
+ 858,58800,10.5647
149
+ 862,59200,11.937
150
+ 867,59600,10.6032
151
+ 871,60000,12.1926
152
+ 879,60400,6.4127
153
+ 887,60800,6.1397
154
+ 892,61200,9.5413
155
+ 897,61600,9.9852
156
+ 905,62000,6.0649
157
+ 909,62400,12.114
158
+ 913,62800,10.7127
159
+ 919,63200,7.5118
160
+ 928,63600,5.1292
161
+ 933,64000,9.7503
162
+ 938,64400,11.1308
163
+ 943,64800,9.8459
164
+ 949,65200,9.6871
165
+ 955,65600,9.5914
166
+ 962,66000,6.487
167
+ 970,66400,5.1927
168
+ 980,66800,5.9668
169
+ 984,67200,13.1207
170
+ 988,67600,13.5101
171
+ 995,68000,7.9993
172
+ 1001,68400,7.3224
173
+ 1006,68800,11.6941
174
+ 1011,69200,8.4031
175
+ 1015,69600,14.3545
176
+ 1024,70000,6.3113
177
+ 1029,70400,12.9317
178
+ 1034,70800,12.4254
179
+ 1040,71200,8.7256
180
+ 1045,71600,12.1083
181
+ 1049,72000,14.6145
182
+ 1056,72400,8.6096
183
+ 1062,72800,10.0664
184
+ 1066,73200,14.5826
185
+ 1071,73600,12.4316
186
+ 1076,74000,11.2348
187
+ 1081,74400,11.6494
188
+ 1086,74800,8.9371
189
+ 1092,75200,11.5723
190
+ 1096,75600,14.9071
191
+ 1100,76000,13.8022
192
+ 1104,76400,12.762
193
+ 1109,76800,12.5704
194
+ 1114,77200,13.2544
195
+ 1121,77600,7.8204
196
+ 1127,78000,11.0577
197
+ 1132,78400,10.2288
198
+ 1137,78800,12.0947
199
+ 1143,79200,10.4587
200
+ 1150,79600,8.3463
201
+ 1154,80000,15.744
202
+ 1159,80400,13.1713
203
+ 1166,80800,7.4983
204
+ 1173,81200,9.3079
205
+ 1177,81600,14.3302
206
+ 1183,82000,10.1948
207
+ 1187,82400,15.5844
208
+ 1193,82800,10.8247
209
+ 1198,83200,9.861
210
+ 1203,83600,13.0967
211
+ 1208,84000,14.2775
212
+ 1212,84400,15.9856
213
+ 1220,84800,7.0939
214
+ 1224,85200,13.172
215
+ 1229,85600,14.9623
216
+ 1234,86000,10.7564
217
+ 1240,86400,12.0071
218
+ 1245,86800,12.1013
219
+ 1250,87200,11.8996
220
+ 1254,87600,14.3552
221
+ 1258,88000,15.6836
222
+ 1263,88400,14.0425
223
+ 1267,88800,16.2951
224
+ 1274,89200,8.0649
225
+ 1279,89600,12.0771
226
+ 1287,90000,8.9504
227
+ 1293,90400,9.2019
228
+ 1298,90800,13.5403
229
+ 1302,91200,16.2009
230
+ 1306,91600,15.8977
231
+ 1311,92000,12.6036
232
+ 1318,92400,9.4293
233
+ 1324,92800,11.8471
234
+ 1329,93200,9.6477
235
+ 1334,93600,13.0578
236
+ 1339,94000,12.0729
237
+ 1346,94400,9.9596
238
+ 1350,94800,16.223
239
+ 1355,95200,13.1369
240
+ 1364,95600,7.2675
241
+ 1369,96000,11.9901
242
+ 1377,96400,8.69
243
+ 1382,96800,12.3589
244
+ 1389,97200,8.9372
245
+ 1400,97600,5.9625
246
+ 1405,98000,9.6908
247
+ 1412,98400,8.4351
248
+ 1418,98800,12.2907
249
+ 1426,99200,7.9709
250
+ 1432,99600,8.709
251
+ 1437,100000,12.9995
252
+ 1442,100400,12.5061
253
+ 1448,100800,11.813
254
+ 1453,101200,10.3663
255
+ 1459,101600,11.3195
256
+ 1471,102000,4.8996
257
+ 1482,102400,5.2443
258
+ 1492,102800,5.7095
259
+ 1498,103200,10.861
260
+ 1505,103600,8.0669
261
+ 1511,104000,11.5714
262
+ 1516,104400,11.8947
263
+ 1522,104800,13.1965
264
+ 1527,105200,11.7688
265
+ 1532,105600,14.6079
266
+ 1537,106000,12.5179
267
+ 1545,106400,6.0034
268
+ 1550,106800,13.8795
269
+ 1555,107200,12.5438
270
+ 1560,107600,12.3526
271
+ 1565,108000,14.0173
272
+ 1572,108400,6.5151
273
+ 1578,108800,11.821
274
+ 1584,109200,10.7337
275
+ 1589,109600,12.987
276
+ 1593,110000,15.0823
277
+ 1598,110400,13.6488
278
+ 1603,110800,15.9419
279
+ 1607,111200,17.6469
280
+ 1613,111600,11.8659
281
+ 1620,112000,8.6666
282
+ 1626,112400,8.0076
283
+ 1634,112800,8.2332
284
+ 1639,113200,11.4569
285
+ 1645,113600,7.8428
286
+ 1651,114000,9.6165
287
+ 1659,114400,7.2423
288
+ 1665,114800,10.0212
289
+ 1670,115200,15.7796
290
+ 1675,115600,13.3568
291
+ 1681,116000,11.9107
292
+ 1686,116400,16.4987
293
+ 1690,116800,17.6908
294
+ 1695,117200,13.7199
295
+ 1700,117600,13.2496
296
+ 1704,118000,18.4471
297
+ 1709,118400,12.1934
298
+ 1714,118800,16.5036
299
+ 1720,119200,13.0416
300
+ 1726,119600,9.5647
301
+ 1734,120000,9.7985
302
+ 1742,120400,7.6858
303
+ 1747,120800,11.444
304
+ 1751,121200,17.1469
305
+ 1758,121600,11.2027
306
+ 1764,122000,11.57
307
+ 1771,122400,11.0437
308
+ 1776,122800,12.1007
309
+ 1788,123200,7.5098
310
+ 1793,123600,15.3339
311
+ 1798,124000,11.8126
312
+ 1805,124400,11.7518
313
+ 1810,124800,15.431
314
+ 1817,125200,9.2215
315
+ 1822,125600,16.9219
316
+ 1827,126000,13.1771
317
+ 1832,126400,19.0296
318
+ 1838,126800,12.1479
319
+ 1842,127200,20.0777
320
+ 1847,127600,12.5869
321
+ 1857,128000,7.5145
322
+ 1862,128400,17.6462
323
+ 1871,128800,5.5968
324
+ 1880,129200,7.7816
325
+ 1890,129600,7.5515
326
+ 1897,130000,9.2213
327
+ 1904,130400,12.0633
328
+ 1913,130800,9.4187
329
+ 1923,131200,6.6914
330
+ 1933,131600,6.9577
331
+ 1938,132000,15.6559
332
+ 1943,132400,15.711
333
+ 1948,132800,15.8596
334
+ 1955,133200,10.39
335
+ 1961,133600,13.9018
336
+ 1968,134000,10.9876
337
+ 1975,134400,8.5861
338
+ 1983,134800,9.3426
339
+ 1988,135200,16.0291
340
+ 1996,135600,10.6449
341
+ 2000,136000,15.9081
342
+ 2008,136400,9.7453
343
+ 2014,136800,14.5408
344
+ 2019,137200,13.6499
345
+ 2023,137600,17.35
346
+ 2030,138000,11.9688
347
+ 2038,138400,9.7757
348
+ 2043,138800,15.0422
349
+ 2049,139200,14.1278
350
+ 2054,139600,11.3104
351
+ 2060,140000,10.5947
352
+ 2065,140400,14.9903
353
+ 2071,140800,12.8763
354
+ 2076,141200,12.2992
355
+ 2085,141600,8.4617
356
+ 2091,142000,12.2624
357
+ 2099,142400,10.5275
358
+ 2114,142800,3.2717
359
+ 2125,143200,4.7307
360
+ 2136,143600,8.0355
361
+ 2147,144000,6.7636
362
+ 2154,144400,8.4627
363
+ 2164,144800,8.6816
364
+ 2173,145200,7.4853
365
+ 2179,145600,9.5195
366
+ 2186,146000,10.8791
367
+ 2193,146400,12.8798
368
+ 2198,146800,14.4826
369
+ 2204,147200,13.2357
370
+ 2210,147600,14.1333
371
+ 2216,148000,12.9477
372
+ 2221,148400,14.1298
373
+ 2229,148800,10.4178
374
+ 2234,149200,15.747
375
+ 2239,149600,13.7127
376
+ 2245,150000,13.5562
377
+ 2252,150400,10.7229
378
+ 2258,150800,13.8351
379
+ 2263,151200,16.2056
380
+ 2268,151600,15.9527
381
+ 2275,152000,9.0862
382
+ 2279,152400,19.7776
383
+ 2285,152800,14.6447
384
+ 2293,153200,10.0589
385
+ 2298,153600,15.9818
386
+ 2308,154000,7.2806
387
+ 2315,154400,11.1871
388
+ 2320,154800,16.5332
389
+ 2326,155200,11.4502
390
+ 2332,155600,11.2749
391
+ 2337,156000,18.7918
392
+ 2345,156400,9.0297
393
+ 2351,156800,13.5346
394
+ 2355,157200,20.3685
395
+ 2359,157600,17.5759
396
+ 2363,158000,20.9134
397
+ 2372,158400,8.7563
398
+ 2377,158800,16.3223
399
+ 2383,159200,12.3571
400
+ 2388,159600,14.8996
401
+ 2393,160000,16.1612
402
+ 2400,160400,10.695
403
+ 2404,160800,15.8143
404
+ 2409,161200,14.1756
405
+ 2417,161600,10.3059
406
+ 2421,162000,15.7669
407
+ 2426,162400,16.7237
408
+ 2432,162800,13.186
409
+ 2436,163200,15.9777
410
+ 2442,163600,15.1247
411
+ 2451,164000,6.9288
412
+ 2455,164400,19.8019
413
+ 2459,164800,16.5844
414
+ 2463,165200,19.4641
415
+ 2469,165600,12.5474
416
+ 2475,166000,12.4718
417
+ 2482,166400,9.2596
418
+ 2492,166800,7.4804
419
+ 2499,167200,9.2925
420
+ 2506,167600,9.6121
421
+ 2512,168000,11.8263
422
+ 2519,168400,11.1036
423
+ 2524,168800,11.8451
424
+ 2530,169200,12.3773
425
+ 2535,169600,16.2471
426
+ 2541,170000,11.3649
427
+ 2545,170400,15.6326
428
+ 2550,170800,16.8937
429
+ 2554,171200,19.5632
430
+ 2558,171600,17.4009
431
+ 2564,172000,15.0096
432
+ 2569,172400,12.1716
433
+ 2574,172800,16.2656
434
+ 2579,173200,12.1121
435
+ 2584,173600,15.5555
436
+ 2590,174000,14.2854
437
+ 2597,174400,11.301
438
+ 2601,174800,17.2093
439
+ 2606,175200,15.6553
440
+ 2611,175600,13.2231
441
+ 2618,176000,12.4886
442
+ 2623,176400,11.6754
443
+ 2627,176800,19.8814
444
+ 2633,177200,15.667
445
+ 2637,177600,17.0859
446
+ 2642,178000,14.0034
447
+ 2650,178400,9.8961
448
+ 2654,178800,19.2615
449
+ 2663,179200,8.5405
450
+ 2670,179600,9.0917
451
+ 2684,180000,3.9164
452
+ 2690,180400,12.0612
453
+ 2697,180800,10.1366
454
+ 2707,181200,7.161
455
+ 2713,181600,10.5804
456
+ 2719,182000,11.7124
457
+ 2725,182400,12.7422
458
+ 2735,182800,5.839
459
+ 2743,183200,9.3809
460
+ 2748,183600,14.0109
461
+ 2754,184000,12.5838
462
+ 2772,184400,2.7956
463
+ 2782,184800,5.9967
464
+ 2790,185200,6.7712
465
+ 2799,185600,8.4177
466
+ 2805,186000,11.2619
467
+ 2818,186400,4.5222
468
+ 2827,186800,7.9052
469
+ 2834,187200,8.1854
470
+ 2845,187600,5.2657
471
+ 2852,188000,9.6327
472
+ 2862,188400,5.4335
473
+ 2870,188800,9.4833
474
+ 2876,189200,10.3303
475
+ 2881,189600,14.843
476
+ 2891,190000,6.7218
477
+ 2896,190400,13.4077
478
+ 2902,190800,12.8647
479
+ 2908,191200,9.9087
480
+ 2913,191600,16.5216
481
+ 2920,192000,9.6137
482
+ 2924,192400,15.5697
483
+ 2932,192800,8.3219
484
+ 2942,193200,4.7098
485
+ 2949,193600,11.8933
486
+ 2954,194000,14.2076
487
+ 2961,194400,10.3591
488
+ 2966,194800,12.7551
489
+ 2972,195200,12.0142
490
+ 2978,195600,11.9555
491
+ 2982,196000,17.2151
492
+ 2988,196400,11.6288
493
+ 2994,196800,11.5739
494
+ 2998,197200,15.9745
495
+ 3005,197600,10.5003
496
+ 3010,198000,12.1237
497
+ 3017,198400,11.9958
498
+ 3022,198800,13.6748
499
+ 3031,199200,8.644
500
+ 3036,199600,15.9198
501
+ 3043,200000,11.7405
code/Lake application/logs/results_1/PDPPO_frozen_lake_log_5.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 30,400,1.0363
3
+ 55,800,1.1656
4
+ 74,1200,1.2812
5
+ 87,1600,2.8849
6
+ 101,2000,2.0512
7
+ 109,2400,4.4421
8
+ 120,2800,2.7307
9
+ 130,3200,2.9113
10
+ 136,3600,4.8513
11
+ 143,4000,4.6759
12
+ 147,4400,6.8206
13
+ 156,4800,3.5122
14
+ 160,5200,7.4682
15
+ 167,5600,4.2985
16
+ 173,6000,4.6158
17
+ 178,6400,6.3235
18
+ 182,6800,5.9121
19
+ 187,7200,6.0524
20
+ 192,7600,6.2336
21
+ 196,8000,6.7958
22
+ 201,8400,5.8009
23
+ 205,8800,7.3525
24
+ 209,9200,7.4518
25
+ 214,9600,6.8613
26
+ 220,10000,4.8876
27
+ 224,10400,7.4277
28
+ 228,10800,6.8252
29
+ 232,11200,7.0368
30
+ 236,11600,7.4591
31
+ 241,12000,6.5145
32
+ 245,12400,7.3888
33
+ 250,12800,5.1931
34
+ 255,13200,6.1031
35
+ 259,13600,7.5133
36
+ 264,14000,6.6363
37
+ 268,14400,7.6229
38
+ 273,14800,6.0136
39
+ 277,15200,8.2029
40
+ 282,15600,6.2847
41
+ 287,16000,6.6472
42
+ 291,16400,7.3617
43
+ 295,16800,8.1746
44
+ 299,17200,6.8719
45
+ 304,17600,6.1261
46
+ 309,18000,7.0801
47
+ 313,18400,7.6909
48
+ 317,18800,7.4924
49
+ 321,19200,7.4629
50
+ 327,19600,5.5724
51
+ 333,20000,4.8738
52
+ 337,20400,7.1132
53
+ 342,20800,6.5451
54
+ 347,21200,5.6767
55
+ 351,21600,7.6065
56
+ 355,22000,7.5147
57
+ 360,22400,7.0767
58
+ 364,22800,7.5414
59
+ 368,23200,7.7461
60
+ 373,23600,5.5045
61
+ 377,24000,6.8692
62
+ 382,24400,6.7735
63
+ 386,24800,7.1926
64
+ 391,25200,6.3948
65
+ 395,25600,6.506
66
+ 400,26000,6.8513
67
+ 404,26400,6.7321
68
+ 410,26800,5.3182
69
+ 415,27200,6.3436
70
+ 424,27600,3.9625
71
+ 433,28000,4.0086
72
+ 438,28400,7.455
73
+ 448,28800,4.1049
74
+ 455,29200,4.8118
75
+ 460,29600,8.8025
76
+ 468,30000,5.2449
77
+ 474,30400,6.9961
78
+ 479,30800,9.5919
79
+ 486,31200,4.9522
80
+ 494,31600,6.2399
81
+ 501,32000,6.2058
82
+ 507,32400,7.817
83
+ 512,32800,10.4552
84
+ 519,33200,6.8586
85
+ 528,33600,5.2227
86
+ 532,34000,11.0335
87
+ 537,34400,9.8253
88
+ 548,34800,4.943
89
+ 552,35200,12.3006
90
+ 557,35600,8.6328
91
+ 565,36000,6.8881
92
+ 573,36400,6.9154
93
+ 578,36800,9.7087
94
+ 583,37200,11.4344
95
+ 588,37600,8.4428
96
+ 594,38000,9.4412
97
+ 598,38400,13.0931
98
+ 602,38800,13.7591
99
+ 608,39200,9.2528
100
+ 616,39600,5.39
101
+ 622,40000,11.192
102
+ 628,40400,9.3021
103
+ 635,40800,6.3386
104
+ 640,41200,13.3335
105
+ 645,41600,10.7252
106
+ 650,42000,9.8878
107
+ 657,42400,7.6309
108
+ 662,42800,13.1219
109
+ 667,43200,11.3305
110
+ 674,43600,8.0326
111
+ 682,44000,7.1185
112
+ 686,44400,11.0479
113
+ 691,44800,12.8963
114
+ 696,45200,12.0313
115
+ 700,45600,11.8897
116
+ 709,46000,7.0573
117
+ 714,46400,10.6651
118
+ 720,46800,10.3476
119
+ 727,47200,10.3755
120
+ 731,47600,13.4045
121
+ 738,48000,9.1209
122
+ 742,48400,13.8504
123
+ 749,48800,9.6356
124
+ 755,49200,9.325
125
+ 761,49600,10.3474
126
+ 765,50000,14.3104
127
+ 770,50400,12.0887
128
+ 774,50800,15.2546
129
+ 779,51200,12.14
130
+ 784,51600,12.2329
131
+ 788,52000,15.0146
132
+ 793,52400,10.5181
133
+ 799,52800,10.1783
134
+ 805,53200,8.4057
135
+ 812,53600,8.7315
136
+ 818,54000,11.7951
137
+ 822,54400,15.2027
138
+ 827,54800,11.7442
139
+ 831,55200,13.5781
140
+ 836,55600,12.2977
141
+ 842,56000,10.4294
142
+ 847,56400,10.9495
143
+ 853,56800,10.9578
144
+ 859,57200,11.0167
145
+ 865,57600,9.7521
146
+ 869,58000,15.5817
147
+ 875,58400,9.9763
148
+ 880,58800,13.0464
149
+ 884,59200,13.8061
150
+ 889,59600,12.9356
151
+ 896,60000,10.3463
152
+ 901,60400,10.6919
153
+ 908,60800,8.6734
154
+ 914,61200,12.4219
155
+ 919,61600,10.0202
156
+ 924,62000,15.9255
157
+ 929,62400,11.2685
158
+ 935,62800,10.8452
159
+ 940,63200,11.7721
160
+ 946,63600,11.407
161
+ 952,64000,12.8059
162
+ 958,64400,8.4392
163
+ 964,64800,11.9307
164
+ 969,65200,15.1005
165
+ 974,65600,13.4469
166
+ 984,66000,7.6713
167
+ 988,66400,14.245
168
+ 995,66800,9.1666
169
+ 1000,67200,15.6424
170
+ 1005,67600,16.3647
171
+ 1011,68000,15.1433
172
+ 1017,68400,12.4149
173
+ 1022,68800,13.4503
174
+ 1030,69200,10.6481
175
+ 1039,69600,8.0946
176
+ 1046,70000,7.7302
177
+ 1051,70400,16.2667
178
+ 1056,70800,17.1896
179
+ 1062,71200,12.1892
180
+ 1067,71600,15.6327
181
+ 1074,72000,10.8082
182
+ 1078,72400,20.0766
183
+ 1086,72800,9.649
184
+ 1092,73200,11.0206
185
+ 1098,73600,11.8751
186
+ 1107,74000,8.5445
187
+ 1111,74400,16.1052
188
+ 1118,74800,11.2793
189
+ 1122,75200,17.9738
190
+ 1129,75600,11.9216
191
+ 1135,76000,15.1108
192
+ 1141,76400,10.5198
193
+ 1147,76800,16.3762
194
+ 1151,77200,18.503
195
+ 1160,77600,8.741
196
+ 1167,78000,9.7747
197
+ 1173,78400,13.8605
198
+ 1180,78800,9.6619
199
+ 1185,79200,15.3712
200
+ 1190,79600,16.7442
201
+ 1196,80000,11.6345
202
+ 1202,80400,14.0943
203
+ 1207,80800,14.2296
204
+ 1212,81200,16.3014
205
+ 1220,81600,9.8765
206
+ 1228,82000,8.7464
207
+ 1235,82400,10.1393
208
+ 1240,82800,16.1224
209
+ 1246,83200,15.2376
210
+ 1252,83600,10.7445
211
+ 1259,84000,10.0961
212
+ 1264,84400,15.7153
213
+ 1271,84800,13.0846
214
+ 1280,85200,7.0547
215
+ 1285,85600,15.492
216
+ 1292,86000,9.3012
217
+ 1297,86400,12.5845
218
+ 1303,86800,12.9457
219
+ 1308,87200,15.8783
220
+ 1314,87600,12.6196
221
+ 1319,88000,13.5926
222
+ 1325,88400,9.8846
223
+ 1329,88800,19.8844
224
+ 1336,89200,11.9627
225
+ 1340,89600,19.6276
226
+ 1345,90000,11.4204
227
+ 1355,90400,9.3768
228
+ 1360,90800,13.957
229
+ 1365,91200,15.7235
230
+ 1372,91600,10.9691
231
+ 1376,92000,17.8047
232
+ 1380,92400,19.6958
233
+ 1385,92800,19.2327
234
+ 1391,93200,9.4353
235
+ 1400,93600,8.9945
236
+ 1407,94000,9.5239
237
+ 1416,94400,9.7381
238
+ 1422,94800,12.1216
239
+ 1429,95200,11.5348
240
+ 1437,95600,6.6356
241
+ 1447,96000,8.2667
242
+ 1456,96400,5.5346
243
+ 1463,96800,13.0559
244
+ 1471,97200,8.4067
245
+ 1483,97600,5.107
246
+ 1494,98000,6.8124
247
+ 1501,98400,8.9277
248
+ 1515,98800,5.11
249
+ 1524,99200,8.4644
250
+ 1538,99600,4.2993
251
+ 1547,100000,6.5226
252
+ 1552,100400,16.8617
253
+ 1559,100800,10.7992
254
+ 1569,101200,7.4671
255
+ 1577,101600,8.0795
256
+ 1586,102000,8.0661
257
+ 1595,102400,7.4557
258
+ 1602,102800,9.2675
259
+ 1612,103200,7.525
260
+ 1621,103600,8.0409
261
+ 1628,104000,10.8619
262
+ 1638,104400,7.0936
263
+ 1648,104800,6.5034
264
+ 1656,105200,7.5528
265
+ 1666,105600,7.0664
266
+ 1674,106000,9.2449
267
+ 1684,106400,6.3384
268
+ 1695,106800,4.2875
269
+ 1701,107200,15.9626
270
+ 1710,107600,8.014
271
+ 1720,108000,7.4489
272
+ 1729,108400,8.3794
273
+ 1737,108800,8.7056
274
+ 1745,109200,7.9263
275
+ 1751,109600,11.078
276
+ 1758,110000,12.3928
277
+ 1763,110400,12.4602
278
+ 1769,110800,16.5946
279
+ 1775,111200,12.9342
280
+ 1780,111600,15.1111
281
+ 1785,112000,13.7242
282
+ 1791,112400,14.9992
283
+ 1797,112800,11.5528
284
+ 1802,113200,16.7522
285
+ 1806,113600,14.0396
286
+ 1813,114000,12.6857
287
+ 1817,114400,16.7741
288
+ 1822,114800,12.9523
289
+ 1827,115200,16.5158
290
+ 1832,115600,17.1408
291
+ 1839,116000,9.8454
292
+ 1843,116400,17.8182
293
+ 1849,116800,11.2671
294
+ 1855,117200,14.7182
295
+ 1860,117600,14.9587
296
+ 1867,118000,8.7827
297
+ 1874,118400,11.6531
298
+ 1879,118800,12.538
299
+ 1887,119200,9.8758
300
+ 1892,119600,14.5453
301
+ 1897,120000,14.467
302
+ 1904,120400,12.7871
303
+ 1909,120800,14.9706
304
+ 1914,121200,13.51
305
+ 1921,121600,12.2767
306
+ 1926,122000,16.5016
307
+ 1930,122400,20.9919
308
+ 1934,122800,18.5356
309
+ 1939,123200,11.7085
310
+ 1945,123600,14.2539
311
+ 1955,124000,7.2888
312
+ 1965,124400,6.2359
313
+ 1974,124800,5.9543
314
+ 1989,125200,3.2663
315
+ 2001,125600,4.2557
316
+ 2015,126000,5.6426
317
+ 2032,126400,2.605
318
+ 2046,126800,4.8766
319
+ 2059,127200,4.8677
320
+ 2070,127600,5.7594
321
+ 2081,128000,5.4336
322
+ 2092,128400,5.9113
323
+ 2103,128800,4.5887
324
+ 2114,129200,4.1911
325
+ 2122,129600,5.8893
326
+ 2134,130000,3.7628
327
+ 2144,130400,4.9745
328
+ 2152,130800,7.0797
329
+ 2158,131200,9.2503
330
+ 2168,131600,6.9398
331
+ 2176,132000,7.7078
332
+ 2185,132400,6.6676
333
+ 2192,132800,7.4282
334
+ 2201,133200,7.9222
335
+ 2208,133600,8.5393
336
+ 2214,134000,11.7517
337
+ 2219,134400,13.6645
338
+ 2225,134800,11.5794
339
+ 2232,135200,9.2883
340
+ 2237,135600,15.8834
341
+ 2242,136000,13.0821
342
+ 2249,136400,11.0747
343
+ 2255,136800,12.4104
344
+ 2263,137200,6.2022
345
+ 2268,137600,15.7658
346
+ 2272,138000,18.5306
347
+ 2283,138400,6.9062
348
+ 2289,138800,10.1887
349
+ 2296,139200,12.4334
350
+ 2300,139600,13.9879
351
+ 2304,140000,19.1339
352
+ 2312,140400,10.1769
353
+ 2317,140800,14.9144
354
+ 2322,141200,15.6578
355
+ 2327,141600,12.3308
356
+ 2333,142000,13.2801
357
+ 2340,142400,8.8488
358
+ 2345,142800,15.9321
359
+ 2351,143200,14.0744
360
+ 2357,143600,13.7016
361
+ 2361,144000,17.1141
362
+ 2366,144400,16.093
363
+ 2371,144800,15.3966
364
+ 2377,145200,11.9135
365
+ 2383,145600,12.9568
366
+ 2388,146000,16.5526
367
+ 2394,146400,13.9278
368
+ 2400,146800,13.1973
369
+ 2407,147200,10.658
370
+ 2411,147600,17.6656
371
+ 2418,148000,9.9268
372
+ 2424,148400,12.6209
373
+ 2429,148800,15.6302
374
+ 2434,149200,15.4722
375
+ 2438,149600,15.8488
376
+ 2443,150000,15.5069
377
+ 2452,150400,8.9388
378
+ 2458,150800,11.1183
379
+ 2465,151200,12.9985
380
+ 2472,151600,9.298
381
+ 2482,152000,6.9406
382
+ 2488,152400,10.9889
383
+ 2494,152800,13.8809
384
+ 2500,153200,9.4922
385
+ 2508,153600,9.8473
386
+ 2517,154000,5.4676
387
+ 2521,154400,17.306
388
+ 2529,154800,10.5859
389
+ 2535,155200,8.9943
390
+ 2539,155600,19.018
391
+ 2545,156000,10.541
392
+ 2553,156400,7.199
393
+ 2559,156800,13.1617
394
+ 2564,157200,13.507
395
+ 2568,157600,19.7962
396
+ 2574,158000,15.0976
397
+ 2581,158400,9.5981
398
+ 2587,158800,11.6193
399
+ 2593,159200,11.8875
400
+ 2599,159600,12.6915
401
+ 2608,160000,7.097
402
+ 2616,160400,10.2592
403
+ 2622,160800,11.4696
404
+ 2627,161200,14.5994
405
+ 2632,161600,14.607
406
+ 2637,162000,17.4054
407
+ 2643,162400,13.1822
408
+ 2649,162800,13.4214
409
+ 2654,163200,15.3761
410
+ 2658,163600,20.4652
411
+ 2664,164000,11.7819
412
+ 2668,164400,21.1348
413
+ 2672,164800,19.7497
414
+ 2676,165200,19.9861
415
+ 2680,165600,20.6264
416
+ 2686,166000,15.0114
417
+ 2690,166400,16.8975
418
+ 2696,166800,16.6914
419
+ 2703,167200,9.2727
420
+ 2711,167600,10.1419
421
+ 2717,168000,8.6377
422
+ 2722,168400,15.6912
423
+ 2729,168800,11.2312
424
+ 2737,169200,6.9751
425
+ 2743,169600,14.1282
426
+ 2748,170000,14.9263
427
+ 2753,170400,16.8727
428
+ 2757,170800,20.6722
429
+ 2761,171200,21.0708
430
+ 2767,171600,10.5146
431
+ 2772,172000,18.3501
432
+ 2777,172400,16.4924
433
+ 2782,172800,15.8319
434
+ 2786,173200,18.9688
435
+ 2792,173600,14.616
436
+ 2796,174000,18.8745
437
+ 2802,174400,13.8155
438
+ 2806,174800,20.1499
439
+ 2810,175200,16.8318
440
+ 2817,175600,10.7324
441
+ 2824,176000,11.1007
442
+ 2830,176400,12.4775
443
+ 2841,176800,5.9412
444
+ 2847,177200,13.0488
445
+ 2855,177600,9.6263
446
+ 2861,178000,9.2555
447
+ 2868,178400,13.6159
448
+ 2874,178800,10.7991
449
+ 2880,179200,13.2894
450
+ 2885,179600,12.8225
451
+ 2894,180000,10.0183
452
+ 2898,180400,18.6829
453
+ 2904,180800,11.2808
454
+ 2909,181200,18.8581
455
+ 2914,181600,15.8593
456
+ 2919,182000,15.171
457
+ 2923,182400,15.3617
458
+ 2929,182800,12.993
459
+ 2934,183200,19.4859
460
+ 2938,183600,16.5612
461
+ 2943,184000,18.0036
462
+ 2948,184400,14.3423
463
+ 2954,184800,14.2031
464
+ 2958,185200,17.8896
465
+ 2964,185600,14.1217
466
+ 2969,186000,15.7024
467
+ 2976,186400,10.0496
468
+ 2982,186800,15.1974
469
+ 2990,187200,8.1909
470
+ 2996,187600,11.4299
471
+ 3002,188000,9.0973
472
+ 3007,188400,18.8057
473
+ 3019,188800,5.6121
474
+ 3029,189200,6.4934
475
+ 3035,189600,10.5633
476
+ 3040,190000,14.1835
477
+ 3049,190400,6.6539
478
+ 3055,190800,11.4925
479
+ 3061,191200,12.6726
480
+ 3066,191600,13.2938
481
+ 3072,192000,12.4183
482
+ 3079,192400,9.0459
483
+ 3084,192800,15.8784
484
+ 3089,193200,14.8581
485
+ 3095,193600,11.5812
486
+ 3099,194000,19.8301
487
+ 3107,194400,4.9365
488
+ 3115,194800,8.6926
489
+ 3122,195200,10.0389
490
+ 3131,195600,8.3146
491
+ 3136,196000,12.6695
492
+ 3141,196400,16.2173
493
+ 3146,196800,15.5667
494
+ 3152,197200,13.6761
495
+ 3158,197600,9.9176
496
+ 3164,198000,12.4659
497
+ 3170,198400,10.7513
498
+ 3175,198800,10.0257
499
+ 3179,199200,17.8125
500
+ 3184,199600,15.2902
501
+ 3188,200000,13.6833
code/Lake application/logs/results_1/PPO_frozen_lake_log_1.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 27,400,1.0685
3
+ 52,800,1.1781
4
+ 66,1200,2.0866
5
+ 74,1600,3.6635
6
+ 83,2000,3.21
7
+ 89,2400,4.7845
8
+ 96,2800,4.0053
9
+ 104,3200,3.8626
10
+ 110,3600,4.2643
11
+ 117,4000,4.8568
12
+ 122,4400,5.5491
13
+ 126,4800,7.4627
14
+ 131,5200,6.4184
15
+ 136,5600,4.5238
16
+ 141,6000,6.3553
17
+ 145,6400,7.4742
18
+ 150,6800,6.7629
19
+ 154,7200,6.507
20
+ 159,7600,6.5995
21
+ 164,8000,5.5815
22
+ 168,8400,7.363
23
+ 172,8800,7.377
24
+ 176,9200,7.3108
25
+ 180,9600,7.5117
26
+ 184,10000,7.5528
27
+ 189,10400,6.4767
28
+ 194,10800,6.4779
29
+ 199,11200,6.0249
30
+ 204,11600,5.7716
31
+ 209,12000,6.2201
32
+ 213,12400,6.6677
33
+ 217,12800,7.5942
34
+ 222,13200,6.4747
35
+ 228,13600,5.2064
36
+ 234,14000,4.7457
37
+ 240,14400,4.9933
38
+ 245,14800,7.1465
39
+ 250,15200,5.3496
40
+ 254,15600,6.5622
41
+ 259,16000,6.4103
42
+ 263,16400,7.161
43
+ 268,16800,6.8279
44
+ 273,17200,6.076
45
+ 278,17600,6.2577
46
+ 282,18000,6.3496
47
+ 287,18400,6.1414
48
+ 292,18800,6.9705
49
+ 298,19200,4.5771
50
+ 302,19600,7.4473
51
+ 306,20000,7.2296
52
+ 310,20400,7.5263
53
+ 315,20800,5.9007
54
+ 319,21200,7.457
55
+ 323,21600,7.0129
56
+ 327,22000,7.5711
57
+ 331,22400,7.4269
58
+ 335,22800,7.6694
59
+ 339,23200,7.4846
60
+ 346,23600,4.3524
61
+ 350,24000,7.8691
62
+ 354,24400,7.6455
63
+ 358,24800,7.412
64
+ 363,25200,6.1338
65
+ 367,25600,7.6492
66
+ 372,26000,7.0091
67
+ 376,26400,7.6802
68
+ 380,26800,7.2539
69
+ 384,27200,7.7116
70
+ 388,27600,7.6681
71
+ 394,28000,4.7247
72
+ 400,28400,4.8051
73
+ 404,28800,7.619
74
+ 410,29200,6.0282
75
+ 414,29600,7.0344
76
+ 419,30000,6.6914
77
+ 424,30400,5.9087
78
+ 428,30800,6.2718
79
+ 434,31200,6.0214
80
+ 438,31600,7.7125
81
+ 444,32000,5.1105
82
+ 448,32400,6.5991
83
+ 452,32800,7.6193
84
+ 458,33200,5.5863
85
+ 464,33600,4.8851
86
+ 468,34000,6.7645
87
+ 474,34400,5.4311
88
+ 479,34800,6.3864
89
+ 487,35200,3.7461
90
+ 494,35600,4.4228
91
+ 500,36000,4.2904
92
+ 506,36400,5.8171
93
+ 510,36800,7.8094
94
+ 514,37200,7.1195
95
+ 519,37600,6.8759
96
+ 524,38000,5.3536
97
+ 535,38400,2.7984
98
+ 542,38800,4.5735
99
+ 549,39200,4.2022
100
+ 557,39600,3.886
101
+ 562,40000,5.98
102
+ 567,40400,5.694
103
+ 572,40800,6.5385
104
+ 577,41200,5.555
105
+ 581,41600,7.3993
106
+ 586,42000,5.9369
107
+ 591,42400,6.7453
108
+ 595,42800,7.2735
109
+ 599,43200,7.2897
110
+ 604,43600,5.4537
111
+ 609,44000,5.0761
112
+ 615,44400,5.6804
113
+ 620,44800,5.6548
114
+ 624,45200,7.1059
115
+ 628,45600,7.38
116
+ 632,46000,6.6452
117
+ 637,46400,6.9389
118
+ 641,46800,7.5441
119
+ 645,47200,7.4802
120
+ 653,47600,3.7831
121
+ 659,48000,4.9193
122
+ 665,48400,4.5841
123
+ 670,48800,5.2625
124
+ 676,49200,5.5027
125
+ 681,49600,6.0169
126
+ 689,50000,3.5328
127
+ 694,50400,6.1266
128
+ 700,50800,4.9486
129
+ 707,51200,4.3697
130
+ 713,51600,5.4997
131
+ 719,52000,4.4305
132
+ 724,52400,5.7043
133
+ 731,52800,5.1429
134
+ 739,53200,3.6578
135
+ 745,53600,5.5292
136
+ 751,54000,4.4774
137
+ 757,54400,5.5445
138
+ 761,54800,7.1383
139
+ 766,55200,6.4968
140
+ 770,55600,7.5131
141
+ 774,56000,7.5454
142
+ 779,56400,5.1354
143
+ 783,56800,6.7832
144
+ 789,57200,5.6604
145
+ 793,57600,7.4366
146
+ 797,58000,7.4868
147
+ 801,58400,7.4437
148
+ 805,58800,7.4123
149
+ 812,59200,4.6388
150
+ 816,59600,7.4665
151
+ 820,60000,7.4706
152
+ 824,60400,6.3475
153
+ 828,60800,7.5096
154
+ 832,61200,7.1812
155
+ 837,61600,6.3239
156
+ 841,62000,6.8265
157
+ 847,62400,5.2963
158
+ 853,62800,4.758
159
+ 857,63200,7.345
160
+ 862,63600,6.2916
161
+ 866,64000,7.0709
162
+ 874,64400,3.7731
163
+ 880,64800,4.6813
164
+ 888,65200,4.1506
165
+ 893,65600,5.6099
166
+ 897,66000,6.8958
167
+ 903,66400,5.7361
168
+ 910,66800,4.2763
169
+ 915,67200,5.9698
170
+ 922,67600,3.6519
171
+ 926,68000,7.6562
172
+ 932,68400,6.2379
173
+ 936,68800,7.0506
174
+ 942,69200,5.6851
175
+ 946,69600,7.5529
176
+ 950,70000,7.6569
177
+ 954,70400,7.489
178
+ 958,70800,6.3997
179
+ 962,71200,7.6004
180
+ 967,71600,6.2412
181
+ 971,72000,7.6256
182
+ 977,72400,5.6385
183
+ 981,72800,7.6622
184
+ 985,73200,6.9613
185
+ 990,73600,6.4998
186
+ 994,74000,7.4835
187
+ 998,74400,6.362
188
+ 1003,74800,6.4975
189
+ 1009,75200,6.0932
190
+ 1013,75600,7.3275
191
+ 1018,76000,6.3136
192
+ 1022,76400,7.918
193
+ 1026,76800,7.8966
194
+ 1030,77200,8.1455
195
+ 1035,77600,6.7381
196
+ 1040,78000,6.5592
197
+ 1045,78400,6.0788
198
+ 1049,78800,7.1046
199
+ 1054,79200,7.1041
200
+ 1058,79600,7.6982
201
+ 1062,80000,8.7302
202
+ 1066,80400,6.6165
203
+ 1071,80800,6.9873
204
+ 1077,81200,6.9054
205
+ 1082,81600,7.456
206
+ 1087,82000,8.4276
207
+ 1093,82400,6.369
208
+ 1097,82800,8.6679
209
+ 1101,83200,6.9598
210
+ 1107,83600,5.4595
211
+ 1112,84000,7.8471
212
+ 1116,84400,6.8466
213
+ 1121,84800,6.1909
214
+ 1127,85200,6.153
215
+ 1131,85600,7.6274
216
+ 1136,86000,6.3568
217
+ 1140,86400,7.0727
218
+ 1144,86800,8.7111
219
+ 1148,87200,7.0302
220
+ 1152,87600,7.515
221
+ 1157,88000,6.5863
222
+ 1162,88400,6.3346
223
+ 1166,88800,6.8116
224
+ 1171,89200,6.1335
225
+ 1175,89600,7.3719
226
+ 1179,90000,6.3049
227
+ 1183,90400,7.4515
228
+ 1188,90800,6.5131
229
+ 1193,91200,7.0855
230
+ 1198,91600,5.4287
231
+ 1203,92000,6.8094
232
+ 1209,92400,5.0978
233
+ 1214,92800,5.9272
234
+ 1220,93200,4.52
235
+ 1225,93600,6.2555
236
+ 1231,94000,5.4839
237
+ 1236,94400,6.4755
238
+ 1245,94800,3.9834
239
+ 1251,95200,4.4569
240
+ 1257,95600,5.508
241
+ 1261,96000,7.6455
242
+ 1265,96400,7.3739
243
+ 1269,96800,7.8249
244
+ 1275,97200,5.2307
245
+ 1279,97600,7.6121
246
+ 1284,98000,6.1267
247
+ 1288,98400,7.3416
248
+ 1292,98800,7.5182
249
+ 1296,99200,7.3272
250
+ 1301,99600,6.8465
251
+ 1306,100000,6.4113
252
+ 1311,100400,5.7506
253
+ 1316,100800,7.3543
254
+ 1320,101200,7.6077
255
+ 1326,101600,5.5944
256
+ 1331,102000,7.3631
257
+ 1337,102400,5.2
258
+ 1343,102800,4.8723
259
+ 1348,103200,6.1586
260
+ 1355,103600,4.7696
261
+ 1360,104000,6.1973
262
+ 1364,104400,6.7792
263
+ 1368,104800,7.2198
264
+ 1373,105200,7.1729
265
+ 1379,105600,5.4069
266
+ 1385,106000,4.6109
267
+ 1390,106400,7.1239
268
+ 1395,106800,5.8867
269
+ 1399,107200,7.495
270
+ 1403,107600,6.6621
271
+ 1407,108000,7.4027
272
+ 1412,108400,6.3171
273
+ 1416,108800,5.6887
274
+ 1421,109200,6.6554
275
+ 1425,109600,6.75
276
+ 1431,110000,5.4458
277
+ 1435,110400,7.2516
278
+ 1439,110800,6.3838
279
+ 1443,111200,6.9752
280
+ 1447,111600,7.2899
281
+ 1451,112000,7.285
282
+ 1455,112400,6.905
283
+ 1460,112800,6.7635
284
+ 1464,113200,7.2151
285
+ 1468,113600,7.3228
286
+ 1472,114000,7.2727
287
+ 1477,114400,5.1432
288
+ 1484,114800,4.5119
289
+ 1488,115200,7.5247
290
+ 1493,115600,6.6993
291
+ 1497,116000,6.583
292
+ 1502,116400,5.6599
293
+ 1506,116800,7.3572
294
+ 1512,117200,4.6733
295
+ 1517,117600,6.9534
296
+ 1522,118000,5.8504
297
+ 1527,118400,5.7037
298
+ 1532,118800,5.3677
299
+ 1537,119200,6.6015
300
+ 1545,119600,4.1934
301
+ 1550,120000,6.0054
302
+ 1555,120400,6.3411
303
+ 1559,120800,7.6877
304
+ 1563,121200,7.6184
305
+ 1569,121600,4.5052
306
+ 1573,122000,7.209
307
+ 1579,122400,6.2706
308
+ 1584,122800,6.129
309
+ 1588,123200,9.4211
310
+ 1595,123600,5.3557
311
+ 1601,124000,5.9072
312
+ 1608,124400,4.905
313
+ 1613,124800,8.0799
314
+ 1619,125200,5.1058
315
+ 1625,125600,5.3964
316
+ 1630,126000,6.4494
317
+ 1634,126400,6.4167
318
+ 1640,126800,5.6178
319
+ 1644,127200,6.6824
320
+ 1650,127600,5.9365
321
+ 1656,128000,4.972
322
+ 1663,128400,4.6724
323
+ 1669,128800,4.5071
324
+ 1676,129200,4.7821
325
+ 1683,129600,4.2253
326
+ 1689,130000,5.028
327
+ 1693,130400,6.5605
328
+ 1699,130800,6.1124
329
+ 1705,131200,5.982
330
+ 1711,131600,5.4071
331
+ 1716,132000,6.5883
332
+ 1720,132400,8.6701
333
+ 1725,132800,5.8316
334
+ 1730,133200,6.5105
335
+ 1734,133600,8.651
336
+ 1739,134000,7.6199
337
+ 1743,134400,7.9109
338
+ 1749,134800,7.469
339
+ 1753,135200,7.2578
340
+ 1760,135600,7.0038
341
+ 1765,136000,8.4482
342
+ 1770,136400,10.9777
343
+ 1776,136800,7.8633
344
+ 1781,137200,10.7422
345
+ 1786,137600,7.7002
346
+ 1791,138000,9.8498
347
+ 1796,138400,8.3565
348
+ 1802,138800,7.2121
349
+ 1808,139200,7.437
350
+ 1815,139600,5.7484
351
+ 1822,140000,7.3232
352
+ 1828,140400,8.0316
353
+ 1836,140800,5.3387
354
+ 1841,141200,7.3611
355
+ 1849,141600,6.0461
356
+ 1857,142000,5.8818
357
+ 1861,142400,8.7706
358
+ 1867,142800,12.6086
359
+ 1875,143200,4.9306
360
+ 1881,143600,8.4787
361
+ 1889,144000,6.6935
362
+ 1894,144400,9.2201
363
+ 1899,144800,9.3988
364
+ 1904,145200,11.3811
365
+ 1909,145600,8.435
366
+ 1916,146000,7.3488
367
+ 1922,146400,10.672
368
+ 1927,146800,11.5718
369
+ 1934,147200,7.5663
370
+ 1940,147600,8.4638
371
+ 1944,148000,10.8035
372
+ 1954,148400,5.3482
373
+ 1961,148800,7.711
374
+ 1967,149200,11.0491
375
+ 1975,149600,6.2135
376
+ 1981,150000,10.6626
377
+ 1991,150400,5.506
378
+ 1996,150800,9.1663
379
+ 2004,151200,5.6931
380
+ 2009,151600,11.9436
381
+ 2018,152000,8.0473
382
+ 2025,152400,7.8392
383
+ 2032,152800,8.6977
384
+ 2041,153200,5.9514
385
+ 2045,153600,15.2402
386
+ 2052,154000,9.4472
387
+ 2059,154400,5.8932
388
+ 2065,154800,9.2129
389
+ 2074,155200,6.222
390
+ 2078,155600,15.2026
391
+ 2090,156000,6.9467
392
+ 2099,156400,6.1649
393
+ 2108,156800,7.4966
394
+ 2117,157200,6.7418
395
+ 2126,157600,8.2676
396
+ 2133,158000,9.3655
397
+ 2141,158400,6.6173
398
+ 2145,158800,17.2168
399
+ 2154,159200,8.7089
400
+ 2160,159600,11.2436
401
+ 2165,160000,12.7216
402
+ 2169,160400,21.6313
403
+ 2174,160800,23.2383
404
+ 2179,161200,17.6359
405
+ 2185,161600,16.5861
406
+ 2191,162000,16.226
407
+ 2197,162400,17.9568
408
+ 2204,162800,12.871
409
+ 2209,163200,13.3918
410
+ 2216,163600,15.4123
411
+ 2223,164000,11.4062
412
+ 2228,164400,14.1367
413
+ 2237,164800,8.0556
414
+ 2244,165200,8.6179
415
+ 2256,165600,6.4841
416
+ 2266,166000,9.5171
417
+ 2274,166400,11.5372
418
+ 2283,166800,9.7408
419
+ 2287,167200,21.5043
420
+ 2291,167600,23.8779
421
+ 2296,168000,20.5695
422
+ 2301,168400,23.0533
423
+ 2309,168800,11.9759
424
+ 2314,169200,23.7854
425
+ 2320,169600,17.3165
426
+ 2327,170000,15.7337
427
+ 2332,170400,21.5258
428
+ 2337,170800,22.1934
429
+ 2344,171200,18.7651
430
+ 2351,171600,10.7436
431
+ 2360,172000,10.7671
432
+ 2369,172400,10.4788
433
+ 2382,172800,7.1276
434
+ 2386,173200,21.2732
435
+ 2392,173600,18.6327
436
+ 2397,174000,19.4716
437
+ 2404,174400,12.1719
438
+ 2409,174800,14.6277
439
+ 2416,175200,14.5393
440
+ 2422,175600,15.4089
441
+ 2429,176000,14.0683
442
+ 2435,176400,15.4539
443
+ 2441,176800,13.9568
444
+ 2446,177200,19.1191
445
+ 2456,177600,9.5304
446
+ 2466,178000,7.2711
447
+ 2472,178400,10.6102
448
+ 2478,178800,11.5563
449
+ 2482,179200,19.9556
450
+ 2490,179600,9.0751
451
+ 2497,180000,6.2264
452
+ 2502,180400,18.2541
453
+ 2507,180800,9.1061
454
+ 2513,181200,15.3433
455
+ 2517,181600,19.7729
456
+ 2522,182000,13.1969
457
+ 2527,182400,18.7246
458
+ 2531,182800,14.651
459
+ 2537,183200,13.6844
460
+ 2543,183600,13.292
461
+ 2550,184000,16.4504
462
+ 2559,184400,8.7936
463
+ 2563,184800,22.5295
464
+ 2569,185200,15.0557
465
+ 2577,185600,10.3489
466
+ 2585,186000,11.0047
467
+ 2595,186400,9.2111
468
+ 2604,186800,11.11
469
+ 2611,187200,10.4671
470
+ 2617,187600,12.7915
471
+ 2624,188000,13.962
472
+ 2633,188400,10.0636
473
+ 2637,188800,18.8204
474
+ 2642,189200,19.8767
475
+ 2647,189600,17.3735
476
+ 2654,190000,12.2333
477
+ 2664,190400,5.6146
478
+ 2670,190800,14.4501
479
+ 2675,191200,15.3957
480
+ 2685,191600,6.7485
481
+ 2689,192000,20.351
482
+ 2702,192400,4.2184
483
+ 2707,192800,16.1191
484
+ 2714,193200,11.0071
485
+ 2722,193600,6.7454
486
+ 2730,194000,4.1497
487
+ 2735,194400,6.5892
488
+ 2743,194800,10.7816
489
+ 2754,195200,3.9205
490
+ 2761,195600,10.0928
491
+ 2767,196000,14.8486
492
+ 2772,196400,17.4269
493
+ 2779,196800,10.2417
494
+ 2783,197200,13.4426
495
+ 2790,197600,10.6094
496
+ 2794,198000,17.6529
497
+ 2801,198400,7.4825
498
+ 2807,198800,9.2602
499
+ 2816,199200,4.0249
500
+ 2824,199600,6.6758
501
+ 2829,200000,16.2271
code/Lake application/logs/results_1/PPO_frozen_lake_log_2.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 28,400,1.0712
3
+ 50,800,1.338
4
+ 64,1200,2.2055
5
+ 75,1600,2.7995
6
+ 81,2000,5.4941
7
+ 92,2400,3.3249
8
+ 96,2800,6.413
9
+ 101,3200,6.3269
10
+ 106,3600,5.3282
11
+ 112,4000,5.6833
12
+ 117,4400,5.6105
13
+ 123,4800,5.1394
14
+ 127,5200,7.3238
15
+ 131,5600,7.129
16
+ 137,6000,7.7241
17
+ 142,6400,4.6711
18
+ 148,6800,7.9164
19
+ 152,7200,6.7923
20
+ 157,7600,7.6097
21
+ 162,8000,5.8306
22
+ 169,8400,5.6442
23
+ 173,8800,8.2231
24
+ 179,9200,8.5511
25
+ 185,9600,6.1404
26
+ 189,10000,10.3032
27
+ 194,10400,16.185
28
+ 198,10800,20.0498
29
+ 202,11200,16.4263
30
+ 212,11600,6.5354
31
+ 217,12000,13.0853
32
+ 222,12400,19.0939
33
+ 232,12800,7.205
34
+ 241,13200,8.4478
35
+ 251,13600,7.8135
36
+ 261,14000,6.7748
37
+ 271,14400,8.0458
38
+ 277,14800,15.136
39
+ 287,15200,8.4088
40
+ 296,15600,9.5744
41
+ 301,16000,17.8938
42
+ 307,16400,14.878
43
+ 317,16800,9.3055
44
+ 323,17200,13.5246
45
+ 328,17600,18.3417
46
+ 334,18000,12.9367
47
+ 341,18400,14.7582
48
+ 345,18800,19.8885
49
+ 352,19200,11.2014
50
+ 359,19600,12.73
51
+ 365,20000,11.939
52
+ 373,20400,10.1943
53
+ 380,20800,12.1353
54
+ 394,21200,5.5529
55
+ 402,21600,8.7544
56
+ 411,22000,7.7306
57
+ 422,22400,6.3769
58
+ 431,22800,8.1486
59
+ 441,23200,6.2044
60
+ 446,23600,13.7331
61
+ 454,24000,10.2596
62
+ 460,24400,10.6293
63
+ 469,24800,8.1278
64
+ 473,25200,20.7824
65
+ 480,25600,14.1365
66
+ 484,26000,18.8189
67
+ 489,26400,15.9557
68
+ 499,26800,6.2855
69
+ 504,27200,16.2915
70
+ 511,27600,7.7517
71
+ 518,28000,11.3698
72
+ 522,28400,20.7648
73
+ 527,28800,19.0116
74
+ 533,29200,16.3582
75
+ 539,29600,16.8797
76
+ 545,30000,12.4403
77
+ 552,30400,16.3851
78
+ 557,30800,21.8059
79
+ 561,31200,23.8707
80
+ 569,31600,9.6084
81
+ 577,32000,13.8038
82
+ 589,32400,5.1129
83
+ 597,32800,13.5648
84
+ 605,33200,12.1758
85
+ 612,33600,11.9826
86
+ 620,34000,8.8992
87
+ 628,34400,10.1155
88
+ 636,34800,7.6714
89
+ 643,35200,9.6841
90
+ 648,35600,18.1897
91
+ 654,36000,10.757
92
+ 664,36400,8.2339
93
+ 670,36800,14.7556
94
+ 677,37200,12.949
95
+ 683,37600,12.4321
96
+ 694,38000,7.0777
97
+ 699,38400,16.2456
98
+ 712,38800,7.5439
99
+ 720,39200,13.3273
100
+ 725,39600,20.2522
101
+ 731,40000,20.2798
102
+ 737,40400,19.7969
103
+ 742,40800,23.1972
104
+ 747,41200,21.0181
105
+ 754,41600,15.7453
106
+ 764,42000,10.3466
107
+ 775,42400,9.1961
108
+ 782,42800,13.7833
109
+ 787,43200,22.5446
110
+ 793,43600,20.7285
111
+ 797,44000,20.3011
112
+ 803,44400,18.0368
113
+ 810,44800,17.1513
114
+ 816,45200,16.1653
115
+ 822,45600,20.1987
116
+ 827,46000,17.5718
117
+ 834,46400,15.5697
118
+ 844,46800,10.5578
119
+ 849,47200,22.3577
120
+ 854,47600,22.7982
121
+ 862,48000,11.8983
122
+ 873,48400,8.1632
123
+ 880,48800,13.9313
124
+ 888,49200,11.4741
125
+ 892,49600,24.8084
126
+ 899,50000,17.164
127
+ 904,50400,19.6258
128
+ 911,50800,15.8299
129
+ 918,51200,9.6712
130
+ 926,51600,10.8926
131
+ 933,52000,12.6279
132
+ 941,52400,10.5669
133
+ 946,52800,19.1119
134
+ 955,53200,6.5965
135
+ 962,53600,14.0385
136
+ 972,54000,5.9704
137
+ 977,54400,18.2561
138
+ 984,54800,11.3252
139
+ 994,55200,7.534
140
+ 999,55600,13.1451
141
+ 1005,56000,17.4585
142
+ 1011,56400,10.8942
143
+ 1017,56800,16.6066
144
+ 1023,57200,16.5959
145
+ 1027,57600,24.5556
146
+ 1037,58000,8.4738
147
+ 1042,58400,17.3071
148
+ 1049,58800,9.78
149
+ 1055,59200,15.1944
150
+ 1067,59600,5.3205
151
+ 1075,60000,11.3803
152
+ 1092,60400,3.6245
153
+ 1102,60800,5.8269
154
+ 1111,61200,9.5032
155
+ 1121,61600,5.4864
156
+ 1128,62000,6.7626
157
+ 1137,62400,9.5821
158
+ 1142,62800,16.4103
159
+ 1148,63200,11.2262
160
+ 1156,63600,8.8738
161
+ 1164,64000,9.1704
162
+ 1169,64400,10.2279
163
+ 1176,64800,11.3308
164
+ 1183,65200,10.4271
165
+ 1191,65600,7.8356
166
+ 1196,66000,13.8851
167
+ 1202,66400,18.9843
168
+ 1206,66800,24.7673
169
+ 1211,67200,18.6684
170
+ 1218,67600,16.0951
171
+ 1222,68000,22.9115
172
+ 1226,68400,23.6065
173
+ 1233,68800,11.1388
174
+ 1238,69200,14.0507
175
+ 1244,69600,15.8132
176
+ 1249,70000,13.9042
177
+ 1253,70400,24.436
178
+ 1263,70800,9.9095
179
+ 1268,71200,15.1151
180
+ 1273,71600,18.5476
181
+ 1278,72000,18.5341
182
+ 1283,72400,16.7705
183
+ 1290,72800,12.7882
184
+ 1297,73200,14.3823
185
+ 1303,73600,11.6998
186
+ 1309,74000,17.2901
187
+ 1315,74400,15.6958
188
+ 1323,74800,10.383
189
+ 1329,75200,11.5377
190
+ 1334,75600,18.225
191
+ 1340,76000,16.318
192
+ 1347,76400,11.497
193
+ 1353,76800,13.7727
194
+ 1360,77200,9.5276
195
+ 1367,77600,12.1409
196
+ 1373,78000,7.7239
197
+ 1383,78400,9.336
198
+ 1388,78800,14.569
199
+ 1392,79200,17.7591
200
+ 1396,79600,21.1039
201
+ 1401,80000,15.0516
202
+ 1407,80400,14.6828
203
+ 1412,80800,15.2347
204
+ 1422,81200,8.8604
205
+ 1429,81600,10.5288
206
+ 1436,82000,14.4741
207
+ 1443,82400,9.9426
208
+ 1450,82800,13.5522
209
+ 1457,83200,12.1773
210
+ 1466,83600,12.6106
211
+ 1473,84000,15.5286
212
+ 1483,84400,9.095
213
+ 1489,84800,16.7606
214
+ 1493,85200,16.0469
215
+ 1499,85600,11.1895
216
+ 1504,86000,16.8995
217
+ 1509,86400,14.7574
218
+ 1514,86800,17.2417
219
+ 1521,87200,8.4962
220
+ 1533,87600,5.2503
221
+ 1539,88000,13.6948
222
+ 1546,88400,10.7947
223
+ 1553,88800,11.7415
224
+ 1563,89200,5.8864
225
+ 1572,89600,7.1169
226
+ 1577,90000,11.0446
227
+ 1585,90400,7.4355
228
+ 1591,90800,7.9803
229
+ 1598,91200,8.522
230
+ 1603,91600,10.4511
231
+ 1608,92000,12.7399
232
+ 1614,92400,8.2374
233
+ 1619,92800,12.8639
234
+ 1623,93200,11.7356
235
+ 1629,93600,12.5085
236
+ 1635,94000,8.7022
237
+ 1640,94400,13.0884
238
+ 1644,94800,18.9683
239
+ 1648,95200,17.3632
240
+ 1654,95600,13.1206
241
+ 1659,96000,16.7717
242
+ 1667,96400,7.295
243
+ 1672,96800,14.4554
244
+ 1679,97200,10.9414
245
+ 1686,97600,11.4577
246
+ 1694,98000,10.4921
247
+ 1699,98400,11.4891
248
+ 1706,98800,12.5354
249
+ 1715,99200,9.2944
250
+ 1721,99600,15.9709
251
+ 1725,100000,17.2995
252
+ 1734,100400,6.9714
253
+ 1738,100800,15.647
254
+ 1746,101200,8.3676
255
+ 1754,101600,9.6176
256
+ 1761,102000,7.512
257
+ 1768,102400,10.3183
258
+ 1774,102800,13.3747
259
+ 1780,103200,11.1231
260
+ 1785,103600,13.8813
261
+ 1791,104000,17.1172
262
+ 1796,104400,12.4113
263
+ 1801,104800,16.1103
264
+ 1809,105200,6.39
265
+ 1818,105600,8.2949
266
+ 1825,106000,11.5045
267
+ 1831,106400,10.9331
268
+ 1837,106800,11.3621
269
+ 1842,107200,18.0518
270
+ 1848,107600,12.5318
271
+ 1855,108000,13.3056
272
+ 1861,108400,12.4526
273
+ 1867,108800,12.8945
274
+ 1876,109200,8.2563
275
+ 1882,109600,11.2807
276
+ 1888,110000,12.192
277
+ 1896,110400,7.9317
278
+ 1903,110800,11.2608
279
+ 1908,111200,18.0233
280
+ 1912,111600,17.8104
281
+ 1918,112000,13.7657
282
+ 1925,112400,7.795
283
+ 1931,112800,13.0999
284
+ 1936,113200,12.6435
285
+ 1942,113600,9.7847
286
+ 1947,114000,15.6282
287
+ 1953,114400,13.0466
288
+ 1959,114800,13.1941
289
+ 1963,115200,17.225
290
+ 1970,115600,10.4272
291
+ 1976,116000,10.2674
292
+ 1984,116400,7.0663
293
+ 1991,116800,7.4921
294
+ 1996,117200,12.674
295
+ 2005,117600,9.7954
296
+ 2010,118000,15.9189
297
+ 2016,118400,13.78
298
+ 2023,118800,10.5507
299
+ 2030,119200,7.0213
300
+ 2036,119600,16.1446
301
+ 2040,120000,20.1122
302
+ 2044,120400,17.4097
303
+ 2049,120800,13.002
304
+ 2054,121200,15.4103
305
+ 2058,121600,13.7301
306
+ 2062,122000,17.3055
307
+ 2069,122400,12.1489
308
+ 2075,122800,10.2724
309
+ 2084,123200,8.3912
310
+ 2088,123600,20.9833
311
+ 2094,124000,9.3535
312
+ 2102,124400,12.5569
313
+ 2106,124800,22.0025
314
+ 2111,125200,15.3937
315
+ 2117,125600,13.1022
316
+ 2127,126000,7.0628
317
+ 2132,126400,19.1772
318
+ 2138,126800,12.9516
319
+ 2146,127200,9.1282
320
+ 2154,127600,8.3276
321
+ 2160,128000,9.1102
322
+ 2167,128400,8.3557
323
+ 2175,128800,9.8998
324
+ 2181,129200,8.9511
325
+ 2190,129600,9.2503
326
+ 2197,130000,8.1281
327
+ 2203,130400,15.1956
328
+ 2207,130800,16.8667
329
+ 2221,131200,4.3684
330
+ 2232,131600,6.8054
331
+ 2237,132000,14.4013
332
+ 2244,132400,16.4632
333
+ 2254,132800,7.111
334
+ 2261,133200,12.7119
335
+ 2268,133600,11.1943
336
+ 2277,134000,10.9562
337
+ 2284,134400,10.6057
338
+ 2291,134800,16.0464
339
+ 2298,135200,12.4611
340
+ 2302,135600,22.5867
341
+ 2308,136000,14.8831
342
+ 2315,136400,10.9013
343
+ 2319,136800,17.5025
344
+ 2327,137200,13.1141
345
+ 2333,137600,16.9285
346
+ 2343,138000,12.4663
347
+ 2350,138400,13.3258
348
+ 2357,138800,16.7013
349
+ 2361,139200,22.2587
350
+ 2366,139600,25.103
351
+ 2371,140000,25.5381
352
+ 2376,140400,16.0396
353
+ 2381,140800,12.2065
354
+ 2387,141200,22.5733
355
+ 2392,141600,25.1197
356
+ 2399,142000,13.928
357
+ 2403,142400,22.8321
358
+ 2412,142800,11.8089
359
+ 2419,143200,12.0245
360
+ 2426,143600,11.2781
361
+ 2433,144000,9.814
362
+ 2440,144400,11.4397
363
+ 2445,144800,15.5671
364
+ 2451,145200,11.2167
365
+ 2457,145600,11.7414
366
+ 2465,146000,11.176
367
+ 2470,146400,17.8477
368
+ 2474,146800,23.4131
369
+ 2479,147200,17.3667
370
+ 2484,147600,15.4173
371
+ 2490,148000,15.5236
372
+ 2496,148400,11.9975
373
+ 2500,148800,15.5211
374
+ 2506,149200,9.2949
375
+ 2512,149600,13.2005
376
+ 2522,150000,6.7119
377
+ 2526,150400,13.047
378
+ 2532,150800,10.975
379
+ 2537,151200,12.4601
380
+ 2545,151600,10.3919
381
+ 2549,152000,17.1736
382
+ 2555,152400,10.9481
383
+ 2561,152800,6.5293
384
+ 2565,153200,16.5226
385
+ 2574,153600,6.976
386
+ 2580,154000,9.178
387
+ 2586,154400,9.0912
388
+ 2593,154800,8.2036
389
+ 2599,155200,11.2736
390
+ 2604,155600,11.8792
391
+ 2612,156000,8.9914
392
+ 2619,156400,7.5987
393
+ 2624,156800,16.6534
394
+ 2631,157200,9.9542
395
+ 2637,157600,11.2406
396
+ 2646,158000,5.5051
397
+ 2651,158400,13.1209
398
+ 2659,158800,11.2929
399
+ 2666,159200,9.2889
400
+ 2673,159600,6.4488
401
+ 2679,160000,8.9596
402
+ 2685,160400,12.8978
403
+ 2691,160800,12.8375
404
+ 2696,161200,16.7479
405
+ 2701,161600,10.707
406
+ 2709,162000,7.5865
407
+ 2720,162400,5.9772
408
+ 2727,162800,14.3091
409
+ 2735,163200,10.0529
410
+ 2741,163600,16.2379
411
+ 2750,164000,9.3317
412
+ 2761,164400,9.3682
413
+ 2772,164800,7.5992
414
+ 2777,165200,13.7422
415
+ 2785,165600,11.4302
416
+ 2796,166000,5.5863
417
+ 2800,166400,20.5644
418
+ 2809,166800,9.1758
419
+ 2819,167200,10.6074
420
+ 2824,167600,19.2911
421
+ 2829,168000,14.7837
422
+ 2834,168400,20.5648
423
+ 2839,168800,21.9035
424
+ 2846,169200,12.8552
425
+ 2852,169600,14.4815
426
+ 2860,170000,10.327
427
+ 2867,170400,16.0522
428
+ 2877,170800,7.9339
429
+ 2885,171200,9.3754
430
+ 2895,171600,8.846
431
+ 2901,172000,18.2031
432
+ 2911,172400,9.8781
433
+ 2920,172800,8.4863
434
+ 2925,173200,14.941
435
+ 2933,173600,12.9186
436
+ 2941,174000,10.819
437
+ 2946,174400,17.9883
438
+ 2951,174800,16.5405
439
+ 2956,175200,18.8158
440
+ 2960,175600,23.2525
441
+ 2967,176000,13.0999
442
+ 2973,176400,13.952
443
+ 2977,176800,21.9684
444
+ 2985,177200,10.9067
445
+ 2991,177600,11.2303
446
+ 2997,178000,13.2209
447
+ 3005,178400,11.2834
448
+ 3009,178800,16.5292
449
+ 3013,179200,21.2372
450
+ 3022,179600,11.0619
451
+ 3031,180000,7.3537
452
+ 3036,180400,14.5772
453
+ 3045,180800,9.2121
454
+ 3057,181200,5.2037
455
+ 3067,181600,6.669
456
+ 3075,182000,8.7553
457
+ 3081,182400,10.2706
458
+ 3087,182800,11.9766
459
+ 3098,183200,7.7188
460
+ 3104,183600,10.9377
461
+ 3112,184000,8.698
462
+ 3118,184400,8.7785
463
+ 3126,184800,7.6664
464
+ 3131,185200,13.2695
465
+ 3138,185600,8.0803
466
+ 3145,186000,12.0663
467
+ 3152,186400,9.1547
468
+ 3158,186800,10.6509
469
+ 3165,187200,11.0623
470
+ 3169,187600,19.4497
471
+ 3173,188000,18.2049
472
+ 3178,188400,14.0501
473
+ 3185,188800,11.2683
474
+ 3194,189200,6.596
475
+ 3202,189600,11.3825
476
+ 3210,190000,7.7631
477
+ 3214,190400,12.7471
478
+ 3222,190800,12.4214
479
+ 3228,191200,11.1834
480
+ 3234,191600,12.8345
481
+ 3242,192000,10.284
482
+ 3248,192400,7.1057
483
+ 3253,192800,11.0905
484
+ 3258,193200,18.9309
485
+ 3263,193600,18.4142
486
+ 3269,194000,14.4793
487
+ 3273,194400,21.8498
488
+ 3277,194800,19.3599
489
+ 3283,195200,19.47
490
+ 3287,195600,26.6692
491
+ 3291,196000,21.4347
492
+ 3296,196400,20.3488
493
+ 3301,196800,18.7587
494
+ 3309,197200,8.8581
495
+ 3314,197600,8.1858
496
+ 3321,198000,11.0028
497
+ 3325,198400,17.812
498
+ 3329,198800,21.4082
499
+ 3333,199200,20.2986
500
+ 3337,199600,25.3884
501
+ 3342,200000,18.7209
code/Lake application/logs/results_1/PPO_frozen_lake_log_3.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 28,400,0.9697
3
+ 49,800,1.4299
4
+ 59,1200,3.1168
5
+ 67,1600,3.4994
6
+ 74,2000,4.3512
7
+ 78,2400,6.7968
8
+ 84,2800,4.9979
9
+ 90,3200,4.8983
10
+ 97,3600,5.1283
11
+ 104,4000,4.4929
12
+ 113,4400,3.0465
13
+ 121,4800,3.6254
14
+ 127,5200,5.7239
15
+ 131,5600,6.4944
16
+ 138,6000,4.5873
17
+ 143,6400,5.5102
18
+ 148,6800,6.1279
19
+ 155,7200,4.6213
20
+ 160,7600,5.1025
21
+ 164,8000,7.2864
22
+ 168,8400,7.325
23
+ 174,8800,5.1145
24
+ 179,9200,5.9985
25
+ 183,9600,7.3481
26
+ 189,10000,5.0562
27
+ 194,10400,5.74
28
+ 198,10800,6.7543
29
+ 205,11200,4.0238
30
+ 210,11600,7.0676
31
+ 215,12000,4.578
32
+ 220,12400,6.2174
33
+ 225,12800,6.4531
34
+ 231,13200,4.6471
35
+ 237,13600,5.7269
36
+ 241,14000,7.5008
37
+ 247,14400,5.3946
38
+ 251,14800,6.9517
39
+ 255,15200,7.4972
40
+ 261,15600,5.5219
41
+ 265,16000,6.6453
42
+ 271,16400,5.998
43
+ 279,16800,3.6128
44
+ 285,17200,4.8651
45
+ 290,17600,7.1438
46
+ 295,18000,6.1627
47
+ 300,18400,6.7364
48
+ 305,18800,5.0593
49
+ 311,19200,5.698
50
+ 317,19600,5.0444
51
+ 322,20000,6.426
52
+ 329,20400,5.4239
53
+ 334,20800,5.0805
54
+ 339,21200,7.5365
55
+ 343,21600,6.5818
56
+ 348,22000,6.7635
57
+ 353,22400,6.5158
58
+ 360,22800,4.8907
59
+ 365,23200,6.5301
60
+ 369,23600,7.0247
61
+ 376,24000,4.7928
62
+ 381,24400,6.8979
63
+ 390,24800,3.7186
64
+ 397,25200,4.4294
65
+ 403,25600,5.0304
66
+ 408,26000,6.3346
67
+ 412,26400,8.2039
68
+ 417,26800,6.1453
69
+ 422,27200,7.8806
70
+ 429,27600,4.8061
71
+ 434,28000,6.9867
72
+ 441,28400,4.2048
73
+ 447,28800,6.2038
74
+ 454,29200,5.0084
75
+ 462,29600,4.0663
76
+ 466,30000,6.9928
77
+ 471,30400,6.8849
78
+ 478,30800,4.8687
79
+ 482,31200,7.0303
80
+ 487,31600,6.6575
81
+ 492,32000,7.9083
82
+ 496,32400,7.7999
83
+ 502,32800,7.0862
84
+ 506,33200,7.493
85
+ 511,33600,7.9607
86
+ 515,34000,10.1515
87
+ 519,34400,10.5739
88
+ 523,34800,7.9336
89
+ 529,35200,8.113
90
+ 535,35600,7.7863
91
+ 541,36000,7.2677
92
+ 546,36400,7.9735
93
+ 552,36800,7.6501
94
+ 557,37200,7.2945
95
+ 565,37600,4.6367
96
+ 569,38000,8.4382
97
+ 578,38400,4.0414
98
+ 583,38800,7.4847
99
+ 588,39200,5.5446
100
+ 594,39600,6.5673
101
+ 599,40000,6.8408
102
+ 604,40400,6.5415
103
+ 609,40800,6.8515
104
+ 613,41200,7.0606
105
+ 617,41600,8.0479
106
+ 623,42000,5.6694
107
+ 627,42400,7.9936
108
+ 632,42800,6.7543
109
+ 637,43200,5.8305
110
+ 643,43600,6.281
111
+ 650,44000,3.5108
112
+ 656,44400,5.8348
113
+ 660,44800,7.7714
114
+ 664,45200,7.4986
115
+ 671,45600,4.8578
116
+ 679,46000,3.9887
117
+ 684,46400,6.534
118
+ 689,46800,6.7332
119
+ 693,47200,7.6821
120
+ 698,47600,6.0771
121
+ 704,48000,5.4061
122
+ 712,48400,3.1593
123
+ 717,48800,7.2291
124
+ 723,49200,5.0261
125
+ 728,49600,8.1633
126
+ 733,50000,6.6328
127
+ 738,50400,7.6968
128
+ 742,50800,9.8616
129
+ 747,51200,8.1682
130
+ 753,51600,6.0632
131
+ 757,52000,6.9269
132
+ 761,52400,8.9436
133
+ 768,52800,5.1674
134
+ 775,53200,5.4459
135
+ 779,53600,8.1192
136
+ 785,54000,6.4501
137
+ 791,54400,5.4877
138
+ 795,54800,9.5873
139
+ 800,55200,8.2846
140
+ 806,55600,6.569
141
+ 812,56000,8.7981
142
+ 816,56400,9.4428
143
+ 823,56800,5.6861
144
+ 829,57200,6.7182
145
+ 835,57600,6.808
146
+ 840,58000,9.3055
147
+ 845,58400,7.6702
148
+ 851,58800,6.8338
149
+ 855,59200,10.6899
150
+ 859,59600,9.518
151
+ 864,60000,8.8663
152
+ 869,60400,7.8428
153
+ 873,60800,8.1849
154
+ 879,61200,5.1644
155
+ 886,61600,6.1759
156
+ 893,62000,4.8597
157
+ 901,62400,4.9238
158
+ 908,62800,4.5957
159
+ 913,63200,5.2139
160
+ 920,63600,4.4207
161
+ 929,64000,3.8686
162
+ 933,64400,7.7389
163
+ 937,64800,7.038
164
+ 942,65200,6.4901
165
+ 946,65600,8.3237
166
+ 951,66000,6.4676
167
+ 956,66400,7.9483
168
+ 960,66800,7.4556
169
+ 966,67200,5.8683
170
+ 972,67600,4.9513
171
+ 977,68000,6.3514
172
+ 982,68400,7.6564
173
+ 989,68800,3.9463
174
+ 996,69200,4.8304
175
+ 1000,69600,7.4777
176
+ 1007,70000,5.3967
177
+ 1012,70400,5.9162
178
+ 1016,70800,9.1032
179
+ 1022,71200,5.6404
180
+ 1027,71600,7.1579
181
+ 1033,72000,5.5076
182
+ 1039,72400,5.1194
183
+ 1044,72800,5.7785
184
+ 1049,73200,7.2613
185
+ 1054,73600,6.0494
186
+ 1059,74000,7.0271
187
+ 1063,74400,7.5499
188
+ 1067,74800,7.0446
189
+ 1072,75200,6.0907
190
+ 1076,75600,6.7148
191
+ 1081,76000,7.3562
192
+ 1086,76400,5.7122
193
+ 1092,76800,4.6709
194
+ 1099,77200,4.9494
195
+ 1104,77600,5.152
196
+ 1109,78000,7.1534
197
+ 1115,78400,5.9638
198
+ 1119,78800,7.1621
199
+ 1123,79200,8.3579
200
+ 1128,79600,8.5829
201
+ 1137,80000,4.075
202
+ 1144,80400,3.9743
203
+ 1149,80800,7.1503
204
+ 1155,81200,5.8118
205
+ 1161,81600,5.5331
206
+ 1167,82000,5.4701
207
+ 1176,82400,4.1435
208
+ 1180,82800,6.5754
209
+ 1186,83200,5.7
210
+ 1192,83600,6.4401
211
+ 1198,84000,5.9802
212
+ 1202,84400,7.2848
213
+ 1207,84800,6.2365
214
+ 1213,85200,6.6385
215
+ 1218,85600,7.7489
216
+ 1224,86000,5.8286
217
+ 1229,86400,6.656
218
+ 1234,86800,7.9111
219
+ 1240,87200,5.2668
220
+ 1244,87600,8.2528
221
+ 1250,88000,5.2783
222
+ 1254,88400,7.6867
223
+ 1258,88800,7.9634
224
+ 1263,89200,7.2483
225
+ 1270,89600,6.3149
226
+ 1274,90000,7.6798
227
+ 1281,90400,6.6529
228
+ 1286,90800,6.4735
229
+ 1296,91200,3.4323
230
+ 1301,91600,7.3159
231
+ 1306,92000,8.4848
232
+ 1311,92400,6.9908
233
+ 1316,92800,8.4663
234
+ 1326,93200,3.5854
235
+ 1331,93600,8.0148
236
+ 1336,94000,7.4098
237
+ 1341,94400,6.6913
238
+ 1348,94800,4.7574
239
+ 1355,95200,5.4236
240
+ 1361,95600,6.3331
241
+ 1369,96000,4.811
242
+ 1373,96400,8.9366
243
+ 1377,96800,8.122
244
+ 1383,97200,6.3531
245
+ 1388,97600,5.7501
246
+ 1395,98000,5.7337
247
+ 1400,98400,6.524
248
+ 1405,98800,5.8413
249
+ 1414,99200,3.6238
250
+ 1419,99600,6.8038
251
+ 1427,100000,4.1713
252
+ 1431,100400,6.4416
253
+ 1435,100800,7.8524
254
+ 1440,101200,7.2999
255
+ 1444,101600,7.8906
256
+ 1448,102000,7.2445
257
+ 1452,102400,7.8529
258
+ 1457,102800,7.3003
259
+ 1463,103200,5.6059
260
+ 1467,103600,7.8356
261
+ 1472,104000,6.6397
262
+ 1477,104400,6.4739
263
+ 1481,104800,6.9889
264
+ 1486,105200,6.2231
265
+ 1490,105600,7.6594
266
+ 1495,106000,6.1959
267
+ 1501,106400,5.118
268
+ 1507,106800,4.5929
269
+ 1513,107200,5.9776
270
+ 1518,107600,6.0486
271
+ 1522,108000,7.2906
272
+ 1526,108400,6.2824
273
+ 1531,108800,5.9725
274
+ 1537,109200,5.3613
275
+ 1541,109600,6.6071
276
+ 1546,110000,6.5305
277
+ 1551,110400,5.922
278
+ 1555,110800,6.7349
279
+ 1561,111200,5.5868
280
+ 1565,111600,6.026
281
+ 1570,112000,6.0956
282
+ 1575,112400,6.1964
283
+ 1580,112800,6.6359
284
+ 1584,113200,6.554
285
+ 1590,113600,6.2213
286
+ 1598,114000,4.2887
287
+ 1604,114400,4.9337
288
+ 1610,114800,5.8288
289
+ 1614,115200,8.2018
290
+ 1618,115600,7.1357
291
+ 1623,116000,7.2247
292
+ 1629,116400,5.1857
293
+ 1633,116800,6.6046
294
+ 1638,117200,5.8635
295
+ 1642,117600,7.5453
296
+ 1648,118000,6.806
297
+ 1652,118400,7.8409
298
+ 1656,118800,8.5085
299
+ 1661,119200,6.7829
300
+ 1666,119600,6.8868
301
+ 1675,120000,4.2945
302
+ 1682,120400,4.4064
303
+ 1689,120800,5.0664
304
+ 1694,121200,6.5454
305
+ 1701,121600,5.6222
306
+ 1708,122000,5.4888
307
+ 1712,122400,11.0368
308
+ 1717,122800,7.0496
309
+ 1722,123200,8.6749
310
+ 1729,123600,6.9978
311
+ 1737,124000,5.4495
312
+ 1747,124400,4.0671
313
+ 1753,124800,7.6818
314
+ 1759,125200,6.4432
315
+ 1763,125600,10.7613
316
+ 1770,126000,6.4108
317
+ 1778,126400,4.3716
318
+ 1784,126800,6.7246
319
+ 1788,127200,10.4382
320
+ 1794,127600,7.1896
321
+ 1800,128000,7.588
322
+ 1805,128400,9.4403
323
+ 1809,128800,11.0176
324
+ 1813,129200,11.7979
325
+ 1818,129600,8.75
326
+ 1827,130000,4.6859
327
+ 1834,130400,6.7395
328
+ 1839,130800,8.993
329
+ 1843,131200,11.3917
330
+ 1849,131600,7.4701
331
+ 1855,132000,5.7999
332
+ 1862,132400,8.5014
333
+ 1866,132800,11.0123
334
+ 1879,133200,3.7213
335
+ 1884,133600,10.6388
336
+ 1892,134000,5.8487
337
+ 1898,134400,7.1305
338
+ 1904,134800,8.8896
339
+ 1910,135200,6.4877
340
+ 1918,135600,6.8896
341
+ 1924,136000,6.4605
342
+ 1930,136400,9.0044
343
+ 1934,136800,10.5351
344
+ 1940,137200,9.2294
345
+ 1945,137600,10.9699
346
+ 1949,138000,10.0906
347
+ 1954,138400,10.2141
348
+ 1959,138800,11.3224
349
+ 1964,139200,9.5563
350
+ 1969,139600,6.9219
351
+ 1974,140000,11.1704
352
+ 1978,140400,11.1142
353
+ 1982,140800,11.9859
354
+ 1986,141200,11.8602
355
+ 1990,141600,11.6316
356
+ 1994,142000,11.6433
357
+ 2000,142400,9.2738
358
+ 2006,142800,7.5194
359
+ 2013,143200,6.4846
360
+ 2019,143600,7.8079
361
+ 2023,144000,9.8408
362
+ 2031,144400,6.9516
363
+ 2035,144800,9.8839
364
+ 2041,145200,7.7938
365
+ 2049,145600,7.6839
366
+ 2054,146000,7.4943
367
+ 2061,146400,8.0578
368
+ 2070,146800,4.8268
369
+ 2075,147200,10.4681
370
+ 2081,147600,7.9008
371
+ 2086,148000,11.0989
372
+ 2091,148400,11.994
373
+ 2095,148800,9.9046
374
+ 2105,149200,4.7372
375
+ 2111,149600,9.8328
376
+ 2116,150000,11.0763
377
+ 2122,150400,10.9598
378
+ 2129,150800,7.7438
379
+ 2135,151200,8.6623
380
+ 2140,151600,12.6654
381
+ 2146,152000,8.1182
382
+ 2151,152400,9.0062
383
+ 2157,152800,8.5988
384
+ 2163,153200,8.9284
385
+ 2167,153600,14.2702
386
+ 2173,154000,10.585
387
+ 2178,154400,11.7006
388
+ 2186,154800,6.7155
389
+ 2192,155200,7.7146
390
+ 2198,155600,8.8118
391
+ 2203,156000,11.7081
392
+ 2212,156400,4.9161
393
+ 2217,156800,11.632
394
+ 2222,157200,12.2152
395
+ 2227,157600,12.1379
396
+ 2236,158000,4.0621
397
+ 2244,158400,7.7841
398
+ 2250,158800,8.4301
399
+ 2260,159200,5.0246
400
+ 2266,159600,7.0603
401
+ 2274,160000,6.5755
402
+ 2282,160400,7.0485
403
+ 2287,160800,9.5418
404
+ 2297,161200,5.8611
405
+ 2305,161600,7.2947
406
+ 2312,162000,7.2398
407
+ 2316,162400,13.7584
408
+ 2322,162800,11.7808
409
+ 2329,163200,11.604
410
+ 2334,163600,13.0979
411
+ 2342,164000,8.5548
412
+ 2347,164400,14.6783
413
+ 2353,164800,12.7043
414
+ 2358,165200,13.3911
415
+ 2363,165600,11.383
416
+ 2371,166000,8.1752
417
+ 2376,166400,11.7496
418
+ 2382,166800,12.7269
419
+ 2388,167200,8.3856
420
+ 2397,167600,7.333
421
+ 2406,168000,6.2778
422
+ 2411,168400,11.0526
423
+ 2418,168800,9.0626
424
+ 2424,169200,7.5325
425
+ 2436,169600,4.0862
426
+ 2441,170000,10.966
427
+ 2446,170400,9.7353
428
+ 2452,170800,8.3233
429
+ 2459,171200,5.748
430
+ 2465,171600,7.1726
431
+ 2472,172000,7.998
432
+ 2481,172400,6.561
433
+ 2488,172800,6.2343
434
+ 2495,173200,7.5135
435
+ 2504,173600,4.6772
436
+ 2509,174000,9.5455
437
+ 2516,174400,7.1717
438
+ 2521,174800,9.9983
439
+ 2526,175200,8.8316
440
+ 2535,175600,4.6613
441
+ 2543,176000,4.4261
442
+ 2551,176400,7.2283
443
+ 2556,176800,14.7301
444
+ 2562,177200,11.5262
445
+ 2569,177600,9.4323
446
+ 2574,178000,14.3105
447
+ 2580,178400,11.4739
448
+ 2584,178800,14.4407
449
+ 2590,179200,13.8673
450
+ 2596,179600,13.3536
451
+ 2600,180000,12.2952
452
+ 2607,180400,11.8378
453
+ 2611,180800,18.9288
454
+ 2615,181200,13.6898
455
+ 2622,181600,10.7245
456
+ 2626,182000,17.0192
457
+ 2631,182400,17.5475
458
+ 2636,182800,14.2294
459
+ 2640,183200,18.8072
460
+ 2646,183600,10.0944
461
+ 2650,184000,14.8466
462
+ 2655,184400,12.8789
463
+ 2663,184800,7.384
464
+ 2668,185200,12.7785
465
+ 2680,185600,5.2209
466
+ 2687,186000,8.8431
467
+ 2695,186400,8.6145
468
+ 2703,186800,6.0656
469
+ 2708,187200,11.2342
470
+ 2714,187600,9.9512
471
+ 2723,188000,6.9898
472
+ 2729,188400,10.1041
473
+ 2739,188800,6.2203
474
+ 2744,189200,12.3465
475
+ 2754,189600,5.1606
476
+ 2761,190000,10.6421
477
+ 2767,190400,10.2982
478
+ 2777,190800,5.2088
479
+ 2782,191200,10.9174
480
+ 2786,191600,13.2552
481
+ 2791,192000,12.1285
482
+ 2798,192400,8.8285
483
+ 2805,192800,7.9108
484
+ 2814,193200,7.3404
485
+ 2819,193600,12.2333
486
+ 2825,194000,9.6662
487
+ 2832,194400,8.0742
488
+ 2837,194800,12.2136
489
+ 2843,195200,12.3488
490
+ 2850,195600,10.1218
491
+ 2856,196000,12.0153
492
+ 2862,196400,11.8017
493
+ 2869,196800,10.381
494
+ 2874,197200,14.0652
495
+ 2879,197600,14.9152
496
+ 2887,198000,8.0053
497
+ 2893,198400,12.0596
498
+ 2897,198800,15.0738
499
+ 2902,199200,16.9263
500
+ 2907,199600,12.2609
501
+ 2912,200000,15.858
code/Lake application/logs/results_1/PPO_frozen_lake_log_4.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 22,400,1.3382
3
+ 40,800,1.8287
4
+ 52,1200,2.5231
5
+ 66,1600,2.103
6
+ 77,2000,3.0155
7
+ 85,2400,3.7714
8
+ 90,2800,5.8035
9
+ 96,3200,5.2274
10
+ 100,3600,6.3303
11
+ 107,4000,4.8019
12
+ 111,4400,6.4724
13
+ 117,4800,5.5563
14
+ 122,5200,6.2998
15
+ 128,5600,5.1646
16
+ 132,6000,7.1287
17
+ 138,6400,5.8314
18
+ 142,6800,7.4052
19
+ 147,7200,6.2875
20
+ 152,7600,5.6015
21
+ 157,8000,6.1754
22
+ 162,8400,7.082
23
+ 167,8800,6.4028
24
+ 171,9200,7.2559
25
+ 176,9600,6.9444
26
+ 181,10000,6.2933
27
+ 186,10400,5.2366
28
+ 195,10800,3.9987
29
+ 201,11200,5.395
30
+ 207,11600,4.9618
31
+ 213,12000,5.1983
32
+ 219,12400,4.4254
33
+ 225,12800,5.7096
34
+ 229,13200,7.1898
35
+ 235,13600,5.3824
36
+ 242,14000,4.5048
37
+ 250,14400,3.7881
38
+ 255,14800,5.2267
39
+ 260,15200,7.031
40
+ 264,15600,6.9285
41
+ 268,16000,7.5476
42
+ 273,16400,5.6767
43
+ 278,16800,6.5262
44
+ 283,17200,6.1492
45
+ 288,17600,5.6468
46
+ 292,18000,7.6928
47
+ 297,18400,6.6286
48
+ 301,18800,7.7454
49
+ 305,19200,6.5254
50
+ 312,19600,4.8168
51
+ 317,20000,6.5406
52
+ 322,20400,6.5504
53
+ 326,20800,6.7464
54
+ 331,21200,6.2836
55
+ 336,21600,6.7817
56
+ 340,22000,6.6796
57
+ 344,22400,7.2005
58
+ 351,22800,4.3842
59
+ 355,23200,7.6366
60
+ 359,23600,7.4904
61
+ 365,24000,6.1521
62
+ 369,24400,7.6973
63
+ 373,24800,7.0403
64
+ 377,25200,6.9523
65
+ 382,25600,7.1983
66
+ 387,26000,6.4586
67
+ 391,26400,6.4252
68
+ 397,26800,5.499
69
+ 402,27200,5.0302
70
+ 407,27600,6.8429
71
+ 413,28000,4.7347
72
+ 418,28400,6.183
73
+ 423,28800,6.8443
74
+ 427,29200,7.5077
75
+ 431,29600,6.0028
76
+ 437,30000,6.0114
77
+ 441,30400,6.4562
78
+ 446,30800,6.7718
79
+ 450,31200,6.276
80
+ 454,31600,7.5279
81
+ 458,32000,7.9159
82
+ 462,32400,8.0556
83
+ 468,32800,5.5321
84
+ 473,33200,5.7863
85
+ 479,33600,5.8919
86
+ 484,34000,5.3791
87
+ 488,34400,7.7837
88
+ 494,34800,5.618
89
+ 498,35200,7.0261
90
+ 502,35600,7.828
91
+ 507,36000,6.4049
92
+ 512,36400,6.4361
93
+ 517,36800,5.7041
94
+ 522,37200,7.132
95
+ 526,37600,7.4088
96
+ 530,38000,7.8924
97
+ 535,38400,5.7086
98
+ 540,38800,6.5723
99
+ 544,39200,7.588
100
+ 548,39600,6.1673
101
+ 552,40000,7.8064
102
+ 557,40400,6.5526
103
+ 562,40800,7.5729
104
+ 566,41200,6.3143
105
+ 570,41600,8.4047
106
+ 575,42000,7.2824
107
+ 579,42400,7.7924
108
+ 583,42800,7.4971
109
+ 587,43200,6.6631
110
+ 592,43600,7.1095
111
+ 596,44000,6.4556
112
+ 600,44400,7.6746
113
+ 604,44800,7.6558
114
+ 609,45200,6.194
115
+ 614,45600,7.1185
116
+ 619,46000,5.7002
117
+ 624,46400,6.5519
118
+ 629,46800,6.8268
119
+ 633,47200,6.9014
120
+ 638,47600,6.301
121
+ 644,48000,6.1982
122
+ 651,48400,4.9436
123
+ 655,48800,6.7595
124
+ 661,49200,6.5292
125
+ 666,49600,5.158
126
+ 671,50000,6.6904
127
+ 675,50400,8.0493
128
+ 679,50800,7.9991
129
+ 687,51200,4.3244
130
+ 692,51600,7.5411
131
+ 696,52000,9.858
132
+ 700,52400,8.5624
133
+ 704,52800,7.9155
134
+ 709,53200,8.9057
135
+ 713,53600,9.5471
136
+ 718,54000,7.1981
137
+ 722,54400,8.6158
138
+ 726,54800,10.1275
139
+ 736,55200,4.2344
140
+ 742,55600,7.8258
141
+ 747,56000,7.7521
142
+ 753,56400,9.0208
143
+ 759,56800,7.7654
144
+ 765,57200,8.6142
145
+ 771,57600,6.8121
146
+ 776,58000,10.9618
147
+ 781,58400,10.3827
148
+ 785,58800,10.0289
149
+ 791,59200,9.8831
150
+ 800,59600,6.9521
151
+ 806,60000,7.7517
152
+ 815,60400,8.8211
153
+ 821,60800,8.9909
154
+ 827,61200,8.7221
155
+ 836,61600,7.7684
156
+ 841,62000,9.6272
157
+ 845,62400,14.9976
158
+ 850,62800,11.6372
159
+ 857,63200,8.8488
160
+ 865,63600,7.6496
161
+ 871,64000,10.0977
162
+ 876,64400,10.3548
163
+ 883,64800,10.7205
164
+ 889,65200,12.5606
165
+ 896,65600,9.843
166
+ 902,66000,11.6978
167
+ 909,66400,10.94
168
+ 915,66800,11.5443
169
+ 920,67200,14.4685
170
+ 927,67600,7.6312
171
+ 934,68000,9.3931
172
+ 941,68400,7.5232
173
+ 946,68800,13.3119
174
+ 952,69200,15.4347
175
+ 956,69600,14.1605
176
+ 963,70000,12.82
177
+ 968,70400,14.5149
178
+ 973,70800,14.5627
179
+ 979,71200,12.8107
180
+ 984,71600,16.1239
181
+ 992,72000,8.8389
182
+ 997,72400,15.4108
183
+ 1008,72800,3.5878
184
+ 1015,73200,11.6583
185
+ 1023,73600,4.6755
186
+ 1029,74000,7.929
187
+ 1034,74400,8.3184
188
+ 1043,74800,4.4589
189
+ 1049,75200,5.8413
190
+ 1055,75600,5.8525
191
+ 1061,76000,5.7395
192
+ 1066,76400,7.915
193
+ 1072,76800,6.9911
194
+ 1077,77200,9.5236
195
+ 1085,77600,6.0534
196
+ 1091,78000,6.3831
197
+ 1098,78400,5.0208
198
+ 1104,78800,8.7623
199
+ 1109,79200,6.9585
200
+ 1117,79600,6.3551
201
+ 1121,80000,14.1976
202
+ 1127,80400,9.871
203
+ 1131,80800,13.4573
204
+ 1138,81200,7.47
205
+ 1143,81600,7.7708
206
+ 1149,82000,10.2425
207
+ 1154,82400,13.5466
208
+ 1160,82800,8.8403
209
+ 1166,83200,9.1941
210
+ 1171,83600,12.7902
211
+ 1179,84000,6.3707
212
+ 1186,84400,8.839
213
+ 1195,84800,6.0659
214
+ 1202,85200,9.7067
215
+ 1210,85600,5.3875
216
+ 1219,86000,7.3249
217
+ 1228,86400,8.1348
218
+ 1240,86800,4.2997
219
+ 1246,87200,7.7715
220
+ 1253,87600,9.9431
221
+ 1261,88000,5.8097
222
+ 1268,88400,9.7243
223
+ 1275,88800,9.9173
224
+ 1281,89200,8.526
225
+ 1291,89600,6.5095
226
+ 1296,90000,10.6242
227
+ 1305,90400,6.8888
228
+ 1313,90800,6.5251
229
+ 1320,91200,7.6252
230
+ 1330,91600,6.4161
231
+ 1339,92000,6.192
232
+ 1345,92400,10.4853
233
+ 1352,92800,6.5388
234
+ 1358,93200,12.3501
235
+ 1365,93600,7.43
236
+ 1373,94000,7.5638
237
+ 1378,94400,15.0633
238
+ 1384,94800,14.7513
239
+ 1396,95200,6.5226
240
+ 1403,95600,8.8394
241
+ 1410,96000,7.9987
242
+ 1420,96400,7.5662
243
+ 1427,96800,8.6258
244
+ 1435,97200,9.7086
245
+ 1444,97600,8.0059
246
+ 1451,98000,10.653
247
+ 1459,98400,7.4015
248
+ 1471,98800,4.6864
249
+ 1479,99200,7.8408
250
+ 1485,99600,10.5011
251
+ 1491,100000,14.4978
252
+ 1498,100400,13.2709
253
+ 1504,100800,11.8503
254
+ 1511,101200,10.111
255
+ 1518,101600,7.1249
256
+ 1524,102000,11.0255
257
+ 1533,102400,9.3004
258
+ 1545,102800,4.3136
259
+ 1549,103200,20.1579
260
+ 1562,103600,5.6588
261
+ 1567,104000,11.4403
262
+ 1574,104400,7.7448
263
+ 1582,104800,8.4585
264
+ 1591,105200,7.9123
265
+ 1596,105600,12.7572
266
+ 1602,106000,9.7458
267
+ 1613,106400,6.0184
268
+ 1621,106800,7.3456
269
+ 1627,107200,10.2534
270
+ 1632,107600,14.4112
271
+ 1637,108000,10.8765
272
+ 1647,108400,8.215
273
+ 1652,108800,14.8476
274
+ 1658,109200,13.0967
275
+ 1666,109600,9.5115
276
+ 1672,110000,12.5662
277
+ 1680,110400,9.9636
278
+ 1685,110800,17.9725
279
+ 1690,111200,21.6135
280
+ 1695,111600,16.7148
281
+ 1702,112000,11.3757
282
+ 1709,112400,11.6943
283
+ 1714,112800,20.2134
284
+ 1718,113200,15.6719
285
+ 1724,113600,17.1174
286
+ 1728,114000,21.9856
287
+ 1733,114400,14.545
288
+ 1741,114800,9.2946
289
+ 1745,115200,13.9682
290
+ 1751,115600,11.2764
291
+ 1758,116000,10.046
292
+ 1765,116400,6.6395
293
+ 1770,116800,12.6278
294
+ 1778,117200,7.5994
295
+ 1786,117600,5.1644
296
+ 1794,118000,6.6251
297
+ 1801,118400,5.2048
298
+ 1806,118800,8.7313
299
+ 1813,119200,6.7291
300
+ 1821,119600,5.8351
301
+ 1829,120000,8.9222
302
+ 1835,120400,8.7741
303
+ 1842,120800,7.2133
304
+ 1849,121200,8.8234
305
+ 1855,121600,9.6317
306
+ 1862,122000,8.2559
307
+ 1867,122400,11.0191
308
+ 1872,122800,13.0496
309
+ 1878,123200,12.1102
310
+ 1884,123600,15.2622
311
+ 1890,124000,14.949
312
+ 1897,124400,14.1959
313
+ 1905,124800,10.2858
314
+ 1912,125200,10.4039
315
+ 1920,125600,9.2182
316
+ 1925,126000,18.0285
317
+ 1931,126400,14.5642
318
+ 1936,126800,15.7313
319
+ 1942,127200,16.071
320
+ 1949,127600,12.4836
321
+ 1955,128000,14.5803
322
+ 1964,128400,6.5171
323
+ 1973,128800,10.3231
324
+ 1979,129200,11.6592
325
+ 1985,129600,13.9441
326
+ 1991,130000,11.8742
327
+ 1997,130400,14.2094
328
+ 2002,130800,16.5023
329
+ 2008,131200,7.9427
330
+ 2013,131600,13.3196
331
+ 2017,132000,22.0345
332
+ 2023,132400,9.6949
333
+ 2029,132800,8.3864
334
+ 2033,133200,15.5384
335
+ 2038,133600,14.2444
336
+ 2043,134000,16.1699
337
+ 2047,134400,14.6686
338
+ 2052,134800,11.1139
339
+ 2057,135200,15.3049
340
+ 2061,135600,18.3799
341
+ 2066,136000,10.9603
342
+ 2070,136400,12.5289
343
+ 2076,136800,12.3745
344
+ 2080,137200,11.4958
345
+ 2086,137600,11.5674
346
+ 2094,138000,8.5145
347
+ 2099,138400,15.1147
348
+ 2104,138800,12.0235
349
+ 2110,139200,10.5698
350
+ 2116,139600,11.9205
351
+ 2123,140000,6.9545
352
+ 2130,140400,10.1053
353
+ 2136,140800,11.9632
354
+ 2143,141200,10.4299
355
+ 2151,141600,8.3429
356
+ 2165,142000,4.3628
357
+ 2172,142400,6.8153
358
+ 2179,142800,12.8716
359
+ 2187,143200,5.8168
360
+ 2196,143600,8.4523
361
+ 2202,144000,11.4057
362
+ 2208,144400,10.0338
363
+ 2219,144800,6.4849
364
+ 2224,145200,12.969
365
+ 2228,145600,18.6417
366
+ 2233,146000,12.536
367
+ 2238,146400,15.4672
368
+ 2243,146800,13.8039
369
+ 2248,147200,14.223
370
+ 2254,147600,12.1347
371
+ 2261,148000,8.5235
372
+ 2267,148400,15.2563
373
+ 2272,148800,11.314
374
+ 2278,149200,12.9462
375
+ 2285,149600,9.7554
376
+ 2291,150000,14.0195
377
+ 2296,150400,15.3385
378
+ 2303,150800,9.973
379
+ 2308,151200,18.1009
380
+ 2312,151600,20.9784
381
+ 2321,152000,10.5002
382
+ 2335,152400,4.4978
383
+ 2342,152800,10.5598
384
+ 2350,153200,9.1714
385
+ 2357,153600,12.176
386
+ 2364,154000,11.5123
387
+ 2368,154400,20.0547
388
+ 2373,154800,17.2117
389
+ 2379,155200,15.459
390
+ 2387,155600,11.3856
391
+ 2393,156000,10.4216
392
+ 2398,156400,18.6778
393
+ 2406,156800,9.8121
394
+ 2413,157200,15.4442
395
+ 2420,157600,8.2398
396
+ 2429,158000,7.9804
397
+ 2436,158400,10.0033
398
+ 2444,158800,11.684
399
+ 2451,159200,11.7212
400
+ 2458,159600,11.1657
401
+ 2463,160000,12.9049
402
+ 2474,160400,7.2923
403
+ 2480,160800,11.1158
404
+ 2488,161200,7.0536
405
+ 2496,161600,12.9054
406
+ 2503,162000,10.8589
407
+ 2510,162400,7.6154
408
+ 2516,162800,15.1175
409
+ 2526,163200,6.0549
410
+ 2535,163600,6.8174
411
+ 2543,164000,11.3806
412
+ 2548,164400,14.9169
413
+ 2555,164800,8.7519
414
+ 2561,165200,12.8943
415
+ 2568,165600,7.4888
416
+ 2577,166000,12.9756
417
+ 2586,166400,9.7037
418
+ 2595,166800,7.6698
419
+ 2604,167200,8.0272
420
+ 2609,167600,17.4422
421
+ 2614,168000,21.842
422
+ 2619,168400,24.0025
423
+ 2624,168800,21.9548
424
+ 2629,169200,16.2903
425
+ 2636,169600,15.3797
426
+ 2644,170000,12.3509
427
+ 2651,170400,17.7225
428
+ 2657,170800,17.4392
429
+ 2667,171200,7.6192
430
+ 2672,171600,19.9947
431
+ 2677,172000,21.4869
432
+ 2682,172400,16.9837
433
+ 2689,172800,15.026
434
+ 2698,173200,8.2427
435
+ 2704,173600,9.9491
436
+ 2712,174000,12.5259
437
+ 2717,174400,17.4708
438
+ 2723,174800,12.1165
439
+ 2729,175200,15.3909
440
+ 2736,175600,12.777
441
+ 2741,176000,21.2902
442
+ 2745,176400,26.1009
443
+ 2751,176800,17.7154
444
+ 2757,177200,14.3114
445
+ 2762,177600,21.2451
446
+ 2766,178000,24.6657
447
+ 2773,178400,11.323
448
+ 2778,178800,18.9868
449
+ 2789,179200,10.2768
450
+ 2795,179600,16.099
451
+ 2802,180000,9.7036
452
+ 2811,180400,10.1027
453
+ 2817,180800,14.6335
454
+ 2821,181200,20.4911
455
+ 2825,181600,23.7041
456
+ 2830,182000,19.1257
457
+ 2834,182400,21.7177
458
+ 2838,182800,22.6593
459
+ 2843,183200,21.3371
460
+ 2849,183600,12.1633
461
+ 2855,184000,18.8629
462
+ 2859,184400,16.2873
463
+ 2866,184800,16.9789
464
+ 2871,185200,13.7611
465
+ 2878,185600,12.7885
466
+ 2883,186000,18.4539
467
+ 2888,186400,19.4485
468
+ 2894,186800,17.3323
469
+ 2899,187200,16.1448
470
+ 2909,187600,9.09
471
+ 2921,188000,5.7227
472
+ 2930,188400,9.5406
473
+ 2939,188800,8.9963
474
+ 2948,189200,9.0039
475
+ 2959,189600,6.1213
476
+ 2968,190000,10.7098
477
+ 2973,190400,27.5424
478
+ 2981,190800,11.4104
479
+ 2988,191200,14.7688
480
+ 2996,191600,8.1336
481
+ 3006,192000,9.3062
482
+ 3013,192400,9.5427
483
+ 3020,192800,12.1444
484
+ 3027,193200,13.9121
485
+ 3033,193600,11.6091
486
+ 3043,194000,9.565
487
+ 3050,194400,12.3977
488
+ 3058,194800,13.0409
489
+ 3064,195200,19.9469
490
+ 3071,195600,12.6553
491
+ 3078,196000,11.394
492
+ 3082,196400,23.2945
493
+ 3087,196800,25.3409
494
+ 3093,197200,20.015
495
+ 3099,197600,16.1347
496
+ 3103,198000,15.8059
497
+ 3108,198400,16.2553
498
+ 3116,198800,12.0655
499
+ 3123,199200,7.5759
500
+ 3133,199600,8.3166
501
+ 3146,200000,5.2906
code/Lake application/logs/results_1/PPO_frozen_lake_log_5.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 24,400,1.1896
3
+ 45,800,1.4148
4
+ 59,1200,2.3332
5
+ 66,1600,5.1201
6
+ 70,2000,6.5847
7
+ 78,2400,5.1845
8
+ 84,2800,5.4028
9
+ 91,3200,4.5706
10
+ 98,3600,4.8473
11
+ 103,4000,6.01
12
+ 108,4400,5.9648
13
+ 114,4800,5.9022
14
+ 118,5200,7.4097
15
+ 123,5600,6.5452
16
+ 128,6000,6.3157
17
+ 133,6400,7.2965
18
+ 143,6800,3.5149
19
+ 147,7200,7.2122
20
+ 151,7600,7.6719
21
+ 156,8000,7.8449
22
+ 160,8400,6.3771
23
+ 168,8800,4.1202
24
+ 172,9200,7.7297
25
+ 179,9600,5.2521
26
+ 184,10000,5.0265
27
+ 190,10400,6.301
28
+ 195,10800,6.4812
29
+ 200,11200,5.9884
30
+ 204,11600,8.2287
31
+ 208,12000,6.8769
32
+ 213,12400,8.2376
33
+ 217,12800,6.7639
34
+ 222,13200,7.491
35
+ 226,13600,7.14
36
+ 234,14000,4.8056
37
+ 238,14400,8.2315
38
+ 244,14800,5.6201
39
+ 250,15200,5.6978
40
+ 254,15600,8.6572
41
+ 259,16000,7.1277
42
+ 264,16400,6.3588
43
+ 276,16800,3.2533
44
+ 285,17200,4.354
45
+ 295,17600,3.6256
46
+ 302,18000,5.482
47
+ 312,18400,4.9095
48
+ 324,18800,3.2072
49
+ 332,19200,4.2376
50
+ 337,19600,7.9829
51
+ 343,20000,7.3154
52
+ 352,20400,5.8945
53
+ 359,20800,5.4014
54
+ 367,21200,6.2058
55
+ 372,21600,8.3138
56
+ 377,22000,7.7193
57
+ 385,22400,5.7761
58
+ 392,22800,6.301
59
+ 397,23200,7.7678
60
+ 402,23600,7.7383
61
+ 408,24000,8.0968
62
+ 414,24400,7.2615
63
+ 420,24800,8.6811
64
+ 428,25200,6.1812
65
+ 434,25600,5.421
66
+ 439,26000,10.3048
67
+ 444,26400,8.6396
68
+ 450,26800,5.8203
69
+ 455,27200,8.955
70
+ 460,27600,10.1203
71
+ 464,28000,10.9113
72
+ 469,28400,10.8469
73
+ 475,28800,8.0835
74
+ 481,29200,8.4523
75
+ 486,29600,9.6786
76
+ 494,30000,5.8029
77
+ 499,30400,9.7283
78
+ 503,30800,10.3865
79
+ 509,31200,7.4219
80
+ 516,31600,6.7019
81
+ 521,32000,8.9637
82
+ 526,32400,8.0468
83
+ 531,32800,9.7588
84
+ 538,33200,7.8747
85
+ 542,33600,8.1689
86
+ 546,34000,11.1529
87
+ 550,34400,11.3951
88
+ 555,34800,9.1521
89
+ 559,35200,10.907
90
+ 563,35600,11.4909
91
+ 569,36000,8.8749
92
+ 573,36400,11.4543
93
+ 577,36800,10.4455
94
+ 583,37200,6.9897
95
+ 587,37600,9.4309
96
+ 592,38000,10.4979
97
+ 596,38400,10.9061
98
+ 600,38800,11.3928
99
+ 606,39200,9.6524
100
+ 610,39600,11.4807
101
+ 616,40000,7.0779
102
+ 620,40400,7.8871
103
+ 630,40800,5.189
104
+ 637,41200,4.9294
105
+ 645,41600,5.8549
106
+ 649,42000,9.1055
107
+ 655,42400,7.5844
108
+ 660,42800,7.9374
109
+ 665,43200,8.81
110
+ 669,43600,11.0942
111
+ 675,44000,8.1994
112
+ 681,44400,5.7097
113
+ 686,44800,5.6185
114
+ 693,45200,6.8876
115
+ 697,45600,9.3624
116
+ 702,46000,10.4714
117
+ 707,46400,8.134
118
+ 711,46800,8.3317
119
+ 717,47200,8.2672
120
+ 722,47600,8.0033
121
+ 729,48000,5.1115
122
+ 737,48400,6.7697
123
+ 743,48800,6.1466
124
+ 750,49200,6.2071
125
+ 756,49600,8.233
126
+ 761,50000,6.4818
127
+ 766,50400,7.5639
128
+ 771,50800,8.9587
129
+ 778,51200,7.244
130
+ 783,51600,8.6157
131
+ 790,52000,6.8854
132
+ 796,52400,6.3743
133
+ 800,52800,7.0537
134
+ 806,53200,5.9831
135
+ 812,53600,7.1028
136
+ 817,54000,7.4769
137
+ 822,54400,6.6734
138
+ 828,54800,7.8816
139
+ 833,55200,7.9481
140
+ 837,55600,8.0056
141
+ 842,56000,8.3952
142
+ 847,56400,7.193
143
+ 852,56800,6.8126
144
+ 857,57200,6.8457
145
+ 861,57600,8.5708
146
+ 867,58000,7.7666
147
+ 872,58400,8.4442
148
+ 877,58800,8.728
149
+ 882,59200,6.7609
150
+ 887,59600,6.3465
151
+ 892,60000,9.3179
152
+ 896,60400,10.0812
153
+ 901,60800,7.6496
154
+ 906,61200,8.6439
155
+ 914,61600,5.8768
156
+ 921,62000,5.4985
157
+ 928,62400,6.8348
158
+ 932,62800,11.426
159
+ 938,63200,7.9831
160
+ 942,63600,11.7064
161
+ 947,64000,8.5804
162
+ 956,64400,4.5109
163
+ 963,64800,6.216
164
+ 969,65200,6.0191
165
+ 976,65600,5.3416
166
+ 983,66000,5.4151
167
+ 989,66400,6.187
168
+ 993,66800,8.3532
169
+ 998,67200,5.5044
170
+ 1003,67600,7.4823
171
+ 1009,68000,6.0798
172
+ 1015,68400,5.3716
173
+ 1021,68800,6.6076
174
+ 1029,69200,4.1927
175
+ 1035,69600,5.568
176
+ 1040,70000,8.2748
177
+ 1044,70400,8.0315
178
+ 1049,70800,7.3339
179
+ 1054,71200,8.792
180
+ 1059,71600,5.9685
181
+ 1064,72000,9.0921
182
+ 1070,72400,6.5296
183
+ 1075,72800,6.9532
184
+ 1080,73200,7.8572
185
+ 1087,73600,6.9338
186
+ 1092,74000,7.5691
187
+ 1096,74400,7.7686
188
+ 1105,74800,5.2799
189
+ 1111,75200,6.5266
190
+ 1121,75600,4.676
191
+ 1131,76000,4.0666
192
+ 1136,76400,7.5432
193
+ 1142,76800,9.6043
194
+ 1147,77200,10.452
195
+ 1153,77600,9.5079
196
+ 1161,78000,5.3496
197
+ 1168,78400,7.87
198
+ 1172,78800,12.6175
199
+ 1177,79200,11.3827
200
+ 1184,79600,9.1898
201
+ 1189,80000,9.0837
202
+ 1194,80400,10.6039
203
+ 1204,80800,5.8527
204
+ 1213,81200,4.7043
205
+ 1221,81600,5.8605
206
+ 1228,82000,6.6981
207
+ 1233,82400,9.605
208
+ 1241,82800,5.4104
209
+ 1246,83200,8.432
210
+ 1251,83600,7.9885
211
+ 1255,84000,10.6539
212
+ 1261,84400,7.0366
213
+ 1268,84800,5.8069
214
+ 1273,85200,6.9464
215
+ 1280,85600,5.2223
216
+ 1286,86000,5.3272
217
+ 1292,86400,6.4975
218
+ 1301,86800,3.7054
219
+ 1307,87200,5.0905
220
+ 1314,87600,5.9251
221
+ 1320,88000,5.9757
222
+ 1325,88400,7.8605
223
+ 1330,88800,6.1258
224
+ 1335,89200,6.4772
225
+ 1342,89600,4.6821
226
+ 1348,90000,6.1312
227
+ 1356,90400,4.4604
228
+ 1360,90800,8.8793
229
+ 1366,91200,5.4831
230
+ 1372,91600,6.6693
231
+ 1377,92000,6.0235
232
+ 1382,92400,7.023
233
+ 1386,92800,7.991
234
+ 1391,93200,9.647
235
+ 1399,93600,4.4356
236
+ 1404,94000,7.8905
237
+ 1413,94400,4.2128
238
+ 1418,94800,7.0636
239
+ 1423,95200,8.5693
240
+ 1430,95600,6.583
241
+ 1436,96000,7.1519
242
+ 1444,96400,4.0687
243
+ 1454,96800,3.9372
244
+ 1460,97200,6.4719
245
+ 1467,97600,7.3715
246
+ 1477,98000,4.4424
247
+ 1486,98400,5.2201
248
+ 1495,98800,5.0595
249
+ 1500,99200,8.9895
250
+ 1507,99600,5.1904
251
+ 1512,100000,7.0454
252
+ 1520,100400,5.0524
253
+ 1526,100800,6.979
254
+ 1530,101200,8.5726
255
+ 1538,101600,5.8817
256
+ 1545,102000,4.4648
257
+ 1552,102400,6.082
258
+ 1560,102800,5.4946
259
+ 1567,103200,5.3969
260
+ 1574,103600,5.1586
261
+ 1580,104000,5.9475
262
+ 1588,104400,5.6989
263
+ 1595,104800,7.2335
264
+ 1602,105200,5.8916
265
+ 1606,105600,9.3439
266
+ 1613,106000,9.341
267
+ 1622,106400,6.1297
268
+ 1626,106800,12.5539
269
+ 1631,107200,8.9207
270
+ 1637,107600,9.1629
271
+ 1643,108000,8.1442
272
+ 1650,108400,6.787
273
+ 1656,108800,8.6608
274
+ 1664,109200,5.6852
275
+ 1673,109600,5.7596
276
+ 1678,110000,11.7301
277
+ 1684,110400,6.4299
278
+ 1691,110800,8.9632
279
+ 1700,111200,5.1691
280
+ 1707,111600,6.5305
281
+ 1717,112000,4.326
282
+ 1723,112400,8.8668
283
+ 1731,112800,6.4173
284
+ 1737,113200,8.5648
285
+ 1741,113600,11.226
286
+ 1750,114000,5.9103
287
+ 1757,114400,8.2391
288
+ 1761,114800,11.72
289
+ 1765,115200,12.409
290
+ 1770,115600,9.8668
291
+ 1775,116000,9.7725
292
+ 1783,116400,5.4277
293
+ 1787,116800,11.838
294
+ 1793,117200,8.9856
295
+ 1802,117600,4.9619
296
+ 1809,118000,7.6212
297
+ 1814,118400,9.6064
298
+ 1820,118800,9.063
299
+ 1826,119200,8.8093
300
+ 1832,119600,8.2819
301
+ 1839,120000,6.0866
302
+ 1844,120400,9.4807
303
+ 1849,120800,11.4309
304
+ 1854,121200,8.7056
305
+ 1859,121600,9.2045
306
+ 1866,122000,7.9125
307
+ 1873,122400,7.3911
308
+ 1883,122800,3.7769
309
+ 1889,123200,7.0604
310
+ 1896,123600,5.8807
311
+ 1902,124000,8.3782
312
+ 1907,124400,9.9949
313
+ 1911,124800,12.1608
314
+ 1916,125200,10.2111
315
+ 1923,125600,6.3728
316
+ 1927,126000,11.6701
317
+ 1932,126400,10.0707
318
+ 1937,126800,9.8646
319
+ 1943,127200,8.6133
320
+ 1948,127600,6.8825
321
+ 1952,128000,11.6098
322
+ 1957,128400,10.7833
323
+ 1967,128800,4.9028
324
+ 1974,129200,4.8043
325
+ 1981,129600,7.1724
326
+ 1988,130000,6.6265
327
+ 1992,130400,12.4505
328
+ 1997,130800,10.4089
329
+ 2002,131200,9.3429
330
+ 2008,131600,8.49
331
+ 2016,132000,5.6689
332
+ 2022,132400,7.8804
333
+ 2027,132800,10.9685
334
+ 2033,133200,7.3669
335
+ 2040,133600,7.722
336
+ 2046,134000,7.4917
337
+ 2050,134400,12.4121
338
+ 2055,134800,8.4809
339
+ 2060,135200,8.2016
340
+ 2065,135600,9.6964
341
+ 2069,136000,11.8806
342
+ 2074,136400,10.897
343
+ 2081,136800,6.2054
344
+ 2088,137200,6.4216
345
+ 2097,137600,5.6088
346
+ 2105,138000,6.3134
347
+ 2111,138400,7.3101
348
+ 2118,138800,7.7567
349
+ 2122,139200,11.3647
350
+ 2127,139600,7.5965
351
+ 2133,140000,8.5471
352
+ 2140,140400,7.6505
353
+ 2148,140800,5.5044
354
+ 2153,141200,10.6672
355
+ 2158,141600,9.3921
356
+ 2164,142000,9.4556
357
+ 2169,142400,8.4184
358
+ 2174,142800,12.2423
359
+ 2180,143200,10.4702
360
+ 2192,143600,3.7695
361
+ 2199,144000,7.5407
362
+ 2209,144400,5.2107
363
+ 2218,144800,5.2279
364
+ 2223,145200,10.1944
365
+ 2232,145600,6.5609
366
+ 2239,146000,7.3471
367
+ 2246,146400,9.7593
368
+ 2256,146800,6.1584
369
+ 2265,147200,6.7017
370
+ 2272,147600,8.3735
371
+ 2278,148000,12.8356
372
+ 2284,148400,10.6073
373
+ 2290,148800,11.7075
374
+ 2294,149200,15.3994
375
+ 2298,149600,14.141
376
+ 2304,150000,12.4809
377
+ 2310,150400,9.9578
378
+ 2319,150800,5.6223
379
+ 2327,151200,9.5834
380
+ 2333,151600,10.7389
381
+ 2341,152000,7.72
382
+ 2346,152400,7.7508
383
+ 2352,152800,12.4856
384
+ 2357,153200,11.9052
385
+ 2363,153600,10.7156
386
+ 2373,154000,4.0923
387
+ 2382,154400,6.618
388
+ 2388,154800,7.4782
389
+ 2399,155200,5.56
390
+ 2409,155600,4.0632
391
+ 2418,156000,6.6121
392
+ 2425,156400,8.951
393
+ 2433,156800,6.1173
394
+ 2439,157200,10.4851
395
+ 2449,157600,6.8041
396
+ 2455,158000,9.7919
397
+ 2461,158400,14.9732
398
+ 2466,158800,17.2664
399
+ 2476,159200,7.0055
400
+ 2480,159600,18.4795
401
+ 2488,160000,7.4399
402
+ 2493,160400,13.4248
403
+ 2497,160800,17.9206
404
+ 2505,161200,10.3671
405
+ 2513,161600,7.8602
406
+ 2520,162000,7.5847
407
+ 2530,162400,6.8028
408
+ 2537,162800,8.7183
409
+ 2544,163200,10.3386
410
+ 2551,163600,11.3944
411
+ 2556,164000,10.885
412
+ 2566,164400,7.0053
413
+ 2575,164800,7.0606
414
+ 2580,165200,14.0458
415
+ 2584,165600,16.733
416
+ 2595,166000,6.4971
417
+ 2606,166400,5.5624
418
+ 2612,166800,12.141
419
+ 2616,167200,20.5602
420
+ 2625,167600,7.7261
421
+ 2634,168000,7.5007
422
+ 2639,168400,13.3516
423
+ 2645,168800,13.6817
424
+ 2651,169200,10.8423
425
+ 2657,169600,11.7492
426
+ 2664,170000,9.1222
427
+ 2670,170400,10.5513
428
+ 2677,170800,10.756
429
+ 2682,171200,14.8444
430
+ 2687,171600,13.6451
431
+ 2692,172000,11.6598
432
+ 2699,172400,10.1388
433
+ 2708,172800,8.8513
434
+ 2720,173200,4.2224
435
+ 2727,173600,8.3196
436
+ 2732,174000,11.9491
437
+ 2740,174400,6.6871
438
+ 2748,174800,7.4226
439
+ 2753,175200,10.0185
440
+ 2760,175600,10.3176
441
+ 2764,176000,11.3026
442
+ 2770,176400,9.9799
443
+ 2776,176800,9.911
444
+ 2782,177200,7.9342
445
+ 2789,177600,6.5082
446
+ 2796,178000,6.9011
447
+ 2801,178400,11.9291
448
+ 2806,178800,9.9604
449
+ 2814,179200,7.9044
450
+ 2820,179600,13.3649
451
+ 2826,180000,9.8404
452
+ 2832,180400,12.8414
453
+ 2836,180800,18.641
454
+ 2843,181200,11.7401
455
+ 2850,181600,12.4612
456
+ 2859,182000,8.9368
457
+ 2864,182400,12.2839
458
+ 2869,182800,15.863
459
+ 2877,183200,8.5494
460
+ 2884,183600,11.6935
461
+ 2896,184000,6.2298
462
+ 2903,184400,10.0828
463
+ 2909,184800,11.6092
464
+ 2914,185200,11.3742
465
+ 2922,185600,10.9442
466
+ 2932,186000,5.9435
467
+ 2936,186400,15.5259
468
+ 2944,186800,8.2002
469
+ 2950,187200,12.5212
470
+ 2957,187600,9.7587
471
+ 2962,188000,12.3502
472
+ 2969,188400,10.8997
473
+ 2976,188800,8.21
474
+ 2984,189200,8.5636
475
+ 2990,189600,12.3993
476
+ 2996,190000,12.8622
477
+ 3000,190400,19.3607
478
+ 3006,190800,9.842
479
+ 3010,191200,14.5131
480
+ 3020,191600,5.0736
481
+ 3030,192000,4.7978
482
+ 3042,192400,3.6577
483
+ 3049,192800,8.4608
484
+ 3056,193200,7.3668
485
+ 3068,193600,4.3313
486
+ 3075,194000,7.6891
487
+ 3080,194400,12.0177
488
+ 3086,194800,8.6174
489
+ 3093,195200,7.7047
490
+ 3099,195600,8.0317
491
+ 3105,196000,8.0931
492
+ 3114,196400,5.4667
493
+ 3122,196800,6.6043
494
+ 3132,197200,5.0706
495
+ 3140,197600,8.1802
496
+ 3149,198000,6.0144
497
+ 3160,198400,5.1743
498
+ 3168,198800,6.8536
499
+ 3178,199200,3.8362
500
+ 3186,199600,6.7096
501
+ 3192,200000,8.9841
code/Lake application/logs/results_2/PDPPO_frozen_lake_log_1.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 22,400,1.2526
3
+ 48,800,1.1762
4
+ 66,1200,1.698
5
+ 86,1600,1.5699
6
+ 100,2000,2.2196
7
+ 114,2400,2.2243
8
+ 130,2800,1.9494
9
+ 144,3200,2.1269
10
+ 161,3600,1.8416
11
+ 173,4000,2.4108
12
+ 186,4400,2.3831
13
+ 195,4800,3.7798
14
+ 204,5200,3.3905
15
+ 212,5600,3.7893
16
+ 221,6000,2.6417
17
+ 230,6400,3.9088
18
+ 245,6800,2.3282
19
+ 256,7200,2.7405
20
+ 263,7600,4.6528
21
+ 273,8000,3.0146
22
+ 281,8400,3.2375
23
+ 291,8800,3.553
24
+ 297,9200,4.7911
25
+ 307,9600,3.4636
26
+ 314,10000,4.4748
27
+ 320,10400,4.6664
28
+ 329,10800,3.4853
29
+ 336,11200,4.7085
30
+ 341,11600,6.6859
31
+ 349,12000,3.6321
32
+ 355,12400,5.3428
33
+ 362,12800,4.1236
34
+ 367,13200,6.1173
35
+ 373,13600,5.3324
36
+ 377,14000,7.2656
37
+ 381,14400,7.7223
38
+ 386,14800,5.9847
39
+ 390,15200,7.6658
40
+ 395,15600,5.8334
41
+ 400,16000,7.1939
42
+ 404,16400,7.1478
43
+ 408,16800,7.5988
44
+ 414,17200,6.1732
45
+ 419,17600,6.1007
46
+ 423,18000,7.2673
47
+ 427,18400,7.41
48
+ 432,18800,7.0193
49
+ 438,19200,5.3807
50
+ 443,19600,5.9541
51
+ 447,20000,7.6451
52
+ 452,20400,7.0583
53
+ 457,20800,7.3468
54
+ 461,21200,7.6243
55
+ 466,21600,6.6661
56
+ 473,22000,4.9625
57
+ 479,22400,4.5857
58
+ 484,22800,6.9212
59
+ 488,23200,6.1686
60
+ 493,23600,7.897
61
+ 497,24000,7.9243
62
+ 501,24400,8.608
63
+ 506,24800,6.6952
64
+ 511,25200,6.2059
65
+ 516,25600,7.1384
66
+ 522,26000,5.7323
67
+ 527,26400,5.9145
68
+ 534,26800,5.6198
69
+ 538,27200,7.6158
70
+ 542,27600,7.7017
71
+ 546,28000,7.8191
72
+ 551,28400,8.2935
73
+ 556,28800,7.3137
74
+ 562,29200,5.8557
75
+ 570,29600,4.3915
76
+ 576,30000,4.9182
77
+ 581,30400,6.3954
78
+ 585,30800,7.2091
79
+ 590,31200,9.0975
80
+ 594,31600,5.9126
81
+ 600,32000,5.1805
82
+ 606,32400,6.9075
83
+ 611,32800,6.2059
84
+ 616,33200,4.6794
85
+ 621,33600,7.0498
86
+ 626,34000,7.4649
87
+ 631,34400,6.6183
88
+ 635,34800,8.8603
89
+ 639,35200,6.275
90
+ 643,35600,9.1126
91
+ 648,36000,6.9308
92
+ 653,36400,6.7373
93
+ 657,36800,7.6857
94
+ 662,37200,7.836
95
+ 666,37600,6.3725
96
+ 671,38000,6.2922
97
+ 676,38400,6.6979
98
+ 680,38800,7.6388
99
+ 684,39200,7.0079
100
+ 688,39600,7.5892
101
+ 692,40000,7.7355
102
+ 697,40400,6.5238
103
+ 702,40800,6.181
104
+ 706,41200,6.5753
105
+ 711,41600,6.499
106
+ 716,42000,6.1642
107
+ 721,42400,6.3709
108
+ 725,42800,6.5148
109
+ 730,43200,6.1071
110
+ 734,43600,7.2231
111
+ 739,44000,6.9884
112
+ 743,44400,7.6752
113
+ 747,44800,7.645
114
+ 751,45200,7.6253
115
+ 756,45600,6.9888
116
+ 760,46000,6.551
117
+ 764,46400,8.8765
118
+ 768,46800,7.644
119
+ 772,47200,7.7078
120
+ 776,47600,7.7402
121
+ 780,48000,7.7096
122
+ 786,48400,6.4542
123
+ 790,48800,6.8511
124
+ 795,49200,6.7727
125
+ 799,49600,7.6417
126
+ 803,50000,7.6993
127
+ 807,50400,7.7043
128
+ 812,50800,6.8261
129
+ 816,51200,6.92
130
+ 820,51600,7.9919
131
+ 825,52000,6.4103
132
+ 830,52400,5.7618
133
+ 834,52800,7.0424
134
+ 838,53200,7.6928
135
+ 842,53600,7.7354
136
+ 847,54000,7.3411
137
+ 852,54400,6.3078
138
+ 856,54800,7.4612
139
+ 860,55200,6.6696
140
+ 865,55600,6.7569
141
+ 869,56000,6.9279
142
+ 874,56400,6.5996
143
+ 878,56800,7.7271
144
+ 882,57200,7.6932
145
+ 886,57600,7.5902
146
+ 890,58000,7.1683
147
+ 897,58400,4.5812
148
+ 903,58800,6.1682
149
+ 908,59200,6.1325
150
+ 912,59600,7.8386
151
+ 916,60000,6.3943
152
+ 921,60400,6.8943
153
+ 925,60800,7.8821
154
+ 932,61200,4.9096
155
+ 937,61600,6.3275
156
+ 941,62000,8.5893
157
+ 945,62400,7.9071
158
+ 949,62800,7.8808
159
+ 953,63200,7.8324
160
+ 957,63600,8.5609
161
+ 961,64000,8.4952
162
+ 965,64400,7.8793
163
+ 971,64800,5.9811
164
+ 975,65200,6.3148
165
+ 979,65600,8.3047
166
+ 985,66000,6.6049
167
+ 991,66400,5.5465
168
+ 995,66800,6.7472
169
+ 1000,67200,6.688
170
+ 1004,67600,8.5059
171
+ 1008,68000,8.0416
172
+ 1012,68400,9.3594
173
+ 1017,68800,7.135
174
+ 1022,69200,7.7882
175
+ 1028,69600,5.1304
176
+ 1032,70000,7.9267
177
+ 1036,70400,7.9253
178
+ 1040,70800,7.522
179
+ 1045,71200,6.7979
180
+ 1050,71600,7.0769
181
+ 1055,72000,7.1814
182
+ 1059,72400,7.9156
183
+ 1063,72800,6.9876
184
+ 1069,73200,5.6939
185
+ 1073,73600,7.603
186
+ 1078,74000,6.9544
187
+ 1085,74400,5.1872
188
+ 1089,74800,8.0712
189
+ 1094,75200,5.4866
190
+ 1099,75600,6.856
191
+ 1104,76000,6.6695
192
+ 1111,76400,5.2366
193
+ 1115,76800,8.231
194
+ 1120,77200,6.3017
195
+ 1125,77600,6.1984
196
+ 1129,78000,8.0981
197
+ 1134,78400,5.6874
198
+ 1139,78800,6.7436
199
+ 1144,79200,6.7781
200
+ 1148,79600,8.035
201
+ 1153,80000,7.0241
202
+ 1157,80400,8.1216
203
+ 1161,80800,7.7847
204
+ 1165,81200,7.0458
205
+ 1172,81600,4.9159
206
+ 1176,82000,8.8252
207
+ 1180,82400,8.1435
208
+ 1184,82800,8.0794
209
+ 1188,83200,8.4439
210
+ 1194,83600,6.5179
211
+ 1198,84000,9.5129
212
+ 1202,84400,9.5982
213
+ 1206,84800,8.1605
214
+ 1213,85200,5.1735
215
+ 1218,85600,6.3955
216
+ 1222,86000,6.5836
217
+ 1227,86400,7.8827
218
+ 1231,86800,9.0583
219
+ 1236,87200,7.4464
220
+ 1242,87600,6.9449
221
+ 1247,88000,7.6785
222
+ 1251,88400,7.7551
223
+ 1255,88800,9.1958
224
+ 1260,89200,7.8329
225
+ 1265,89600,9.6998
226
+ 1269,90000,12.2941
227
+ 1274,90400,7.8743
228
+ 1281,90800,6.7261
229
+ 1285,91200,10.3406
230
+ 1289,91600,10.3629
231
+ 1294,92000,12.8416
232
+ 1298,92400,11.8637
233
+ 1302,92800,10.95
234
+ 1307,93200,12.6815
235
+ 1312,93600,10.2359
236
+ 1316,94000,14.5616
237
+ 1320,94400,12.2057
238
+ 1325,94800,10.3496
239
+ 1331,95200,10.0749
240
+ 1336,95600,12.5332
241
+ 1341,96000,7.9488
242
+ 1347,96400,11.6734
243
+ 1353,96800,8.6258
244
+ 1359,97200,8.6982
245
+ 1363,97600,16.8782
246
+ 1369,98000,13.076
247
+ 1374,98400,12.6862
248
+ 1380,98800,13.0069
249
+ 1385,99200,13.2064
250
+ 1394,99600,7.5732
251
+ 1399,100000,13.571
252
+ 1406,100400,11.8885
253
+ 1411,100800,14.0567
254
+ 1416,101200,14.4459
255
+ 1422,101600,14.7572
256
+ 1430,102000,9.0038
257
+ 1436,102400,11.7543
258
+ 1443,102800,12.3206
259
+ 1448,103200,13.6686
260
+ 1454,103600,14.6563
261
+ 1461,104000,10.5416
262
+ 1467,104400,11.8187
263
+ 1473,104800,12.6766
264
+ 1478,105200,13.234
265
+ 1487,105600,10.4377
266
+ 1494,106000,10.4682
267
+ 1499,106400,14.2136
268
+ 1505,106800,13.3214
269
+ 1512,107200,9.9131
270
+ 1518,107600,11.3795
271
+ 1524,108000,13.6525
272
+ 1531,108400,10.5166
273
+ 1536,108800,15.6816
274
+ 1541,109200,13.5686
275
+ 1546,109600,15.8877
276
+ 1552,110000,10.798
277
+ 1556,110400,15.505
278
+ 1564,110800,8.7457
279
+ 1571,111200,9.8442
280
+ 1576,111600,16.783
281
+ 1581,112000,16.6507
282
+ 1587,112400,12.5058
283
+ 1597,112800,7.0879
284
+ 1604,113200,9.4795
285
+ 1612,113600,8.8247
286
+ 1619,114000,9.8475
287
+ 1626,114400,9.1637
288
+ 1631,114800,14.0166
289
+ 1638,115200,8.0027
290
+ 1642,115600,17.5209
291
+ 1650,116000,7.7073
292
+ 1654,116400,20.1086
293
+ 1661,116800,7.9623
294
+ 1668,117200,12.2066
295
+ 1675,117600,8.7988
296
+ 1681,118000,14.56
297
+ 1686,118400,16.3382
298
+ 1691,118800,12.1992
299
+ 1699,119200,10.4266
300
+ 1703,119600,19.3562
301
+ 1708,120000,11.3536
302
+ 1712,120400,20.2356
303
+ 1716,120800,19.5323
304
+ 1721,121200,17.4551
305
+ 1728,121600,10.1801
306
+ 1732,122000,16.947
307
+ 1739,122400,9.827
308
+ 1745,122800,13.3383
309
+ 1749,123200,19.4348
310
+ 1755,123600,12.4996
311
+ 1761,124000,13.8883
312
+ 1765,124400,20.16
313
+ 1769,124800,19.6822
314
+ 1776,125200,10.0647
315
+ 1780,125600,18.9827
316
+ 1785,126000,10.8816
317
+ 1791,126400,12.6382
318
+ 1797,126800,11.2938
319
+ 1806,127200,8.8997
320
+ 1811,127600,15.7514
321
+ 1817,128000,9.8611
322
+ 1825,128400,10.0803
323
+ 1833,128800,9.5121
324
+ 1841,129200,7.1428
325
+ 1850,129600,8.5931
326
+ 1858,130000,9.1178
327
+ 1866,130400,10.6061
328
+ 1871,130800,15.7645
329
+ 1875,131200,19.0111
330
+ 1881,131600,10.6172
331
+ 1885,132000,20.6998
332
+ 1889,132400,20.2484
333
+ 1894,132800,16.4684
334
+ 1900,133200,13.5329
335
+ 1904,133600,20.9186
336
+ 1910,134000,15.0977
337
+ 1915,134400,14.7358
338
+ 1920,134800,15.5285
339
+ 1928,135200,8.3647
340
+ 1934,135600,11.9479
341
+ 1940,136000,14.4666
342
+ 1947,136400,9.6302
343
+ 1953,136800,13.6634
344
+ 1958,137200,14.0396
345
+ 1966,137600,9.1948
346
+ 1973,138000,9.3788
347
+ 1982,138400,9.0964
348
+ 1991,138800,7.105
349
+ 1998,139200,11.6642
350
+ 2005,139600,8.2024
351
+ 2014,140000,7.7126
352
+ 2021,140400,11.0422
353
+ 2027,140800,11.2108
354
+ 2032,141200,15.9589
355
+ 2039,141600,8.9692
356
+ 2046,142000,12.0016
357
+ 2052,142400,13.3516
358
+ 2056,142800,19.3885
359
+ 2061,143200,14.0276
360
+ 2066,143600,14.164
361
+ 2073,144000,11.3607
362
+ 2082,144400,6.5908
363
+ 2089,144800,10.36
364
+ 2094,145200,14.2824
365
+ 2105,145600,4.9849
366
+ 2111,146000,11.1523
367
+ 2116,146400,12.4602
368
+ 2123,146800,10.201
369
+ 2129,147200,11.5534
370
+ 2138,147600,5.9679
371
+ 2142,148000,16.4718
372
+ 2147,148400,17.5833
373
+ 2152,148800,16.768
374
+ 2156,149200,19.1331
375
+ 2162,149600,12.3424
376
+ 2166,150000,18.0845
377
+ 2170,150400,18.9557
378
+ 2175,150800,14.0846
379
+ 2179,151200,20.1474
380
+ 2184,151600,15.3493
381
+ 2188,152000,18.1315
382
+ 2193,152400,15.8044
383
+ 2197,152800,17.7751
384
+ 2203,153200,11.6984
385
+ 2209,153600,13.0837
386
+ 2213,154000,15.855
387
+ 2218,154400,16.8814
388
+ 2223,154800,16.0311
389
+ 2227,155200,15.3192
390
+ 2234,155600,11.457
391
+ 2239,156000,16.1934
392
+ 2246,156400,11.9881
393
+ 2252,156800,9.2926
394
+ 2257,157200,16.73
395
+ 2261,157600,14.6868
396
+ 2268,158000,7.9747
397
+ 2273,158400,10.7901
398
+ 2277,158800,18.3051
399
+ 2283,159200,14.0301
400
+ 2288,159600,15.5108
401
+ 2294,160000,10.0966
402
+ 2300,160400,8.0898
403
+ 2306,160800,8.0323
404
+ 2315,161200,5.836
405
+ 2321,161600,10.9128
406
+ 2329,162000,5.4973
407
+ 2334,162400,11.0091
408
+ 2340,162800,8.3576
409
+ 2349,163200,6.2928
410
+ 2354,163600,13.008
411
+ 2358,164000,15.7582
412
+ 2364,164400,10.6657
413
+ 2372,164800,7.3352
414
+ 2378,165200,7.9416
415
+ 2385,165600,9.7033
416
+ 2390,166000,14.8003
417
+ 2395,166400,13.5949
418
+ 2399,166800,16.5582
419
+ 2405,167200,13.1281
420
+ 2410,167600,13.6583
421
+ 2415,168000,13.3859
422
+ 2422,168400,12.7336
423
+ 2427,168800,13.4928
424
+ 2432,169200,14.222
425
+ 2438,169600,12.2052
426
+ 2442,170000,17.538
427
+ 2449,170400,13.4975
428
+ 2454,170800,13.4316
429
+ 2460,171200,14.3904
430
+ 2466,171600,14.1914
431
+ 2472,172000,10.7212
432
+ 2478,172400,13.2043
433
+ 2484,172800,12.3505
434
+ 2488,173200,18.4707
435
+ 2493,173600,13.7594
436
+ 2498,174000,15.2434
437
+ 2503,174400,17.6234
438
+ 2509,174800,11.5482
439
+ 2513,175200,18.5919
440
+ 2518,175600,18.2289
441
+ 2525,176000,10.1992
442
+ 2529,176400,19.2187
443
+ 2533,176800,20.1763
444
+ 2538,177200,15.8706
445
+ 2542,177600,20.1437
446
+ 2547,178000,14.6606
447
+ 2552,178400,16.5006
448
+ 2557,178800,17.2947
449
+ 2561,179200,20.4127
450
+ 2566,179600,16.8026
451
+ 2571,180000,16.4537
452
+ 2575,180400,19.0096
453
+ 2580,180800,12.4961
454
+ 2586,181200,13.702
455
+ 2590,181600,19.4121
456
+ 2595,182000,19.1734
457
+ 2599,182400,15.6252
458
+ 2604,182800,17.4543
459
+ 2608,183200,18.4527
460
+ 2613,183600,17.0079
461
+ 2617,184000,15.8574
462
+ 2623,184400,14.6486
463
+ 2627,184800,16.7431
464
+ 2632,185200,16.3204
465
+ 2636,185600,18.1816
466
+ 2640,186000,19.3138
467
+ 2646,186400,13.6693
468
+ 2652,186800,12.1288
469
+ 2656,187200,19.5385
470
+ 2662,187600,12.7194
471
+ 2666,188000,18.8978
472
+ 2671,188400,17.7266
473
+ 2675,188800,20.3572
474
+ 2680,189200,16.1169
475
+ 2685,189600,14.712
476
+ 2692,190000,11.1334
477
+ 2696,190400,17.4639
478
+ 2701,190800,16.8716
479
+ 2705,191200,19.8286
480
+ 2710,191600,18.7337
481
+ 2714,192000,19.8553
482
+ 2719,192400,14.2238
483
+ 2725,192800,12.8373
484
+ 2735,193200,7.0151
485
+ 2739,193600,13.3644
486
+ 2745,194000,13.3385
487
+ 2753,194400,8.739
488
+ 2758,194800,13.8917
489
+ 2764,195200,10.3893
490
+ 2773,195600,4.8033
491
+ 2784,196000,5.5534
492
+ 2792,196400,7.2659
493
+ 2796,196800,13.9818
494
+ 2802,197200,13.2211
495
+ 2807,197600,16.0152
496
+ 2814,198000,9.9562
497
+ 2823,198400,6.7525
498
+ 2828,198800,14.4238
499
+ 2833,199200,10.7252
500
+ 2838,199600,13.7153
501
+ 2842,200000,19.2825
code/Lake application/logs/results_2/PDPPO_frozen_lake_log_2.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 25,400,1.2001
3
+ 50,800,1.1974
4
+ 67,1200,1.7766
5
+ 87,1600,1.4873
6
+ 104,2000,1.7077
7
+ 114,2400,2.5908
8
+ 134,2800,1.8992
9
+ 145,3200,2.8642
10
+ 154,3600,2.9615
11
+ 163,4000,3.7629
12
+ 170,4400,4.1044
13
+ 180,4800,3.4985
14
+ 187,5200,4.3872
15
+ 192,5600,5.6852
16
+ 198,6000,5.7852
17
+ 205,6400,4.5497
18
+ 210,6800,5.1203
19
+ 215,7200,6.3622
20
+ 222,7600,4.9101
21
+ 230,8000,3.7216
22
+ 237,8400,3.9412
23
+ 243,8800,5.1797
24
+ 250,9200,4.9764
25
+ 255,9600,5.0597
26
+ 260,10000,6.6622
27
+ 266,10400,5.4034
28
+ 274,10800,3.7803
29
+ 279,11200,4.8601
30
+ 284,11600,6.8704
31
+ 289,12000,5.884
32
+ 298,12400,3.1085
33
+ 306,12800,3.7402
34
+ 317,13200,2.4474
35
+ 325,13600,4.697
36
+ 333,14000,3.7155
37
+ 340,14400,4.1133
38
+ 345,14800,6.3342
39
+ 354,15200,3.4713
40
+ 363,15600,2.973
41
+ 369,16000,5.3844
42
+ 376,16400,3.9942
43
+ 384,16800,4.3917
44
+ 390,17200,3.9032
45
+ 396,17600,5.5522
46
+ 402,18000,5.0959
47
+ 410,18400,4.0555
48
+ 419,18800,3.8592
49
+ 424,19200,5.5717
50
+ 430,19600,5.4533
51
+ 436,20000,4.4112
52
+ 442,20400,5.2485
53
+ 453,20800,3.6337
54
+ 458,21200,5.7439
55
+ 464,21600,5.8554
56
+ 470,22000,6.1356
57
+ 478,22400,4.1016
58
+ 483,22800,6.9287
59
+ 487,23200,7.9029
60
+ 492,23600,7.2121
61
+ 496,24000,6.8316
62
+ 501,24400,7.3779
63
+ 506,24800,7.0025
64
+ 511,25200,7.3224
65
+ 516,25600,6.0262
66
+ 521,26000,7.3493
67
+ 525,26400,8.5734
68
+ 530,26800,7.505
69
+ 536,27200,5.6146
70
+ 541,27600,7.32
71
+ 547,28000,5.8791
72
+ 551,28400,8.3691
73
+ 555,28800,8.5242
74
+ 560,29200,6.8016
75
+ 564,29600,6.4966
76
+ 570,30000,6.5884
77
+ 574,30400,7.9429
78
+ 579,30800,8.2591
79
+ 586,31200,5.1781
80
+ 590,31600,6.725
81
+ 595,32000,7.3339
82
+ 602,32400,5.5153
83
+ 606,32800,7.294
84
+ 611,33200,7.861
85
+ 617,33600,5.8614
86
+ 622,34000,6.2895
87
+ 626,34400,7.641
88
+ 631,34800,7.1765
89
+ 635,35200,8.3742
90
+ 639,35600,8.5644
91
+ 644,36000,7.5061
92
+ 648,36400,7.137
93
+ 654,36800,6.317
94
+ 659,37200,7.5171
95
+ 663,37600,8.4671
96
+ 667,38000,8.8055
97
+ 672,38400,7.2286
98
+ 677,38800,7.0709
99
+ 682,39200,6.0232
100
+ 687,39600,7.7556
101
+ 691,40000,7.7135
102
+ 695,40400,8.3451
103
+ 701,40800,6.0775
104
+ 705,41200,8.3363
105
+ 709,41600,8.6071
106
+ 715,42000,5.3694
107
+ 720,42400,7.6011
108
+ 724,42800,8.7117
109
+ 729,43200,7.0684
110
+ 735,43600,5.433
111
+ 739,44000,8.6411
112
+ 743,44400,8.5924
113
+ 747,44800,8.9168
114
+ 752,45200,7.2905
115
+ 756,45600,8.9831
116
+ 761,46000,7.4007
117
+ 766,46400,6.664
118
+ 770,46800,7.3928
119
+ 777,47200,5.7024
120
+ 783,47600,5.151
121
+ 788,48000,7.2108
122
+ 793,48400,7.2763
123
+ 799,48800,7.0493
124
+ 803,49200,8.5485
125
+ 808,49600,7.5421
126
+ 812,50000,9.5257
127
+ 816,50400,9.1144
128
+ 822,50800,5.6572
129
+ 828,51200,7.8776
130
+ 832,51600,10.0686
131
+ 838,52000,6.2858
132
+ 842,52400,8.7905
133
+ 847,52800,6.9064
134
+ 852,53200,8.591
135
+ 857,53600,7.9326
136
+ 861,54000,10.7555
137
+ 865,54400,11.0084
138
+ 870,54800,8.5343
139
+ 874,55200,10.2597
140
+ 878,55600,11.0706
141
+ 883,56000,9.7567
142
+ 887,56400,11.0066
143
+ 891,56800,11.0205
144
+ 897,57200,8.6028
145
+ 902,57600,7.8706
146
+ 906,58000,11.6827
147
+ 911,58400,11.5678
148
+ 916,58800,7.3302
149
+ 920,59200,11.8686
150
+ 925,59600,9.6659
151
+ 929,60000,11.8218
152
+ 933,60400,11.753
153
+ 938,60800,10.8838
154
+ 942,61200,11.9606
155
+ 946,61600,11.9891
156
+ 950,62000,10.7408
157
+ 955,62400,9.9649
158
+ 959,62800,10.1192
159
+ 963,63200,11.9243
160
+ 967,63600,12.0996
161
+ 971,64000,12.2641
162
+ 975,64400,12.2119
163
+ 980,64800,9.6896
164
+ 984,65200,11.985
165
+ 988,65600,12.0389
166
+ 993,66000,9.7261
167
+ 998,66400,10.0918
168
+ 1002,66800,11.9409
169
+ 1007,67200,9.7976
170
+ 1011,67600,11.9562
171
+ 1015,68000,11.9393
172
+ 1019,68400,11.9489
173
+ 1023,68800,10.6665
174
+ 1029,69200,8.6481
175
+ 1035,69600,8.2744
176
+ 1040,70000,9.8761
177
+ 1044,70400,12.7648
178
+ 1048,70800,12.0824
179
+ 1053,71200,9.9615
180
+ 1059,71600,7.5596
181
+ 1063,72000,12.066
182
+ 1068,72400,10.3717
183
+ 1072,72800,11.9771
184
+ 1076,73200,11.9456
185
+ 1080,73600,9.1042
186
+ 1084,74000,12.076
187
+ 1090,74400,9.4681
188
+ 1094,74800,11.529
189
+ 1098,75200,12.0008
190
+ 1103,75600,10.1566
191
+ 1108,76000,9.751
192
+ 1112,76400,12.3104
193
+ 1116,76800,9.2405
194
+ 1120,77200,12.0348
195
+ 1125,77600,10.0069
196
+ 1130,78000,10.0386
197
+ 1136,78400,10.4839
198
+ 1142,78800,7.6819
199
+ 1149,79200,5.9398
200
+ 1156,79600,7.5437
201
+ 1163,80000,7.4114
202
+ 1167,80400,9.315
203
+ 1172,80800,11.2425
204
+ 1176,81200,10.7758
205
+ 1180,81600,12.0153
206
+ 1185,82000,10.7721
207
+ 1190,82400,9.8986
208
+ 1194,82800,11.9053
209
+ 1198,83200,11.9558
210
+ 1203,83600,9.3269
211
+ 1207,84000,12.2099
212
+ 1213,84400,8.4628
213
+ 1217,84800,10.7038
214
+ 1221,85200,12.2837
215
+ 1226,85600,9.9915
216
+ 1231,86000,9.9481
217
+ 1236,86400,10.1545
218
+ 1240,86800,12.2327
219
+ 1244,87200,12.075
220
+ 1248,87600,12.01
221
+ 1252,88000,10.5855
222
+ 1256,88400,12.134
223
+ 1260,88800,12.3757
224
+ 1264,89200,11.9803
225
+ 1269,89600,9.3804
226
+ 1274,90000,9.8415
227
+ 1278,90400,11.9713
228
+ 1284,90800,8.3152
229
+ 1289,91200,8.8216
230
+ 1293,91600,12.3776
231
+ 1299,92000,8.471
232
+ 1303,92400,12.5027
233
+ 1307,92800,12.517
234
+ 1313,93200,8.4122
235
+ 1317,93600,13.1839
236
+ 1322,94000,10.4515
237
+ 1326,94400,11.1728
238
+ 1330,94800,13.0548
239
+ 1334,95200,12.0536
240
+ 1338,95600,12.4526
241
+ 1344,96000,9.0872
242
+ 1351,96400,7.9055
243
+ 1355,96800,11.2378
244
+ 1360,97200,11.1121
245
+ 1364,97600,12.7524
246
+ 1368,98000,12.6111
247
+ 1373,98400,11.2153
248
+ 1378,98800,8.2927
249
+ 1382,99200,12.5971
250
+ 1388,99600,9.538
251
+ 1392,100000,12.5941
252
+ 1397,100400,10.2156
253
+ 1401,100800,12.5003
254
+ 1406,101200,9.1204
255
+ 1411,101600,11.3426
256
+ 1418,102000,6.7031
257
+ 1422,102400,13.0065
258
+ 1427,102800,10.4644
259
+ 1431,103200,12.6849
260
+ 1435,103600,12.3948
261
+ 1441,104000,9.7853
262
+ 1446,104400,9.4532
263
+ 1450,104800,10.5596
264
+ 1456,105200,8.6362
265
+ 1461,105600,11.1625
266
+ 1465,106000,9.7383
267
+ 1471,106400,9.5542
268
+ 1476,106800,10.1016
269
+ 1480,107200,11.3806
270
+ 1484,107600,12.8012
271
+ 1488,108000,12.754
272
+ 1493,108400,9.2045
273
+ 1497,108800,12.9866
274
+ 1504,109200,7.3446
275
+ 1509,109600,11.8225
276
+ 1514,110000,8.9346
277
+ 1518,110400,12.7821
278
+ 1523,110800,11.2293
279
+ 1527,111200,12.976
280
+ 1532,111600,8.6775
281
+ 1536,112000,12.7469
282
+ 1540,112400,12.7591
283
+ 1545,112800,10.3778
284
+ 1550,113200,10.3744
285
+ 1554,113600,12.7611
286
+ 1558,114000,11.8172
287
+ 1562,114400,12.8452
288
+ 1566,114800,12.6974
289
+ 1570,115200,12.7894
290
+ 1575,115600,10.9595
291
+ 1579,116000,13.0989
292
+ 1583,116400,12.9278
293
+ 1589,116800,9.0682
294
+ 1593,117200,12.9978
295
+ 1597,117600,10.7087
296
+ 1601,118000,13.5158
297
+ 1605,118400,13.435
298
+ 1610,118800,11.5973
299
+ 1614,119200,13.0533
300
+ 1618,119600,13.0017
301
+ 1623,120000,10.8166
302
+ 1629,120400,9.0293
303
+ 1634,120800,9.2539
304
+ 1639,121200,10.6134
305
+ 1644,121600,11.5216
306
+ 1648,122000,13.3206
307
+ 1652,122400,12.207
308
+ 1657,122800,11.3809
309
+ 1661,123200,10.1333
310
+ 1665,123600,13.4154
311
+ 1669,124000,13.7245
312
+ 1673,124400,13.2766
313
+ 1677,124800,13.6751
314
+ 1681,125200,13.3002
315
+ 1686,125600,11.3154
316
+ 1690,126000,14.442
317
+ 1697,126400,9.1752
318
+ 1701,126800,12.6002
319
+ 1710,127200,8.7327
320
+ 1715,127600,11.4596
321
+ 1722,128000,9.0605
322
+ 1730,128400,6.6071
323
+ 1738,128800,7.8556
324
+ 1743,129200,8.6705
325
+ 1748,129600,13.2619
326
+ 1757,130000,8.0688
327
+ 1762,130400,11.255
328
+ 1769,130800,10.1434
329
+ 1776,131200,7.9637
330
+ 1780,131600,16.234
331
+ 1784,132000,16.009
332
+ 1790,132400,11.5696
333
+ 1795,132800,13.03
334
+ 1801,133200,11.5952
335
+ 1807,133600,10.1351
336
+ 1811,134000,14.54
337
+ 1823,134400,4.607
338
+ 1829,134800,12.6576
339
+ 1833,135200,16.8332
340
+ 1838,135600,11.8201
341
+ 1842,136000,17.1005
342
+ 1846,136400,16.8673
343
+ 1851,136800,15.3565
344
+ 1857,137200,8.8723
345
+ 1862,137600,13.1234
346
+ 1868,138000,12.9135
347
+ 1876,138400,6.6299
348
+ 1883,138800,8.6404
349
+ 1892,139200,8.0842
350
+ 1896,139600,16.6064
351
+ 1900,140000,12.9285
352
+ 1912,140400,5.249
353
+ 1918,140800,9.7863
354
+ 1926,141200,7.8766
355
+ 1932,141600,7.9322
356
+ 1942,142000,5.3181
357
+ 1947,142400,12.7024
358
+ 1956,142800,8.2081
359
+ 1968,143200,2.9574
360
+ 1975,143600,6.7944
361
+ 1981,144000,11.6649
362
+ 1994,144400,5.4107
363
+ 2003,144800,6.3419
364
+ 2012,145200,6.9728
365
+ 2023,145600,5.0183
366
+ 2031,146000,7.8319
367
+ 2041,146400,4.6116
368
+ 2054,146800,4.337
369
+ 2063,147200,5.3691
370
+ 2069,147600,12.9849
371
+ 2078,148000,8.5405
372
+ 2091,148400,4.9827
373
+ 2108,148800,3.026
374
+ 2123,149200,3.3827
375
+ 2132,149600,6.8501
376
+ 2141,150000,7.5638
377
+ 2152,150400,5.2852
378
+ 2162,150800,7.5752
379
+ 2170,151200,7.9508
380
+ 2177,151600,10.2038
381
+ 2191,152000,3.0956
382
+ 2200,152400,7.7211
383
+ 2208,152800,6.9913
384
+ 2215,153200,10.9929
385
+ 2219,153600,15.1937
386
+ 2226,154000,12.1173
387
+ 2235,154400,6.3827
388
+ 2244,154800,9.904
389
+ 2251,155200,10.4535
390
+ 2257,155600,11.1928
391
+ 2262,156000,11.8628
392
+ 2269,156400,11.1129
393
+ 2275,156800,10.8419
394
+ 2282,157200,10.6974
395
+ 2288,157600,12.6491
396
+ 2293,158000,17.1227
397
+ 2297,158400,15.1925
398
+ 2302,158800,15.8225
399
+ 2310,159200,10.5351
400
+ 2314,159600,20.5124
401
+ 2318,160000,20.7472
402
+ 2323,160400,16.1619
403
+ 2328,160800,17.0157
404
+ 2332,161200,19.5865
405
+ 2336,161600,20.3359
406
+ 2340,162000,15.7826
407
+ 2344,162400,20.7786
408
+ 2349,162800,19.027
409
+ 2353,163200,16.8306
410
+ 2358,163600,15.0345
411
+ 2364,164000,13.7065
412
+ 2370,164400,15.8193
413
+ 2375,164800,15.9792
414
+ 2379,165200,16.8467
415
+ 2384,165600,17.832
416
+ 2388,166000,20.4626
417
+ 2393,166400,14.8119
418
+ 2399,166800,12.4114
419
+ 2403,167200,20.9186
420
+ 2408,167600,13.3934
421
+ 2412,168000,20.5788
422
+ 2418,168400,16.2933
423
+ 2422,168800,18.8223
424
+ 2427,169200,19.3578
425
+ 2432,169600,13.3396
426
+ 2437,170000,17.3548
427
+ 2443,170400,16.2848
428
+ 2448,170800,18.1538
429
+ 2453,171200,16.7561
430
+ 2457,171600,17.7607
431
+ 2463,172000,15.1953
432
+ 2469,172400,13.134
433
+ 2476,172800,12.5457
434
+ 2482,173200,14.6165
435
+ 2487,173600,18.5189
436
+ 2492,174000,19.1314
437
+ 2496,174400,17.353
438
+ 2500,174800,22.18
439
+ 2506,175200,15.5115
440
+ 2511,175600,19.8744
441
+ 2515,176000,18.1736
442
+ 2519,176400,21.4187
443
+ 2525,176800,17.5231
444
+ 2531,177200,12.1789
445
+ 2536,177600,17.784
446
+ 2540,178000,20.6193
447
+ 2545,178400,17.4618
448
+ 2549,178800,21.499
449
+ 2553,179200,17.4586
450
+ 2557,179600,20.6227
451
+ 2561,180000,20.1806
452
+ 2567,180400,13.3524
453
+ 2573,180800,14.6734
454
+ 2577,181200,21.1076
455
+ 2582,181600,17.1151
456
+ 2587,182000,12.7979
457
+ 2592,182400,15.9493
458
+ 2599,182800,14.3687
459
+ 2604,183200,15.514
460
+ 2610,183600,12.1476
461
+ 2616,184000,13.7643
462
+ 2622,184400,14.6858
463
+ 2627,184800,13.8197
464
+ 2631,185200,20.2988
465
+ 2637,185600,13.8708
466
+ 2641,186000,21.6494
467
+ 2646,186400,15.6542
468
+ 2653,186800,11.9435
469
+ 2659,187200,13.8883
470
+ 2666,187600,12.5814
471
+ 2671,188000,17.3771
472
+ 2678,188400,11.9801
473
+ 2683,188800,13.1353
474
+ 2692,189200,10.337
475
+ 2698,189600,11.3343
476
+ 2705,190000,12.8742
477
+ 2711,190400,14.6771
478
+ 2717,190800,11.9189
479
+ 2726,191200,8.7059
480
+ 2732,191600,13.1508
481
+ 2739,192000,9.7473
482
+ 2743,192400,19.009
483
+ 2748,192800,16.8758
484
+ 2754,193200,15.7392
485
+ 2758,193600,20.0703
486
+ 2764,194000,12.4249
487
+ 2768,194400,21.7073
488
+ 2773,194800,18.1804
489
+ 2779,195200,15.5854
490
+ 2785,195600,12.3091
491
+ 2790,196000,15.0442
492
+ 2794,196400,20.9159
493
+ 2802,196800,11.6643
494
+ 2811,197200,7.4276
495
+ 2819,197600,8.6076
496
+ 2826,198000,10.6653
497
+ 2831,198400,16.6902
498
+ 2837,198800,13.4796
499
+ 2843,199200,14.7899
500
+ 2848,199600,14.6657
501
+ 2855,200000,11.2761
code/Lake application/logs/results_2/PDPPO_frozen_lake_log_3.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 20,400,1.4511
3
+ 42,800,1.4159
4
+ 60,1200,1.7077
5
+ 73,1600,2.1135
6
+ 84,2000,2.7468
7
+ 105,2400,1.7674
8
+ 113,2800,3.7814
9
+ 122,3200,3.7896
10
+ 134,3600,2.7067
11
+ 146,4000,2.5524
12
+ 152,4400,6.5864
13
+ 157,4800,6.5195
14
+ 162,5200,6.1349
15
+ 168,5600,5.317
16
+ 177,6000,3.6724
17
+ 183,6400,6.1446
18
+ 189,6800,5.5608
19
+ 195,7200,6.1726
20
+ 199,7600,6.344
21
+ 205,8000,5.8716
22
+ 213,8400,4.0247
23
+ 217,8800,6.9902
24
+ 224,9200,6.2118
25
+ 229,9600,6.7983
26
+ 236,10000,6.4537
27
+ 241,10400,9.274
28
+ 246,10800,7.4492
29
+ 253,11200,6.7536
30
+ 257,11600,10.0378
31
+ 264,12000,5.3493
32
+ 270,12400,6.8639
33
+ 274,12800,9.7271
34
+ 278,13200,10.6744
35
+ 284,13600,7.8632
36
+ 290,14000,8.3459
37
+ 296,14400,6.5538
38
+ 301,14800,9.4489
39
+ 307,15200,8.4332
40
+ 313,15600,7.6024
41
+ 318,16000,9.6646
42
+ 323,16400,7.5682
43
+ 327,16800,12.2827
44
+ 332,17200,11.2367
45
+ 336,17600,10.6158
46
+ 341,18000,8.7318
47
+ 347,18400,8.9077
48
+ 352,18800,10.0014
49
+ 357,19200,9.9727
50
+ 362,19600,8.4138
51
+ 368,20000,8.7518
52
+ 372,20400,12.3645
53
+ 376,20800,10.8224
54
+ 381,21200,11.2952
55
+ 387,21600,7.5035
56
+ 392,22000,10.1362
57
+ 398,22400,8.0769
58
+ 402,22800,11.4559
59
+ 408,23200,10.9315
60
+ 412,23600,12.5869
61
+ 416,24000,9.7607
62
+ 421,24400,10.3972
63
+ 427,24800,9.1654
64
+ 434,25200,6.5726
65
+ 440,25600,7.9518
66
+ 445,26000,12.163
67
+ 449,26400,12.5389
68
+ 453,26800,12.4047
69
+ 457,27200,12.8434
70
+ 461,27600,12.508
71
+ 465,28000,12.7977
72
+ 469,28400,12.7722
73
+ 475,28800,7.5211
74
+ 481,29200,8.8252
75
+ 485,29600,11.5555
76
+ 489,30000,11.477
77
+ 493,30400,12.9982
78
+ 497,30800,13.1206
79
+ 504,31200,9.1732
80
+ 509,31600,9.6877
81
+ 515,32000,8.65
82
+ 522,32400,6.6807
83
+ 529,32800,7.7346
84
+ 534,33200,7.8476
85
+ 541,33600,7.9549
86
+ 548,34000,6.9946
87
+ 555,34400,5.7617
88
+ 562,34800,7.4759
89
+ 567,35200,9.4369
90
+ 574,35600,8.9111
91
+ 581,36000,6.5248
92
+ 586,36400,10.6508
93
+ 594,36800,4.8766
94
+ 606,37200,5.0255
95
+ 611,37600,8.7737
96
+ 619,38000,7.297
97
+ 625,38400,8.7064
98
+ 631,38800,7.6392
99
+ 637,39200,8.1329
100
+ 643,39600,8.6996
101
+ 648,40000,12.1593
102
+ 654,40400,8.0097
103
+ 658,40800,13.7355
104
+ 663,41200,10.8767
105
+ 667,41600,13.4623
106
+ 671,42000,13.7128
107
+ 677,42400,7.9521
108
+ 685,42800,7.0389
109
+ 693,43200,7.1228
110
+ 699,43600,7.1906
111
+ 704,44000,10.2499
112
+ 711,44400,8.8439
113
+ 715,44800,10.723
114
+ 722,45200,8.0561
115
+ 726,45600,12.8837
116
+ 730,46000,11.6005
117
+ 735,46400,10.5382
118
+ 740,46800,10.9579
119
+ 744,47200,12.8439
120
+ 749,47600,8.5832
121
+ 755,48000,10.2625
122
+ 759,48400,11.0394
123
+ 764,48800,10.5021
124
+ 771,49200,7.5662
125
+ 775,49600,11.8596
126
+ 779,50000,10.9197
127
+ 785,50400,9.6613
128
+ 791,50800,7.1842
129
+ 797,51200,8.9205
130
+ 810,51600,3.7799
131
+ 814,52000,11.7011
132
+ 818,52400,13.1067
133
+ 825,52800,6.8794
134
+ 837,53200,4.1435
135
+ 844,53600,7.8758
136
+ 850,54000,6.8512
137
+ 857,54400,7.279
138
+ 863,54800,8.8461
139
+ 870,55200,5.1664
140
+ 876,55600,8.291
141
+ 882,56000,7.4186
142
+ 886,56400,12.7099
143
+ 895,56800,5.8991
144
+ 900,57200,10.3267
145
+ 905,57600,10.4081
146
+ 910,58000,8.2201
147
+ 917,58400,7.2555
148
+ 923,58800,8.6902
149
+ 929,59200,8.4311
150
+ 935,59600,9.3899
151
+ 940,60000,9.48
152
+ 947,60400,8.8701
153
+ 952,60800,10.6712
154
+ 957,61200,12.1303
155
+ 963,61600,10.0041
156
+ 967,62000,11.8506
157
+ 975,62400,7.9768
158
+ 981,62800,10.121
159
+ 985,63200,12.7484
160
+ 992,63600,8.6506
161
+ 996,64000,12.0166
162
+ 1003,64400,8.6989
163
+ 1007,64800,13.5841
164
+ 1012,65200,11.8948
165
+ 1020,65600,7.5362
166
+ 1024,66000,14.5183
167
+ 1031,66400,8.4667
168
+ 1039,66800,6.099
169
+ 1044,67200,9.9433
170
+ 1051,67600,10.2187
171
+ 1056,68000,12.4386
172
+ 1065,68400,4.846
173
+ 1071,68800,12.9973
174
+ 1076,69200,11.6068
175
+ 1084,69600,9.6249
176
+ 1090,70000,8.821
177
+ 1098,70400,8.9721
178
+ 1104,70800,10.8627
179
+ 1110,71200,12.9207
180
+ 1117,71600,9.6223
181
+ 1124,72000,9.5217
182
+ 1129,72400,12.7009
183
+ 1137,72800,7.1291
184
+ 1142,73200,15.8915
185
+ 1147,73600,12.773
186
+ 1152,74000,14.0152
187
+ 1158,74400,12.8197
188
+ 1165,74800,7.6715
189
+ 1171,75200,11.6273
190
+ 1176,75600,14.7481
191
+ 1182,76000,10.9798
192
+ 1186,76400,17.6803
193
+ 1193,76800,10.2361
194
+ 1198,77200,11.135
195
+ 1203,77600,13.5392
196
+ 1207,78000,16.3684
197
+ 1213,78400,12.9063
198
+ 1218,78800,11.9918
199
+ 1226,79200,8.0133
200
+ 1231,79600,13.8717
201
+ 1236,80000,13.592
202
+ 1240,80400,16.6494
203
+ 1247,80800,9.0394
204
+ 1253,81200,11.6721
205
+ 1257,81600,12.4117
206
+ 1263,82000,13.1805
207
+ 1268,82400,12.8848
208
+ 1273,82800,13.0017
209
+ 1279,83200,11.932
210
+ 1286,83600,10.3167
211
+ 1292,84000,11.0747
212
+ 1298,84400,12.7847
213
+ 1302,84800,16.4195
214
+ 1307,85200,14.5389
215
+ 1313,85600,11.3515
216
+ 1317,86000,19.0045
217
+ 1324,86400,12.204
218
+ 1328,86800,18.172
219
+ 1334,87200,11.4799
220
+ 1339,87600,15.2817
221
+ 1346,88000,12.8543
222
+ 1351,88400,15.2124
223
+ 1355,88800,20.012
224
+ 1360,89200,15.6753
225
+ 1364,89600,18.9953
226
+ 1369,90000,14.7316
227
+ 1373,90400,18.3781
228
+ 1379,90800,12.0495
229
+ 1383,91200,19.1038
230
+ 1388,91600,15.5228
231
+ 1394,92000,13.1508
232
+ 1399,92400,13.739
233
+ 1404,92800,15.4669
234
+ 1409,93200,12.2052
235
+ 1413,93600,18.4534
236
+ 1417,94000,18.4736
237
+ 1421,94400,19.451
238
+ 1426,94800,17.0253
239
+ 1431,95200,13.6257
240
+ 1436,95600,15.849
241
+ 1447,96000,6.1302
242
+ 1452,96400,13.5995
243
+ 1460,96800,10.4263
244
+ 1466,97200,12.8175
245
+ 1470,97600,19.8985
246
+ 1474,98000,19.4499
247
+ 1480,98400,12.2035
248
+ 1485,98800,15.5921
249
+ 1493,99200,9.945
250
+ 1500,99600,8.3822
251
+ 1504,100000,20.1108
252
+ 1511,100400,13.2678
253
+ 1517,100800,13.3653
254
+ 1521,101200,20.5694
255
+ 1526,101600,12.9576
256
+ 1531,102000,18.6283
257
+ 1537,102400,13.6185
258
+ 1542,102800,13.5109
259
+ 1547,103200,14.7459
260
+ 1555,103600,11.7803
261
+ 1562,104000,8.6873
262
+ 1572,104400,8.425
263
+ 1577,104800,12.4127
264
+ 1583,105200,11.9189
265
+ 1590,105600,12.8694
266
+ 1597,106000,10.6233
267
+ 1604,106400,8.3289
268
+ 1613,106800,9.3008
269
+ 1622,107200,7.9757
270
+ 1627,107600,15.1024
271
+ 1633,108000,12.5267
272
+ 1641,108400,10.7986
273
+ 1646,108800,10.9844
274
+ 1650,109200,20.3921
275
+ 1657,109600,12.9882
276
+ 1664,110000,8.8107
277
+ 1671,110400,13.2145
278
+ 1677,110800,13.7578
279
+ 1683,111200,11.6441
280
+ 1691,111600,10.6352
281
+ 1698,112000,9.8465
282
+ 1704,112400,13.4316
283
+ 1708,112800,19.3443
284
+ 1715,113200,11.1754
285
+ 1722,113600,10.6371
286
+ 1728,114000,9.1292
287
+ 1734,114400,14.4382
288
+ 1741,114800,13.7379
289
+ 1745,115200,20.8923
290
+ 1749,115600,16.3165
291
+ 1755,116000,15.3684
292
+ 1762,116400,9.6267
293
+ 1766,116800,17.2582
294
+ 1772,117200,14.4907
295
+ 1777,117600,16.4743
296
+ 1783,118000,13.0177
297
+ 1792,118400,7.393
298
+ 1799,118800,10.4686
299
+ 1804,119200,17.1223
300
+ 1814,119600,7.6813
301
+ 1820,120000,10.9129
302
+ 1826,120400,15.8259
303
+ 1830,120800,20.2207
304
+ 1837,121200,9.1836
305
+ 1843,121600,14.5043
306
+ 1848,122000,12.8295
307
+ 1853,122400,18.3265
308
+ 1858,122800,16.1205
309
+ 1863,123200,19.6436
310
+ 1869,123600,11.276
311
+ 1873,124000,20.749
312
+ 1882,124400,9.1372
313
+ 1888,124800,13.6346
314
+ 1893,125200,17.135
315
+ 1899,125600,10.655
316
+ 1904,126000,20.1275
317
+ 1908,126400,19.847
318
+ 1913,126800,15.7564
319
+ 1918,127200,14.3012
320
+ 1923,127600,15.9916
321
+ 1928,128000,15.8541
322
+ 1932,128400,20.358
323
+ 1937,128800,16.4342
324
+ 1941,129200,21.0778
325
+ 1946,129600,15.8157
326
+ 1952,130000,14.1684
327
+ 1956,130400,17.5727
328
+ 1962,130800,13.636
329
+ 1967,131200,16.7871
330
+ 1971,131600,19.9122
331
+ 1976,132000,17.2468
332
+ 1984,132400,10.4792
333
+ 1989,132800,15.3806
334
+ 1994,133200,15.9361
335
+ 1998,133600,20.595
336
+ 2003,134000,14.5683
337
+ 2010,134400,12.9443
338
+ 2017,134800,11.1499
339
+ 2021,135200,16.4114
340
+ 2028,135600,11.15
341
+ 2033,136000,16.1641
342
+ 2039,136400,11.8905
343
+ 2045,136800,12.094
344
+ 2051,137200,13.1055
345
+ 2057,137600,12.3794
346
+ 2062,138000,16.7795
347
+ 2066,138400,15.2186
348
+ 2071,138800,17.5386
349
+ 2075,139200,19.7387
350
+ 2080,139600,17.7345
351
+ 2087,140000,9.286
352
+ 2091,140400,20.1843
353
+ 2095,140800,20.6579
354
+ 2099,141200,20.4451
355
+ 2105,141600,12.451
356
+ 2111,142000,13.8034
357
+ 2116,142400,16.789
358
+ 2121,142800,17.3337
359
+ 2127,143200,12.512
360
+ 2132,143600,18.0873
361
+ 2137,144000,16.5148
362
+ 2143,144400,14.2245
363
+ 2150,144800,11.4969
364
+ 2158,145200,6.4741
365
+ 2167,145600,10.4108
366
+ 2173,146000,9.8208
367
+ 2180,146400,14.0351
368
+ 2186,146800,11.3705
369
+ 2191,147200,17.1942
370
+ 2196,147600,16.5483
371
+ 2201,148000,17.2101
372
+ 2206,148400,15.8254
373
+ 2211,148800,16.944
374
+ 2216,149200,13.2711
375
+ 2222,149600,16.0584
376
+ 2227,150000,15.4412
377
+ 2231,150400,20.0904
378
+ 2235,150800,18.9388
379
+ 2241,151200,10.2277
380
+ 2248,151600,9.7938
381
+ 2257,152000,6.8651
382
+ 2265,152400,10.0151
383
+ 2269,152800,16.5873
384
+ 2275,153200,13.0192
385
+ 2280,153600,16.4787
386
+ 2284,154000,19.9945
387
+ 2288,154400,21.1409
388
+ 2293,154800,17.4874
389
+ 2298,155200,16.9422
390
+ 2302,155600,16.5356
391
+ 2307,156000,17.7096
392
+ 2314,156400,11.4627
393
+ 2318,156800,20.033
394
+ 2323,157200,14.9603
395
+ 2327,157600,19.5649
396
+ 2334,158000,10.3648
397
+ 2339,158400,17.1039
398
+ 2343,158800,21.143
399
+ 2348,159200,17.2194
400
+ 2352,159600,21.0071
401
+ 2356,160000,21.8045
402
+ 2361,160400,17.6473
403
+ 2367,160800,15.5177
404
+ 2371,161200,20.4818
405
+ 2376,161600,17.1564
406
+ 2381,162000,14.4102
407
+ 2386,162400,18.4027
408
+ 2390,162800,22.3808
409
+ 2395,163200,14.4186
410
+ 2400,163600,13.4343
411
+ 2404,164000,19.605
412
+ 2409,164400,14.7853
413
+ 2415,164800,11.0992
414
+ 2421,165200,12.6908
415
+ 2426,165600,17.289
416
+ 2431,166000,17.1976
417
+ 2435,166400,21.6014
418
+ 2441,166800,12.793
419
+ 2445,167200,14.232
420
+ 2450,167600,15.6217
421
+ 2457,168000,14.2949
422
+ 2461,168400,16.1677
423
+ 2465,168800,21.0584
424
+ 2470,169200,19.4555
425
+ 2474,169600,16.2403
426
+ 2481,170000,10.8878
427
+ 2491,170400,4.966
428
+ 2497,170800,11.4854
429
+ 2509,171200,4.8386
430
+ 2517,171600,9.0965
431
+ 2532,172000,4.4732
432
+ 2539,172400,10.3322
433
+ 2548,172800,6.617
434
+ 2559,173200,6.6052
435
+ 2566,173600,8.1639
436
+ 2571,174000,14.7295
437
+ 2575,174400,19.1853
438
+ 2581,174800,11.4953
439
+ 2586,175200,15.2959
440
+ 2591,175600,17.3706
441
+ 2597,176000,16.6239
442
+ 2602,176400,16.1638
443
+ 2608,176800,13.7811
444
+ 2614,177200,11.7461
445
+ 2624,177600,8.2969
446
+ 2629,178000,16.7226
447
+ 2636,178400,10.9358
448
+ 2642,178800,16.5572
449
+ 2649,179200,10.4538
450
+ 2655,179600,12.1145
451
+ 2660,180000,17.3922
452
+ 2669,180400,10.0921
453
+ 2675,180800,14.2412
454
+ 2680,181200,16.8869
455
+ 2684,181600,17.4712
456
+ 2689,182000,19.3122
457
+ 2693,182400,22.4468
458
+ 2699,182800,15.3699
459
+ 2704,183200,18.0721
460
+ 2710,183600,13.5026
461
+ 2717,184000,12.2414
462
+ 2721,184400,17.901
463
+ 2726,184800,16.2374
464
+ 2731,185200,17.4196
465
+ 2736,185600,18.1275
466
+ 2740,186000,20.9471
467
+ 2745,186400,20.6423
468
+ 2750,186800,18.2358
469
+ 2755,187200,17.1191
470
+ 2760,187600,17.9405
471
+ 2766,188000,12.8474
472
+ 2770,188400,19.6427
473
+ 2776,188800,15.7317
474
+ 2780,189200,19.7856
475
+ 2787,189600,10.9979
476
+ 2792,190000,18.7782
477
+ 2798,190400,13.124
478
+ 2803,190800,15.1497
479
+ 2808,191200,15.8551
480
+ 2818,191600,6.6157
481
+ 2824,192000,15.3035
482
+ 2831,192400,11.4718
483
+ 2835,192800,22.8164
484
+ 2841,193200,16.0213
485
+ 2846,193600,17.6302
486
+ 2851,194000,14.8716
487
+ 2855,194400,20.6271
488
+ 2862,194800,14.8134
489
+ 2868,195200,10.8108
490
+ 2872,195600,22.52
491
+ 2879,196000,13.9515
492
+ 2885,196400,12.6253
493
+ 2893,196800,8.5209
494
+ 2900,197200,11.9898
495
+ 2906,197600,13.4315
496
+ 2911,198000,18.833
497
+ 2918,198400,11.5714
498
+ 2923,198800,15.1536
499
+ 2931,199200,9.8851
500
+ 2937,199600,12.3961
501
+ 2943,200000,10.9256
code/Lake application/logs/results_2/PDPPO_frozen_lake_log_4.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 27,400,1.0785
3
+ 45,800,1.6504
4
+ 65,1200,1.4004
5
+ 75,1600,2.9704
6
+ 92,2000,1.7748
7
+ 101,2400,3.44
8
+ 112,2800,2.7486
9
+ 118,3200,4.5942
10
+ 124,3600,5.5212
11
+ 132,4000,3.7269
12
+ 139,4400,4.3077
13
+ 146,4800,4.8111
14
+ 151,5200,5.5228
15
+ 157,5600,6.1189
16
+ 163,6000,5.2543
17
+ 168,6400,5.2306
18
+ 175,6800,5.3845
19
+ 182,7200,4.3935
20
+ 187,7600,5.7774
21
+ 192,8000,6.1537
22
+ 197,8400,5.7906
23
+ 202,8800,7.0905
24
+ 209,9200,4.5122
25
+ 213,9600,6.5891
26
+ 218,10000,7.7331
27
+ 223,10400,6.6175
28
+ 227,10800,10.1737
29
+ 231,11200,8.0792
30
+ 238,11600,5.0695
31
+ 243,12000,6.8807
32
+ 247,12400,6.8492
33
+ 252,12800,7.4324
34
+ 256,13200,7.9897
35
+ 263,13600,4.5812
36
+ 270,14000,4.8277
37
+ 275,14400,6.4227
38
+ 281,14800,7.0743
39
+ 287,15200,5.8331
40
+ 292,15600,5.6509
41
+ 297,16000,7.8764
42
+ 303,16400,4.8805
43
+ 308,16800,6.1394
44
+ 312,17200,7.3848
45
+ 319,17600,5.4064
46
+ 326,18000,4.2903
47
+ 333,18400,4.4865
48
+ 338,18800,6.5078
49
+ 344,19200,5.3164
50
+ 348,19600,6.5957
51
+ 352,20000,8.4949
52
+ 357,20400,8.0414
53
+ 361,20800,8.259
54
+ 366,21200,8.7431
55
+ 371,21600,8.1425
56
+ 375,22000,7.6031
57
+ 379,22400,10.6469
58
+ 384,22800,7.4268
59
+ 389,23200,7.8948
60
+ 393,23600,9.7095
61
+ 399,24000,7.9959
62
+ 403,24400,12.6126
63
+ 407,24800,10.4783
64
+ 414,25200,8.8362
65
+ 418,25600,12.1951
66
+ 422,26000,12.6242
67
+ 426,26400,13.1493
68
+ 430,26800,11.1338
69
+ 434,27200,11.6175
70
+ 440,27600,7.7618
71
+ 445,28000,10.8874
72
+ 451,28400,9.7606
73
+ 456,28800,11.5253
74
+ 462,29200,6.7856
75
+ 467,29600,11.7964
76
+ 472,30000,8.8168
77
+ 476,30400,13.8556
78
+ 481,30800,11.2407
79
+ 485,31200,13.1211
80
+ 491,31600,10.3695
81
+ 496,32000,8.6598
82
+ 500,32400,13.3756
83
+ 506,32800,9.0459
84
+ 511,33200,10.4668
85
+ 516,33600,9.0482
86
+ 522,34000,9.8797
87
+ 526,34400,13.0444
88
+ 532,34800,7.3031
89
+ 536,35200,12.3843
90
+ 541,35600,11.6548
91
+ 545,36000,12.1808
92
+ 550,36400,8.937
93
+ 554,36800,13.32
94
+ 559,37200,10.2187
95
+ 563,37600,11.1425
96
+ 569,38000,10.4837
97
+ 573,38400,9.4084
98
+ 579,38800,9.5927
99
+ 585,39200,8.1512
100
+ 591,39600,7.4306
101
+ 598,40000,7.4929
102
+ 603,40400,12.0014
103
+ 609,40800,6.8877
104
+ 615,41200,8.7736
105
+ 622,41600,6.575
106
+ 632,42000,3.9331
107
+ 638,42400,9.7754
108
+ 644,42800,8.4288
109
+ 649,43200,9.7733
110
+ 656,43600,6.2225
111
+ 666,44000,4.1808
112
+ 672,44400,6.0964
113
+ 685,44800,3.3983
114
+ 694,45200,5.6596
115
+ 702,45600,6.6803
116
+ 708,46000,7.2517
117
+ 716,46400,5.9717
118
+ 729,46800,3.6724
119
+ 740,47200,4.6841
120
+ 744,47600,12.0424
121
+ 751,48000,8.4528
122
+ 757,48400,7.1028
123
+ 761,48800,12.6503
124
+ 770,49200,4.9012
125
+ 775,49600,9.6465
126
+ 782,50000,7.875
127
+ 787,50400,12.0186
128
+ 793,50800,9.2714
129
+ 798,51200,9.0638
130
+ 804,51600,9.1859
131
+ 808,52000,13.3962
132
+ 814,52400,9.1845
133
+ 819,52800,8.6704
134
+ 823,53200,12.922
135
+ 827,53600,13.1999
136
+ 832,54000,10.8399
137
+ 837,54400,12.6242
138
+ 842,54800,11.2661
139
+ 847,55200,10.5994
140
+ 853,55600,8.1275
141
+ 858,56000,12.1206
142
+ 866,56400,6.3821
143
+ 871,56800,8.825
144
+ 875,57200,13.9679
145
+ 880,57600,13.0994
146
+ 884,58000,14.0452
147
+ 888,58400,10.9111
148
+ 892,58800,13.7891
149
+ 897,59200,12.1729
150
+ 902,59600,12.7809
151
+ 907,60000,10.9993
152
+ 914,60400,6.609
153
+ 922,60800,6.1909
154
+ 927,61200,10.7844
155
+ 934,61600,7.0903
156
+ 945,62000,4.2601
157
+ 953,62400,5.6534
158
+ 958,62800,10.075
159
+ 963,63200,10.4977
160
+ 967,63600,11.9044
161
+ 971,64000,13.6212
162
+ 975,64400,13.086
163
+ 980,64800,12.0809
164
+ 987,65200,8.3579
165
+ 993,65600,8.2868
166
+ 997,66000,14.3792
167
+ 1001,66400,11.228
168
+ 1008,66800,8.2525
169
+ 1017,67200,6.1501
170
+ 1022,67600,9.1965
171
+ 1027,68000,11.0591
172
+ 1032,68400,11.1621
173
+ 1038,68800,10.1783
174
+ 1042,69200,10.0313
175
+ 1052,69600,5.234
176
+ 1060,70000,7.2449
177
+ 1064,70400,11.1294
178
+ 1070,70800,7.2192
179
+ 1075,71200,10.5247
180
+ 1082,71600,8.6408
181
+ 1087,72000,12.3906
182
+ 1091,72400,10.4689
183
+ 1095,72800,14.3503
184
+ 1100,73200,13.3414
185
+ 1104,73600,14.6343
186
+ 1108,74000,12.0234
187
+ 1113,74400,10.4043
188
+ 1118,74800,13.1958
189
+ 1122,75200,14.1113
190
+ 1128,75600,8.9121
191
+ 1133,76000,9.4241
192
+ 1138,76400,12.1937
193
+ 1142,76800,10.8917
194
+ 1148,77200,9.6075
195
+ 1152,77600,12.5465
196
+ 1157,78000,12.2911
197
+ 1161,78400,14.3991
198
+ 1166,78800,11.7178
199
+ 1170,79200,14.2131
200
+ 1176,79600,9.4352
201
+ 1182,80000,9.8783
202
+ 1187,80400,8.9671
203
+ 1192,80800,11.4625
204
+ 1197,81200,13.3021
205
+ 1201,81600,14.9308
206
+ 1205,82000,11.3369
207
+ 1212,82400,9.6094
208
+ 1217,82800,11.86
209
+ 1221,83200,12.7279
210
+ 1227,83600,10.7913
211
+ 1232,84000,11.0473
212
+ 1236,84400,14.2463
213
+ 1242,84800,9.8461
214
+ 1246,85200,14.7624
215
+ 1252,85600,10.4951
216
+ 1259,86000,8.9123
217
+ 1265,86400,10.1258
218
+ 1271,86800,9.0527
219
+ 1276,87200,12.5009
220
+ 1280,87600,14.9112
221
+ 1286,88000,9.7683
222
+ 1291,88400,12.2766
223
+ 1295,88800,14.6523
224
+ 1303,89200,8.0044
225
+ 1307,89600,14.4251
226
+ 1311,90000,14.7435
227
+ 1316,90400,13.5846
228
+ 1322,90800,11.1363
229
+ 1326,91200,13.1161
230
+ 1330,91600,14.8782
231
+ 1335,92000,12.9898
232
+ 1341,92400,10.5745
233
+ 1349,92800,7.7201
234
+ 1355,93200,9.7362
235
+ 1362,93600,9.0283
236
+ 1371,94000,5.595
237
+ 1376,94400,9.584
238
+ 1384,94800,7.7162
239
+ 1389,95200,9.5471
240
+ 1395,95600,11.8725
241
+ 1400,96000,11.4859
242
+ 1406,96400,10.0133
243
+ 1411,96800,10.0898
244
+ 1419,97200,7.1929
245
+ 1425,97600,10.2308
246
+ 1431,98000,10.8156
247
+ 1436,98400,11.1904
248
+ 1441,98800,13.0126
249
+ 1448,99200,9.5278
250
+ 1456,99600,7.7219
251
+ 1460,100000,11.838
252
+ 1467,100400,10.3963
253
+ 1471,100800,15.1954
254
+ 1477,101200,9.7102
255
+ 1483,101600,10.8302
256
+ 1487,102000,13.6986
257
+ 1491,102400,17.6513
258
+ 1497,102800,11.7696
259
+ 1502,103200,14.6363
260
+ 1506,103600,12.3268
261
+ 1510,104000,15.2753
262
+ 1515,104400,13.8737
263
+ 1520,104800,9.0522
264
+ 1524,105200,15.7886
265
+ 1529,105600,14.429
266
+ 1534,106000,12.0745
267
+ 1539,106400,13.0085
268
+ 1543,106800,16.5044
269
+ 1547,107200,12.8213
270
+ 1552,107600,10.883
271
+ 1557,108000,12.4299
272
+ 1561,108400,12.7274
273
+ 1565,108800,15.701
274
+ 1571,109200,11.4077
275
+ 1576,109600,13.4852
276
+ 1580,110000,15.1326
277
+ 1585,110400,12.6496
278
+ 1592,110800,11.1331
279
+ 1598,111200,10.001
280
+ 1604,111600,9.8463
281
+ 1610,112000,9.5269
282
+ 1615,112400,12.4523
283
+ 1623,112800,8.4184
284
+ 1627,113200,14.1469
285
+ 1634,113600,7.9029
286
+ 1640,114000,11.989
287
+ 1646,114400,10.0607
288
+ 1654,114800,7.935
289
+ 1658,115200,15.8146
290
+ 1662,115600,16.4529
291
+ 1666,116000,16.031
292
+ 1670,116400,16.961
293
+ 1674,116800,16.9752
294
+ 1680,117200,11.4042
295
+ 1687,117600,10.9484
296
+ 1693,118000,11.3393
297
+ 1697,118400,19.1626
298
+ 1702,118800,16.0651
299
+ 1707,119200,12.5378
300
+ 1713,119600,14.3033
301
+ 1720,120000,11.5588
302
+ 1724,120400,15.8731
303
+ 1729,120800,16.0878
304
+ 1734,121200,16.3894
305
+ 1739,121600,14.4359
306
+ 1745,122000,14.7653
307
+ 1752,122400,10.6896
308
+ 1756,122800,17.8908
309
+ 1762,123200,14.3452
310
+ 1768,123600,13.2548
311
+ 1772,124000,19.6647
312
+ 1776,124400,19.1847
313
+ 1780,124800,16.8379
314
+ 1786,125200,14.3642
315
+ 1790,125600,16.4554
316
+ 1794,126000,19.7687
317
+ 1801,126400,10.1183
318
+ 1806,126800,18.1265
319
+ 1812,127200,11.8105
320
+ 1818,127600,11.0402
321
+ 1823,128000,16.0082
322
+ 1830,128400,9.7205
323
+ 1836,128800,10.7887
324
+ 1841,129200,12.7927
325
+ 1845,129600,17.2088
326
+ 1852,130000,11.4728
327
+ 1857,130400,13.019
328
+ 1861,130800,18.7369
329
+ 1868,131200,11.284
330
+ 1873,131600,13.2389
331
+ 1878,132000,15.4627
332
+ 1882,132400,19.8252
333
+ 1889,132800,10.8438
334
+ 1894,133200,12.0874
335
+ 1901,133600,11.009
336
+ 1906,134000,13.7517
337
+ 1915,134400,8.6863
338
+ 1921,134800,10.9831
339
+ 1928,135200,11.1524
340
+ 1934,135600,11.4495
341
+ 1941,136000,9.4189
342
+ 1945,136400,18.9608
343
+ 1952,136800,9.5868
344
+ 1958,137200,10.5218
345
+ 1964,137600,13.0402
346
+ 1972,138000,7.7796
347
+ 1980,138400,7.3364
348
+ 1986,138800,13.7623
349
+ 1991,139200,16.2419
350
+ 1996,139600,11.4699
351
+ 2003,140000,10.983
352
+ 2012,140400,7.5045
353
+ 2016,140800,20.0741
354
+ 2022,141200,12.6223
355
+ 2027,141600,15.3017
356
+ 2033,142000,12.7772
357
+ 2037,142400,20.7311
358
+ 2041,142800,18.8427
359
+ 2046,143200,19.43
360
+ 2051,143600,15.0906
361
+ 2056,144000,13.0377
362
+ 2065,144400,7.2051
363
+ 2071,144800,13.2424
364
+ 2078,145200,11.0955
365
+ 2088,145600,5.3895
366
+ 2093,146000,12.318
367
+ 2100,146400,12.8436
368
+ 2105,146800,14.4873
369
+ 2109,147200,19.8853
370
+ 2117,147600,10.7438
371
+ 2123,148000,13.8923
372
+ 2128,148400,15.6226
373
+ 2132,148800,18.0464
374
+ 2137,149200,16.3704
375
+ 2144,149600,12.576
376
+ 2148,150000,20.3183
377
+ 2156,150400,9.807
378
+ 2161,150800,12.9068
379
+ 2165,151200,21.0426
380
+ 2171,151600,15.371
381
+ 2175,152000,17.7154
382
+ 2180,152400,17.2565
383
+ 2185,152800,18.8765
384
+ 2190,153200,17.2862
385
+ 2194,153600,19.4167
386
+ 2200,154000,13.2546
387
+ 2205,154400,15.271
388
+ 2211,154800,14.3329
389
+ 2215,155200,21.1205
390
+ 2221,155600,12.1206
391
+ 2227,156000,14.4072
392
+ 2232,156400,16.5247
393
+ 2236,156800,21.231
394
+ 2240,157200,20.8302
395
+ 2244,157600,19.4892
396
+ 2248,158000,20.936
397
+ 2252,158400,21.2178
398
+ 2256,158800,21.1056
399
+ 2261,159200,16.8762
400
+ 2267,159600,14.0483
401
+ 2272,160000,13.3591
402
+ 2279,160400,12.6142
403
+ 2285,160800,9.0069
404
+ 2291,161200,13.7843
405
+ 2300,161600,7.8473
406
+ 2307,162000,9.9078
407
+ 2312,162400,15.3595
408
+ 2317,162800,14.6767
409
+ 2324,163200,10.7115
410
+ 2331,163600,9.728
411
+ 2336,164000,14.8431
412
+ 2341,164400,15.2026
413
+ 2346,164800,12.322
414
+ 2351,165200,11.1342
415
+ 2356,165600,14.9371
416
+ 2361,166000,13.2367
417
+ 2365,166400,20.3463
418
+ 2371,166800,10.1723
419
+ 2377,167200,10.4504
420
+ 2381,167600,18.0134
421
+ 2388,168000,10.336
422
+ 2395,168400,11.1223
423
+ 2403,168800,6.6491
424
+ 2410,169200,9.6499
425
+ 2418,169600,7.0802
426
+ 2423,170000,10.1417
427
+ 2431,170400,6.2054
428
+ 2440,170800,7.8601
429
+ 2449,171200,7.7198
430
+ 2454,171600,14.5012
431
+ 2459,172000,15.7605
432
+ 2467,172400,7.7054
433
+ 2473,172800,12.5347
434
+ 2478,173200,15.5908
435
+ 2486,173600,5.7416
436
+ 2494,174000,9.3572
437
+ 2501,174400,9.0669
438
+ 2505,174800,11.2773
439
+ 2512,175200,6.6277
440
+ 2518,175600,4.9397
441
+ 2524,176000,6.4387
442
+ 2529,176400,9.8694
443
+ 2534,176800,8.5331
444
+ 2538,177200,8.5411
445
+ 2545,177600,6.2453
446
+ 2549,178000,11.7878
447
+ 2554,178400,10.175
448
+ 2558,178800,12.4336
449
+ 2565,179200,6.5908
450
+ 2570,179600,6.219
451
+ 2577,180000,6.0009
452
+ 2582,180400,5.8638
453
+ 2588,180800,6.9441
454
+ 2592,181200,6.8218
455
+ 2598,181600,9.0203
456
+ 2604,182000,7.8072
457
+ 2609,182400,6.4259
458
+ 2616,182800,6.3331
459
+ 2622,183200,9.3925
460
+ 2631,183600,6.0891
461
+ 2639,184000,5.3467
462
+ 2645,184400,5.1621
463
+ 2652,184800,7.0998
464
+ 2656,185200,8.041
465
+ 2663,185600,4.7481
466
+ 2670,186000,5.0002
467
+ 2677,186400,4.4118
468
+ 2682,186800,5.4232
469
+ 2689,187200,7.2963
470
+ 2693,187600,6.6073
471
+ 2699,188000,8.326
472
+ 2707,188400,5.5025
473
+ 2713,188800,5.5795
474
+ 2721,189200,3.796
475
+ 2728,189600,5.6303
476
+ 2734,190000,6.2013
477
+ 2740,190400,6.0202
478
+ 2746,190800,4.2373
479
+ 2751,191200,8.5135
480
+ 2758,191600,3.9532
481
+ 2763,192000,7.6742
482
+ 2770,192400,4.637
483
+ 2776,192800,4.4964
484
+ 2781,193200,8.2152
485
+ 2789,193600,4.0498
486
+ 2793,194000,7.944
487
+ 2798,194400,6.7629
488
+ 2802,194800,6.5532
489
+ 2807,195200,7.4252
490
+ 2811,195600,8.0144
491
+ 2815,196000,7.9792
492
+ 2819,196400,7.9971
493
+ 2824,196800,5.5573
494
+ 2828,197200,7.3163
495
+ 2833,197600,8.0791
496
+ 2838,198000,5.7746
497
+ 2843,198400,7.1306
498
+ 2848,198800,7.198
499
+ 2852,199200,8.5707
500
+ 2858,199600,5.5788
501
+ 2862,200000,8.7377
code/Lake application/logs/results_2/PDPPO_frozen_lake_log_5.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 22,400,1.2943
3
+ 41,800,1.6015
4
+ 55,1200,1.9737
5
+ 64,1600,3.5487
6
+ 73,2000,3.146
7
+ 82,2400,3.2991
8
+ 92,2800,2.7673
9
+ 100,3200,3.8523
10
+ 107,3600,3.7979
11
+ 115,4000,4.256
12
+ 119,4400,6.4793
13
+ 124,4800,5.5414
14
+ 128,5200,7.3476
15
+ 133,5600,5.8006
16
+ 138,6000,6.1344
17
+ 144,6400,5.8326
18
+ 150,6800,4.8366
19
+ 154,7200,6.7648
20
+ 159,7600,6.5947
21
+ 163,8000,7.3957
22
+ 167,8400,7.3241
23
+ 172,8800,5.9961
24
+ 177,9200,6.0296
25
+ 182,9600,7.2685
26
+ 187,10000,6.637
27
+ 192,10400,5.999
28
+ 196,10800,6.4386
29
+ 201,11200,6.6646
30
+ 207,11600,5.4069
31
+ 212,12000,6.6252
32
+ 216,12400,6.4882
33
+ 223,12800,5.5298
34
+ 228,13200,6.2035
35
+ 233,13600,5.9536
36
+ 237,14000,6.9952
37
+ 244,14400,4.9771
38
+ 249,14800,6.1157
39
+ 253,15200,7.8858
40
+ 257,15600,7.8068
41
+ 261,16000,7.9837
42
+ 266,16400,6.6484
43
+ 270,16800,8.0041
44
+ 274,17200,6.6656
45
+ 278,17600,8.0284
46
+ 283,18000,7.3533
47
+ 290,18400,4.5723
48
+ 295,18800,6.8589
49
+ 299,19200,8.1765
50
+ 303,19600,8.1
51
+ 307,20000,8.1549
52
+ 312,20400,5.818
53
+ 318,20800,6.0381
54
+ 323,21200,5.8448
55
+ 327,21600,8.4165
56
+ 332,22000,6.3011
57
+ 336,22400,8.2937
58
+ 343,22800,5.503
59
+ 347,23200,8.5968
60
+ 353,23600,5.1292
61
+ 358,24000,7.4338
62
+ 363,24400,6.6991
63
+ 368,24800,6.5642
64
+ 373,25200,6.8225
65
+ 377,25600,8.3731
66
+ 382,26000,7.4055
67
+ 387,26400,7.1132
68
+ 393,26800,5.6948
69
+ 399,27200,5.2845
70
+ 404,27600,5.9669
71
+ 408,28000,8.3363
72
+ 415,28400,5.6672
73
+ 420,28800,5.4611
74
+ 425,29200,7.714
75
+ 429,29600,7.5512
76
+ 434,30000,7.2347
77
+ 439,30400,6.5507
78
+ 443,30800,8.4242
79
+ 449,31200,6.0604
80
+ 453,31600,8.2476
81
+ 458,32000,8.0065
82
+ 462,32400,8.2966
83
+ 467,32800,6.7805
84
+ 471,33200,9.484
85
+ 475,33600,8.7649
86
+ 481,34000,8.2474
87
+ 485,34400,9.5795
88
+ 490,34800,8.6045
89
+ 496,35200,7.4124
90
+ 501,35600,7.6157
91
+ 505,36000,10.9111
92
+ 510,36400,8.8556
93
+ 514,36800,9.9951
94
+ 519,37200,11.3025
95
+ 523,37600,11.5043
96
+ 527,38000,11.7302
97
+ 532,38400,10.3442
98
+ 536,38800,12.0948
99
+ 541,39200,10.2846
100
+ 545,39600,12.2089
101
+ 549,40000,11.8314
102
+ 554,40400,9.8942
103
+ 559,40800,10.0485
104
+ 563,41200,12.4629
105
+ 567,41600,12.2332
106
+ 571,42000,11.1847
107
+ 575,42400,12.4524
108
+ 580,42800,10.2353
109
+ 587,43200,6.303
110
+ 591,43600,12.3469
111
+ 596,44000,11.1024
112
+ 600,44400,12.4106
113
+ 604,44800,11.5728
114
+ 608,45200,10.9579
115
+ 614,45600,9.6399
116
+ 618,46000,11.3217
117
+ 622,46400,12.529
118
+ 628,46800,9.0714
119
+ 633,47200,9.9013
120
+ 638,47600,10.0751
121
+ 643,48000,10.2499
122
+ 647,48400,12.5708
123
+ 652,48800,8.7569
124
+ 656,49200,12.3604
125
+ 660,49600,12.5165
126
+ 664,50000,12.3671
127
+ 668,50400,12.618
128
+ 672,50800,12.6543
129
+ 677,51200,10.6911
130
+ 681,51600,11.8317
131
+ 685,52000,12.6955
132
+ 690,52400,10.1157
133
+ 695,52800,10.0116
134
+ 700,53200,10.2901
135
+ 705,53600,8.1824
136
+ 710,54000,9.8285
137
+ 716,54400,8.8717
138
+ 721,54800,9.9854
139
+ 725,55200,12.9736
140
+ 731,55600,8.6973
141
+ 737,56000,8.4719
142
+ 742,56400,10.3744
143
+ 747,56800,8.9466
144
+ 752,57200,10.6086
145
+ 758,57600,10.0045
146
+ 763,58000,9.8052
147
+ 768,58400,11.2697
148
+ 774,58800,7.2144
149
+ 780,59200,9.6594
150
+ 784,59600,12.3888
151
+ 789,60000,10.4215
152
+ 795,60400,8.8573
153
+ 800,60800,10.8871
154
+ 807,61200,5.8992
155
+ 812,61600,10.12
156
+ 817,62000,10.8824
157
+ 821,62400,13.1221
158
+ 826,62800,10.7535
159
+ 830,63200,13.1014
160
+ 835,63600,10.7345
161
+ 839,64000,13.082
162
+ 843,64400,13.2581
163
+ 847,64800,13.4413
164
+ 851,65200,13.5097
165
+ 855,65600,13.3641
166
+ 859,66000,13.3583
167
+ 863,66400,13.1701
168
+ 868,66800,11.1134
169
+ 872,67200,13.6245
170
+ 880,67600,5.9886
171
+ 886,68000,10.2475
172
+ 890,68400,14.6202
173
+ 894,68800,10.0685
174
+ 900,69200,11.0053
175
+ 904,69600,13.6849
176
+ 909,70000,9.6346
177
+ 913,70400,13.5021
178
+ 917,70800,13.1414
179
+ 922,71200,10.8646
180
+ 926,71600,13.4908
181
+ 932,72000,9.1396
182
+ 936,72400,11.6489
183
+ 940,72800,13.2407
184
+ 944,73200,13.0342
185
+ 948,73600,13.5329
186
+ 952,74000,13.4403
187
+ 956,74400,13.7446
188
+ 960,74800,13.1605
189
+ 964,75200,13.3433
190
+ 969,75600,10.9952
191
+ 974,76000,11.5741
192
+ 980,76400,9.3918
193
+ 985,76800,11.7496
194
+ 992,77200,8.3268
195
+ 997,77600,10.0094
196
+ 1002,78000,10.6563
197
+ 1007,78400,11.5348
198
+ 1011,78800,15.119
199
+ 1017,79200,12.0912
200
+ 1023,79600,8.9712
201
+ 1027,80000,11.6409
202
+ 1031,80400,15.629
203
+ 1036,80800,13.3766
204
+ 1040,81200,15.2536
205
+ 1045,81600,12.2289
206
+ 1049,82000,15.4839
207
+ 1055,82400,9.1355
208
+ 1060,82800,12.202
209
+ 1064,83200,14.748
210
+ 1068,83600,14.5001
211
+ 1072,84000,15.0432
212
+ 1080,84400,7.0194
213
+ 1085,84800,10.1806
214
+ 1090,85200,11.795
215
+ 1095,85600,12.7322
216
+ 1100,86000,10.2449
217
+ 1109,86400,6.4535
218
+ 1117,86800,6.893
219
+ 1123,87200,8.8297
220
+ 1129,87600,9.9537
221
+ 1134,88000,9.4762
222
+ 1138,88400,15.7652
223
+ 1144,88800,12.4334
224
+ 1153,89200,6.025
225
+ 1157,89600,14.0408
226
+ 1162,90000,12.8277
227
+ 1167,90400,12.4492
228
+ 1173,90800,8.5448
229
+ 1180,91200,9.9864
230
+ 1184,91600,15.1804
231
+ 1190,92000,8.7061
232
+ 1196,92400,11.9629
233
+ 1202,92800,9.8801
234
+ 1207,93200,10.6953
235
+ 1211,93600,16.5854
236
+ 1221,94000,6.0867
237
+ 1229,94400,9.2746
238
+ 1234,94800,12.5134
239
+ 1240,95200,8.38
240
+ 1245,95600,14.4995
241
+ 1249,96000,16.0927
242
+ 1255,96400,8.763
243
+ 1261,96800,11.1095
244
+ 1265,97200,15.5608
245
+ 1271,97600,12.1043
246
+ 1278,98000,9.2772
247
+ 1283,98400,10.4808
248
+ 1291,98800,9.4265
249
+ 1296,99200,13.4881
250
+ 1303,99600,8.7073
251
+ 1311,100000,7.7693
252
+ 1323,100400,5.2588
253
+ 1328,100800,12.0091
254
+ 1338,101200,6.3879
255
+ 1346,101600,9.2148
256
+ 1354,102000,6.5712
257
+ 1359,102400,15.4022
258
+ 1363,102800,17.8751
259
+ 1372,103200,5.6517
260
+ 1379,103600,11.7892
261
+ 1383,104000,13.7202
262
+ 1390,104400,12.0935
263
+ 1398,104800,8.3031
264
+ 1403,105200,11.4273
265
+ 1409,105600,13.6581
266
+ 1414,106000,12.8389
267
+ 1421,106400,10.5737
268
+ 1426,106800,14.0672
269
+ 1431,107200,15.1905
270
+ 1435,107600,16.499
271
+ 1441,108000,14.9027
272
+ 1447,108400,12.2743
273
+ 1453,108800,12.5351
274
+ 1458,109200,13.3324
275
+ 1464,109600,10.1987
276
+ 1470,110000,14.6681
277
+ 1480,110400,6.3981
278
+ 1484,110800,17.443
279
+ 1489,111200,11.7544
280
+ 1494,111600,12.3627
281
+ 1500,112000,14.2433
282
+ 1507,112400,9.9183
283
+ 1514,112800,10.1194
284
+ 1520,113200,11.6173
285
+ 1526,113600,12.6407
286
+ 1530,114000,14.9395
287
+ 1537,114400,10.6247
288
+ 1546,114800,8.2285
289
+ 1555,115200,7.4419
290
+ 1565,115600,6.9423
291
+ 1571,116000,11.8016
292
+ 1576,116400,13.4006
293
+ 1582,116800,12.1714
294
+ 1589,117200,9.5481
295
+ 1594,117600,17.1002
296
+ 1599,118000,13.3729
297
+ 1605,118400,14.4748
298
+ 1610,118800,15.9086
299
+ 1616,119200,13.515
300
+ 1621,119600,13.6216
301
+ 1627,120000,13.3638
302
+ 1631,120400,15.8724
303
+ 1637,120800,13.1813
304
+ 1642,121200,16.7492
305
+ 1647,121600,16.2201
306
+ 1652,122000,14.2471
307
+ 1658,122400,11.9562
308
+ 1663,122800,14.3429
309
+ 1670,123200,8.2829
310
+ 1675,123600,12.7689
311
+ 1679,124000,17.9555
312
+ 1686,124400,10.3992
313
+ 1693,124800,9.3774
314
+ 1700,125200,10.5229
315
+ 1705,125600,13.095
316
+ 1709,126000,16.827
317
+ 1716,126400,11.5151
318
+ 1720,126800,15.0954
319
+ 1725,127200,17.1223
320
+ 1729,127600,19.2025
321
+ 1734,128000,16.3688
322
+ 1740,128400,13.0476
323
+ 1744,128800,17.5761
324
+ 1751,129200,9.9195
325
+ 1758,129600,12.3492
326
+ 1763,130000,12.9946
327
+ 1768,130400,15.0853
328
+ 1774,130800,12.7493
329
+ 1780,131200,13.7049
330
+ 1784,131600,16.4027
331
+ 1791,132000,10.02
332
+ 1796,132400,13.2953
333
+ 1802,132800,13.2571
334
+ 1807,133200,16.9227
335
+ 1816,133600,7.5362
336
+ 1823,134000,9.0337
337
+ 1831,134400,8.2112
338
+ 1839,134800,9.5222
339
+ 1845,135200,10.5068
340
+ 1851,135600,15.2168
341
+ 1860,136000,7.9646
342
+ 1866,136400,9.9186
343
+ 1872,136800,11.7983
344
+ 1878,137200,15.5265
345
+ 1884,137600,11.4403
346
+ 1889,138000,14.1125
347
+ 1893,138400,18.7814
348
+ 1898,138800,15.9716
349
+ 1907,139200,6.6898
350
+ 1916,139600,8.6765
351
+ 1926,140000,6.0023
352
+ 1930,140400,18.1008
353
+ 1938,140800,8.4123
354
+ 1946,141200,8.4666
355
+ 1953,141600,9.8084
356
+ 1958,142000,12.6225
357
+ 1962,142400,16.6253
358
+ 1969,142800,11.879
359
+ 1979,143200,5.891
360
+ 1983,143600,14.6341
361
+ 1991,144000,10.6154
362
+ 1995,144400,19.3371
363
+ 2000,144800,14.6999
364
+ 2005,145200,17.3212
365
+ 2010,145600,13.511
366
+ 2019,146000,8.9176
367
+ 2025,146400,11.9234
368
+ 2029,146800,18.7508
369
+ 2035,147200,11.3652
370
+ 2042,147600,10.7875
371
+ 2047,148000,17.2345
372
+ 2052,148400,13.469
373
+ 2057,148800,15.0187
374
+ 2062,149200,18.2483
375
+ 2067,149600,15.1589
376
+ 2072,150000,13.8506
377
+ 2078,150400,13.5154
378
+ 2083,150800,17.4931
379
+ 2088,151200,13.0434
380
+ 2094,151600,12.4448
381
+ 2098,152000,16.2641
382
+ 2103,152400,18.5631
383
+ 2109,152800,13.4721
384
+ 2114,153200,17.5024
385
+ 2118,153600,17.2629
386
+ 2123,154000,16.0443
387
+ 2130,154400,12.4068
388
+ 2134,154800,19.7433
389
+ 2141,155200,12.1725
390
+ 2145,155600,17.4482
391
+ 2151,156000,14.9725
392
+ 2158,156400,8.3286
393
+ 2163,156800,15.3569
394
+ 2168,157200,16.8249
395
+ 2176,157600,10.7159
396
+ 2181,158000,13.5756
397
+ 2186,158400,16.8722
398
+ 2191,158800,16.1566
399
+ 2196,159200,16.4402
400
+ 2200,159600,19.2901
401
+ 2206,160000,13.408
402
+ 2216,160400,6.5277
403
+ 2221,160800,16.5398
404
+ 2225,161200,17.9313
405
+ 2229,161600,16.0893
406
+ 2234,162000,13.5297
407
+ 2239,162400,18.5255
408
+ 2244,162800,15.412
409
+ 2249,163200,16.7656
410
+ 2254,163600,12.2883
411
+ 2260,164000,12.8931
412
+ 2268,164400,9.994
413
+ 2273,164800,14.903
414
+ 2278,165200,14.9247
415
+ 2284,165600,13.8873
416
+ 2288,166000,15.893
417
+ 2294,166400,16.0733
418
+ 2298,166800,20.3023
419
+ 2306,167200,8.823
420
+ 2311,167600,14.7959
421
+ 2316,168000,15.5506
422
+ 2320,168400,18.9412
423
+ 2328,168800,9.9269
424
+ 2337,169200,7.8662
425
+ 2342,169600,13.8098
426
+ 2347,170000,16.0699
427
+ 2352,170400,14.2326
428
+ 2357,170800,14.3457
429
+ 2365,171200,7.755
430
+ 2372,171600,11.1969
431
+ 2377,172000,17.7461
432
+ 2382,172400,14.5072
433
+ 2387,172800,15.6355
434
+ 2392,173200,16.9889
435
+ 2397,173600,16.7953
436
+ 2402,174000,14.2381
437
+ 2407,174400,15.2855
438
+ 2414,174800,9.9614
439
+ 2422,175200,6.716
440
+ 2429,175600,9.1914
441
+ 2433,176000,15.9262
442
+ 2439,176400,13.2029
443
+ 2444,176800,12.0649
444
+ 2448,177200,11.2086
445
+ 2453,177600,17.596
446
+ 2458,178000,10.4265
447
+ 2464,178400,12.7507
448
+ 2470,178800,10.5018
449
+ 2477,179200,9.6174
450
+ 2484,179600,12.3463
451
+ 2493,180000,7.463
452
+ 2502,180400,7.0977
453
+ 2511,180800,6.566
454
+ 2516,181200,12.4002
455
+ 2522,181600,12.8125
456
+ 2526,182000,16.8705
457
+ 2532,182400,15.6621
458
+ 2537,182800,15.5005
459
+ 2545,183200,7.1256
460
+ 2549,183600,19.091
461
+ 2553,184000,20.7269
462
+ 2558,184400,17.8736
463
+ 2562,184800,16.5145
464
+ 2568,185200,12.29
465
+ 2574,185600,11.165
466
+ 2580,186000,14.862
467
+ 2585,186400,12.9498
468
+ 2590,186800,16.6379
469
+ 2597,187200,12.0999
470
+ 2601,187600,19.986
471
+ 2605,188000,21.4165
472
+ 2610,188400,12.3688
473
+ 2614,188800,17.9821
474
+ 2618,189200,20.5196
475
+ 2623,189600,19.5269
476
+ 2629,190000,11.2376
477
+ 2633,190400,20.1738
478
+ 2637,190800,20.5599
479
+ 2642,191200,15.8952
480
+ 2649,191600,12.2397
481
+ 2654,192000,16.6287
482
+ 2659,192400,14.966
483
+ 2665,192800,13.1073
484
+ 2671,193200,11.1274
485
+ 2676,193600,15.6753
486
+ 2681,194000,15.8626
487
+ 2688,194400,9.1965
488
+ 2693,194800,15.0967
489
+ 2697,195200,19.8348
490
+ 2701,195600,14.855
491
+ 2706,196000,13.0618
492
+ 2715,196400,9.1275
493
+ 2721,196800,7.1574
494
+ 2726,197200,16.5246
495
+ 2732,197600,10.026
496
+ 2739,198000,11.8092
497
+ 2745,198400,10.2368
498
+ 2751,198800,11.7544
499
+ 2757,199200,11.6034
500
+ 2764,199600,9.3743
501
+ 2769,200000,15.3278
code/Lake application/logs/results_2/PPO_frozen_lake_log_1.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 15,400,1.9345
3
+ 32,800,1.8784
4
+ 42,1200,3.0732
5
+ 52,1600,2.9835
6
+ 65,2000,2.3838
7
+ 69,2400,7.2159
8
+ 75,2800,4.758
9
+ 80,3200,5.6732
10
+ 85,3600,7.076
11
+ 89,4000,5.77
12
+ 95,4400,5.4889
13
+ 101,4800,4.8132
14
+ 106,5200,7.4812
15
+ 115,5600,2.816
16
+ 123,6000,4.0168
17
+ 129,6400,5.4723
18
+ 137,6800,3.675
19
+ 142,7200,5.8091
20
+ 147,7600,5.6523
21
+ 151,8000,7.3976
22
+ 158,8400,4.279
23
+ 164,8800,4.7596
24
+ 168,9200,7.4893
25
+ 173,9600,6.204
26
+ 177,10000,7.3342
27
+ 183,10400,4.9965
28
+ 188,10800,6.2773
29
+ 193,11200,6.1197
30
+ 197,11600,7.2967
31
+ 201,12000,7.4756
32
+ 205,12400,7.1791
33
+ 209,12800,7.3157
34
+ 213,13200,7.0592
35
+ 218,13600,5.8954
36
+ 222,14000,7.1992
37
+ 227,14400,6.5493
38
+ 232,14800,5.9315
39
+ 237,15200,7.0655
40
+ 243,15600,5.2935
41
+ 251,16000,3.6833
42
+ 258,16400,4.511
43
+ 267,16800,4.1031
44
+ 271,17200,7.4993
45
+ 275,17600,9.0943
46
+ 279,18000,7.6642
47
+ 285,18400,6.6167
48
+ 291,18800,5.4259
49
+ 295,19200,7.9226
50
+ 299,19600,8.2753
51
+ 304,20000,6.8015
52
+ 308,20400,7.8775
53
+ 315,20800,4.5316
54
+ 321,21200,5.4028
55
+ 326,21600,6.3314
56
+ 330,22000,8.1528
57
+ 334,22400,7.4973
58
+ 339,22800,6.6922
59
+ 343,23200,7.7072
60
+ 347,23600,7.2187
61
+ 351,24000,7.7859
62
+ 356,24400,7.0575
63
+ 360,24800,7.2281
64
+ 366,25200,5.8797
65
+ 372,25600,4.8101
66
+ 378,26000,5.0378
67
+ 384,26400,5.6277
68
+ 389,26800,5.2954
69
+ 400,27200,2.9769
70
+ 406,27600,4.6418
71
+ 416,28000,3.4303
72
+ 423,28400,4.0932
73
+ 430,28800,4.505
74
+ 435,29200,5.5546
75
+ 439,29600,10.5906
76
+ 445,30000,6.1133
77
+ 451,30400,5.9823
78
+ 455,30800,8.4017
79
+ 462,31200,5.0732
80
+ 468,31600,5.7932
81
+ 474,32000,6.708
82
+ 479,32400,6.8853
83
+ 485,32800,9.0958
84
+ 490,33200,6.4707
85
+ 495,33600,7.4192
86
+ 501,34000,6.0651
87
+ 506,34400,6.7035
88
+ 511,34800,6.8316
89
+ 518,35200,5.9218
90
+ 524,35600,7.2932
91
+ 528,36000,8.8676
92
+ 534,36400,8.8016
93
+ 538,36800,8.7465
94
+ 544,37200,8.41
95
+ 549,37600,8.3048
96
+ 555,38000,8.0676
97
+ 561,38400,6.0523
98
+ 566,38800,8.8973
99
+ 571,39200,8.7871
100
+ 577,39600,6.6108
101
+ 582,40000,10.0771
102
+ 587,40400,7.6992
103
+ 594,40800,5.8859
104
+ 602,41200,5.2757
105
+ 607,41600,7.4604
106
+ 614,42000,6.3297
107
+ 624,42400,4.3
108
+ 630,42800,7.3462
109
+ 636,43200,7.7832
110
+ 642,43600,7.2278
111
+ 649,44000,7.0903
112
+ 656,44400,7.2864
113
+ 660,44800,11.4337
114
+ 664,45200,9.4219
115
+ 669,45600,10.1337
116
+ 674,46000,10.8808
117
+ 679,46400,9.5832
118
+ 686,46800,6.8856
119
+ 692,47200,7.9853
120
+ 699,47600,5.4185
121
+ 705,48000,8.51
122
+ 711,48400,8.7795
123
+ 717,48800,8.1386
124
+ 721,49200,12.1496
125
+ 727,49600,9.671
126
+ 734,50000,4.7004
127
+ 742,50400,5.4595
128
+ 751,50800,7.1566
129
+ 756,51200,11.0405
130
+ 763,51600,7.8217
131
+ 769,52000,6.9623
132
+ 774,52400,9.7564
133
+ 780,52800,8.2737
134
+ 786,53200,7.7905
135
+ 792,53600,8.2016
136
+ 798,54000,11.1113
137
+ 803,54400,9.71
138
+ 811,54800,6.3473
139
+ 816,55200,10.3967
140
+ 822,55600,8.5624
141
+ 829,56000,7.9157
142
+ 836,56400,7.3681
143
+ 843,56800,7.9999
144
+ 848,57200,12.6498
145
+ 855,57600,6.7851
146
+ 864,58000,5.926
147
+ 871,58400,8.3211
148
+ 877,58800,8.4122
149
+ 885,59200,5.5055
150
+ 892,59600,4.8152
151
+ 896,60000,10.2298
152
+ 903,60400,8.8526
153
+ 908,60800,10.4385
154
+ 914,61200,6.8265
155
+ 920,61600,7.4231
156
+ 926,62000,7.7046
157
+ 932,62400,8.744
158
+ 939,62800,6.815
159
+ 944,63200,8.9794
160
+ 950,63600,9.0955
161
+ 960,64000,3.9011
162
+ 966,64400,9.1169
163
+ 970,64800,9.8831
164
+ 975,65200,10.7317
165
+ 983,65600,5.231
166
+ 988,66000,9.8312
167
+ 993,66400,8.1545
168
+ 999,66800,8.3283
169
+ 1008,67200,3.3866
170
+ 1014,67600,6.2135
171
+ 1018,68000,8.4834
172
+ 1025,68400,4.2607
173
+ 1031,68800,7.755
174
+ 1035,69200,6.7764
175
+ 1043,69600,4.8484
176
+ 1049,70000,6.8767
177
+ 1055,70400,7.2876
178
+ 1061,70800,6.5337
179
+ 1066,71200,8.7095
180
+ 1071,71600,8.5315
181
+ 1076,72000,8.1106
182
+ 1082,72400,6.5303
183
+ 1088,72800,5.1858
184
+ 1092,73200,8.2166
185
+ 1097,73600,7.7642
186
+ 1103,74000,5.7689
187
+ 1109,74400,6.5443
188
+ 1113,74800,6.0629
189
+ 1119,75200,6.9554
190
+ 1124,75600,6.5107
191
+ 1131,76000,5.8779
192
+ 1136,76400,7.0196
193
+ 1141,76800,8.221
194
+ 1146,77200,7.0804
195
+ 1150,77600,7.0824
196
+ 1158,78000,4.9257
197
+ 1165,78400,4.3861
198
+ 1171,78800,5.412
199
+ 1178,79200,5.8524
200
+ 1186,79600,4.6331
201
+ 1193,80000,5.5782
202
+ 1199,80400,7.2403
203
+ 1204,80800,8.5925
204
+ 1208,81200,9.0404
205
+ 1212,81600,10.5095
206
+ 1220,82000,7.0118
207
+ 1225,82400,8.1882
208
+ 1229,82800,10.863
209
+ 1233,83200,10.7497
210
+ 1239,83600,6.4536
211
+ 1243,84000,11.2907
212
+ 1247,84400,11.783
213
+ 1252,84800,11.0893
214
+ 1257,85200,9.8707
215
+ 1261,85600,13.0076
216
+ 1265,86000,12.8858
217
+ 1269,86400,13.1408
218
+ 1273,86800,11.8421
219
+ 1277,87200,9.9549
220
+ 1284,87600,6.7063
221
+ 1288,88000,12.4549
222
+ 1292,88400,13.2602
223
+ 1299,88800,7.2956
224
+ 1303,89200,15.6022
225
+ 1308,89600,12.0928
226
+ 1317,90000,3.9969
227
+ 1323,90400,6.6326
228
+ 1331,90800,6.7157
229
+ 1337,91200,6.7272
230
+ 1346,91600,4.2204
231
+ 1351,92000,10.5966
232
+ 1357,92400,6.7511
233
+ 1364,92800,6.8195
234
+ 1371,93200,6.92
235
+ 1381,93600,4.6135
236
+ 1387,94000,9.0048
237
+ 1395,94400,4.8047
238
+ 1401,94800,9.5174
239
+ 1407,95200,11.6457
240
+ 1414,95600,6.9109
241
+ 1421,96000,5.7336
242
+ 1430,96400,5.3074
243
+ 1434,96800,9.2165
244
+ 1439,97200,9.5083
245
+ 1447,97600,5.2046
246
+ 1456,98000,4.3764
247
+ 1462,98400,5.923
248
+ 1467,98800,8.1644
249
+ 1473,99200,6.7967
250
+ 1480,99600,7.7647
251
+ 1484,100000,12.2281
252
+ 1488,100400,9.1569
253
+ 1494,100800,8.54
254
+ 1499,101200,9.4563
255
+ 1503,101600,9.5965
256
+ 1509,102000,8.6554
257
+ 1514,102400,11.0492
258
+ 1518,102800,10.4916
259
+ 1525,103200,5.7317
260
+ 1534,103600,5.4161
261
+ 1539,104000,9.7729
262
+ 1546,104400,6.9356
263
+ 1552,104800,7.0423
264
+ 1556,105200,12.5092
265
+ 1563,105600,8.7948
266
+ 1567,106000,12.3567
267
+ 1573,106400,7.8177
268
+ 1577,106800,12.8039
269
+ 1584,107200,6.8714
270
+ 1589,107600,8.3869
271
+ 1593,108000,12.6683
272
+ 1601,108400,6.5902
273
+ 1607,108800,8.628
274
+ 1615,109200,5.4762
275
+ 1621,109600,9.3018
276
+ 1627,110000,7.4751
277
+ 1640,110400,4.139
278
+ 1647,110800,7.7327
279
+ 1652,111200,10.3567
280
+ 1662,111600,4.9881
281
+ 1668,112000,8.8983
282
+ 1673,112400,9.1326
283
+ 1683,112800,3.9693
284
+ 1693,113200,4.1639
285
+ 1699,113600,8.137
286
+ 1705,114000,8.6208
287
+ 1710,114400,9.1387
288
+ 1717,114800,6.4969
289
+ 1724,115200,5.8328
290
+ 1729,115600,9.6428
291
+ 1736,116000,7.7885
292
+ 1742,116400,6.1858
293
+ 1751,116800,5.3759
294
+ 1760,117200,4.9088
295
+ 1764,117600,12.8121
296
+ 1773,118000,5.528
297
+ 1781,118400,4.8728
298
+ 1786,118800,9.4107
299
+ 1794,119200,5.6412
300
+ 1800,119600,8.3779
301
+ 1804,120000,14.4553
302
+ 1814,120400,4.32
303
+ 1819,120800,12.7509
304
+ 1828,121200,6.4023
305
+ 1833,121600,15.743
306
+ 1841,122000,6.9661
307
+ 1847,122400,11.3894
308
+ 1852,122800,12.2086
309
+ 1857,123200,16.0304
310
+ 1862,123600,16.1933
311
+ 1870,124000,8.6231
312
+ 1875,124400,13.6078
313
+ 1880,124800,14.3245
314
+ 1889,125200,5.8726
315
+ 1896,125600,9.655
316
+ 1904,126000,6.397
317
+ 1912,126400,5.4703
318
+ 1917,126800,13.7367
319
+ 1923,127200,10.1322
320
+ 1928,127600,12.9031
321
+ 1932,128000,11.6047
322
+ 1941,128400,8.1528
323
+ 1948,128800,10.0696
324
+ 1953,129200,12.8557
325
+ 1964,129600,6.1629
326
+ 1974,130000,4.7264
327
+ 1983,130400,5.0263
328
+ 1989,130800,13.0645
329
+ 2000,131200,4.9296
330
+ 2006,131600,9.139
331
+ 2016,132000,6.4189
332
+ 2025,132400,4.758
333
+ 2033,132800,7.2135
334
+ 2038,133200,8.544
335
+ 2045,133600,9.7969
336
+ 2051,134000,7.3074
337
+ 2063,134400,4.5675
338
+ 2072,134800,4.937
339
+ 2080,135200,6.392
340
+ 2088,135600,4.6451
341
+ 2093,136000,9.6252
342
+ 2100,136400,5.984
343
+ 2106,136800,7.3098
344
+ 2114,137200,6.8205
345
+ 2122,137600,5.1513
346
+ 2130,138000,6.8115
347
+ 2140,138400,4.8287
348
+ 2149,138800,3.6011
349
+ 2157,139200,5.5075
350
+ 2163,139600,9.1481
351
+ 2171,140000,6.788
352
+ 2178,140400,7.9736
353
+ 2186,140800,8.5882
354
+ 2191,141200,9.2546
355
+ 2196,141600,15.4158
356
+ 2206,142000,7.1935
357
+ 2211,142400,13.2408
358
+ 2223,142800,4.9066
359
+ 2235,143200,4.5763
360
+ 2241,143600,10.3808
361
+ 2246,144000,10.8564
362
+ 2254,144400,7.7899
363
+ 2261,144800,8.8404
364
+ 2268,145200,8.8255
365
+ 2276,145600,7.4552
366
+ 2282,146000,11.216
367
+ 2289,146400,10.299
368
+ 2295,146800,9.2089
369
+ 2302,147200,8.5813
370
+ 2311,147600,5.5125
371
+ 2318,148000,7.7001
372
+ 2323,148400,11.1793
373
+ 2333,148800,3.7442
374
+ 2340,149200,10.413
375
+ 2345,149600,11.7132
376
+ 2350,150000,9.5357
377
+ 2357,150400,8.7914
378
+ 2363,150800,8.3319
379
+ 2370,151200,7.1484
380
+ 2376,151600,5.6768
381
+ 2385,152000,4.1424
382
+ 2390,152400,6.528
383
+ 2394,152800,7.7268
384
+ 2399,153200,8.4871
385
+ 2406,153600,6.8809
386
+ 2411,154000,8.3506
387
+ 2415,154400,11.0307
388
+ 2419,154800,10.397
389
+ 2427,155200,5.1522
390
+ 2434,155600,5.8036
391
+ 2439,156000,10.0986
392
+ 2445,156400,7.7494
393
+ 2452,156800,8.7844
394
+ 2458,157200,9.1953
395
+ 2464,157600,9.7125
396
+ 2469,158000,14.3923
397
+ 2475,158400,10.5696
398
+ 2479,158800,19.5742
399
+ 2488,159200,6.9904
400
+ 2494,159600,12.7756
401
+ 2499,160000,12.9402
402
+ 2506,160400,11.8509
403
+ 2510,160800,14.1441
404
+ 2522,161200,5.8916
405
+ 2530,161600,8.0032
406
+ 2536,162000,11.8227
407
+ 2540,162400,14.6518
408
+ 2548,162800,9.0002
409
+ 2555,163200,5.4754
410
+ 2560,163600,15.7747
411
+ 2568,164000,9.1886
412
+ 2572,164400,18.4005
413
+ 2580,164800,6.8589
414
+ 2586,165200,13.1998
415
+ 2591,165600,12.0732
416
+ 2599,166000,7.623
417
+ 2608,166400,6.2975
418
+ 2614,166800,10.325
419
+ 2622,167200,7.3189
420
+ 2628,167600,11.0537
421
+ 2636,168000,8.6657
422
+ 2642,168400,13.2533
423
+ 2650,168800,5.9973
424
+ 2660,169200,7.9659
425
+ 2665,169600,14.8961
426
+ 2673,170000,9.1592
427
+ 2680,170400,12.4329
428
+ 2687,170800,12.6719
429
+ 2694,171200,11.0606
430
+ 2702,171600,10.5086
431
+ 2709,172000,10.1179
432
+ 2716,172400,9.1879
433
+ 2723,172800,11.2291
434
+ 2728,173200,12.3325
435
+ 2734,173600,12.1718
436
+ 2741,174000,10.1937
437
+ 2748,174400,9.3497
438
+ 2753,174800,10.5969
439
+ 2761,175200,10.1798
440
+ 2767,175600,10.9461
441
+ 2772,176000,13.9387
442
+ 2777,176400,14.2562
443
+ 2783,176800,8.5326
444
+ 2789,177200,9.8383
445
+ 2794,177600,12.2766
446
+ 2800,178000,14.2094
447
+ 2804,178400,15.156
448
+ 2811,178800,6.4663
449
+ 2816,179200,15.0295
450
+ 2823,179600,10.1727
451
+ 2828,180000,11.4667
452
+ 2834,180400,11.0373
453
+ 2840,180800,11.0221
454
+ 2844,181200,18.4976
455
+ 2850,181600,9.7416
456
+ 2857,182000,8.77
457
+ 2863,182400,7.9617
458
+ 2867,182800,21.1088
459
+ 2872,183200,15.0862
460
+ 2880,183600,9.2334
461
+ 2885,184000,15.0575
462
+ 2891,184400,8.7998
463
+ 2898,184800,9.4749
464
+ 2903,185200,15.2583
465
+ 2908,185600,15.9073
466
+ 2913,186000,18.1103
467
+ 2920,186400,10.0962
468
+ 2925,186800,14.1606
469
+ 2931,187200,13.2483
470
+ 2936,187600,9.6115
471
+ 2942,188000,10.5395
472
+ 2948,188400,13.0603
473
+ 2952,188800,15.9296
474
+ 2958,189200,11.0247
475
+ 2964,189600,13.2235
476
+ 2973,190000,6.3575
477
+ 2978,190400,11.5839
478
+ 2983,190800,13.9557
479
+ 2989,191200,9.2667
480
+ 2995,191600,10.0113
481
+ 3000,192000,10.4833
482
+ 3006,192400,11.3706
483
+ 3012,192800,10.4763
484
+ 3018,193200,10.3391
485
+ 3022,193600,16.8413
486
+ 3030,194000,9.8189
487
+ 3042,194400,4.0255
488
+ 3049,194800,8.3469
489
+ 3058,195200,6.7786
490
+ 3066,195600,9.455
491
+ 3072,196000,9.361
492
+ 3078,196400,11.942
493
+ 3086,196800,8.1976
494
+ 3093,197200,8.8319
495
+ 3098,197600,10.2572
496
+ 3105,198000,10.4746
497
+ 3111,198400,9.7314
498
+ 3117,198800,13.439
499
+ 3121,199200,15.2623
500
+ 3127,199600,15.3384
501
+ 3137,200000,8.1123
code/Lake application/logs/results_2/PPO_frozen_lake_log_2.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 19,400,1.6097
3
+ 38,800,1.6416
4
+ 61,1200,1.4003
5
+ 73,1600,2.5901
6
+ 81,2000,3.7322
7
+ 90,2400,3.1764
8
+ 95,2800,6.9871
9
+ 102,3200,4.2802
10
+ 109,3600,4.7436
11
+ 116,4000,4.2818
12
+ 120,4400,7.5307
13
+ 124,4800,6.3049
14
+ 132,5200,4.3085
15
+ 137,5600,6.2239
16
+ 141,6000,7.0954
17
+ 145,6400,6.7039
18
+ 150,6800,6.2469
19
+ 155,7200,6.36
20
+ 160,7600,6.1549
21
+ 164,8000,6.9545
22
+ 169,8400,7.0148
23
+ 175,8800,4.7251
24
+ 179,9200,7.9361
25
+ 184,9600,6.5351
26
+ 189,10000,6.2774
27
+ 195,10400,5.7017
28
+ 199,10800,7.825
29
+ 203,11200,6.8724
30
+ 207,11600,7.7336
31
+ 211,12000,7.5364
32
+ 215,12400,7.8172
33
+ 219,12800,7.9367
34
+ 223,13200,7.7311
35
+ 228,13600,7.2277
36
+ 232,14000,7.0049
37
+ 238,14400,5.1928
38
+ 242,14800,7.6765
39
+ 246,15200,7.4842
40
+ 250,15600,7.5239
41
+ 254,16000,7.1122
42
+ 260,16400,5.5361
43
+ 265,16800,7.1787
44
+ 272,17200,4.4245
45
+ 276,17600,6.2406
46
+ 280,18000,7.7256
47
+ 286,18400,5.6842
48
+ 291,18800,6.646
49
+ 297,19200,5.5573
50
+ 302,19600,4.9828
51
+ 308,20000,5.9812
52
+ 313,20400,6.4346
53
+ 320,20800,4.4117
54
+ 324,21200,6.6082
55
+ 330,21600,6.2485
56
+ 334,22000,7.8137
57
+ 339,22400,5.5175
58
+ 343,22800,7.8811
59
+ 347,23200,7.1971
60
+ 353,23600,5.196
61
+ 358,24000,6.0231
62
+ 364,24400,6.6306
63
+ 369,24800,5.7511
64
+ 374,25200,6.1309
65
+ 378,25600,7.6626
66
+ 382,26000,6.8129
67
+ 387,26400,6.4101
68
+ 391,26800,8.1621
69
+ 395,27200,7.6024
70
+ 399,27600,7.6139
71
+ 404,28000,7.434
72
+ 409,28400,7.5108
73
+ 413,28800,10.0048
74
+ 417,29200,8.9736
75
+ 423,29600,5.5721
76
+ 428,30000,5.6607
77
+ 433,30400,7.3198
78
+ 437,30800,7.5887
79
+ 441,31200,7.1714
80
+ 446,31600,5.5443
81
+ 452,32000,6.0095
82
+ 456,32400,6.5919
83
+ 461,32800,6.9332
84
+ 465,33200,5.9742
85
+ 472,33600,4.6549
86
+ 476,34000,6.7065
87
+ 481,34400,6.6351
88
+ 485,34800,7.3781
89
+ 491,35200,5.3901
90
+ 495,35600,7.481
91
+ 499,36000,7.3904
92
+ 504,36400,5.5203
93
+ 508,36800,6.6256
94
+ 513,37200,6.2715
95
+ 519,37600,5.4467
96
+ 523,38000,7.5914
97
+ 528,38400,5.8271
98
+ 535,38800,4.7497
99
+ 539,39200,6.705
100
+ 543,39600,7.2193
101
+ 547,40000,7.6591
102
+ 553,40400,5.4713
103
+ 559,40800,5.6278
104
+ 563,41200,7.0377
105
+ 567,41600,6.5975
106
+ 572,42000,6.4589
107
+ 577,42400,6.4962
108
+ 581,42800,7.5891
109
+ 585,43200,7.5598
110
+ 593,43600,3.9839
111
+ 597,44000,6.7188
112
+ 601,44400,6.9495
113
+ 606,44800,7.4883
114
+ 613,45200,3.7567
115
+ 617,45600,7.7218
116
+ 622,46000,6.69
117
+ 626,46400,7.9262
118
+ 630,46800,6.3034
119
+ 635,47200,6.0545
120
+ 639,47600,7.4195
121
+ 644,48000,7.13
122
+ 648,48400,7.0398
123
+ 652,48800,7.6392
124
+ 660,49200,3.7038
125
+ 667,49600,4.3578
126
+ 672,50000,5.9194
127
+ 678,50400,4.8665
128
+ 683,50800,6.3008
129
+ 691,51200,4.5975
130
+ 697,51600,5.1888
131
+ 702,52000,5.7734
132
+ 707,52400,7.4679
133
+ 715,52800,3.3284
134
+ 721,53200,6.0641
135
+ 727,53600,6.0441
136
+ 731,54000,7.6702
137
+ 736,54400,7.4219
138
+ 740,54800,6.222
139
+ 746,55200,6.4839
140
+ 751,55600,5.6146
141
+ 755,56000,7.5972
142
+ 761,56400,5.612
143
+ 765,56800,7.8148
144
+ 770,57200,5.7253
145
+ 774,57600,7.9334
146
+ 778,58000,8.3276
147
+ 783,58400,6.8913
148
+ 787,58800,7.2828
149
+ 793,59200,6.9596
150
+ 797,59600,8.2329
151
+ 803,60000,6.4227
152
+ 808,60400,6.7123
153
+ 812,60800,7.9976
154
+ 819,61200,5.5859
155
+ 826,61600,4.4553
156
+ 832,62000,6.7451
157
+ 837,62400,6.241
158
+ 843,62800,6.4673
159
+ 848,63200,6.9543
160
+ 855,63600,5.0913
161
+ 861,64000,7.2159
162
+ 865,64400,7.9432
163
+ 870,64800,7.8044
164
+ 875,65200,7.2243
165
+ 880,65600,7.7488
166
+ 886,66000,6.3843
167
+ 890,66400,9.9507
168
+ 894,66800,9.063
169
+ 899,67200,7.5112
170
+ 903,67600,7.6428
171
+ 908,68000,8.2787
172
+ 912,68400,8.5889
173
+ 918,68800,5.723
174
+ 922,69200,8.2975
175
+ 926,69600,7.3824
176
+ 931,70000,8.2068
177
+ 937,70400,7.7218
178
+ 941,70800,8.7229
179
+ 945,71200,10.2961
180
+ 950,71600,7.6121
181
+ 955,72000,7.8974
182
+ 960,72400,7.8978
183
+ 967,72800,5.6801
184
+ 979,73200,2.1174
185
+ 990,73600,3.7935
186
+ 995,74000,10.1194
187
+ 1000,74400,6.8977
188
+ 1006,74800,7.6098
189
+ 1011,75200,7.2811
190
+ 1017,75600,7.2474
191
+ 1023,76000,8.5673
192
+ 1029,76400,6.6197
193
+ 1035,76800,8.3262
194
+ 1041,77200,7.9771
195
+ 1047,77600,7.1542
196
+ 1051,78000,11.2335
197
+ 1056,78400,8.8322
198
+ 1062,78800,7.5779
199
+ 1066,79200,13.4042
200
+ 1071,79600,10.4175
201
+ 1076,80000,13.3333
202
+ 1080,80400,12.734
203
+ 1086,80800,11.1026
204
+ 1090,81200,14.7759
205
+ 1096,81600,8.6412
206
+ 1104,82000,6.3892
207
+ 1110,82400,9.8357
208
+ 1118,82800,7.1741
209
+ 1125,83200,7.8912
210
+ 1131,83600,8.455
211
+ 1135,84000,14.1645
212
+ 1140,84400,9.4645
213
+ 1146,84800,10.1664
214
+ 1153,85200,5.8022
215
+ 1160,85600,6.215
216
+ 1168,86000,4.1062
217
+ 1175,86400,6.1067
218
+ 1180,86800,6.3874
219
+ 1187,87200,6.3452
220
+ 1192,87600,6.9666
221
+ 1198,88000,5.1382
222
+ 1203,88400,6.1001
223
+ 1212,88800,4.8099
224
+ 1218,89200,7.4769
225
+ 1226,89600,7.5724
226
+ 1231,90000,10.0412
227
+ 1238,90400,8.6996
228
+ 1242,90800,13.2195
229
+ 1247,91200,11.4526
230
+ 1252,91600,12.0908
231
+ 1260,92000,6.7688
232
+ 1265,92400,10.449
233
+ 1270,92800,10.2558
234
+ 1277,93200,5.5992
235
+ 1284,93600,9.3672
236
+ 1292,94000,7.979
237
+ 1302,94400,6.4251
238
+ 1308,94800,8.6478
239
+ 1314,95200,12.1168
240
+ 1322,95600,6.8864
241
+ 1333,96000,5.5147
242
+ 1339,96400,9.0327
243
+ 1347,96800,6.0256
244
+ 1356,97200,4.7894
245
+ 1365,97600,5.5282
246
+ 1371,98000,7.7578
247
+ 1380,98400,8.3785
248
+ 1389,98800,5.239
249
+ 1395,99200,8.3681
250
+ 1400,99600,9.7949
251
+ 1406,100000,9.5216
252
+ 1414,100400,5.3299
253
+ 1420,100800,7.8871
254
+ 1428,101200,6.0441
255
+ 1434,101600,6.6909
256
+ 1444,102000,5.0341
257
+ 1450,102400,7.7649
258
+ 1461,102800,4.1021
259
+ 1472,103200,2.9832
260
+ 1482,103600,4.4576
261
+ 1489,104000,7.3442
262
+ 1497,104400,5.6498
263
+ 1505,104800,6.4131
264
+ 1511,105200,10.5803
265
+ 1516,105600,11.3073
266
+ 1520,106000,13.2675
267
+ 1527,106400,10.6688
268
+ 1535,106800,5.8265
269
+ 1540,107200,10.7056
270
+ 1545,107600,7.6742
271
+ 1553,108000,5.7264
272
+ 1560,108400,6.6821
273
+ 1567,108800,5.4216
274
+ 1573,109200,8.4766
275
+ 1578,109600,8.6739
276
+ 1582,110000,11.4666
277
+ 1586,110400,9.0832
278
+ 1594,110800,6.2276
279
+ 1599,111200,8.6866
280
+ 1606,111600,6.2615
281
+ 1614,112000,5.6982
282
+ 1621,112400,5.8051
283
+ 1626,112800,7.93
284
+ 1631,113200,10.595
285
+ 1636,113600,7.6407
286
+ 1640,114000,11.1847
287
+ 1647,114400,5.6078
288
+ 1651,114800,9.1446
289
+ 1658,115200,6.8432
290
+ 1662,115600,12.9911
291
+ 1671,116000,4.613
292
+ 1676,116400,11.0305
293
+ 1681,116800,8.694
294
+ 1688,117200,6.11
295
+ 1698,117600,6.4954
296
+ 1703,118000,9.7062
297
+ 1712,118400,5.8668
298
+ 1717,118800,7.5547
299
+ 1724,119200,8.3224
300
+ 1729,119600,8.329
301
+ 1737,120000,7.1094
302
+ 1742,120400,9.2663
303
+ 1747,120800,9.6127
304
+ 1757,121200,5.6769
305
+ 1762,121600,8.5658
306
+ 1770,122000,7.4468
307
+ 1776,122400,8.0238
308
+ 1782,122800,8.0927
309
+ 1790,123200,5.6296
310
+ 1798,123600,6.9246
311
+ 1806,124000,6.5561
312
+ 1813,124400,7.4058
313
+ 1821,124800,6.9491
314
+ 1833,125200,4.4584
315
+ 1839,125600,8.2732
316
+ 1848,126000,6.3892
317
+ 1853,126400,10.4589
318
+ 1858,126800,12.1972
319
+ 1867,127200,5.6719
320
+ 1874,127600,7.9428
321
+ 1880,128000,7.5333
322
+ 1885,128400,12.1847
323
+ 1889,128800,11.591
324
+ 1895,129200,9.8666
325
+ 1902,129600,9.6165
326
+ 1910,130000,8.469
327
+ 1917,130400,5.6391
328
+ 1924,130800,8.5008
329
+ 1931,131200,9.7077
330
+ 1935,131600,13.5229
331
+ 1939,132000,16.4664
332
+ 1944,132400,13.0046
333
+ 1949,132800,8.4371
334
+ 1955,133200,8.8647
335
+ 1959,133600,14.3521
336
+ 1963,134000,11.7871
337
+ 1968,134400,10.4688
338
+ 1974,134800,9.3431
339
+ 1979,135200,8.226
340
+ 1984,135600,10.8513
341
+ 1988,136000,10.6682
342
+ 1998,136400,4.4273
343
+ 2003,136800,12.778
344
+ 2011,137200,8.0067
345
+ 2017,137600,9.4886
346
+ 2027,138000,5.6532
347
+ 2033,138400,7.9827
348
+ 2039,138800,5.9282
349
+ 2045,139200,9.6567
350
+ 2053,139600,7.7935
351
+ 2059,140000,5.9489
352
+ 2064,140400,6.9939
353
+ 2070,140800,7.3466
354
+ 2075,141200,6.8939
355
+ 2079,141600,9.1796
356
+ 2085,142000,9.0719
357
+ 2089,142400,13.9325
358
+ 2094,142800,11.0623
359
+ 2099,143200,6.7284
360
+ 2107,143600,5.6728
361
+ 2115,144000,6.321
362
+ 2119,144400,10.3033
363
+ 2124,144800,8.3466
364
+ 2130,145200,6.2657
365
+ 2136,145600,5.7428
366
+ 2142,146000,7.4074
367
+ 2147,146400,7.8209
368
+ 2153,146800,7.3415
369
+ 2161,147200,5.2917
370
+ 2166,147600,12.5109
371
+ 2174,148000,5.0395
372
+ 2179,148400,10.3555
373
+ 2187,148800,5.349
374
+ 2192,149200,10.0253
375
+ 2198,149600,7.0379
376
+ 2205,150000,7.5125
377
+ 2211,150400,7.8038
378
+ 2217,150800,7.8167
379
+ 2221,151200,11.7039
380
+ 2229,151600,5.489
381
+ 2236,152000,6.0868
382
+ 2243,152400,6.2941
383
+ 2249,152800,6.6384
384
+ 2254,153200,8.3705
385
+ 2259,153600,8.5642
386
+ 2265,154000,6.8584
387
+ 2272,154400,7.3834
388
+ 2278,154800,8.2766
389
+ 2286,155200,5.1656
390
+ 2290,155600,12.9118
391
+ 2294,156000,11.8071
392
+ 2299,156400,8.9169
393
+ 2303,156800,8.9791
394
+ 2308,157200,9.3741
395
+ 2312,157600,11.1361
396
+ 2316,158000,11.7926
397
+ 2320,158400,11.7151
398
+ 2326,158800,8.0207
399
+ 2333,159200,6.0691
400
+ 2338,159600,9.577
401
+ 2344,160000,6.4232
402
+ 2350,160400,8.8049
403
+ 2354,160800,9.1235
404
+ 2359,161200,8.3861
405
+ 2365,161600,7.0742
406
+ 2369,162000,7.6221
407
+ 2373,162400,7.9897
408
+ 2378,162800,5.8857
409
+ 2384,163200,7.0723
410
+ 2389,163600,6.654
411
+ 2395,164000,7.1041
412
+ 2399,164400,8.4728
413
+ 2407,164800,4.6772
414
+ 2411,165200,7.5167
415
+ 2417,165600,7.8597
416
+ 2422,166000,9.3692
417
+ 2427,166400,8.0704
418
+ 2431,166800,10.7773
419
+ 2435,167200,9.905
420
+ 2440,167600,8.5513
421
+ 2444,168000,8.9629
422
+ 2450,168400,7.6352
423
+ 2457,168800,6.9678
424
+ 2463,169200,7.899
425
+ 2469,169600,7.6206
426
+ 2474,170000,8.0358
427
+ 2481,170400,7.7953
428
+ 2486,170800,10.2047
429
+ 2492,171200,12.0048
430
+ 2500,171600,5.3765
431
+ 2509,172000,5.1863
432
+ 2513,172400,12.2984
433
+ 2517,172800,11.7797
434
+ 2524,173200,7.875
435
+ 2534,173600,3.9569
436
+ 2539,174000,11.8781
437
+ 2544,174400,10.3608
438
+ 2550,174800,8.7275
439
+ 2556,175200,8.4987
440
+ 2561,175600,10.7181
441
+ 2566,176000,8.2619
442
+ 2571,176400,8.8454
443
+ 2575,176800,8.7559
444
+ 2580,177200,8.4039
445
+ 2584,177600,11.9876
446
+ 2592,178000,6.4634
447
+ 2597,178400,7.8467
448
+ 2602,178800,6.8881
449
+ 2607,179200,7.1522
450
+ 2612,179600,7.6053
451
+ 2620,180000,5.2739
452
+ 2628,180400,4.7562
453
+ 2636,180800,3.7301
454
+ 2641,181200,10.6831
455
+ 2645,181600,8.9943
456
+ 2652,182000,5.277
457
+ 2661,182400,4.275
458
+ 2671,182800,3.453
459
+ 2676,183200,8.5581
460
+ 2681,183600,7.2929
461
+ 2686,184000,12.0382
462
+ 2690,184400,12.133
463
+ 2694,184800,12.2167
464
+ 2701,185200,5.3062
465
+ 2706,185600,10.3605
466
+ 2714,186000,5.9351
467
+ 2718,186400,9.3307
468
+ 2723,186800,9.5625
469
+ 2730,187200,7.1449
470
+ 2735,187600,7.7182
471
+ 2741,188000,6.4756
472
+ 2745,188400,8.736
473
+ 2751,188800,8.123
474
+ 2757,189200,8.8964
475
+ 2761,189600,10.7888
476
+ 2765,190000,12.0779
477
+ 2769,190400,10.5991
478
+ 2775,190800,8.0868
479
+ 2780,191200,8.917
480
+ 2785,191600,9.9389
481
+ 2789,192000,11.5622
482
+ 2795,192400,8.386
483
+ 2800,192800,8.3721
484
+ 2805,193200,8.8166
485
+ 2811,193600,8.1763
486
+ 2817,194000,8.3255
487
+ 2823,194400,8.8563
488
+ 2827,194800,12.6773
489
+ 2833,195200,7.9132
490
+ 2837,195600,10.4681
491
+ 2844,196000,8.2131
492
+ 2849,196400,9.2795
493
+ 2855,196800,7.094
494
+ 2860,197200,10.5194
495
+ 2864,197600,12.9269
496
+ 2870,198000,7.9327
497
+ 2876,198400,7.223
498
+ 2881,198800,10.8269
499
+ 2885,199200,12.3889
500
+ 2892,199600,6.5769
501
+ 2897,200000,11.076
code/Lake application/logs/results_2/PPO_frozen_lake_log_3.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 15,400,1.8261
3
+ 32,800,1.7685
4
+ 45,1200,2.4142
5
+ 51,1600,4.6704
6
+ 61,2000,3.1899
7
+ 67,2400,5.3735
8
+ 75,2800,4.1154
9
+ 82,3200,3.7725
10
+ 92,3600,3.7704
11
+ 103,4000,2.6905
12
+ 110,4400,4.0276
13
+ 117,4800,5.8064
14
+ 124,5200,4.4713
15
+ 132,5600,4.0093
16
+ 137,6000,5.9495
17
+ 142,6400,5.8014
18
+ 148,6800,5.8551
19
+ 155,7200,4.6169
20
+ 162,7600,4.2494
21
+ 170,8000,3.9722
22
+ 177,8400,3.9192
23
+ 186,8800,3.8894
24
+ 192,9200,5.2858
25
+ 196,9600,6.1068
26
+ 201,10000,7.4978
27
+ 206,10400,6.5813
28
+ 210,10800,7.0333
29
+ 216,11200,5.7853
30
+ 220,11600,6.9548
31
+ 226,12000,6.9295
32
+ 231,12400,8.6606
33
+ 235,12800,8.1602
34
+ 241,13200,5.5479
35
+ 248,13600,5.2991
36
+ 253,14000,5.5222
37
+ 259,14400,6.6097
38
+ 266,14800,5.7681
39
+ 272,15200,5.1424
40
+ 281,15600,3.9703
41
+ 289,16000,5.446
42
+ 296,16400,5.6469
43
+ 301,16800,8.1023
44
+ 309,17200,5.4118
45
+ 313,17600,10.0884
46
+ 318,18000,7.85
47
+ 324,18400,7.1867
48
+ 334,18800,4.1973
49
+ 341,19200,6.5775
50
+ 346,19600,8.8721
51
+ 353,20000,8.3356
52
+ 358,20400,8.4092
53
+ 364,20800,7.0598
54
+ 369,21200,9.7722
55
+ 375,21600,8.7407
56
+ 381,22000,7.1701
57
+ 387,22400,6.8198
58
+ 391,22800,11.2731
59
+ 399,23200,6.9213
60
+ 404,23600,7.6675
61
+ 408,24000,8.5927
62
+ 412,24400,9.5454
63
+ 416,24800,11.0606
64
+ 421,25200,11.0094
65
+ 426,25600,9.2321
66
+ 431,26000,7.2778
67
+ 436,26400,10.7602
68
+ 440,26800,10.2041
69
+ 444,27200,10.7586
70
+ 448,27600,8.5868
71
+ 452,28000,9.4227
72
+ 458,28400,8.901
73
+ 462,28800,9.059
74
+ 467,29200,9.2612
75
+ 472,29600,10.802
76
+ 478,30000,7.4041
77
+ 483,30400,8.8905
78
+ 489,30800,6.3011
79
+ 493,31200,12.0364
80
+ 497,31600,11.5981
81
+ 501,32000,11.5024
82
+ 505,32400,11.9104
83
+ 509,32800,11.9212
84
+ 513,33200,12.0185
85
+ 518,33600,9.4035
86
+ 522,34000,11.5176
87
+ 526,34400,11.0239
88
+ 534,34800,6.1402
89
+ 538,35200,9.7348
90
+ 546,35600,6.35
91
+ 551,36000,7.8883
92
+ 558,36400,7.7455
93
+ 563,36800,7.1019
94
+ 571,37200,6.6505
95
+ 576,37600,7.5348
96
+ 581,38000,12.1861
97
+ 586,38400,9.1601
98
+ 591,38800,8.1292
99
+ 596,39200,7.3226
100
+ 602,39600,9.3
101
+ 608,40000,10.1455
102
+ 614,40400,7.404
103
+ 620,40800,9.5543
104
+ 627,41200,8.0328
105
+ 636,41600,5.051
106
+ 648,42000,4.3144
107
+ 654,42400,8.7103
108
+ 661,42800,8.5619
109
+ 666,43200,9.0912
110
+ 671,43600,12.1562
111
+ 679,44000,6.8929
112
+ 683,44400,12.4673
113
+ 690,44800,7.4547
114
+ 700,45200,6.1627
115
+ 708,45600,5.2344
116
+ 712,46000,14.522
117
+ 718,46400,9.7264
118
+ 724,46800,9.4083
119
+ 731,47200,7.3673
120
+ 735,47600,10.918
121
+ 741,48000,9.7135
122
+ 746,48400,11.6226
123
+ 753,48800,6.5335
124
+ 760,49200,6.1922
125
+ 765,49600,11.59
126
+ 772,50000,7.6406
127
+ 779,50400,7.3931
128
+ 785,50800,8.8649
129
+ 790,51200,13.0236
130
+ 796,51600,9.1355
131
+ 802,52000,9.2798
132
+ 812,52400,4.6073
133
+ 818,52800,8.5625
134
+ 823,53200,8.0732
135
+ 829,53600,8.2494
136
+ 837,54000,5.0721
137
+ 849,54400,3.926
138
+ 857,54800,5.9843
139
+ 866,55200,5.4496
140
+ 872,55600,9.6436
141
+ 877,56000,9.8259
142
+ 882,56400,12.0831
143
+ 886,56800,11.8707
144
+ 892,57200,9.3723
145
+ 897,57600,8.75
146
+ 902,58000,9.1673
147
+ 908,58400,8.2213
148
+ 919,58800,3.6353
149
+ 929,59200,3.9628
150
+ 935,59600,6.8984
151
+ 942,60000,6.928
152
+ 948,60400,8.007
153
+ 954,60800,7.1696
154
+ 962,61200,6.8068
155
+ 970,61600,5.7813
156
+ 979,62000,6.7075
157
+ 990,62400,4.5979
158
+ 995,62800,11.0131
159
+ 1001,63200,9.9881
160
+ 1007,63600,8.771
161
+ 1013,64000,6.4708
162
+ 1020,64400,8.4602
163
+ 1024,64800,12.5658
164
+ 1029,65200,12.6734
165
+ 1033,65600,13.9195
166
+ 1037,66000,10.7454
167
+ 1043,66400,10.9443
168
+ 1048,66800,10.9429
169
+ 1053,67200,9.4126
170
+ 1059,67600,8.414
171
+ 1066,68000,6.8977
172
+ 1071,68400,8.4342
173
+ 1077,68800,6.9781
174
+ 1081,69200,9.3134
175
+ 1087,69600,8.1705
176
+ 1091,70000,8.8618
177
+ 1096,70400,10.7669
178
+ 1100,70800,10.662
179
+ 1104,71200,9.404
180
+ 1108,71600,10.9212
181
+ 1114,72000,7.7906
182
+ 1120,72400,6.9977
183
+ 1124,72800,10.0251
184
+ 1130,73200,8.4128
185
+ 1134,73600,9.9691
186
+ 1138,74000,11.4181
187
+ 1143,74400,8.6228
188
+ 1152,74800,5.3614
189
+ 1157,75200,8.1655
190
+ 1164,75600,7.1774
191
+ 1171,76000,5.9159
192
+ 1180,76400,4.0023
193
+ 1189,76800,4.7476
194
+ 1197,77200,5.5766
195
+ 1202,77600,8.0878
196
+ 1209,78000,6.6897
197
+ 1213,78400,13.7633
198
+ 1221,78800,6.482
199
+ 1230,79200,6.0141
200
+ 1234,79600,12.2026
201
+ 1242,80000,5.6537
202
+ 1251,80400,4.3695
203
+ 1259,80800,7.4921
204
+ 1264,81200,9.8077
205
+ 1269,81600,10.9606
206
+ 1275,82000,9.6273
207
+ 1280,82400,12.195
208
+ 1287,82800,7.4125
209
+ 1292,83200,12.1273
210
+ 1296,83600,13.0822
211
+ 1303,84000,7.0237
212
+ 1308,84400,11.6651
213
+ 1313,84800,9.4606
214
+ 1318,85200,12.5532
215
+ 1324,85600,9.9701
216
+ 1329,86000,11.7337
217
+ 1333,86400,14.7119
218
+ 1339,86800,9.0294
219
+ 1345,87200,7.5184
220
+ 1353,87600,7.3597
221
+ 1359,88000,10.1495
222
+ 1365,88400,8.7107
223
+ 1375,88800,4.0411
224
+ 1383,89200,5.6476
225
+ 1389,89600,6.5996
226
+ 1397,90000,8.0321
227
+ 1404,90400,8.9973
228
+ 1410,90800,8.9065
229
+ 1417,91200,5.712
230
+ 1427,91600,4.6027
231
+ 1436,92000,5.296
232
+ 1441,92400,7.6209
233
+ 1449,92800,7.4688
234
+ 1456,93200,5.0893
235
+ 1464,93600,6.9208
236
+ 1472,94000,7.4646
237
+ 1479,94400,8.2095
238
+ 1484,94800,9.7461
239
+ 1490,95200,8.781
240
+ 1494,95600,8.3202
241
+ 1499,96000,9.0412
242
+ 1506,96400,6.9789
243
+ 1511,96800,7.9199
244
+ 1517,97200,10.6694
245
+ 1524,97600,6.481
246
+ 1532,98000,6.7254
247
+ 1541,98400,6.2726
248
+ 1547,98800,7.5851
249
+ 1555,99200,5.3696
250
+ 1560,99600,10.4877
251
+ 1567,100000,8.8012
252
+ 1577,100400,5.5168
253
+ 1584,100800,10.155
254
+ 1588,101200,8.5114
255
+ 1595,101600,6.3359
256
+ 1602,102000,6.6452
257
+ 1608,102400,6.8052
258
+ 1612,102800,8.0109
259
+ 1617,103200,5.8693
260
+ 1621,103600,8.5857
261
+ 1626,104000,9.6799
262
+ 1634,104400,5.6426
263
+ 1639,104800,6.5151
264
+ 1644,105200,7.8849
265
+ 1648,105600,8.9338
266
+ 1654,106000,5.6678
267
+ 1659,106400,6.3244
268
+ 1665,106800,5.6218
269
+ 1669,107200,9.5578
270
+ 1676,107600,5.3338
271
+ 1686,108000,4.6969
272
+ 1693,108400,5.4701
273
+ 1698,108800,10.4455
274
+ 1704,109200,8.6891
275
+ 1710,109600,10.0138
276
+ 1717,110000,7.5448
277
+ 1726,110400,7.8171
278
+ 1731,110800,10.4446
279
+ 1739,111200,8.1385
280
+ 1748,111600,6.0488
281
+ 1756,112000,7.1346
282
+ 1761,112400,9.6157
283
+ 1767,112800,9.1395
284
+ 1774,113200,8.8882
285
+ 1782,113600,8.0369
286
+ 1789,114000,9.7018
287
+ 1795,114400,8.5519
288
+ 1802,114800,12.4735
289
+ 1810,115200,6.0752
290
+ 1815,115600,11.7469
291
+ 1824,116000,5.4505
292
+ 1829,116400,9.3351
293
+ 1835,116800,10.7987
294
+ 1840,117200,15.1342
295
+ 1846,117600,14.5398
296
+ 1853,118000,10.6334
297
+ 1861,118400,11.3101
298
+ 1866,118800,14.8907
299
+ 1873,119200,9.6076
300
+ 1882,119600,7.7126
301
+ 1893,120000,4.7907
302
+ 1901,120400,6.0066
303
+ 1906,120800,15.4955
304
+ 1911,121200,13.3978
305
+ 1919,121600,9.6642
306
+ 1924,122000,15.7393
307
+ 1928,122400,18.9361
308
+ 1932,122800,19.4331
309
+ 1940,123200,8.6073
310
+ 1947,123600,10.161
311
+ 1954,124000,8.8061
312
+ 1959,124400,13.5384
313
+ 1966,124800,9.9289
314
+ 1981,125200,4.1506
315
+ 1991,125600,8.5539
316
+ 1995,126000,20.6911
317
+ 2002,126400,11.9305
318
+ 2007,126800,16.2045
319
+ 2016,127200,6.5353
320
+ 2021,127600,17.3839
321
+ 2027,128000,13.1571
322
+ 2035,128400,11.3393
323
+ 2039,128800,22.3028
324
+ 2047,129200,12.8433
325
+ 2055,129600,12.7583
326
+ 2066,130000,6.9837
327
+ 2071,130400,18.0303
328
+ 2076,130800,18.2896
329
+ 2084,131200,8.7988
330
+ 2093,131600,8.7713
331
+ 2098,132000,11.0877
332
+ 2109,132400,7.8183
333
+ 2115,132800,12.4679
334
+ 2124,133200,10.5047
335
+ 2137,133600,6.17
336
+ 2142,134000,20.3565
337
+ 2148,134400,10.4538
338
+ 2154,134800,12.7331
339
+ 2161,135200,12.5367
340
+ 2171,135600,5.7754
341
+ 2178,136000,13.3435
342
+ 2182,136400,13.3376
343
+ 2195,136800,5.0278
344
+ 2203,137200,8.039
345
+ 2215,137600,5.0622
346
+ 2225,138000,7.6281
347
+ 2232,138400,12.4199
348
+ 2243,138800,5.7324
349
+ 2249,139200,14.5818
350
+ 2255,139600,14.0929
351
+ 2262,140000,13.6329
352
+ 2267,140400,18.3515
353
+ 2272,140800,18.0695
354
+ 2280,141200,12.0349
355
+ 2287,141600,13.6652
356
+ 2296,142000,9.2929
357
+ 2305,142400,10.1985
358
+ 2312,142800,12.7522
359
+ 2323,143200,7.2459
360
+ 2331,143600,8.9751
361
+ 2338,144000,11.4881
362
+ 2344,144400,15.2227
363
+ 2351,144800,12.8927
364
+ 2358,145200,10.6543
365
+ 2362,145600,22.496
366
+ 2368,146000,13.9616
367
+ 2373,146400,18.1932
368
+ 2378,146800,16.1787
369
+ 2382,147200,21.2142
370
+ 2386,147600,22.1002
371
+ 2396,148000,8.9528
372
+ 2401,148400,15.8869
373
+ 2408,148800,13.7149
374
+ 2413,149200,13.7033
375
+ 2419,149600,17.2193
376
+ 2425,150000,11.3894
377
+ 2432,150400,13.8544
378
+ 2437,150800,17.5939
379
+ 2444,151200,12.8075
380
+ 2449,151600,12.1515
381
+ 2457,152000,10.4033
382
+ 2465,152400,11.4859
383
+ 2470,152800,14.4762
384
+ 2477,153200,12.3627
385
+ 2483,153600,14.8347
386
+ 2488,154000,18.2382
387
+ 2497,154400,9.2311
388
+ 2501,154800,19.7235
389
+ 2509,155200,13.3697
390
+ 2515,155600,11.9598
391
+ 2525,156000,7.2526
392
+ 2534,156400,9.3025
393
+ 2545,156800,8.9835
394
+ 2551,157200,12.1765
395
+ 2558,157600,14.0303
396
+ 2564,158000,13.4739
397
+ 2573,158400,9.8322
398
+ 2578,158800,19.6338
399
+ 2584,159200,15.7125
400
+ 2588,159600,17.0086
401
+ 2594,160000,14.7127
402
+ 2598,160400,23.1588
403
+ 2607,160800,11.0373
404
+ 2615,161200,9.348
405
+ 2619,161600,21.6514
406
+ 2624,162000,12.9316
407
+ 2631,162400,12.1088
408
+ 2636,162800,20.0918
409
+ 2640,163200,18.6887
410
+ 2644,163600,19.3577
411
+ 2653,164000,8.5057
412
+ 2662,164400,8.0083
413
+ 2668,164800,15.0007
414
+ 2676,165200,8.8861
415
+ 2682,165600,15.3621
416
+ 2689,166000,13.6995
417
+ 2696,166400,10.5381
418
+ 2701,166800,19.4263
419
+ 2708,167200,12.0695
420
+ 2713,167600,11.9025
421
+ 2719,168000,10.0897
422
+ 2725,168400,15.0383
423
+ 2731,168800,14.8992
424
+ 2735,169200,14.9242
425
+ 2739,169600,20.4302
426
+ 2745,170000,15.2987
427
+ 2750,170400,16.7812
428
+ 2754,170800,16.0345
429
+ 2759,171200,16.4285
430
+ 2768,171600,9.1208
431
+ 2780,172000,5.583
432
+ 2787,172400,8.2014
433
+ 2793,172800,10.1961
434
+ 2798,173200,17.1725
435
+ 2806,173600,9.115
436
+ 2814,174000,9.2754
437
+ 2821,174400,10.8946
438
+ 2827,174800,11.5879
439
+ 2834,175200,10.3869
440
+ 2840,175600,13.8918
441
+ 2845,176000,12.5769
442
+ 2854,176400,10.322
443
+ 2863,176800,6.8967
444
+ 2869,177200,17.4846
445
+ 2874,177600,19.6151
446
+ 2881,178000,12.9361
447
+ 2886,178400,18.2368
448
+ 2892,178800,12.8876
449
+ 2898,179200,12.3181
450
+ 2903,179600,17.6907
451
+ 2908,180000,15.7174
452
+ 2915,180400,11.7662
453
+ 2920,180800,17.438
454
+ 2925,181200,14.2649
455
+ 2931,181600,12.5882
456
+ 2936,182000,17.2888
457
+ 2942,182400,15.7864
458
+ 2946,182800,19.7236
459
+ 2952,183200,14.7757
460
+ 2957,183600,13.2554
461
+ 2962,184000,16.9161
462
+ 2966,184400,19.4477
463
+ 2971,184800,14.806
464
+ 2976,185200,14.7174
465
+ 2981,185600,14.6584
466
+ 2985,186000,13.1555
467
+ 2993,186400,8.2998
468
+ 2999,186800,10.4079
469
+ 3004,187200,14.5865
470
+ 3011,187600,9.0036
471
+ 3015,188000,13.7298
472
+ 3022,188400,8.8899
473
+ 3026,188800,15.7034
474
+ 3032,189200,11.7676
475
+ 3036,189600,17.0897
476
+ 3044,190000,10.1182
477
+ 3049,190400,13.9028
478
+ 3054,190800,16.7113
479
+ 3059,191200,16.4022
480
+ 3064,191600,18.3592
481
+ 3069,192000,17.6439
482
+ 3074,192400,15.5535
483
+ 3079,192800,15.6137
484
+ 3085,193200,14.7975
485
+ 3091,193600,11.9988
486
+ 3099,194000,10.8644
487
+ 3106,194400,14.6474
488
+ 3113,194800,12.833
489
+ 3117,195200,22.6677
490
+ 3121,195600,21.1913
491
+ 3126,196000,17.2308
492
+ 3131,196400,14.7737
493
+ 3140,196800,6.5658
494
+ 3147,197200,8.1251
495
+ 3155,197600,8.3248
496
+ 3162,198000,10.0096
497
+ 3168,198400,9.34
498
+ 3175,198800,9.7054
499
+ 3181,199200,9.9748
500
+ 3186,199600,11.6185
501
+ 3191,200000,10.6864
code/Lake application/logs/results_2/PPO_frozen_lake_log_4.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 30,400,0.8897
3
+ 47,800,1.6985
4
+ 70,1200,1.5344
5
+ 81,1600,2.8837
6
+ 93,2000,2.7853
7
+ 99,2400,4.9228
8
+ 105,2800,4.8879
9
+ 109,3200,6.7205
10
+ 116,3600,5.1037
11
+ 121,4000,6.8188
12
+ 126,4400,5.7358
13
+ 130,4800,7.5711
14
+ 136,5200,5.2419
15
+ 141,5600,5.8543
16
+ 147,6000,5.2331
17
+ 152,6400,4.9293
18
+ 161,6800,4.0985
19
+ 166,7200,5.2082
20
+ 172,7600,5.3103
21
+ 177,8000,6.0557
22
+ 181,8400,6.9039
23
+ 188,8800,4.5342
24
+ 192,9200,9.1478
25
+ 199,9600,5.3605
26
+ 206,10000,5.1273
27
+ 211,10400,7.8887
28
+ 216,10800,6.2693
29
+ 220,11200,7.3964
30
+ 224,11600,7.1054
31
+ 230,12000,5.4118
32
+ 234,12400,7.4287
33
+ 238,12800,7.4478
34
+ 245,13200,4.4117
35
+ 249,13600,7.2105
36
+ 253,14000,6.7015
37
+ 257,14400,7.4679
38
+ 262,14800,6.5292
39
+ 267,15200,6.025
40
+ 271,15600,7.6356
41
+ 275,16000,7.6501
42
+ 280,16400,7.0316
43
+ 284,16800,8.6843
44
+ 289,17200,6.7495
45
+ 294,17600,6.023
46
+ 299,18000,6.415
47
+ 305,18400,5.4163
48
+ 311,18800,5.4882
49
+ 316,19200,6.2097
50
+ 320,19600,8.0329
51
+ 324,20000,7.5882
52
+ 329,20400,7.3095
53
+ 333,20800,7.5687
54
+ 339,21200,6.6201
55
+ 343,21600,9.5695
56
+ 349,22000,6.358
57
+ 354,22400,7.729
58
+ 362,22800,3.8919
59
+ 367,23200,6.2263
60
+ 372,23600,8.9296
61
+ 377,24000,8.9107
62
+ 383,24400,6.8109
63
+ 391,24800,4.3733
64
+ 397,25200,7.8942
65
+ 402,25600,8.1473
66
+ 408,26000,6.342
67
+ 414,26400,6.6643
68
+ 419,26800,8.5145
69
+ 423,27200,9.8487
70
+ 427,27600,8.3884
71
+ 432,28000,8.1417
72
+ 438,28400,6.5363
73
+ 442,28800,8.3084
74
+ 447,29200,7.5203
75
+ 451,29600,8.0109
76
+ 455,30000,7.1375
77
+ 459,30400,6.972
78
+ 464,30800,7.2792
79
+ 468,31200,8.3772
80
+ 472,31600,8.6912
81
+ 476,32000,7.6424
82
+ 480,32400,8.1047
83
+ 484,32800,6.6454
84
+ 489,33200,7.4736
85
+ 493,33600,8.1904
86
+ 497,34000,7.1456
87
+ 503,34400,6.2541
88
+ 507,34800,7.7885
89
+ 511,35200,7.3507
90
+ 515,35600,8.0471
91
+ 520,36000,8.5436
92
+ 524,36400,6.8725
93
+ 529,36800,8.4028
94
+ 535,37200,5.2433
95
+ 542,37600,4.7139
96
+ 546,38000,7.3213
97
+ 555,38400,3.8831
98
+ 561,38800,5.6601
99
+ 568,39200,4.7948
100
+ 576,39600,4.6981
101
+ 584,40000,4.3181
102
+ 589,40400,7.5472
103
+ 593,40800,9.3392
104
+ 602,41200,3.9924
105
+ 609,41600,7.1339
106
+ 615,42000,6.7132
107
+ 620,42400,6.7015
108
+ 628,42800,5.4925
109
+ 636,43200,3.7468
110
+ 644,43600,4.3569
111
+ 651,44000,5.8671
112
+ 655,44400,8.3115
113
+ 660,44800,9.1009
114
+ 665,45200,7.2625
115
+ 672,45600,5.378
116
+ 678,46000,5.686
117
+ 684,46400,5.8378
118
+ 688,46800,7.674
119
+ 693,47200,7.7574
120
+ 697,47600,9.4904
121
+ 706,48000,4.0155
122
+ 712,48400,9.193
123
+ 718,48800,6.0672
124
+ 723,49200,8.703
125
+ 729,49600,6.4219
126
+ 737,50000,5.2146
127
+ 742,50400,7.6968
128
+ 747,50800,9.559
129
+ 753,51200,6.7111
130
+ 758,51600,9.7201
131
+ 764,52000,7.5954
132
+ 770,52400,8.0675
133
+ 775,52800,7.1163
134
+ 782,53200,5.3886
135
+ 786,53600,10.9581
136
+ 791,54000,9.5825
137
+ 800,54400,4.9313
138
+ 808,54800,3.2748
139
+ 813,55200,9.4975
140
+ 819,55600,8.5919
141
+ 828,56000,4.0659
142
+ 834,56400,6.4677
143
+ 839,56800,8.6157
144
+ 847,57200,7.6231
145
+ 854,57600,6.1867
146
+ 864,58000,5.138
147
+ 875,58400,4.1107
148
+ 884,58800,4.6541
149
+ 890,59200,8.6775
150
+ 898,59600,4.5193
151
+ 903,60000,10.8015
152
+ 909,60400,7.2792
153
+ 916,60800,6.9898
154
+ 920,61200,9.2429
155
+ 926,61600,7.8279
156
+ 930,62000,9.559
157
+ 938,62400,6.2201
158
+ 942,62800,12.4695
159
+ 949,63200,6.0011
160
+ 955,63600,7.5678
161
+ 960,64000,8.5841
162
+ 965,64400,8.8059
163
+ 969,64800,9.559
164
+ 974,65200,8.137
165
+ 979,65600,6.2258
166
+ 985,66000,6.0418
167
+ 990,66400,7.6972
168
+ 994,66800,10.6031
169
+ 999,67200,6.4527
170
+ 1004,67600,7.6003
171
+ 1009,68000,8.036
172
+ 1014,68400,11.559
173
+ 1018,68800,9.9028
174
+ 1024,69200,8.8209
175
+ 1030,69600,6.8682
176
+ 1034,70000,9.1513
177
+ 1039,70400,8.8808
178
+ 1045,70800,6.2892
179
+ 1052,71200,6.6137
180
+ 1056,71600,9.1258
181
+ 1061,72000,8.2712
182
+ 1069,72400,5.1346
183
+ 1073,72800,9.3301
184
+ 1079,73200,8.6006
185
+ 1083,73600,9.7199
186
+ 1088,74000,8.5393
187
+ 1093,74400,9.4136
188
+ 1098,74800,9.2309
189
+ 1104,75200,9.9483
190
+ 1111,75600,6.868
191
+ 1117,76000,7.3642
192
+ 1123,76400,8.7512
193
+ 1128,76800,7.7363
194
+ 1133,77200,11.2048
195
+ 1138,77600,8.7672
196
+ 1142,78000,11.2022
197
+ 1148,78400,8.6267
198
+ 1156,78800,5.7085
199
+ 1165,79200,5.2502
200
+ 1171,79600,9.1847
201
+ 1181,80000,3.8257
202
+ 1187,80400,9.0944
203
+ 1193,80800,7.8396
204
+ 1204,81200,4.6099
205
+ 1214,81600,4.2423
206
+ 1223,82000,4.2463
207
+ 1231,82400,6.0866
208
+ 1236,82800,9.3631
209
+ 1244,83200,6.0139
210
+ 1250,83600,9.3664
211
+ 1258,84000,5.5445
212
+ 1263,84400,9.2419
213
+ 1270,84800,6.5126
214
+ 1277,85200,5.3284
215
+ 1284,85600,4.7442
216
+ 1289,86000,6.6238
217
+ 1295,86400,5.9787
218
+ 1300,86800,6.273
219
+ 1304,87200,7.7552
220
+ 1310,87600,6.0625
221
+ 1315,88000,6.2442
222
+ 1320,88400,7.2193
223
+ 1326,88800,5.8607
224
+ 1333,89200,5.8177
225
+ 1338,89600,5.6992
226
+ 1344,90000,6.309
227
+ 1350,90400,7.4904
228
+ 1357,90800,5.3341
229
+ 1362,91200,9.1276
230
+ 1370,91600,5.6335
231
+ 1377,92000,5.5404
232
+ 1382,92400,10.4014
233
+ 1387,92800,8.972
234
+ 1393,93200,7.6199
235
+ 1400,93600,7.0028
236
+ 1408,94000,6.7953
237
+ 1417,94400,4.607
238
+ 1425,94800,6.7686
239
+ 1431,95200,6.3672
240
+ 1437,95600,7.3133
241
+ 1442,96000,5.5286
242
+ 1449,96400,8.1326
243
+ 1454,96800,6.6459
244
+ 1459,97200,9.6138
245
+ 1465,97600,8.3167
246
+ 1470,98000,11.7781
247
+ 1474,98400,17.2643
248
+ 1481,98800,9.2478
249
+ 1490,99200,8.6222
250
+ 1497,99600,10.0905
251
+ 1504,100000,7.0319
252
+ 1510,100400,11.7434
253
+ 1515,100800,12.7016
254
+ 1519,101200,14.5775
255
+ 1525,101600,7.4347
256
+ 1531,102000,17.0148
257
+ 1535,102400,21.5497
258
+ 1541,102800,13.3595
259
+ 1546,103200,20.2214
260
+ 1553,103600,12.2627
261
+ 1562,104000,10.1528
262
+ 1572,104400,7.0324
263
+ 1577,104800,13.988
264
+ 1587,105200,9.7205
265
+ 1591,105600,22.2021
266
+ 1599,106000,12.8697
267
+ 1611,106400,6.7423
268
+ 1618,106800,9.1311
269
+ 1624,107200,13.5325
270
+ 1630,107600,15.3574
271
+ 1637,108000,11.4605
272
+ 1647,108400,8.1593
273
+ 1657,108800,7.3692
274
+ 1664,109200,12.4204
275
+ 1671,109600,16.0635
276
+ 1677,110000,11.0747
277
+ 1687,110400,10.0776
278
+ 1695,110800,9.713
279
+ 1704,111200,6.6402
280
+ 1709,111600,16.4947
281
+ 1714,112000,12.0573
282
+ 1720,112400,12.4928
283
+ 1726,112800,16.9818
284
+ 1731,113200,16.4082
285
+ 1735,113600,19.4684
286
+ 1741,114000,17.2942
287
+ 1746,114400,17.3803
288
+ 1752,114800,14.3429
289
+ 1759,115200,15.4686
290
+ 1764,115600,18.3797
291
+ 1771,116000,10.6607
292
+ 1778,116400,12.9278
293
+ 1783,116800,21.2477
294
+ 1789,117200,12.1737
295
+ 1795,117600,15.1248
296
+ 1801,118000,11.4594
297
+ 1808,118400,11.8572
298
+ 1816,118800,8.6953
299
+ 1822,119200,12.9991
300
+ 1830,119600,8.346
301
+ 1835,120000,16.5775
302
+ 1840,120400,18.7012
303
+ 1845,120800,18.5211
304
+ 1852,121200,12.3151
305
+ 1860,121600,11.191
306
+ 1868,122000,10.6305
307
+ 1875,122400,12.1361
308
+ 1883,122800,12.0561
309
+ 1887,123200,21.1206
310
+ 1895,123600,10.3102
311
+ 1901,124000,15.5468
312
+ 1905,124400,21.1214
313
+ 1913,124800,12.598
314
+ 1919,125200,8.6702
315
+ 1923,125600,19.5976
316
+ 1928,126000,17.347
317
+ 1936,126400,12.0519
318
+ 1944,126800,6.2953
319
+ 1949,127200,13.6435
320
+ 1956,127600,9.3424
321
+ 1960,128000,22.6692
322
+ 1966,128400,12.2863
323
+ 1973,128800,15.4013
324
+ 1978,129200,17.9858
325
+ 1988,129600,7.2154
326
+ 1996,130000,10.964
327
+ 2004,130400,10.9658
328
+ 2009,130800,16.1921
329
+ 2015,131200,19.8994
330
+ 2020,131600,12.5598
331
+ 2026,132000,18.5603
332
+ 2034,132400,8.9442
333
+ 2039,132800,15.7247
334
+ 2044,133200,19.6043
335
+ 2048,133600,22.708
336
+ 2055,134000,12.1769
337
+ 2059,134400,30.2886
338
+ 2064,134800,19.3976
339
+ 2069,135200,24.011
340
+ 2075,135600,22.3232
341
+ 2079,136000,22.4054
342
+ 2087,136400,14.8207
343
+ 2095,136800,14.1154
344
+ 2102,137200,13.3378
345
+ 2106,137600,22.9892
346
+ 2112,138000,19.1975
347
+ 2119,138400,16.2562
348
+ 2125,138800,16.5325
349
+ 2134,139200,9.7804
350
+ 2143,139600,12.9261
351
+ 2149,140000,15.1729
352
+ 2157,140400,11.4505
353
+ 2163,140800,16.225
354
+ 2168,141200,15.0464
355
+ 2175,141600,12.2286
356
+ 2181,142000,14.5324
357
+ 2187,142400,17.9193
358
+ 2192,142800,21.9792
359
+ 2202,143200,7.5693
360
+ 2214,143600,7.0395
361
+ 2219,144000,20.2988
362
+ 2230,144400,8.1503
363
+ 2237,144800,12.8959
364
+ 2246,145200,11.8272
365
+ 2254,145600,15.8534
366
+ 2259,146000,20.5079
367
+ 2266,146400,10.7379
368
+ 2271,146800,21.599
369
+ 2279,147200,12.679
370
+ 2284,147600,14.8514
371
+ 2291,148000,8.6118
372
+ 2297,148400,7.5502
373
+ 2306,148800,5.1645
374
+ 2313,149200,10.6152
375
+ 2319,149600,15.1497
376
+ 2324,150000,13.6594
377
+ 2331,150400,10.1251
378
+ 2337,150800,10.9294
379
+ 2345,151200,5.0712
380
+ 2350,151600,13.3293
381
+ 2359,152000,6.394
382
+ 2370,152400,4.9969
383
+ 2379,152800,7.9595
384
+ 2391,153200,4.0272
385
+ 2398,153600,7.3762
386
+ 2407,154000,7.3333
387
+ 2413,154400,10.8586
388
+ 2423,154800,9.7345
389
+ 2432,155200,7.9822
390
+ 2439,155600,10.3486
391
+ 2450,156000,6.3284
392
+ 2458,156400,9.4372
393
+ 2473,156800,3.132
394
+ 2481,157200,10.3754
395
+ 2487,157600,10.447
396
+ 2493,158000,15.9101
397
+ 2503,158400,8.3842
398
+ 2510,158800,15.4866
399
+ 2518,159200,11.682
400
+ 2526,159600,11.0361
401
+ 2537,160000,8.7871
402
+ 2545,160400,11.1971
403
+ 2555,160800,4.4022
404
+ 2562,161200,13.1779
405
+ 2568,161600,12.9045
406
+ 2579,162000,8.0949
407
+ 2587,162400,11.4998
408
+ 2597,162800,7.2315
409
+ 2604,163200,14.4484
410
+ 2609,163600,15.2563
411
+ 2622,164000,7.1052
412
+ 2631,164400,8.0708
413
+ 2636,164800,15.3412
414
+ 2646,165200,7.0698
415
+ 2654,165600,10.9479
416
+ 2661,166000,12.6783
417
+ 2666,166400,16.1794
418
+ 2671,166800,18.6718
419
+ 2676,167200,13.5239
420
+ 2681,167600,12.3507
421
+ 2691,168000,7.2502
422
+ 2700,168400,6.2099
423
+ 2707,168800,13.9091
424
+ 2712,169200,16.7988
425
+ 2717,169600,22.2866
426
+ 2728,170000,8.0224
427
+ 2738,170400,11.7132
428
+ 2743,170800,22.9338
429
+ 2750,171200,15.1354
430
+ 2762,171600,8.9432
431
+ 2768,172000,14.02
432
+ 2773,172400,19.0923
433
+ 2780,172800,14.6205
434
+ 2787,173200,12.9528
435
+ 2796,173600,11.7497
436
+ 2803,174000,14.0874
437
+ 2808,174400,21.8773
438
+ 2813,174800,16.507
439
+ 2819,175200,19.8032
440
+ 2827,175600,13.8242
441
+ 2833,176000,19.0166
442
+ 2838,176400,27.172
443
+ 2846,176800,11.3364
444
+ 2851,177200,19.2692
445
+ 2857,177600,21.4003
446
+ 2865,178000,13.1762
447
+ 2871,178400,20.7159
448
+ 2878,178800,12.615
449
+ 2882,179200,17.292
450
+ 2886,179600,22.37
451
+ 2893,180000,16.9824
452
+ 2903,180400,7.4276
453
+ 2912,180800,12.7024
454
+ 2918,181200,14.4444
455
+ 2926,181600,14.2302
456
+ 2932,182000,18.124
457
+ 2938,182400,13.5542
458
+ 2943,182800,33.0073
459
+ 2950,183200,12.0173
460
+ 2954,183600,22.766
461
+ 2959,184000,16.2093
462
+ 2965,184400,15.9019
463
+ 2972,184800,13.7782
464
+ 2976,185200,26.2554
465
+ 2985,185600,10.2687
466
+ 2991,186000,14.767
467
+ 2998,186400,12.8508
468
+ 3002,186800,19.9929
469
+ 3008,187200,16.2693
470
+ 3016,187600,14.6283
471
+ 3026,188000,9.5896
472
+ 3034,188400,11.7475
473
+ 3041,188800,12.2039
474
+ 3045,189200,23.8742
475
+ 3056,189600,8.3229
476
+ 3063,190000,11.436
477
+ 3070,190400,10.1774
478
+ 3077,190800,10.9202
479
+ 3082,191200,16.139
480
+ 3087,191600,17.0197
481
+ 3096,192000,6.3408
482
+ 3103,192400,10.6383
483
+ 3108,192800,14.7467
484
+ 3113,193200,16.4579
485
+ 3121,193600,9.5428
486
+ 3125,194000,22.9954
487
+ 3129,194400,11.4853
488
+ 3136,194800,8.377
489
+ 3141,195200,13.0133
490
+ 3145,195600,14.8357
491
+ 3150,196000,20.832
492
+ 3159,196400,8.4116
493
+ 3166,196800,16.5597
494
+ 3178,197200,6.8153
495
+ 3186,197600,11.2551
496
+ 3197,198000,9.8681
497
+ 3202,198400,16.0912
498
+ 3210,198800,13.4439
499
+ 3216,199200,11.4534
500
+ 3222,199600,16.3593
501
+ 3229,200000,11.4777
code/Lake application/logs/results_2/PPO_frozen_lake_log_5.csv ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 25,400,1.1394
3
+ 48,800,1.3002
4
+ 55,1200,4.2174
5
+ 65,1600,3.1185
6
+ 72,2000,4.6995
7
+ 77,2400,5.4912
8
+ 83,2800,5.3572
9
+ 87,3200,6.4659
10
+ 93,3600,5.7893
11
+ 97,4000,7.5009
12
+ 101,4400,7.0671
13
+ 105,4800,6.9336
14
+ 109,5200,7.0397
15
+ 114,5600,6.2202
16
+ 120,6000,4.966
17
+ 126,6400,5.4569
18
+ 131,6800,5.7776
19
+ 135,7200,7.3573
20
+ 139,7600,6.3645
21
+ 144,8000,6.1568
22
+ 148,8400,7.3496
23
+ 152,8800,6.4834
24
+ 157,9200,6.0306
25
+ 162,9600,6.5141
26
+ 167,10000,6.2576
27
+ 172,10400,5.8399
28
+ 176,10800,5.7246
29
+ 182,11200,5.6336
30
+ 186,11600,6.3594
31
+ 192,12000,5.9243
32
+ 196,12400,7.3959
33
+ 200,12800,7.3087
34
+ 204,13200,7.4288
35
+ 208,13600,6.0092
36
+ 212,14000,7.4051
37
+ 219,14400,5.0751
38
+ 223,14800,6.1337
39
+ 228,15200,6.0328
40
+ 233,15600,6.2323
41
+ 237,16000,7.2947
42
+ 242,16400,6.21
43
+ 247,16800,5.2467
44
+ 251,17200,7.301
45
+ 256,17600,6.7575
46
+ 260,18000,7.3051
47
+ 264,18400,7.3659
48
+ 270,18800,4.9364
49
+ 275,19200,4.7764
50
+ 280,19600,6.6525
51
+ 286,20000,5.1666
52
+ 290,20400,7.305
53
+ 295,20800,5.3332
54
+ 301,21200,5.8009
55
+ 306,21600,6.3444
56
+ 311,22000,5.38
57
+ 316,22400,5.8886
58
+ 321,22800,5.9808
59
+ 326,23200,6.1518
60
+ 331,23600,6.2596
61
+ 335,24000,7.4699
62
+ 340,24400,5.0459
63
+ 344,24800,7.1979
64
+ 348,25200,7.1906
65
+ 354,25600,5.5289
66
+ 358,26000,7.4249
67
+ 362,26400,7.3209
68
+ 366,26800,6.8339
69
+ 371,27200,6.2896
70
+ 375,27600,5.8839
71
+ 382,28000,4.5678
72
+ 387,28400,6.4948
73
+ 391,28800,7.3334
74
+ 395,29200,6.8561
75
+ 400,29600,5.7921
76
+ 405,30000,5.775
77
+ 411,30400,5.2429
78
+ 416,30800,6.3733
79
+ 420,31200,6.9394
80
+ 424,31600,7.4063
81
+ 430,32000,5.6989
82
+ 434,32400,6.6094
83
+ 439,32800,6.4591
84
+ 443,33200,7.5158
85
+ 448,33600,5.1683
86
+ 457,34000,3.2012
87
+ 462,34400,6.4385
88
+ 466,34800,7.6264
89
+ 471,35200,6.8552
90
+ 475,35600,6.6808
91
+ 479,36000,7.5093
92
+ 485,36400,6.0369
93
+ 491,36800,5.9012
94
+ 497,37200,4.7016
95
+ 507,37600,3.1521
96
+ 513,38000,5.874
97
+ 517,38400,7.713
98
+ 522,38800,6.4352
99
+ 526,39200,7.6985
100
+ 530,39600,7.8557
101
+ 540,40000,3.9398
102
+ 546,40400,5.5794
103
+ 551,40800,6.7952
104
+ 555,41200,7.4607
105
+ 561,41600,6.2385
106
+ 565,42000,6.9421
107
+ 571,42400,5.5255
108
+ 577,42800,7.0349
109
+ 583,43200,7.038
110
+ 587,43600,8.1308
111
+ 592,44000,6.715
112
+ 598,44400,6.6102
113
+ 603,44800,5.7423
114
+ 609,45200,5.7222
115
+ 616,45600,4.6904
116
+ 621,46000,6.6802
117
+ 627,46400,5.1794
118
+ 631,46800,7.8428
119
+ 636,47200,6.6569
120
+ 640,47600,6.0637
121
+ 645,48000,6.6306
122
+ 649,48400,7.8827
123
+ 654,48800,7.2829
124
+ 660,49200,5.3484
125
+ 666,49600,4.965
126
+ 671,50000,6.2847
127
+ 675,50400,7.2723
128
+ 681,50800,5.6506
129
+ 689,51200,4.2316
130
+ 693,51600,6.588
131
+ 699,52000,5.8609
132
+ 704,52400,6.7229
133
+ 710,52800,5.2104
134
+ 716,53200,5.5105
135
+ 720,53600,6.7428
136
+ 728,54000,4.2067
137
+ 733,54400,6.1901
138
+ 737,54800,6.1446
139
+ 743,55200,6.4328
140
+ 747,55600,7.5352
141
+ 753,56000,4.6297
142
+ 758,56400,6.7488
143
+ 765,56800,4.4836
144
+ 769,57200,7.332
145
+ 773,57600,7.3174
146
+ 778,58000,7.5865
147
+ 784,58400,5.1354
148
+ 788,58800,7.5499
149
+ 792,59200,7.6182
150
+ 802,59600,2.9436
151
+ 807,60000,6.7174
152
+ 812,60400,6.5712
153
+ 820,60800,3.8267
154
+ 827,61200,5.0311
155
+ 831,61600,7.6775
156
+ 835,62000,6.2429
157
+ 842,62400,5.1519
158
+ 847,62800,5.1346
159
+ 858,63200,3.2845
160
+ 862,63600,6.7612
161
+ 868,64000,5.4555
162
+ 874,64400,5.6836
163
+ 879,64800,5.4058
164
+ 883,65200,7.5245
165
+ 887,65600,7.5208
166
+ 892,66000,7.063
167
+ 897,66400,6.6028
168
+ 903,66800,6.9216
169
+ 908,67200,7.874
170
+ 912,67600,6.5846
171
+ 918,68000,6.0015
172
+ 923,68400,6.5042
173
+ 927,68800,6.4518
174
+ 932,69200,7.5216
175
+ 937,69600,7.0083
176
+ 942,70000,6.8853
177
+ 948,70400,5.5392
178
+ 954,70800,8.848
179
+ 960,71200,11.4058
180
+ 967,71600,7.5975
181
+ 972,72000,8.9093
182
+ 979,72400,9.266
183
+ 985,72800,9.023
184
+ 991,73200,11.5379
185
+ 1000,73600,7.4839
186
+ 1006,74000,10.8982
187
+ 1011,74400,9.6794
188
+ 1016,74800,11.5398
189
+ 1022,75200,12.7577
190
+ 1032,75600,7.5257
191
+ 1040,76000,10.313
192
+ 1052,76400,4.9592
193
+ 1057,76800,9.929
194
+ 1065,77200,9.0269
195
+ 1074,77600,10.0283
196
+ 1080,78000,9.1994
197
+ 1084,78400,17.318
198
+ 1090,78800,8.6919
199
+ 1099,79200,8.3069
200
+ 1104,79600,14.1304
201
+ 1110,80000,13.6171
202
+ 1114,80400,16.2281
203
+ 1119,80800,13.7672
204
+ 1127,81200,9.2344
205
+ 1133,81600,12.9287
206
+ 1139,82000,9.0991
207
+ 1146,82400,10.4085
208
+ 1154,82800,6.4297
209
+ 1161,83200,11.2657
210
+ 1170,83600,8.6181
211
+ 1176,84000,10.1173
212
+ 1182,84400,10.5116
213
+ 1189,84800,7.5418
214
+ 1197,85200,7.8979
215
+ 1204,85600,10.4355
216
+ 1214,86000,5.9039
217
+ 1228,86400,4.1987
218
+ 1238,86800,6.374
219
+ 1246,87200,5.9424
220
+ 1251,87600,15.9749
221
+ 1257,88000,14.0111
222
+ 1261,88400,19.8135
223
+ 1270,88800,7.7016
224
+ 1276,89200,10.2966
225
+ 1281,89600,12.6069
226
+ 1288,90000,10.6588
227
+ 1293,90400,17.1633
228
+ 1300,90800,9.8388
229
+ 1308,91200,9.1061
230
+ 1314,91600,10.2858
231
+ 1319,92000,15.5991
232
+ 1323,92400,19.9744
233
+ 1329,92800,13.7349
234
+ 1333,93200,16.2973
235
+ 1340,93600,12.1433
236
+ 1346,94000,15.6216
237
+ 1350,94400,16.604
238
+ 1356,94800,17.9473
239
+ 1360,95200,22.7261
240
+ 1367,95600,9.7798
241
+ 1372,96000,18.8177
242
+ 1376,96400,23.3835
243
+ 1380,96800,22.7676
244
+ 1385,97200,17.9196
245
+ 1391,97600,11.0045
246
+ 1398,98000,12.9032
247
+ 1403,98400,19.9428
248
+ 1411,98800,10.0649
249
+ 1415,99200,23.4105
250
+ 1421,99600,14.4659
251
+ 1427,100000,12.6086
252
+ 1434,100400,11.2016
253
+ 1440,100800,16.4195
254
+ 1445,101200,14.8528
255
+ 1451,101600,13.3423
256
+ 1457,102000,15.6651
257
+ 1462,102400,19.3589
258
+ 1467,102800,19.2814
259
+ 1472,103200,17.4048
260
+ 1479,103600,10.9156
261
+ 1484,104000,19.1347
262
+ 1489,104400,16.7974
263
+ 1500,104800,7.7077
264
+ 1508,105200,8.3873
265
+ 1515,105600,11.8502
266
+ 1522,106000,12.4617
267
+ 1529,106400,10.997
268
+ 1534,106800,18.1086
269
+ 1538,107200,21.5753
270
+ 1542,107600,18.1229
271
+ 1548,108000,19.0807
272
+ 1553,108400,19.9151
273
+ 1557,108800,24.3347
274
+ 1565,109200,11.5838
275
+ 1571,109600,10.4892
276
+ 1576,110000,18.4124
277
+ 1583,110400,9.6659
278
+ 1589,110800,15.3845
279
+ 1594,111200,19.4332
280
+ 1603,111600,9.1848
281
+ 1608,112000,19.8579
282
+ 1614,112400,14.6327
283
+ 1620,112800,15.4716
284
+ 1628,113200,7.6968
285
+ 1633,113600,14.4689
286
+ 1637,114000,19.6793
287
+ 1642,114400,20.0721
288
+ 1647,114800,15.0668
289
+ 1652,115200,17.4454
290
+ 1657,115600,19.6026
291
+ 1663,116000,14.572
292
+ 1669,116400,12.3857
293
+ 1675,116800,15.0434
294
+ 1679,117200,23.0521
295
+ 1685,117600,15.9115
296
+ 1691,118000,16.4641
297
+ 1695,118400,18.5005
298
+ 1701,118800,13.3055
299
+ 1705,119200,20.5855
300
+ 1711,119600,15.2568
301
+ 1716,120000,17.1653
302
+ 1721,120400,16.2964
303
+ 1726,120800,17.3911
304
+ 1731,121200,18.9176
305
+ 1735,121600,20.2643
306
+ 1741,122000,21.9711
307
+ 1748,122400,15.1474
308
+ 1752,122800,21.0002
309
+ 1756,123200,27.017
310
+ 1761,123600,21.0847
311
+ 1768,124000,16.9835
312
+ 1774,124400,16.988
313
+ 1780,124800,17.102
314
+ 1784,125200,27.0598
315
+ 1790,125600,18.8929
316
+ 1795,126000,18.4346
317
+ 1799,126400,27.4704
318
+ 1803,126800,20.451
319
+ 1812,127200,12.5548
320
+ 1816,127600,24.9355
321
+ 1821,128000,14.002
322
+ 1826,128400,16.9177
323
+ 1834,128800,9.9734
324
+ 1847,129200,4.5403
325
+ 1853,129600,10.6147
326
+ 1865,130000,6.26
327
+ 1872,130400,12.1423
328
+ 1877,130800,16.8818
329
+ 1882,131200,14.1034
330
+ 1887,131600,19.5902
331
+ 1894,132000,12.8515
332
+ 1899,132400,16.2843
333
+ 1904,132800,15.5745
334
+ 1914,133200,8.3905
335
+ 1922,133600,13.0687
336
+ 1929,134000,13.9548
337
+ 1935,134400,11.7435
338
+ 1944,134800,12.2644
339
+ 1949,135200,18.9015
340
+ 1957,135600,10.5449
341
+ 1968,136000,6.614
342
+ 1979,136400,7.9006
343
+ 1988,136800,8.6919
344
+ 1993,137200,19.6558
345
+ 1999,137600,13.7705
346
+ 2004,138000,19.7431
347
+ 2010,138400,16.1015
348
+ 2018,138800,7.969
349
+ 2024,139200,10.7627
350
+ 2033,139600,9.8075
351
+ 2038,140000,15.1353
352
+ 2044,140400,14.33
353
+ 2051,140800,13.0915
354
+ 2059,141200,11.0496
355
+ 2067,141600,8.5425
356
+ 2074,142000,12.6574
357
+ 2079,142400,18.6865
358
+ 2083,142800,18.9614
359
+ 2091,143200,7.6956
360
+ 2097,143600,19.3319
361
+ 2106,144000,6.1586
362
+ 2112,144400,11.7879
363
+ 2117,144800,14.6574
364
+ 2124,145200,9.78
365
+ 2131,145600,8.172
366
+ 2138,146000,9.3161
367
+ 2145,146400,10.1464
368
+ 2151,146800,13.3546
369
+ 2158,147200,10.2643
370
+ 2162,147600,17.7297
371
+ 2167,148000,12.2066
372
+ 2174,148400,11.723
373
+ 2181,148800,12.61
374
+ 2185,149200,20.9512
375
+ 2192,149600,9.257
376
+ 2200,150000,13.0471
377
+ 2206,150400,10.6689
378
+ 2212,150800,16.0447
379
+ 2219,151200,13.6559
380
+ 2225,151600,13.2487
381
+ 2235,152000,7.2764
382
+ 2242,152400,11.6686
383
+ 2248,152800,12.3615
384
+ 2255,153200,13.5621
385
+ 2263,153600,9.6251
386
+ 2269,154000,9.0672
387
+ 2276,154400,13.0372
388
+ 2281,154800,16.5969
389
+ 2286,155200,18.0225
390
+ 2292,155600,14.2052
391
+ 2298,156000,11.6988
392
+ 2304,156400,9.5336
393
+ 2312,156800,8.0191
394
+ 2324,157200,5.3825
395
+ 2330,157600,8.1571
396
+ 2337,158000,10.3493
397
+ 2344,158400,10.4621
398
+ 2350,158800,10.5959
399
+ 2356,159200,7.2691
400
+ 2364,159600,6.3992
401
+ 2372,160000,7.9295
402
+ 2377,160400,7.4555
403
+ 2384,160800,7.9996
404
+ 2389,161200,13.726
405
+ 2395,161600,7.7046
406
+ 2399,162000,16.8889
407
+ 2410,162400,5.528
408
+ 2422,162800,4.9575
409
+ 2429,163200,8.7608
410
+ 2438,163600,7.2575
411
+ 2446,164000,7.0835
412
+ 2452,164400,10.8246
413
+ 2459,164800,7.831
414
+ 2467,165200,6.133
415
+ 2476,165600,7.8923
416
+ 2483,166000,8.0733
417
+ 2489,166400,11.0754
418
+ 2493,166800,20.1624
419
+ 2500,167200,12.4293
420
+ 2504,167600,15.0355
421
+ 2510,168000,13.0286
422
+ 2518,168400,7.4877
423
+ 2523,168800,12.2261
424
+ 2531,169200,7.3993
425
+ 2536,169600,8.9622
426
+ 2542,170000,10.8549
427
+ 2547,170400,11.6566
428
+ 2555,170800,8.8997
429
+ 2562,171200,8.861
430
+ 2568,171600,13.2091
431
+ 2574,172000,10.3659
432
+ 2580,172400,11.7853
433
+ 2585,172800,14.792
434
+ 2592,173200,10.6782
435
+ 2602,173600,6.9546
436
+ 2609,174000,9.9301
437
+ 2614,174400,17.7772
438
+ 2623,174800,6.2142
439
+ 2630,175200,12.9292
440
+ 2637,175600,10.1204
441
+ 2645,176000,9.0597
442
+ 2651,176400,15.3755
443
+ 2657,176800,13.128
444
+ 2662,177200,19.3868
445
+ 2670,177600,10.7437
446
+ 2679,178000,6.904
447
+ 2686,178400,9.2907
448
+ 2696,178800,6.4837
449
+ 2705,179200,8.2248
450
+ 2711,179600,12.1069
451
+ 2720,180000,8.3973
452
+ 2724,180400,22.0167
453
+ 2729,180800,15.3768
454
+ 2734,181200,15.6707
455
+ 2738,181600,21.4503
456
+ 2744,182000,14.5199
457
+ 2750,182400,16.9138
458
+ 2756,182800,12.2078
459
+ 2762,183200,15.9948
460
+ 2769,183600,12.4933
461
+ 2775,184000,14.7625
462
+ 2781,184400,16.4597
463
+ 2786,184800,12.6036
464
+ 2793,185200,11.1748
465
+ 2799,185600,13.5976
466
+ 2805,186000,13.175
467
+ 2811,186400,14.25
468
+ 2816,186800,22.0337
469
+ 2822,187200,17.4297
470
+ 2827,187600,17.1395
471
+ 2832,188000,18.1786
472
+ 2837,188400,16.0257
473
+ 2844,188800,11.8928
474
+ 2850,189200,16.6968
475
+ 2855,189600,19.1383
476
+ 2860,190000,21.8792
477
+ 2864,190400,27.2875
478
+ 2868,190800,25.2937
479
+ 2873,191200,20.9754
480
+ 2882,191600,12.5236
481
+ 2886,192000,26.9158
482
+ 2896,192400,9.8619
483
+ 2912,192800,5.0885
484
+ 2923,193200,4.6341
485
+ 2930,193600,13.8767
486
+ 2937,194000,11.1766
487
+ 2944,194400,14.2145
488
+ 2952,194800,7.6092
489
+ 2961,195200,9.0705
490
+ 2968,195600,10.5332
491
+ 2973,196000,13.6747
492
+ 2979,196400,17.6262
493
+ 2986,196800,12.0028
494
+ 2997,197200,5.1463
495
+ 3008,197600,9.2429
496
+ 3015,198000,11.3805
497
+ 3022,198400,7.9289
498
+ 3029,198800,12.3534
499
+ 3033,199200,26.1072
500
+ 3041,199600,13.3515
501
+ 3048,200000,11.5205
code/Lake application/plot_figure.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Mon Mar 6 16:30:32 2023
4
+
5
+ @author: leona
6
+ """
7
+
8
+ import os
9
+ import pandas as pd
10
+ import matplotlib.pyplot as plt
11
+ import seaborn as sns
12
+
13
+
14
+
15
+
16
+ def save_graph():
17
+ print("============================================================================================")
18
+
19
+ # experiment_name = '15items_5machines_i100'
20
+ # experiment_name = '20items_10machines'
21
+ # experiment_name = '25items_10machines'
22
+ experiment_name = 'frozen_lake'
23
+ env_name = experiment_name
24
+
25
+ rolling_window = 10
26
+
27
+ BASE_DIR = os.path.dirname(os.path.abspath('__file__'))
28
+ BASE_DIR = BASE_DIR+'\\Post-Doctorate\\Lot-sizing\\Lake application'
29
+
30
+ # make directory for saving figures
31
+ figures_dir = BASE_DIR + "\\results"
32
+
33
+ if not os.path.exists(figures_dir):
34
+ os.makedirs(figures_dir)
35
+
36
+ # make environment directory for saving figures
37
+ figures_dir = figures_dir + '/' + env_name + '_PPO'+'/'
38
+ if not os.path.exists(figures_dir):
39
+ os.makedirs(figures_dir)
40
+
41
+ #fig_save_path = figures_dir + '/PPO_' + env_name + '_fig_' + str(fig_num) + '.png'
42
+
43
+ # get number of log files in directory
44
+
45
+ # Use the logs file in the root path of the main.
46
+ LOG_DIR = os.path.join(BASE_DIR,'logs')
47
+
48
+ log_dir = LOG_DIR + '/' + env_name + '_PPO' + '/'
49
+
50
+ current_num_files = next(os.walk(log_dir))[2]
51
+ num_runs = len(current_num_files)-1
52
+
53
+ all_runs_ppo = []
54
+
55
+
56
+ ########################################################################################
57
+ for run_num in range(num_runs):
58
+ run_num = run_num + 1
59
+ log_f_name = log_dir + '/PPO_' + env_name + "_log_" + str(run_num) + ".csv"
60
+ print("loading data from : " + log_f_name)
61
+ data = pd.read_csv(log_f_name)
62
+ data = pd.DataFrame(data)
63
+
64
+ print("data shape : ", data.shape)
65
+
66
+ all_runs_ppo.append(data)
67
+ print("--------------------------------------------------------------------------------------------")
68
+
69
+ # average all runs
70
+ df_concat = pd.concat(all_runs_ppo)
71
+
72
+
73
+ #Apply rolling mean to reward values
74
+ df_concat['reward_mean'] = df_concat['reward'].rolling(window=rolling_window, win_type='triang', min_periods=1).mean()
75
+
76
+ # Drop NaN values from beginning of rolling mean
77
+ df_concat = df_concat.dropna().reset_index(drop=True)
78
+
79
+ # Calculate mean and standard deviation of reward values
80
+ reward_mean = df_concat.groupby('timestep')['reward_mean'].mean().iloc[rolling_window:]
81
+ reward_std = df_concat.groupby('timestep')['reward_mean'].std().iloc[rolling_window:]
82
+
83
+ # Set up plot using seaborn
84
+ sns.set_style("whitegrid")
85
+ fig, ax = plt.subplots(figsize=(10, 6))
86
+
87
+ sns.set_style("whitegrid")
88
+ # Plot mean reward with shaded confidence interval
89
+ sns.lineplot(x=reward_mean.index, y=reward_mean, ax=ax,label='PPO')
90
+ ax.fill_between(reward_mean.index, reward_mean - reward_std, reward_mean + reward_std, alpha=0.2)
91
+ # keep only reward_smooth in the legend and rename it
92
+
93
+
94
+ ########################################################################################
95
+
96
+
97
+ log_dir = LOG_DIR + '/' + env_name + '_PDPPO' + '/'
98
+
99
+ current_num_files = next(os.walk(log_dir))[2]
100
+ num_runs = len(current_num_files)-1
101
+
102
+ all_runs = []
103
+
104
+ for run_num in range(num_runs):
105
+ run_num = run_num + 1
106
+ log_f_name = log_dir + 'PDPPO_' + env_name + "_log_" + str(run_num) + ".csv"
107
+ print("loading data from : " + log_f_name)
108
+ data = pd.read_csv(log_f_name)
109
+ data = pd.DataFrame(data)
110
+
111
+ print("data shape : ", data.shape)
112
+
113
+ all_runs.append(data)
114
+ print("--------------------------------------------------------------------------------------------")
115
+
116
+ # average all runs
117
+ df_concat = pd.concat(all_runs)
118
+
119
+ #Apply rolling mean to reward values
120
+ df_concat['reward_mean'] = df_concat['reward'].rolling(window=rolling_window, win_type='triang', min_periods=1).mean()
121
+
122
+ # Drop NaN values from beginning of rolling mean
123
+ df_concat = df_concat.dropna().reset_index(drop=True)
124
+
125
+ # Calculate mean and standard deviation of reward values
126
+ reward_mean = df_concat.groupby('timestep')['reward_mean'].mean().iloc[rolling_window:]
127
+ reward_std = df_concat.groupby('timestep')['reward_mean'].std().iloc[rolling_window:]
128
+
129
+ # Plot mean reward with shaded confidence interval
130
+ sns.lineplot(x=reward_mean.index, y=reward_mean, ax=ax,label='PDPPO')
131
+ ax.fill_between(reward_mean.index, reward_mean - reward_std, reward_mean + reward_std, alpha=0.2)
132
+ #ax.set(xlabel='Timestep', ylabel='Mean Reward', title='Average Reward with Confidence Interval')
133
+ ax.legend()
134
+ ########################################################################################
135
+
136
+ # ax.set_yticks(np.arange(0, 1800, 200))
137
+ # ax.set_xticks(np.arange(0, int(4e6), int(5e5)))
138
+
139
+ ax.grid(color='gray', linestyle='-', linewidth=1, alpha=0.2)
140
+
141
+ ax.set_xlabel("Timesteps", fontsize=12)
142
+ ax.set_ylabel("Rewards", fontsize=12)
143
+
144
+ fig = plt.gcf()
145
+
146
+ fig.set_size_inches(15, 4)
147
+
148
+ print("============================================================================================")
149
+ fig.savefig(os.path.join(figures_dir, f'{experiment_name}.pdf'), dpi=300, bbox_inches='tight')
150
+ print("figure saved at : ", figures_dir)
151
+ print("============================================================================================")
152
+
153
+
154
+ if __name__ == '__main__':
155
+
156
+ save_graph()
157
+