first commit
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- README.md +148 -2
- code/Lake application/__pycache__/PDPPO.cpython-38.pyc +0 -0
- code/Lake application/__pycache__/PPO.cpython-38.pyc +0 -0
- code/Lake application/agents/PDPPO one critic.py +321 -0
- code/Lake application/agents/PDPPO two critics.py +345 -0
- code/Lake application/agents/PDPPO.py +301 -0
- code/Lake application/agents/PDPPOAgent two critics.py +394 -0
- code/Lake application/agents/PDPPOAgent.py +402 -0
- code/Lake application/agents/PDPPO_two_actors.py +353 -0
- code/Lake application/agents/PDPPO_two_critics_two_actors.py +377 -0
- code/Lake application/agents/PDPPO_v0.py +328 -0
- code/Lake application/agents/PPO.py +248 -0
- code/Lake application/agents/__init__.py +8 -0
- code/Lake application/envs/frozen_lake.py +301 -0
- code/Lake application/experiments.py +117 -0
- code/Lake application/generate_tables.py +78 -0
- code/Lake application/logs/.gitkeep +0 -0
- code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_0_0.pth +3 -0
- code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_1.csv +501 -0
- code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_2.csv +501 -0
- code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_3.csv +501 -0
- code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_4.csv +501 -0
- code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_5.csv +501 -0
- code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_0_0.pth +3 -0
- code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_1.csv +501 -0
- code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_2.csv +501 -0
- code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_3.csv +501 -0
- code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_4.csv +501 -0
- code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_5.csv +501 -0
- code/Lake application/logs/results_1/PDPPO_frozen_lake_log_1.csv +501 -0
- code/Lake application/logs/results_1/PDPPO_frozen_lake_log_2.csv +501 -0
- code/Lake application/logs/results_1/PDPPO_frozen_lake_log_3.csv +501 -0
- code/Lake application/logs/results_1/PDPPO_frozen_lake_log_4.csv +501 -0
- code/Lake application/logs/results_1/PDPPO_frozen_lake_log_5.csv +501 -0
- code/Lake application/logs/results_1/PPO_frozen_lake_log_1.csv +501 -0
- code/Lake application/logs/results_1/PPO_frozen_lake_log_2.csv +501 -0
- code/Lake application/logs/results_1/PPO_frozen_lake_log_3.csv +501 -0
- code/Lake application/logs/results_1/PPO_frozen_lake_log_4.csv +501 -0
- code/Lake application/logs/results_1/PPO_frozen_lake_log_5.csv +501 -0
- code/Lake application/logs/results_2/PDPPO_frozen_lake_log_1.csv +501 -0
- code/Lake application/logs/results_2/PDPPO_frozen_lake_log_2.csv +501 -0
- code/Lake application/logs/results_2/PDPPO_frozen_lake_log_3.csv +501 -0
- code/Lake application/logs/results_2/PDPPO_frozen_lake_log_4.csv +501 -0
- code/Lake application/logs/results_2/PDPPO_frozen_lake_log_5.csv +501 -0
- code/Lake application/logs/results_2/PPO_frozen_lake_log_1.csv +501 -0
- code/Lake application/logs/results_2/PPO_frozen_lake_log_2.csv +501 -0
- code/Lake application/logs/results_2/PPO_frozen_lake_log_3.csv +501 -0
- code/Lake application/logs/results_2/PPO_frozen_lake_log_4.csv +501 -0
- code/Lake application/logs/results_2/PPO_frozen_lake_log_5.csv +501 -0
- code/Lake application/plot_figure.py +157 -0
README.md
CHANGED
|
@@ -1,2 +1,148 @@
|
|
| 1 |
-
#
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reinforcement Learning for Stochastic Discrete Lot-Sizing Problem and Frozen-Lake game
|
| 2 |
+
|
| 3 |
+
This repository contains code and resources the research developed at University of São Paulo (USP) and Politecnico di Torino (Polito) on using reinforcement learning, particularly the Post-Decision Proximal Policy Optimization (PDPPO), for the Stochastic Discrete Lot-Sizing Problem and a Frozen-Lake game.
|
| 4 |
+
|
| 5 |
+
## Project Structure
|
| 6 |
+
This repository consists of two main directories: `Lot-sizing` and `Lake application`, each containing the related files and folders.
|
| 7 |
+
|
| 8 |
+
### Lot-Sizing
|
| 9 |
+
|
| 10 |
+
`Lot-sizing` directory holds the following subdirectories:
|
| 11 |
+
|
| 12 |
+
- **agents**: Holds various versions of PDPPO agent implementations and utility functions.
|
| 13 |
+
|
| 14 |
+
- **cfg_env**: Includes environment settings and configurations files for the project in JSON format. Additionally, `generate_setting.py` is used for generating new environment settings.
|
| 15 |
+
|
| 16 |
+
- **cfg_sol**: Stores the solution settings in `sol_setting.json`.
|
| 17 |
+
|
| 18 |
+
- **envs**: Contains different environment definitions for the problem, like `simplePlant.py` and `singleSequenceDependentMachinePlant.py`.
|
| 19 |
+
|
| 20 |
+
- **logs**: Keeps the log files for the model training and evaluation.
|
| 21 |
+
|
| 22 |
+
- **models**: Stores various optimization models.
|
| 23 |
+
|
| 24 |
+
- **results**: After executing the experiments, the results are saved in this directory.
|
| 25 |
+
|
| 26 |
+
- **scenarioManager**: Manages different scenario setups.
|
| 27 |
+
|
| 28 |
+
- **test_functions**: Stores functions to validate the models and generate plots and tables.
|
| 29 |
+
|
| 30 |
+
### Lake Application
|
| 31 |
+
|
| 32 |
+
`Lake application` directory holds the following subdirectories:
|
| 33 |
+
|
| 34 |
+
- **agents**: Contains various versions of PDPPO agent implementations for the Lake problem.
|
| 35 |
+
|
| 36 |
+
- **envs**: Contains environment definitions, like `frozen_lake.py`.
|
| 37 |
+
|
| 38 |
+
- **logs**: Contains the log files and results from the model training and evaluation for different scenarios.
|
| 39 |
+
|
| 40 |
+
- **results**: Stores the output from experiments and relevant figures.
|
| 41 |
+
|
| 42 |
+
- Root level scripts `experiments.py`, `generate_tables.py` and `plot_figure.py` are used for running experiments, generating output tables and plotting results respectively.
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
## Repository structure:
|
| 46 |
+
|
| 47 |
+
The main components of the repository are as follows:
|
| 48 |
+
|
| 49 |
+
```graphql
|
| 50 |
+
|
| 51 |
+
├───Lake application
|
| 52 |
+
│ ├───agents # contains the implementations of various agents
|
| 53 |
+
│ ├───envs # contains the FrozenLake environment implementation
|
| 54 |
+
│ ├───logs # contains the logs of the agent's performance
|
| 55 |
+
│ │ ├───frozen_lake_PDPPO
|
| 56 |
+
│ │ ├───frozen_lake_PPO
|
| 57 |
+
│ │ ├───results_1
|
| 58 |
+
│ │ └───results_2
|
| 59 |
+
│ └───results # contains the results of the agent's performance
|
| 60 |
+
│ └───frozen_lake_PPO
|
| 61 |
+
└───Lot-sizing
|
| 62 |
+
├───.vscode
|
| 63 |
+
├───agents # contains the implementations of various agents
|
| 64 |
+
│ ├───utils # utility functions for the agents
|
| 65 |
+
│ │ └───__pycache__
|
| 66 |
+
│ └───__pycache__
|
| 67 |
+
├───cfg_env # contains the settings for the Lot-sizing environment
|
| 68 |
+
│ └───setting file
|
| 69 |
+
├───cfg_sol
|
| 70 |
+
├───envs # contains the Lot-sizing environment implementation
|
| 71 |
+
├───logs # contains the logs of the agent's performance
|
| 72 |
+
├───models # contains the models for the optimization problems
|
| 73 |
+
├───results # contains the results of the agent's performance
|
| 74 |
+
├───scenarioManager # manages different scenarios for the Lot-sizing environment
|
| 75 |
+
└───test_functions # contains test functions for the Lot-sizing environment
|
| 76 |
+
|
| 77 |
+
```
|
| 78 |
+
|
| 79 |
+
## Requirements:
|
| 80 |
+
|
| 81 |
+
This project uses the following main dependencies:
|
| 82 |
+
|
| 83 |
+
- [Python 3.8](https://www.python.org/downloads/)
|
| 84 |
+
- [numpy](https://numpy.org/)
|
| 85 |
+
- [gym](https://gym.openai.com/)
|
| 86 |
+
- [matplotlib](https://matplotlib.org/)
|
| 87 |
+
- [torch](https://pytorch.org/)
|
| 88 |
+
- gurobipy (not included in `requirements.txt` due to separate licensing)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
## How to Reproduce
|
| 92 |
+
|
| 93 |
+
1. Clone the repository:
|
| 94 |
+
|
| 95 |
+
```
|
| 96 |
+
git clone https://github.com/username/repository.git
|
| 97 |
+
|
| 98 |
+
```
|
| 99 |
+
|
| 100 |
+
## Navigate into the project directory:
|
| 101 |
+
|
| 102 |
+
```
|
| 103 |
+
cd repository
|
| 104 |
+
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
Install the required Python packages. This project was developed with Python 3.8. Substitute requirements.txt with your actual requirements file:
|
| 108 |
+
|
| 109 |
+
```
|
| 110 |
+
pip install -r requirements.txt
|
| 111 |
+
```
|
| 112 |
+
|
| 113 |
+
NOTE: You might need to replace the frozen environment file in your environment path with the frozen_lake.py provided in this repository for the Lake application to work properly.
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
Run the experiments:
|
| 117 |
+
|
| 118 |
+
```
|
| 119 |
+
python ./code/Lot-sizing/experiments.py
|
| 120 |
+
python ./code/Lake application/experiments.py
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
Generate the tables:
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
```
|
| 127 |
+
python ./code/Lot-sizing/generate_tables.py
|
| 128 |
+
python ./code/Lake application/generate_tables.py
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
Plot the figures:
|
| 132 |
+
|
| 133 |
+
```
|
| 134 |
+
python ./code/Lot-sizing/plot_figure.py
|
| 135 |
+
python ./code/Lake application/plot_figure.py
|
| 136 |
+
```
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
You can find the results of the experiments in the results directories in both Lot-sizing and Lake application directories.
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
## Reproducing Results
|
| 143 |
+
To reproduce the results in the logs and results folders, you would need to run the experiments with the same hyperparameters and seeds.
|
| 144 |
+
|
| 145 |
+
Please note that due to the stochastic nature of the environments and training process, the results might not be identical, but they should be within a similar range.
|
| 146 |
+
|
| 147 |
+
## Contact
|
| 148 |
+
For any additional questions, you can reach me at email@example.com
|
code/Lake application/__pycache__/PDPPO.cpython-38.pyc
ADDED
|
Binary file (7.56 kB). View file
|
|
|
code/Lake application/__pycache__/PPO.cpython-38.pyc
ADDED
|
Binary file (7.19 kB). View file
|
|
|
code/Lake application/agents/PDPPO one critic.py
ADDED
|
@@ -0,0 +1,321 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Created on Wed Mar 1 00:43:49 2023
|
| 4 |
+
|
| 5 |
+
@author: leona
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import numpy as np
|
| 9 |
+
import torch
|
| 10 |
+
import torch.nn as nn
|
| 11 |
+
import torch.nn.init as init
|
| 12 |
+
from torch.distributions import MultivariateNormal
|
| 13 |
+
from torch.distributions import Categorical
|
| 14 |
+
|
| 15 |
+
################################## set device ##################################
|
| 16 |
+
print("============================================================================================")
|
| 17 |
+
# set device to cpu or cuda
|
| 18 |
+
device = torch.device('cpu')
|
| 19 |
+
if(torch.cuda.is_available()):
|
| 20 |
+
device = torch.device('cuda:0')
|
| 21 |
+
torch.cuda.empty_cache()
|
| 22 |
+
print("Device set to : " + str(torch.cuda.get_device_name(device)))
|
| 23 |
+
else:
|
| 24 |
+
print("Device set to : cpu")
|
| 25 |
+
print("============================================================================================")
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
################################## PDPPO Policy ##################################
|
| 29 |
+
class RolloutBuffer:
|
| 30 |
+
def __init__(self):
|
| 31 |
+
self.actions = []
|
| 32 |
+
self.states = []
|
| 33 |
+
self.post_states = []
|
| 34 |
+
self.logprobs = []
|
| 35 |
+
self.rewards = []
|
| 36 |
+
self.state_values = []
|
| 37 |
+
self.state_values_post = []
|
| 38 |
+
self.is_terminals = []
|
| 39 |
+
|
| 40 |
+
def clear(self):
|
| 41 |
+
del self.actions[:]
|
| 42 |
+
del self.states[:]
|
| 43 |
+
del self.post_states[:]
|
| 44 |
+
del self.logprobs[:]
|
| 45 |
+
del self.rewards[:]
|
| 46 |
+
del self.state_values[:]
|
| 47 |
+
del self.state_values_post[:]
|
| 48 |
+
del self.is_terminals[:]
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class ActorCritic(nn.Module):
|
| 52 |
+
def __init__(self, state_dim, action_dim, has_continuous_action_space, action_std_init):
|
| 53 |
+
super(ActorCritic, self).__init__()
|
| 54 |
+
|
| 55 |
+
self.has_continuous_action_space = has_continuous_action_space
|
| 56 |
+
|
| 57 |
+
if has_continuous_action_space:
|
| 58 |
+
self.action_dim = action_dim
|
| 59 |
+
self.action_var = torch.full((action_dim,), action_std_init * action_std_init).to(device)
|
| 60 |
+
# actor
|
| 61 |
+
if has_continuous_action_space :
|
| 62 |
+
self.actor = nn.Sequential(
|
| 63 |
+
nn.Linear(state_dim, 64),
|
| 64 |
+
nn.Tanh(),
|
| 65 |
+
nn.Linear(64, 64),
|
| 66 |
+
nn.Tanh(),
|
| 67 |
+
nn.Linear(64, action_dim),
|
| 68 |
+
nn.Tanh()
|
| 69 |
+
)
|
| 70 |
+
else:
|
| 71 |
+
|
| 72 |
+
self.action_dim = action_dim
|
| 73 |
+
self.fc1 = nn.Linear(state_dim, 128)
|
| 74 |
+
self.fc2 = nn.Linear(128, 128)
|
| 75 |
+
self.actor = nn.Linear(128, self.action_dim.nvec.sum())
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# critic
|
| 79 |
+
self.critic = nn.Sequential(
|
| 80 |
+
nn.Linear(state_dim, 128),
|
| 81 |
+
nn.Tanh(),
|
| 82 |
+
nn.Linear(128, 128),
|
| 83 |
+
nn.Tanh(),
|
| 84 |
+
nn.Linear(128, 1)
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def forward(self, state):
|
| 89 |
+
raise NotImplementedError
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def set_action_std(self, new_action_std):
|
| 94 |
+
if self.has_continuous_action_space:
|
| 95 |
+
self.action_var = torch.full((self.action_dim,), new_action_std * new_action_std).to(device)
|
| 96 |
+
else:
|
| 97 |
+
print("--------------------------------------------------------------------------------------------")
|
| 98 |
+
print("WARNING : Calling ActorCritic::set_action_std() on discrete action space policy")
|
| 99 |
+
print("--------------------------------------------------------------------------------------------")
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def act(self, state,tau):
|
| 104 |
+
|
| 105 |
+
if self.has_continuous_action_space:
|
| 106 |
+
action_mean = self.actor(state)
|
| 107 |
+
cov_mat = torch.diag(self.action_var).unsqueeze(dim=0)
|
| 108 |
+
dist = MultivariateNormal(action_mean, cov_mat)
|
| 109 |
+
else:
|
| 110 |
+
x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
|
| 111 |
+
logits = self.actor(x)
|
| 112 |
+
action_probs = nn.functional.softmax(logits, dim=-1)
|
| 113 |
+
dist = Categorical(action_probs.view(len(self.action_dim.nvec),-1))
|
| 114 |
+
|
| 115 |
+
action = dist.sample()
|
| 116 |
+
action_logprob = dist.log_prob(action)
|
| 117 |
+
|
| 118 |
+
return action.detach(), action_logprob.detach()
|
| 119 |
+
|
| 120 |
+
def evaluate(self, state,post_state, action,tau):
|
| 121 |
+
|
| 122 |
+
if self.has_continuous_action_space:
|
| 123 |
+
action_mean = self.actor(state)
|
| 124 |
+
|
| 125 |
+
action_var = self.action_var.expand_as(action_mean)
|
| 126 |
+
cov_mat = torch.diag_embed(action_var).to(device)
|
| 127 |
+
dist = MultivariateNormal(action_mean, cov_mat)
|
| 128 |
+
|
| 129 |
+
# For Single Action Environments.
|
| 130 |
+
if self.action_dim == 1:
|
| 131 |
+
action = action.reshape(-1, self.action_dim)
|
| 132 |
+
else:
|
| 133 |
+
x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
|
| 134 |
+
logits = self.actor(x)
|
| 135 |
+
action_probs = nn.functional.softmax(logits, dim=-1)
|
| 136 |
+
|
| 137 |
+
dist = Categorical(action_probs.view(state.shape[0],len(self.action_dim.nvec),-1))
|
| 138 |
+
# action_probs = self.actor(state)
|
| 139 |
+
# dist = Categorical(action_probs)
|
| 140 |
+
action_logprobs = dist.log_prob(action)
|
| 141 |
+
dist_entropy = dist.entropy()
|
| 142 |
+
state_values = self.critic(post_state)
|
| 143 |
+
|
| 144 |
+
return action_logprobs, state_values, dist_entropy
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
class PDPPO:
|
| 148 |
+
def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, env, has_continuous_action_space, tau, action_std_init=0.6):
|
| 149 |
+
|
| 150 |
+
self.has_continuous_action_space = has_continuous_action_space
|
| 151 |
+
|
| 152 |
+
if has_continuous_action_space:
|
| 153 |
+
self.action_std = action_std_init
|
| 154 |
+
|
| 155 |
+
self.tau = tau
|
| 156 |
+
self.env = env
|
| 157 |
+
self.gamma = gamma
|
| 158 |
+
self.eps_clip = eps_clip
|
| 159 |
+
self.K_epochs = K_epochs
|
| 160 |
+
|
| 161 |
+
self.buffer = RolloutBuffer()
|
| 162 |
+
|
| 163 |
+
self.policy = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
|
| 164 |
+
self.optimizer = torch.optim.Adam([
|
| 165 |
+
{'params': self.policy.actor.parameters(), 'lr': lr_actor},
|
| 166 |
+
{'params': self.policy.critic.parameters(), 'lr': lr_critic}
|
| 167 |
+
], weight_decay=0.001)
|
| 168 |
+
|
| 169 |
+
self.policy_old = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
|
| 170 |
+
self.policy_old.load_state_dict(self.policy.state_dict())
|
| 171 |
+
|
| 172 |
+
self.MseLoss = nn.MSELoss()
|
| 173 |
+
|
| 174 |
+
def set_action_std(self, new_action_std):
|
| 175 |
+
if self.has_continuous_action_space:
|
| 176 |
+
self.action_std = new_action_std
|
| 177 |
+
self.policy.set_action_std(new_action_std)
|
| 178 |
+
self.policy_old.set_action_std(new_action_std)
|
| 179 |
+
else:
|
| 180 |
+
print("--------------------------------------------------------------------------------------------")
|
| 181 |
+
print("WARNING : Calling PDPPO::set_action_std() on discrete action space policy")
|
| 182 |
+
print("--------------------------------------------------------------------------------------------")
|
| 183 |
+
|
| 184 |
+
def decay_action_std(self, action_std_decay_rate, min_action_std):
|
| 185 |
+
print("--------------------------------------------------------------------------------------------")
|
| 186 |
+
if self.has_continuous_action_space:
|
| 187 |
+
self.action_std = self.action_std - action_std_decay_rate
|
| 188 |
+
self.action_std = round(self.action_std, 4)
|
| 189 |
+
if (self.action_std <= min_action_std):
|
| 190 |
+
self.action_std = min_action_std
|
| 191 |
+
print("setting actor output action_std to min_action_std : ", self.action_std)
|
| 192 |
+
else:
|
| 193 |
+
print("setting actor output action_std to : ", self.action_std)
|
| 194 |
+
self.set_action_std(self.action_std)
|
| 195 |
+
|
| 196 |
+
else:
|
| 197 |
+
print("WARNING : Calling PDPPO::decay_action_std() on discrete action space policy")
|
| 198 |
+
print("--------------------------------------------------------------------------------------------")
|
| 199 |
+
|
| 200 |
+
def get_post_state(self, action, machine_setup, inventory_level):
|
| 201 |
+
setup_loss = np.zeros(self.env.n_machines, dtype=int)
|
| 202 |
+
setup_costs = np.zeros(self.env.n_machines)
|
| 203 |
+
# if we are just changing the setup, we use the setup cost matrix with the corresponding position given by the actual setup and the new setup
|
| 204 |
+
for m in range(self.env.n_machines):
|
| 205 |
+
if action[m] != 0: # if the machine is not iddle
|
| 206 |
+
# 1. IF NEEDED CHANGE SETUP
|
| 207 |
+
if machine_setup[m] != action[m] and action[m] != 0:
|
| 208 |
+
setup_costs[m] = self.env.setup_costs[m][action[m] - 1]
|
| 209 |
+
setup_loss[m] = self.env.setup_loss[m][action[m] - 1]
|
| 210 |
+
machine_setup[m] = action[m]
|
| 211 |
+
# 2. PRODUCTION
|
| 212 |
+
production = self.env.machine_production_matrix[m][action[m] - 1] - setup_loss[m]
|
| 213 |
+
inventory_level[action[m] - 1] += production
|
| 214 |
+
else:
|
| 215 |
+
machine_setup[m] = 0
|
| 216 |
+
# return the new machine_setup_inventory_level and the setup_cost
|
| 217 |
+
return machine_setup, inventory_level, setup_costs
|
| 218 |
+
|
| 219 |
+
def select_action(self, state,tau):
|
| 220 |
+
|
| 221 |
+
if self.has_continuous_action_space:
|
| 222 |
+
with torch.no_grad():
|
| 223 |
+
state = torch.FloatTensor(state).to(device)
|
| 224 |
+
action, action_logprob, state_val = self.policy_old.act(state,tau)
|
| 225 |
+
|
| 226 |
+
self.buffer.states.append(state)
|
| 227 |
+
self.buffer.actions.append(action)
|
| 228 |
+
self.buffer.logprobs.append(action_logprob)
|
| 229 |
+
self.buffer.state_values.append(state_val)
|
| 230 |
+
|
| 231 |
+
return action.detach().cpu().numpy().flatten()
|
| 232 |
+
else:
|
| 233 |
+
with torch.no_grad():
|
| 234 |
+
state = torch.FloatTensor(state).to(device)
|
| 235 |
+
action, action_logprob = self.policy_old.act(state,tau)
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
machine_setup, inventory_level, setup_cost = self.get_post_state(action, state[self.env.n_items:self.env.n_items+self.env.n_machines].clone(), state[0:self.env.n_items].clone())
|
| 239 |
+
|
| 240 |
+
post_state = state.clone()
|
| 241 |
+
post_state[self.env.n_items:self.env.n_items+self.env.n_machines] = machine_setup.clone()
|
| 242 |
+
post_state[0:self.env.n_items] = inventory_level.clone()
|
| 243 |
+
post_state = torch.FloatTensor(post_state).to(device)
|
| 244 |
+
|
| 245 |
+
self.buffer.states.append(state)
|
| 246 |
+
self.buffer.post_states.append(post_state)
|
| 247 |
+
self.buffer.actions.append(action)
|
| 248 |
+
self.buffer.logprobs.append(action_logprob)
|
| 249 |
+
|
| 250 |
+
with torch.no_grad():
|
| 251 |
+
#post_state = torch.cat([post_state.clone(),state.clone()])
|
| 252 |
+
state_val = self.policy_old.critic(post_state)
|
| 253 |
+
|
| 254 |
+
self.buffer.state_values.append(state_val)
|
| 255 |
+
|
| 256 |
+
return action.numpy()
|
| 257 |
+
|
| 258 |
+
def update(self):
|
| 259 |
+
# Monte Carlo estimate of returns
|
| 260 |
+
rewards = []
|
| 261 |
+
discounted_reward = 0
|
| 262 |
+
for reward, is_terminal in zip(reversed(self.buffer.rewards), reversed(self.buffer.is_terminals)):
|
| 263 |
+
if is_terminal:
|
| 264 |
+
discounted_reward = 0
|
| 265 |
+
discounted_reward = reward + (self.gamma * discounted_reward)
|
| 266 |
+
rewards.insert(0, discounted_reward)
|
| 267 |
+
|
| 268 |
+
# Normalizing the rewards
|
| 269 |
+
rewards = torch.tensor(rewards, dtype=torch.float32).to(device)
|
| 270 |
+
rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-7)
|
| 271 |
+
|
| 272 |
+
# convert list to tensor
|
| 273 |
+
old_states = torch.squeeze(torch.stack(self.buffer.states, dim=0)).detach().to(device)
|
| 274 |
+
old_post_states = torch.squeeze(torch.stack(self.buffer.post_states, dim=0)).detach().to(device)
|
| 275 |
+
old_actions = torch.squeeze(torch.stack(self.buffer.actions, dim=0)).detach().to(device)
|
| 276 |
+
old_logprobs = torch.squeeze(torch.stack(self.buffer.logprobs, dim=0)).detach().to(device)
|
| 277 |
+
old_state_values = torch.squeeze(torch.stack(self.buffer.state_values, dim=0)).detach().to(device)
|
| 278 |
+
|
| 279 |
+
# calculate advantages
|
| 280 |
+
advantages = rewards.detach() - old_state_values.detach()
|
| 281 |
+
|
| 282 |
+
# Optimize policy for K epochs
|
| 283 |
+
for _ in range(self.K_epochs):
|
| 284 |
+
|
| 285 |
+
# Evaluating old actions and values
|
| 286 |
+
logprobs, state_values, dist_entropy = self.policy.evaluate(old_states,old_post_states, old_actions,self.tau)
|
| 287 |
+
|
| 288 |
+
# match state_values tensor dimensions with rewards tensor
|
| 289 |
+
state_values = torch.squeeze(state_values)
|
| 290 |
+
|
| 291 |
+
# Finding the ratio (pi_theta / pi_theta__old)
|
| 292 |
+
ratios = torch.exp(logprobs - old_logprobs.detach())
|
| 293 |
+
|
| 294 |
+
# Finding Surrogate Loss
|
| 295 |
+
surr1 = ratios * advantages.unsqueeze(1)
|
| 296 |
+
surr2 = torch.clamp(ratios, 1-self.eps_clip, 1+self.eps_clip) * advantages.unsqueeze(1)
|
| 297 |
+
|
| 298 |
+
# final loss of clipped objective PDPPO
|
| 299 |
+
loss = -torch.min(surr1, surr2) + 0.5 * self.MseLoss(state_values, rewards) - 0.012 * dist_entropy
|
| 300 |
+
|
| 301 |
+
loss_numpy = loss.detach().numpy()
|
| 302 |
+
|
| 303 |
+
# take gradient step
|
| 304 |
+
self.optimizer.zero_grad()
|
| 305 |
+
loss.mean().backward()
|
| 306 |
+
torch.nn.utils.clip_grad_norm_(self.policy.parameters(), max_norm=1)
|
| 307 |
+
self.optimizer.step()
|
| 308 |
+
|
| 309 |
+
# Copy new weights into old policy
|
| 310 |
+
|
| 311 |
+
self.policy_old.load_state_dict(self.policy.state_dict())
|
| 312 |
+
|
| 313 |
+
# clear buffer
|
| 314 |
+
self.buffer.clear()
|
| 315 |
+
|
| 316 |
+
def save(self, checkpoint_path):
|
| 317 |
+
torch.save(self.policy_old.state_dict(), checkpoint_path)
|
| 318 |
+
|
| 319 |
+
def load(self, checkpoint_path):
|
| 320 |
+
self.policy_old.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
|
| 321 |
+
self.policy.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
|
code/Lake application/agents/PDPPO two critics.py
ADDED
|
@@ -0,0 +1,345 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Created on Wed Mar 1 00:43:49 2023
|
| 4 |
+
|
| 5 |
+
@author: leona
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import numpy as np
|
| 9 |
+
import torch
|
| 10 |
+
import torch.nn as nn
|
| 11 |
+
import torch.nn.init as init
|
| 12 |
+
from torch.distributions import MultivariateNormal
|
| 13 |
+
from torch.distributions import Categorical
|
| 14 |
+
|
| 15 |
+
################################## set device ##################################
|
| 16 |
+
print("============================================================================================")
|
| 17 |
+
# set device to cpu or cuda
|
| 18 |
+
device = torch.device('cpu')
|
| 19 |
+
if(torch.cuda.is_available()):
|
| 20 |
+
device = torch.device('cuda:0')
|
| 21 |
+
torch.cuda.empty_cache()
|
| 22 |
+
print("Device set to : " + str(torch.cuda.get_device_name(device)))
|
| 23 |
+
else:
|
| 24 |
+
print("Device set to : cpu")
|
| 25 |
+
print("============================================================================================")
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
################################## PDPPO Policy ##################################
|
| 29 |
+
class RolloutBuffer:
|
| 30 |
+
def __init__(self):
|
| 31 |
+
self.actions = []
|
| 32 |
+
self.states = []
|
| 33 |
+
self.post_states = []
|
| 34 |
+
self.logprobs = []
|
| 35 |
+
self.rewards = []
|
| 36 |
+
self.state_values = []
|
| 37 |
+
self.state_values_post = []
|
| 38 |
+
self.is_terminals = []
|
| 39 |
+
|
| 40 |
+
def clear(self):
|
| 41 |
+
del self.actions[:]
|
| 42 |
+
del self.states[:]
|
| 43 |
+
del self.post_states[:]
|
| 44 |
+
del self.logprobs[:]
|
| 45 |
+
del self.rewards[:]
|
| 46 |
+
del self.state_values[:]
|
| 47 |
+
del self.state_values_post[:]
|
| 48 |
+
del self.is_terminals[:]
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class ActorCritic(nn.Module):
|
| 52 |
+
def __init__(self, state_dim, action_dim, has_continuous_action_space, action_std_init):
|
| 53 |
+
super(ActorCritic, self).__init__()
|
| 54 |
+
|
| 55 |
+
self.has_continuous_action_space = has_continuous_action_space
|
| 56 |
+
|
| 57 |
+
if has_continuous_action_space:
|
| 58 |
+
self.action_dim = action_dim
|
| 59 |
+
self.action_var = torch.full((action_dim,), action_std_init * action_std_init).to(device)
|
| 60 |
+
# actor
|
| 61 |
+
if has_continuous_action_space :
|
| 62 |
+
self.actor = nn.Sequential(
|
| 63 |
+
nn.Linear(state_dim, 64),
|
| 64 |
+
nn.Tanh(),
|
| 65 |
+
nn.Linear(64, 64),
|
| 66 |
+
nn.Tanh(),
|
| 67 |
+
nn.Linear(64, action_dim),
|
| 68 |
+
nn.Tanh()
|
| 69 |
+
)
|
| 70 |
+
else:
|
| 71 |
+
|
| 72 |
+
self.action_dim = action_dim
|
| 73 |
+
self.fc1 = nn.Linear(state_dim, 128)
|
| 74 |
+
self.fc2 = nn.Linear(128, 128)
|
| 75 |
+
self.actor = nn.Linear(128, self.action_dim.nvec.sum())
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# critic
|
| 79 |
+
self.critic = nn.Sequential(
|
| 80 |
+
nn.Linear(state_dim, 128),
|
| 81 |
+
nn.Tanh(),
|
| 82 |
+
nn.Linear(128, 128),
|
| 83 |
+
nn.Tanh(),
|
| 84 |
+
nn.Linear(128, 1)
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
self.critic_post = nn.Sequential(
|
| 88 |
+
nn.Linear(state_dim, 128),
|
| 89 |
+
nn.Tanh(),
|
| 90 |
+
nn.Linear(128, 128),
|
| 91 |
+
nn.Tanh(),
|
| 92 |
+
nn.Linear(128, 1)
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
def forward(self, state):
|
| 96 |
+
raise NotImplementedError
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def set_action_std(self, new_action_std):
|
| 101 |
+
if self.has_continuous_action_space:
|
| 102 |
+
self.action_var = torch.full((self.action_dim,), new_action_std * new_action_std).to(device)
|
| 103 |
+
else:
|
| 104 |
+
print("--------------------------------------------------------------------------------------------")
|
| 105 |
+
print("WARNING : Calling ActorCritic::set_action_std() on discrete action space policy")
|
| 106 |
+
print("--------------------------------------------------------------------------------------------")
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def act(self, state,tau):
|
| 111 |
+
|
| 112 |
+
if self.has_continuous_action_space:
|
| 113 |
+
action_mean = self.actor(state)
|
| 114 |
+
cov_mat = torch.diag(self.action_var).unsqueeze(dim=0)
|
| 115 |
+
dist = MultivariateNormal(action_mean, cov_mat)
|
| 116 |
+
else:
|
| 117 |
+
#x = nn.functional.relu(self.fc(state))
|
| 118 |
+
x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
|
| 119 |
+
logits = self.actor(x)
|
| 120 |
+
# x[torch.isnan(x)] = 0
|
| 121 |
+
action_probs = nn.functional.softmax(logits, dim=-1)
|
| 122 |
+
#action_probs = torch.nan_to_num(action_probs, nan=1e-6)
|
| 123 |
+
dist = Categorical(action_probs.view(len(self.action_dim.nvec),-1))
|
| 124 |
+
# action_probs = self.actor(state)
|
| 125 |
+
# dist = Categorical(action_probs)
|
| 126 |
+
|
| 127 |
+
action = dist.sample()
|
| 128 |
+
action_logprob = dist.log_prob(action)
|
| 129 |
+
|
| 130 |
+
return action.detach(), action_logprob.detach()
|
| 131 |
+
|
| 132 |
+
def evaluate(self, state,post_state, action,tau):
|
| 133 |
+
|
| 134 |
+
if self.has_continuous_action_space:
|
| 135 |
+
action_mean = self.actor(state)
|
| 136 |
+
|
| 137 |
+
action_var = self.action_var.expand_as(action_mean)
|
| 138 |
+
cov_mat = torch.diag_embed(action_var).to(device)
|
| 139 |
+
dist = MultivariateNormal(action_mean, cov_mat)
|
| 140 |
+
|
| 141 |
+
# For Single Action Environments.
|
| 142 |
+
if self.action_dim == 1:
|
| 143 |
+
action = action.reshape(-1, self.action_dim)
|
| 144 |
+
else:
|
| 145 |
+
#x = nn.functional.relu(self.fc(state))
|
| 146 |
+
x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
|
| 147 |
+
# x[torch.isnan(x)] = 0
|
| 148 |
+
logits = self.actor(x)
|
| 149 |
+
action_probs = nn.functional.softmax(logits, dim=-1)
|
| 150 |
+
#action_probs = torch.nan_to_num(action_probs, nan=1e-6)
|
| 151 |
+
# mask = torch.isnan(action_probs)
|
| 152 |
+
# if torch.all(mask):
|
| 153 |
+
# logits = torch.abs(logits)
|
| 154 |
+
# action_probs = nn.functional.softmax(logits, dim=-1)
|
| 155 |
+
|
| 156 |
+
dist = Categorical(action_probs.view(state.shape[0],len(self.action_dim.nvec),-1))
|
| 157 |
+
# action_probs = self.actor(state)
|
| 158 |
+
# dist = Categorical(action_probs)
|
| 159 |
+
action_logprobs = dist.log_prob(action)
|
| 160 |
+
dist_entropy = dist.entropy()
|
| 161 |
+
state_values = self.critic(state)
|
| 162 |
+
state_values_post = self.critic_post(post_state)
|
| 163 |
+
|
| 164 |
+
return action_logprobs, state_values, state_values_post, dist_entropy
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
class PDPPO:
|
| 168 |
+
def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, env, has_continuous_action_space, tau, action_std_init=0.6):
|
| 169 |
+
|
| 170 |
+
self.has_continuous_action_space = has_continuous_action_space
|
| 171 |
+
|
| 172 |
+
if has_continuous_action_space:
|
| 173 |
+
self.action_std = action_std_init
|
| 174 |
+
|
| 175 |
+
self.tau = tau
|
| 176 |
+
self.env = env
|
| 177 |
+
self.gamma = gamma
|
| 178 |
+
self.eps_clip = eps_clip
|
| 179 |
+
self.K_epochs = K_epochs
|
| 180 |
+
|
| 181 |
+
self.buffer = RolloutBuffer()
|
| 182 |
+
|
| 183 |
+
self.policy = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
|
| 184 |
+
self.optimizer = torch.optim.Adam([
|
| 185 |
+
{'params': self.policy.actor.parameters(), 'lr': lr_actor},
|
| 186 |
+
{'params': self.policy.critic.parameters(), 'lr': lr_critic},
|
| 187 |
+
{'params': self.policy.critic_post.parameters(), 'lr': lr_critic}
|
| 188 |
+
], weight_decay=0.001)
|
| 189 |
+
|
| 190 |
+
self.policy_old = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
|
| 191 |
+
self.policy_old.load_state_dict(self.policy.state_dict())
|
| 192 |
+
|
| 193 |
+
self.MseLoss = nn.MSELoss()
|
| 194 |
+
|
| 195 |
+
def set_action_std(self, new_action_std):
|
| 196 |
+
if self.has_continuous_action_space:
|
| 197 |
+
self.action_std = new_action_std
|
| 198 |
+
self.policy.set_action_std(new_action_std)
|
| 199 |
+
self.policy_old.set_action_std(new_action_std)
|
| 200 |
+
else:
|
| 201 |
+
print("--------------------------------------------------------------------------------------------")
|
| 202 |
+
print("WARNING : Calling PDPPO::set_action_std() on discrete action space policy")
|
| 203 |
+
print("--------------------------------------------------------------------------------------------")
|
| 204 |
+
|
| 205 |
+
def decay_action_std(self, action_std_decay_rate, min_action_std):
|
| 206 |
+
print("--------------------------------------------------------------------------------------------")
|
| 207 |
+
if self.has_continuous_action_space:
|
| 208 |
+
self.action_std = self.action_std - action_std_decay_rate
|
| 209 |
+
self.action_std = round(self.action_std, 4)
|
| 210 |
+
if (self.action_std <= min_action_std):
|
| 211 |
+
self.action_std = min_action_std
|
| 212 |
+
print("setting actor output action_std to min_action_std : ", self.action_std)
|
| 213 |
+
else:
|
| 214 |
+
print("setting actor output action_std to : ", self.action_std)
|
| 215 |
+
self.set_action_std(self.action_std)
|
| 216 |
+
|
| 217 |
+
else:
|
| 218 |
+
print("WARNING : Calling PDPPO::decay_action_std() on discrete action space policy")
|
| 219 |
+
print("--------------------------------------------------------------------------------------------")
|
| 220 |
+
|
| 221 |
+
def get_post_state(self, action, machine_setup, inventory_level):
|
| 222 |
+
setup_loss = np.zeros(self.env.n_machines, dtype=int)
|
| 223 |
+
setup_costs = np.zeros(self.env.n_machines)
|
| 224 |
+
# if we are just changing the setup, we use the setup cost matrix with the corresponding position given by the actual setup and the new setup
|
| 225 |
+
for m in range(self.env.n_machines):
|
| 226 |
+
if action[m] != 0: # if the machine is not iddle
|
| 227 |
+
# 1. IF NEEDED CHANGE SETUP
|
| 228 |
+
if machine_setup[m] != action[m] and action[m] != 0:
|
| 229 |
+
setup_costs[m] = self.env.setup_costs[m][action[m] - 1]
|
| 230 |
+
setup_loss[m] = self.env.setup_loss[m][action[m] - 1]
|
| 231 |
+
machine_setup[m] = action[m]
|
| 232 |
+
# 2. PRODUCTION
|
| 233 |
+
production = self.env.machine_production_matrix[m][action[m] - 1] - setup_loss[m]
|
| 234 |
+
inventory_level[action[m] - 1] += production
|
| 235 |
+
else:
|
| 236 |
+
machine_setup[m] = 0
|
| 237 |
+
# return the new machine_setup_inventory_level and the setup_cost
|
| 238 |
+
return machine_setup, inventory_level, setup_costs
|
| 239 |
+
|
| 240 |
+
def select_action(self, state,tau):
|
| 241 |
+
|
| 242 |
+
if self.has_continuous_action_space:
|
| 243 |
+
with torch.no_grad():
|
| 244 |
+
state = torch.FloatTensor(state).to(device)
|
| 245 |
+
action, action_logprob, state_val = self.policy_old.act(state,tau)
|
| 246 |
+
|
| 247 |
+
self.buffer.states.append(state)
|
| 248 |
+
self.buffer.actions.append(action)
|
| 249 |
+
self.buffer.logprobs.append(action_logprob)
|
| 250 |
+
self.buffer.state_values.append(state_val)
|
| 251 |
+
|
| 252 |
+
return action.detach().cpu().numpy().flatten()
|
| 253 |
+
else:
|
| 254 |
+
with torch.no_grad():
|
| 255 |
+
state = torch.FloatTensor(state).to(device)
|
| 256 |
+
action, action_logprob = self.policy_old.act(state,tau)
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
machine_setup, inventory_level, setup_cost = self.get_post_state(action, state[self.env.n_items:self.env.n_items+self.env.n_machines].clone(), state[0:self.env.n_items].clone())
|
| 260 |
+
|
| 261 |
+
post_state = state.clone()
|
| 262 |
+
post_state[self.env.n_items:self.env.n_items+self.env.n_machines] = machine_setup.clone()
|
| 263 |
+
post_state[0:self.env.n_items] = inventory_level.clone()
|
| 264 |
+
post_state = torch.FloatTensor(post_state).to(device)
|
| 265 |
+
|
| 266 |
+
self.buffer.states.append(state)
|
| 267 |
+
self.buffer.post_states.append(post_state)
|
| 268 |
+
self.buffer.actions.append(action)
|
| 269 |
+
self.buffer.logprobs.append(action_logprob)
|
| 270 |
+
|
| 271 |
+
with torch.no_grad():
|
| 272 |
+
#post_state = torch.cat([post_state.clone(),state.clone()])
|
| 273 |
+
state_val = self.policy_old.critic(state)
|
| 274 |
+
state_val_post = self.policy_old.critic_post(post_state)
|
| 275 |
+
|
| 276 |
+
self.buffer.state_values.append(state_val)
|
| 277 |
+
self.buffer.state_values_post.append(state_val_post)
|
| 278 |
+
|
| 279 |
+
return action.numpy()
|
| 280 |
+
|
| 281 |
+
def update(self):
|
| 282 |
+
# Monte Carlo estimate of returns
|
| 283 |
+
rewards = []
|
| 284 |
+
discounted_reward = 0
|
| 285 |
+
for reward, is_terminal in zip(reversed(self.buffer.rewards), reversed(self.buffer.is_terminals)):
|
| 286 |
+
if is_terminal:
|
| 287 |
+
discounted_reward = 0
|
| 288 |
+
discounted_reward = reward + (self.gamma * discounted_reward)
|
| 289 |
+
rewards.insert(0, discounted_reward)
|
| 290 |
+
|
| 291 |
+
# Normalizing the rewards
|
| 292 |
+
rewards = torch.tensor(rewards, dtype=torch.float32).to(device)
|
| 293 |
+
rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-7)
|
| 294 |
+
|
| 295 |
+
# convert list to tensor
|
| 296 |
+
old_states = torch.squeeze(torch.stack(self.buffer.states, dim=0)).detach().to(device)
|
| 297 |
+
old_post_states = torch.squeeze(torch.stack(self.buffer.post_states, dim=0)).detach().to(device)
|
| 298 |
+
old_actions = torch.squeeze(torch.stack(self.buffer.actions, dim=0)).detach().to(device)
|
| 299 |
+
old_logprobs = torch.squeeze(torch.stack(self.buffer.logprobs, dim=0)).detach().to(device)
|
| 300 |
+
old_state_values = torch.squeeze(torch.stack(self.buffer.state_values, dim=0)).detach().to(device)
|
| 301 |
+
old_state_values_post = torch.squeeze(torch.stack(self.buffer.state_values_post, dim=0)).detach().to(device)
|
| 302 |
+
|
| 303 |
+
# calculate advantages
|
| 304 |
+
advantages = rewards.detach() - torch.min(old_state_values.detach(), old_state_values_post.detach()).detach()
|
| 305 |
+
|
| 306 |
+
# Optimize policy for K epochs
|
| 307 |
+
for _ in range(self.K_epochs):
|
| 308 |
+
|
| 309 |
+
# Evaluating old actions and values
|
| 310 |
+
logprobs, state_values, state_values_post, dist_entropy = self.policy.evaluate(old_states,old_post_states, old_actions,self.tau)
|
| 311 |
+
|
| 312 |
+
# match state_values tensor dimensions with rewards tensor
|
| 313 |
+
state_values = torch.squeeze(state_values)
|
| 314 |
+
|
| 315 |
+
# Finding the ratio (pi_theta / pi_theta__old)
|
| 316 |
+
ratios = torch.exp(logprobs - old_logprobs.detach())
|
| 317 |
+
|
| 318 |
+
# Finding Surrogate Loss
|
| 319 |
+
surr1 = ratios * advantages.unsqueeze(1)
|
| 320 |
+
surr2 = torch.clamp(ratios, 1-self.eps_clip, 1+self.eps_clip) * advantages.unsqueeze(1)
|
| 321 |
+
|
| 322 |
+
# final loss of clipped objective PDPPO
|
| 323 |
+
loss = -torch.min(surr1, surr2) + 0.5 * self.MseLoss(torch.min(state_values,state_values_post.squeeze()), rewards) - 0.012 * dist_entropy
|
| 324 |
+
|
| 325 |
+
loss_numpy = loss.detach().numpy()
|
| 326 |
+
|
| 327 |
+
# take gradient step
|
| 328 |
+
self.optimizer.zero_grad()
|
| 329 |
+
loss.mean().backward()
|
| 330 |
+
torch.nn.utils.clip_grad_norm_(self.policy.parameters(), max_norm=1)
|
| 331 |
+
self.optimizer.step()
|
| 332 |
+
|
| 333 |
+
# Copy new weights into old policy
|
| 334 |
+
|
| 335 |
+
self.policy_old.load_state_dict(self.policy.state_dict())
|
| 336 |
+
|
| 337 |
+
# clear buffer
|
| 338 |
+
self.buffer.clear()
|
| 339 |
+
|
| 340 |
+
def save(self, checkpoint_path):
|
| 341 |
+
torch.save(self.policy_old.state_dict(), checkpoint_path)
|
| 342 |
+
|
| 343 |
+
def load(self, checkpoint_path):
|
| 344 |
+
self.policy_old.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
|
| 345 |
+
self.policy.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
|
code/Lake application/agents/PDPPO.py
ADDED
|
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Created on Wed Mar 1 00:43:49 2023
|
| 4 |
+
|
| 5 |
+
@author: leona
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import numpy as np
|
| 9 |
+
import torch
|
| 10 |
+
import torch.nn as nn
|
| 11 |
+
import torch.nn.init as init
|
| 12 |
+
from torch.distributions import MultivariateNormal
|
| 13 |
+
from torch.distributions import Categorical
|
| 14 |
+
|
| 15 |
+
################################## set device ##################################
|
| 16 |
+
print("============================================================================================")
|
| 17 |
+
# set device to cpu or cuda
|
| 18 |
+
device = torch.device('cpu')
|
| 19 |
+
if(torch.cuda.is_available()):
|
| 20 |
+
device = torch.device('cuda:0')
|
| 21 |
+
torch.cuda.empty_cache()
|
| 22 |
+
print("Device set to : " + str(torch.cuda.get_device_name(device)))
|
| 23 |
+
else:
|
| 24 |
+
print("Device set to : cpu")
|
| 25 |
+
print("============================================================================================")
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
################################## PDPPO Policy ##################################
|
| 29 |
+
class RolloutBuffer:
|
| 30 |
+
def __init__(self):
|
| 31 |
+
self.actions = []
|
| 32 |
+
self.states = []
|
| 33 |
+
self.post_states = []
|
| 34 |
+
self.logprobs = []
|
| 35 |
+
self.rewards = []
|
| 36 |
+
self.state_values = []
|
| 37 |
+
self.state_values_post = []
|
| 38 |
+
self.is_terminals = []
|
| 39 |
+
|
| 40 |
+
def clear(self):
|
| 41 |
+
del self.actions[:]
|
| 42 |
+
del self.states[:]
|
| 43 |
+
del self.post_states[:]
|
| 44 |
+
del self.logprobs[:]
|
| 45 |
+
del self.rewards[:]
|
| 46 |
+
del self.state_values[:]
|
| 47 |
+
del self.state_values_post[:]
|
| 48 |
+
del self.is_terminals[:]
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class ActorCritic(nn.Module):
|
| 52 |
+
def __init__(self, state_dim, action_dim, has_continuous_action_space, action_std_init):
|
| 53 |
+
super(ActorCritic, self).__init__()
|
| 54 |
+
|
| 55 |
+
self.has_continuous_action_space = has_continuous_action_space
|
| 56 |
+
|
| 57 |
+
if has_continuous_action_space:
|
| 58 |
+
self.action_dim = action_dim
|
| 59 |
+
self.action_var = torch.full((action_dim,), action_std_init * action_std_init).to(device)
|
| 60 |
+
# actor
|
| 61 |
+
if has_continuous_action_space :
|
| 62 |
+
self.actor = nn.Sequential(
|
| 63 |
+
nn.Linear(state_dim, 64),
|
| 64 |
+
nn.Tanh(),
|
| 65 |
+
nn.Linear(64, 64),
|
| 66 |
+
nn.Tanh(),
|
| 67 |
+
nn.Linear(64, action_dim),
|
| 68 |
+
nn.Tanh()
|
| 69 |
+
)
|
| 70 |
+
else:
|
| 71 |
+
|
| 72 |
+
self.actor = nn.Sequential(
|
| 73 |
+
nn.Linear(state_dim, 128),
|
| 74 |
+
nn.Tanh(),
|
| 75 |
+
nn.Linear(128, 128),
|
| 76 |
+
nn.Tanh(),
|
| 77 |
+
nn.Linear(128, action_dim)
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
# critic
|
| 82 |
+
self.critic = nn.Sequential(
|
| 83 |
+
nn.Linear(state_dim, 128),
|
| 84 |
+
nn.Tanh(),
|
| 85 |
+
nn.Linear(128, 128),
|
| 86 |
+
nn.Tanh(),
|
| 87 |
+
nn.Linear(128, 1)
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
self.critic_post = nn.Sequential(
|
| 91 |
+
nn.Linear(state_dim, 128),
|
| 92 |
+
nn.Tanh(),
|
| 93 |
+
nn.Linear(128, 128),
|
| 94 |
+
nn.Tanh(),
|
| 95 |
+
nn.Linear(128, 1)
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
def forward(self, state):
|
| 99 |
+
raise NotImplementedError
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def set_action_std(self, new_action_std):
|
| 104 |
+
if self.has_continuous_action_space:
|
| 105 |
+
self.action_var = torch.full((self.action_dim,), new_action_std * new_action_std).to(device)
|
| 106 |
+
else:
|
| 107 |
+
print("--------------------------------------------------------------------------------------------")
|
| 108 |
+
print("WARNING : Calling ActorCritic::set_action_std() on discrete action space policy")
|
| 109 |
+
print("--------------------------------------------------------------------------------------------")
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def act(self, state,tau):
|
| 114 |
+
|
| 115 |
+
if self.has_continuous_action_space:
|
| 116 |
+
action_mean = self.actor(state)
|
| 117 |
+
cov_mat = torch.diag(self.action_var).unsqueeze(dim=0)
|
| 118 |
+
dist = MultivariateNormal(action_mean, cov_mat)
|
| 119 |
+
else:
|
| 120 |
+
#x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
|
| 121 |
+
logits = self.actor(state)
|
| 122 |
+
action_probs = nn.functional.softmax(logits, dim=-1)
|
| 123 |
+
dist = Categorical(action_probs)
|
| 124 |
+
|
| 125 |
+
action = dist.sample()
|
| 126 |
+
action_logprob = dist.log_prob(action)
|
| 127 |
+
|
| 128 |
+
return action.detach(), action_logprob.detach()
|
| 129 |
+
|
| 130 |
+
def evaluate(self, state,post_state, action,tau):
|
| 131 |
+
|
| 132 |
+
#x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
|
| 133 |
+
logits = self.actor(state)
|
| 134 |
+
action_probs = nn.functional.softmax(logits, dim=-1)
|
| 135 |
+
dist = Categorical(action_probs)
|
| 136 |
+
|
| 137 |
+
action_logprobs = dist.log_prob(action.T).T
|
| 138 |
+
dist_entropy = dist.entropy()
|
| 139 |
+
state_values = self.critic(state)
|
| 140 |
+
state_values_post = self.critic_post(post_state)
|
| 141 |
+
|
| 142 |
+
return action_logprobs, state_values, state_values_post, dist_entropy
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
class PDPPO:
|
| 146 |
+
def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, env, has_continuous_action_space, tau, action_std_init=0.6):
|
| 147 |
+
|
| 148 |
+
self.has_continuous_action_space = has_continuous_action_space
|
| 149 |
+
|
| 150 |
+
if has_continuous_action_space:
|
| 151 |
+
self.action_std = action_std_init
|
| 152 |
+
|
| 153 |
+
self.tau = tau
|
| 154 |
+
self.env = env
|
| 155 |
+
self.gamma = gamma
|
| 156 |
+
self.eps_clip = eps_clip
|
| 157 |
+
self.K_epochs = K_epochs
|
| 158 |
+
|
| 159 |
+
self.buffer = RolloutBuffer()
|
| 160 |
+
|
| 161 |
+
self.policy = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
|
| 162 |
+
self.optimizer = torch.optim.Adam([
|
| 163 |
+
{'params': self.policy.actor.parameters(), 'lr': lr_actor},
|
| 164 |
+
{'params': self.policy.critic.parameters(), 'lr': lr_critic},
|
| 165 |
+
{'params': self.policy.critic_post.parameters(), 'lr': lr_critic}
|
| 166 |
+
], weight_decay=0.0002) #, weight_decay=0.001
|
| 167 |
+
|
| 168 |
+
self.policy_old = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
|
| 169 |
+
self.policy_old.load_state_dict(self.policy.state_dict())
|
| 170 |
+
|
| 171 |
+
self.MseLoss = nn.MSELoss()
|
| 172 |
+
|
| 173 |
+
def set_action_std(self, new_action_std):
|
| 174 |
+
if self.has_continuous_action_space:
|
| 175 |
+
self.action_std = new_action_std
|
| 176 |
+
self.policy.set_action_std(new_action_std)
|
| 177 |
+
self.policy_old.set_action_std(new_action_std)
|
| 178 |
+
else:
|
| 179 |
+
print("--------------------------------------------------------------------------------------------")
|
| 180 |
+
print("WARNING : Calling PDPPO::set_action_std() on discrete action space policy")
|
| 181 |
+
print("--------------------------------------------------------------------------------------------")
|
| 182 |
+
|
| 183 |
+
def decay_action_std(self, action_std_decay_rate, min_action_std):
|
| 184 |
+
print("--------------------------------------------------------------------------------------------")
|
| 185 |
+
if self.has_continuous_action_space:
|
| 186 |
+
self.action_std = self.action_std - action_std_decay_rate
|
| 187 |
+
self.action_std = round(self.action_std, 4)
|
| 188 |
+
if (self.action_std <= min_action_std):
|
| 189 |
+
self.action_std = min_action_std
|
| 190 |
+
print("setting actor output action_std to min_action_std : ", self.action_std)
|
| 191 |
+
else:
|
| 192 |
+
print("setting actor output action_std to : ", self.action_std)
|
| 193 |
+
self.set_action_std(self.action_std)
|
| 194 |
+
|
| 195 |
+
else:
|
| 196 |
+
print("WARNING : Calling PDPPO::decay_action_std() on discrete action space policy")
|
| 197 |
+
print("--------------------------------------------------------------------------------------------")
|
| 198 |
+
|
| 199 |
+
def select_action(self, state, tau):
|
| 200 |
+
|
| 201 |
+
state_int = state.copy()
|
| 202 |
+
|
| 203 |
+
with torch.no_grad():
|
| 204 |
+
state = torch.tensor(state).to(device)
|
| 205 |
+
state = state.float()
|
| 206 |
+
state = torch.unsqueeze(state, 1).T
|
| 207 |
+
action, action_logprob = self.policy_old.act(state,tau)
|
| 208 |
+
|
| 209 |
+
post_state = self.env.get_post_decision_state(np.argmax(state_int),action.clone())
|
| 210 |
+
|
| 211 |
+
binary_array = np.zeros(state.shape[1], dtype=int)
|
| 212 |
+
binary_array[post_state] = 1
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
post_state = torch.tensor(binary_array).to(device)
|
| 216 |
+
post_state = post_state.float()
|
| 217 |
+
post_state = torch.unsqueeze(post_state, 1).T
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
self.buffer.states.append(state)
|
| 221 |
+
self.buffer.post_states.append(post_state)
|
| 222 |
+
self.buffer.actions.append(action)
|
| 223 |
+
self.buffer.logprobs.append(action_logprob)
|
| 224 |
+
|
| 225 |
+
with torch.no_grad():
|
| 226 |
+
state_val = self.policy_old.critic(state)
|
| 227 |
+
state_val_post = self.policy_old.critic(post_state)
|
| 228 |
+
|
| 229 |
+
self.buffer.state_values.append(state_val)
|
| 230 |
+
self.buffer.state_values_post.append(state_val_post)
|
| 231 |
+
|
| 232 |
+
return action.cpu().numpy()
|
| 233 |
+
|
| 234 |
+
def update(self):
|
| 235 |
+
# Monte Carlo estimate of returns
|
| 236 |
+
rewards = []
|
| 237 |
+
discounted_reward = 0
|
| 238 |
+
for reward, is_terminal in zip(reversed(self.buffer.rewards), reversed(self.buffer.is_terminals)):
|
| 239 |
+
if is_terminal:
|
| 240 |
+
discounted_reward = 0
|
| 241 |
+
discounted_reward = reward + (self.gamma * discounted_reward)
|
| 242 |
+
rewards.insert(0, discounted_reward)
|
| 243 |
+
|
| 244 |
+
# Normalizing the rewards
|
| 245 |
+
rewards = torch.tensor(rewards, dtype=torch.float32).to(device)
|
| 246 |
+
rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-7)
|
| 247 |
+
|
| 248 |
+
# convert list to tensor
|
| 249 |
+
|
| 250 |
+
old_states = torch.squeeze(torch.stack(self.buffer.states, dim=1)).detach().to(device)
|
| 251 |
+
old_post_states = torch.squeeze(torch.stack(self.buffer.post_states, dim=0).detach().to(device),1)
|
| 252 |
+
old_actions = torch.stack(self.buffer.actions, dim=0).detach().to(device)
|
| 253 |
+
old_logprobs = torch.stack(self.buffer.logprobs, dim=0).detach().to(device)
|
| 254 |
+
old_state_values = torch.squeeze(torch.stack(self.buffer.state_values, dim=0)).detach().to(device)
|
| 255 |
+
old_state_values_post = torch.squeeze(torch.stack(self.buffer.state_values_post, dim=0)).detach().to(device)
|
| 256 |
+
|
| 257 |
+
# calculate advantages
|
| 258 |
+
advantages = rewards.detach() - torch.min(old_state_values.detach(), old_state_values_post.detach()).detach()
|
| 259 |
+
|
| 260 |
+
# Optimize policy for K epochs
|
| 261 |
+
for _ in range(self.K_epochs):
|
| 262 |
+
|
| 263 |
+
# Evaluating old actions and values
|
| 264 |
+
logprobs, state_values, state_values_post, dist_entropy = self.policy.evaluate(old_states,old_post_states, old_actions,self.tau)
|
| 265 |
+
|
| 266 |
+
# match state_values tensor dimensions with rewards tensor
|
| 267 |
+
state_values = torch.squeeze(state_values)
|
| 268 |
+
|
| 269 |
+
state_values_post = torch.squeeze(state_values_post)
|
| 270 |
+
|
| 271 |
+
# Finding the ratio (pi_theta / pi_theta__old)
|
| 272 |
+
ratios = torch.exp(logprobs - old_logprobs.detach())
|
| 273 |
+
|
| 274 |
+
# Finding Surrogate Loss
|
| 275 |
+
surr1 = ratios * advantages.unsqueeze(1)
|
| 276 |
+
surr2 = torch.clamp(ratios, 1-self.eps_clip, 1+self.eps_clip) * advantages.unsqueeze(1)
|
| 277 |
+
|
| 278 |
+
# final loss of clipped objective PDPPO
|
| 279 |
+
loss = -torch.min(surr1, surr2) + 0.5 * self.MseLoss(torch.min(state_values,state_values_post), rewards) - 0.012 * dist_entropy
|
| 280 |
+
|
| 281 |
+
loss_numpy = loss.detach().cpu().numpy()
|
| 282 |
+
|
| 283 |
+
# take gradient step
|
| 284 |
+
self.optimizer.zero_grad()
|
| 285 |
+
loss.mean().backward()
|
| 286 |
+
torch.nn.utils.clip_grad_norm_(self.policy.parameters(), max_norm=1)
|
| 287 |
+
self.optimizer.step()
|
| 288 |
+
|
| 289 |
+
# Copy new weights into old policy
|
| 290 |
+
|
| 291 |
+
self.policy_old.load_state_dict(self.policy.state_dict())
|
| 292 |
+
|
| 293 |
+
# clear buffer
|
| 294 |
+
self.buffer.clear()
|
| 295 |
+
|
| 296 |
+
def save(self, checkpoint_path):
|
| 297 |
+
torch.save(self.policy_old.state_dict(), checkpoint_path)
|
| 298 |
+
|
| 299 |
+
def load(self, checkpoint_path):
|
| 300 |
+
self.policy_old.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
|
| 301 |
+
self.policy.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
|
code/Lake application/agents/PDPPOAgent two critics.py
ADDED
|
@@ -0,0 +1,394 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os # Provides a way of interacting with the file system
|
| 2 |
+
import sys
|
| 3 |
+
import glob # Helps find all the pathnames matching a specified pattern according to the rules used by the Unix shell
|
| 4 |
+
import time # Provides various time-related functions
|
| 5 |
+
from datetime import datetime # Module that supplies classes for working with dates and times
|
| 6 |
+
|
| 7 |
+
import numpy as np # A library for the Python programming language, adding support for large, multi-dimensional arrays and matrices
|
| 8 |
+
import gym # Provides a collection of test problems — environments — that you can use to work out your reinforcement learning algorithms
|
| 9 |
+
import torch # A machine learning framework that provides tensor computation (like NumPy) with strong acceleration on GPUs
|
| 10 |
+
import copy # Provides a module for shallow and deep copying operations
|
| 11 |
+
import matplotlib.pyplot as plt # A plotting library for the Python programming language and its numerical mathematics extension NumPy
|
| 12 |
+
import matplotlib.patches as mpatches # Provides a way of adding a colored patch to the plot, for example to create a legend
|
| 13 |
+
BASE_DIR = os.path.dirname(os.path.abspath('__file__'))
|
| 14 |
+
AGENTS_DIR = os.path.join(BASE_DIR,'agents')
|
| 15 |
+
sys.path.append(AGENTS_DIR)
|
| 16 |
+
from PDPPO import PDPPO
|
| 17 |
+
from envs import *
|
| 18 |
+
import copy
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class SimplePlantSB(SimplePlant):
|
| 22 |
+
def __init__(self, settings, stoch_model):
|
| 23 |
+
super().__init__(settings, stoch_model)
|
| 24 |
+
try:self.dict_obs = settings['dict_obs']
|
| 25 |
+
except:self.dict_obs = False
|
| 26 |
+
self.last_inventory = copy.copy(self.inventory_level)
|
| 27 |
+
self.action_space = gym.spaces.MultiDiscrete(
|
| 28 |
+
[self.n_items+1] * self.n_machines
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
if self.dict_obs:
|
| 32 |
+
self.observation_space = gym.spaces.Dict({
|
| 33 |
+
'inventory_level': gym.spaces.Box(low = np.zeros(self.n_items),high = np.ones(self.n_items)*(settings['max_inventory_level'][0]+1)*self.n_items),
|
| 34 |
+
'machine_setup': gym.spaces.MultiDiscrete([self.n_items+1] * self.n_machines)
|
| 35 |
+
#'last_inventory_level':gym.spaces.Box(low = np.zeros(self.n_items),high = np.ones(self.n_items)*(settings['max_inventory_level'][0]+1)*self.n_items)
|
| 36 |
+
})
|
| 37 |
+
else:
|
| 38 |
+
self.observation_space = gym.spaces.Box(
|
| 39 |
+
low=np.zeros(self.n_items+self.n_machines),# high for the inventory level
|
| 40 |
+
high=np.concatenate(
|
| 41 |
+
[
|
| 42 |
+
np.array(self.max_inventory_level),
|
| 43 |
+
np.ones(self.n_machines) * (self.n_items+1), #high for the machine setups
|
| 44 |
+
#np.array(self.max_inventory_level) # high for the inventory level
|
| 45 |
+
]),
|
| 46 |
+
dtype=np.int32
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
def step(self, action):
|
| 50 |
+
"""
|
| 51 |
+
Step method: Execute one time step within the environment
|
| 52 |
+
|
| 53 |
+
Parameters
|
| 54 |
+
----------
|
| 55 |
+
action : action given by the agent
|
| 56 |
+
|
| 57 |
+
Returns
|
| 58 |
+
-------
|
| 59 |
+
obs : Observation of the state give the method _next_observation
|
| 60 |
+
reward : Cost given by the _reward method
|
| 61 |
+
done : returns True or False given by the _done method
|
| 62 |
+
dict : possible information for control to environment monitoring
|
| 63 |
+
|
| 64 |
+
"""
|
| 65 |
+
self.last_inventory = copy.copy(self.inventory_level)
|
| 66 |
+
|
| 67 |
+
self.total_cost = self._take_action(action, self.machine_setup, self.inventory_level, self.demand)
|
| 68 |
+
|
| 69 |
+
# self.total_cost['setup_costs'] = 0
|
| 70 |
+
# self.total_cost['holding_costs'] = 0
|
| 71 |
+
|
| 72 |
+
reward = -sum([ele for key, ele in self.total_cost.items()])
|
| 73 |
+
#reward = -self.total_cost['lost_sales']
|
| 74 |
+
|
| 75 |
+
#reward = np.abs(action)
|
| 76 |
+
|
| 77 |
+
self.current_step += 1
|
| 78 |
+
done = self.current_step == self.T
|
| 79 |
+
obs = self._next_observation()
|
| 80 |
+
|
| 81 |
+
return obs, reward, done, self.total_cost
|
| 82 |
+
|
| 83 |
+
def _next_observation(self):
|
| 84 |
+
"""
|
| 85 |
+
Returns the next demand
|
| 86 |
+
"""
|
| 87 |
+
obs = SimplePlant._next_observation(self)
|
| 88 |
+
#obs['last_inventory_level'] = copy.copy(self.last_inventory)
|
| 89 |
+
if isinstance(obs, dict):
|
| 90 |
+
if not self.dict_obs:
|
| 91 |
+
obs = np.concatenate(
|
| 92 |
+
(
|
| 93 |
+
obs['inventory_level'], # n_items size
|
| 94 |
+
obs['machine_setup'], # n_machine size
|
| 95 |
+
#obs['last_inventory_level']# n_items size
|
| 96 |
+
)
|
| 97 |
+
)
|
| 98 |
+
else:
|
| 99 |
+
if self.dict_obs:
|
| 100 |
+
raise('Change dict_obst to False')
|
| 101 |
+
return obs
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
class PDPPOAgent():
|
| 105 |
+
def __init__(self, env: SimplePlant, settings: dict):
|
| 106 |
+
self.env = SimplePlantSB(env.settings, env.stoch_model)
|
| 107 |
+
self.last_inventory = env.inventory_level
|
| 108 |
+
self.model_name = settings['model_name']
|
| 109 |
+
self.experiment_name = settings['experiment_name']
|
| 110 |
+
self.parallelization = settings['parallelization']
|
| 111 |
+
try:self.dict_obs = settings['dict_obs']
|
| 112 |
+
except:self.dict_obs = False
|
| 113 |
+
|
| 114 |
+
self.POSSIBLE_STATES = self.env.n_items + 1
|
| 115 |
+
self.env.cost_to_reward = True
|
| 116 |
+
self.epsilon = 0
|
| 117 |
+
|
| 118 |
+
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 119 |
+
# Use the logs file in the root path of the main.
|
| 120 |
+
self.LOG_DIR = os.path.join(BASE_DIR,'logs')
|
| 121 |
+
|
| 122 |
+
print("============================================================================================")
|
| 123 |
+
|
| 124 |
+
####### initialize environment hyperparameters ######
|
| 125 |
+
|
| 126 |
+
self.has_continuous_action_space = False # continuous action space; else discrete
|
| 127 |
+
|
| 128 |
+
self.max_ep_len = 1000 # max timesteps in one episode
|
| 129 |
+
self.tau = 1
|
| 130 |
+
self.tau_start = 1.0 # initial value of tau
|
| 131 |
+
self.tau_end = 2.0 # final value of tau
|
| 132 |
+
|
| 133 |
+
self.print_freq = self.max_ep_len * 4 # print avg reward in the interval (in num timesteps)
|
| 134 |
+
self.log_freq = self.max_ep_len * 4 # log avg reward in the interval (in num timesteps)
|
| 135 |
+
self.save_model_freq = int(4999) # save model frequency (in num timesteps)
|
| 136 |
+
|
| 137 |
+
self.action_std = 0.6 # starting std for action distribution (Multivariate Normal)
|
| 138 |
+
self.action_std_decay_rate = 0.05 # linearly decay self.action_std (self.action_std = self.action_std - self.action_std_decay_rate)
|
| 139 |
+
self.min_action_std = 0.1 # minimum self.action_std (stop decay after self.action_std <= min_self.action_std)
|
| 140 |
+
self.action_std_decay_freq = int(2.5e5) # self.action_std decay frequency (in num timesteps)
|
| 141 |
+
#####################################################
|
| 142 |
+
|
| 143 |
+
## Note : print/log frequencies should be > than self.max_ep_len
|
| 144 |
+
|
| 145 |
+
################ PDPPO hyperparameters ################
|
| 146 |
+
self.update_timestep = self.max_ep_len * 4 # update policy every n timesteps
|
| 147 |
+
self.K_epochs = 60 # update policy for K epochs in one PDPPO update
|
| 148 |
+
|
| 149 |
+
self.eps_clip = 0.2 # clip parameter for PDPPO
|
| 150 |
+
self.gamma = 0.99 # discount factor
|
| 151 |
+
|
| 152 |
+
self.lr_actor = 0.00055 # learning rate for actor network
|
| 153 |
+
self.lr_critic = 0.001 # learning rate for critic network
|
| 154 |
+
|
| 155 |
+
self.random_seed = 0 # set random seed if required (0 = no random seed)
|
| 156 |
+
#####################################################
|
| 157 |
+
self.run_num_pretrained = 0 #### change this to prevent overwriting weights in same self.experiment_name folder
|
| 158 |
+
|
| 159 |
+
print("training environment name : " + self.experiment_name + '_PDPPO')
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
# state space dimension
|
| 164 |
+
self.state_dim = self.env.observation_space.shape[0]
|
| 165 |
+
|
| 166 |
+
# action space dimension
|
| 167 |
+
if self.has_continuous_action_space:
|
| 168 |
+
self.action_dim = self.env.action_space.shape[0]
|
| 169 |
+
else:
|
| 170 |
+
self.action_dim = self.env.action_space
|
| 171 |
+
|
| 172 |
+
self.pdppo_agent = PDPPO(self.state_dim, self.action_dim, self.lr_actor, self.lr_critic, self.gamma, self.K_epochs, self.eps_clip, copy.copy(self.env), self.has_continuous_action_space,self.tau, self.action_std)
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
################################### Training ###################################
|
| 176 |
+
def learn(self,n_episodes = 100000):
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
###################### logging ######################
|
| 180 |
+
|
| 181 |
+
self.max_training_timesteps = n_episodes # break training loop if timeteps > self.max_training_timesteps
|
| 182 |
+
|
| 183 |
+
env = self.env
|
| 184 |
+
|
| 185 |
+
#### log files for multiple runs are NOT overwritten
|
| 186 |
+
log_dir = self.LOG_DIR
|
| 187 |
+
if not os.path.exists(log_dir):
|
| 188 |
+
os.makedirs(log_dir)
|
| 189 |
+
|
| 190 |
+
log_dir = log_dir + '/' + self.experiment_name + '_PDPPO/'
|
| 191 |
+
if not os.path.exists(log_dir):
|
| 192 |
+
os.makedirs(log_dir)
|
| 193 |
+
|
| 194 |
+
#### get number of log files in log directory
|
| 195 |
+
run_num = 0
|
| 196 |
+
current_num_files = next(os.walk(log_dir))[2]
|
| 197 |
+
run_num = len(current_num_files)
|
| 198 |
+
|
| 199 |
+
#### create new log file for each run
|
| 200 |
+
log_f_name = log_dir + '/PDPPO_' + self.experiment_name + "_log_" + str(run_num) + ".csv"
|
| 201 |
+
|
| 202 |
+
print("current logging run number for " + self.experiment_name + " : ", run_num)
|
| 203 |
+
print("logging at : " + log_f_name)
|
| 204 |
+
#####################################################
|
| 205 |
+
|
| 206 |
+
################### checkpointing ###################
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
directory = self.LOG_DIR
|
| 210 |
+
if not os.path.exists(directory):
|
| 211 |
+
os.makedirs(directory)
|
| 212 |
+
|
| 213 |
+
directory = directory + '/' + self.experiment_name + '_PDPPO' + '/'
|
| 214 |
+
if not os.path.exists(directory):
|
| 215 |
+
os.makedirs(directory)
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
checkpoint_path = directory + "PDPPO_{}_{}_{}.pth".format(self.experiment_name, self.random_seed, self.run_num_pretrained)
|
| 219 |
+
print("save checkpoint path : " + checkpoint_path)
|
| 220 |
+
#####################################################
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
############# print all hyperparameters #############
|
| 224 |
+
print("--------------------------------------------------------------------------------------------")
|
| 225 |
+
print("max training timesteps : ", self.max_training_timesteps)
|
| 226 |
+
print("max timesteps per episode : ", self.max_ep_len)
|
| 227 |
+
print("model saving frequency : " + str(self.save_model_freq) + " timesteps")
|
| 228 |
+
print("log frequency : " + str(self.log_freq) + " timesteps")
|
| 229 |
+
print("printing average reward over episodes in last : " + str(self.print_freq) + " timesteps")
|
| 230 |
+
print("--------------------------------------------------------------------------------------------")
|
| 231 |
+
print("state space dimension : ", self.state_dim)
|
| 232 |
+
print("action space dimension : ", self.action_dim)
|
| 233 |
+
print("--------------------------------------------------------------------------------------------")
|
| 234 |
+
if self.has_continuous_action_space:
|
| 235 |
+
print("Initializing a continuous action space policy")
|
| 236 |
+
print("--------------------------------------------------------------------------------------------")
|
| 237 |
+
print("starting std of action distribution : ", self.action_std)
|
| 238 |
+
print("decay rate of std of action distribution : ", self.action_std_decay_rate)
|
| 239 |
+
print("minimum std of action distribution : ", min_self.action_std)
|
| 240 |
+
print("decay frequency of std of action distribution : " + str(self.action_std_decay_freq) + " timesteps")
|
| 241 |
+
else:
|
| 242 |
+
print("Initializing a discrete action space policy")
|
| 243 |
+
print("--------------------------------------------------------------------------------------------")
|
| 244 |
+
print("PDPPO update frequency : " + str(self.update_timestep) + " timesteps")
|
| 245 |
+
print("PDPPO K epochs : ", self.K_epochs)
|
| 246 |
+
print("PDPPO epsilon clip : ", self.eps_clip)
|
| 247 |
+
print("discount factor (self.gamma) : ", self.gamma)
|
| 248 |
+
print("--------------------------------------------------------------------------------------------")
|
| 249 |
+
print("optimizer learning rate actor : ", self.lr_actor)
|
| 250 |
+
print("optimizer learning rate critic : ", self.lr_critic)
|
| 251 |
+
if self.random_seed:
|
| 252 |
+
print("--------------------------------------------------------------------------------------------")
|
| 253 |
+
print("setting random seed to ", self.random_seed)
|
| 254 |
+
|
| 255 |
+
#####################################################
|
| 256 |
+
|
| 257 |
+
print("============================================================================================")
|
| 258 |
+
|
| 259 |
+
################# training procedure ################
|
| 260 |
+
|
| 261 |
+
# initialize a PDPPO agent
|
| 262 |
+
self.PDPPO_agent = PDPPO(self.state_dim, self.action_dim, self.lr_actor, self.lr_critic, self.gamma, self.K_epochs, self.eps_clip, copy.copy(self.env), self.has_continuous_action_space, self.action_std)
|
| 263 |
+
|
| 264 |
+
# track total training time
|
| 265 |
+
start_time = datetime.now().replace(microsecond=0)
|
| 266 |
+
print("Started training at (GMT) : ", start_time)
|
| 267 |
+
|
| 268 |
+
print("============================================================================================")
|
| 269 |
+
|
| 270 |
+
# logging file
|
| 271 |
+
log_f = open(log_f_name,"w+")
|
| 272 |
+
log_f.write('episode,timestep,reward\n')
|
| 273 |
+
|
| 274 |
+
# printing and logging variables
|
| 275 |
+
print_running_reward = 0
|
| 276 |
+
print_running_episodes = 0
|
| 277 |
+
|
| 278 |
+
log_running_reward = 0
|
| 279 |
+
log_running_episodes = 0
|
| 280 |
+
|
| 281 |
+
time_step = 0
|
| 282 |
+
i_episode = 0
|
| 283 |
+
|
| 284 |
+
annealing_steps = self.max_training_timesteps # total number of training steps
|
| 285 |
+
|
| 286 |
+
# training loop
|
| 287 |
+
while time_step <= self.max_training_timesteps:
|
| 288 |
+
|
| 289 |
+
anneal_rate = (self.tau_end - self.tau_start) / annealing_steps # rate of tau increase per step
|
| 290 |
+
|
| 291 |
+
self.tau = max(self.tau_end, self.tau_start + anneal_rate * time_step)
|
| 292 |
+
|
| 293 |
+
state = env.reset()
|
| 294 |
+
current_ep_reward = 0
|
| 295 |
+
|
| 296 |
+
for t in range(1, self.max_ep_len+1):
|
| 297 |
+
|
| 298 |
+
# select action with policy
|
| 299 |
+
action = self.pdppo_agent.select_action(state,self.tau)
|
| 300 |
+
state, reward, done, _ = env.step(action)
|
| 301 |
+
|
| 302 |
+
# saving reward and is_terminals
|
| 303 |
+
self.pdppo_agent.buffer.rewards.append(reward)
|
| 304 |
+
self.pdppo_agent.buffer.is_terminals.append(done)
|
| 305 |
+
|
| 306 |
+
time_step +=1
|
| 307 |
+
current_ep_reward += reward
|
| 308 |
+
|
| 309 |
+
# update PDPPO agent
|
| 310 |
+
if time_step % self.update_timestep == 0:
|
| 311 |
+
self.pdppo_agent.update()
|
| 312 |
+
|
| 313 |
+
# if continuous action space; then decay action std of ouput action distribution
|
| 314 |
+
if self.has_continuous_action_space and time_step % self.action_std_decay_freq == 0:
|
| 315 |
+
self.pdppo_agent.decay_self.action_std(self.action_std_decay_rate, self.action_std)
|
| 316 |
+
|
| 317 |
+
# log in logging file
|
| 318 |
+
if time_step % self.log_freq == 0:
|
| 319 |
+
|
| 320 |
+
# log average reward till last episode
|
| 321 |
+
log_avg_reward = log_running_reward / log_running_episodes
|
| 322 |
+
log_avg_reward = round(log_avg_reward, 4)
|
| 323 |
+
|
| 324 |
+
log_f.write('{},{},{}\n'.format(i_episode, time_step, log_avg_reward))
|
| 325 |
+
log_f.flush()
|
| 326 |
+
|
| 327 |
+
log_running_reward = 0
|
| 328 |
+
log_running_episodes = 0
|
| 329 |
+
|
| 330 |
+
# printing average reward
|
| 331 |
+
if time_step % self.print_freq == 0:
|
| 332 |
+
|
| 333 |
+
# print average reward till last episode
|
| 334 |
+
print_avg_reward = print_running_reward / print_running_episodes
|
| 335 |
+
print_avg_reward = round(print_avg_reward, 2)
|
| 336 |
+
|
| 337 |
+
print("Episode : {} \t\t Timestep : {} \t\t Average Reward : {}".format(i_episode, time_step, print_avg_reward))
|
| 338 |
+
|
| 339 |
+
print_running_reward = 0
|
| 340 |
+
print_running_episodes = 0
|
| 341 |
+
|
| 342 |
+
# save model weights
|
| 343 |
+
if time_step % self.save_model_freq == 0:
|
| 344 |
+
print("--------------------------------------------------------------------------------------------")
|
| 345 |
+
#print("saving model at : " + checkpoint_path)
|
| 346 |
+
self.pdppo_agent.save(checkpoint_path)
|
| 347 |
+
#print("model saved")
|
| 348 |
+
print("Elapsed Time : ", datetime.now().replace(microsecond=0) - start_time)
|
| 349 |
+
print("--------------------------------------------------------------------------------------------")
|
| 350 |
+
|
| 351 |
+
# break; if the episode is over
|
| 352 |
+
if done:
|
| 353 |
+
break
|
| 354 |
+
|
| 355 |
+
print_running_reward += current_ep_reward
|
| 356 |
+
print_running_episodes += 1
|
| 357 |
+
|
| 358 |
+
log_running_reward += current_ep_reward
|
| 359 |
+
log_running_episodes += 1
|
| 360 |
+
|
| 361 |
+
i_episode += 1
|
| 362 |
+
|
| 363 |
+
log_f.close()
|
| 364 |
+
#env.close()
|
| 365 |
+
|
| 366 |
+
# print total training time
|
| 367 |
+
print("============================================================================================")
|
| 368 |
+
end_time = datetime.now().replace(microsecond=0)
|
| 369 |
+
print("Started training at (GMT) : ", start_time)
|
| 370 |
+
print("Finished training at (GMT) : ", end_time)
|
| 371 |
+
print("Total training time : ", end_time - start_time)
|
| 372 |
+
print("============================================================================================")
|
| 373 |
+
|
| 374 |
+
def get_action(self,state):
|
| 375 |
+
if isinstance(state, dict):
|
| 376 |
+
if not self.dict_obs:
|
| 377 |
+
state = np.concatenate(
|
| 378 |
+
(
|
| 379 |
+
state['inventory_level'], # n_items size
|
| 380 |
+
state['machine_setup'], # n_machine size
|
| 381 |
+
)
|
| 382 |
+
)
|
| 383 |
+
else:
|
| 384 |
+
if self.dict_obs:
|
| 385 |
+
raise('Change dict_obst to False')
|
| 386 |
+
return self.pdppo_agent.select_action(state,self.tau)
|
| 387 |
+
|
| 388 |
+
def load_agent(self,path):
|
| 389 |
+
#directory = "PDPPO_preTrained" + '/' + env_name + '/'
|
| 390 |
+
directory = self.LOG_DIR
|
| 391 |
+
directory = directory + '/' + self.experiment_name + '_PDPPO' + '/'
|
| 392 |
+
checkpoint_path = directory + "PDPPO_{}_{}_{}.pth".format(self.experiment_name, self.random_seed, self.run_num_pretrained)
|
| 393 |
+
print("loading network from : " + checkpoint_path)
|
| 394 |
+
self.pdppo_agent.load(checkpoint_path)
|
code/Lake application/agents/PDPPOAgent.py
ADDED
|
@@ -0,0 +1,402 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os # Provides a way of interacting with the file system
|
| 2 |
+
import sys
|
| 3 |
+
import glob # Helps find all the pathnames matching a specified pattern according to the rules used by the Unix shell
|
| 4 |
+
import time # Provides various time-related functions
|
| 5 |
+
from datetime import datetime # Module that supplies classes for working with dates and times
|
| 6 |
+
|
| 7 |
+
import numpy as np # A library for the Python programming language, adding support for large, multi-dimensional arrays and matrices
|
| 8 |
+
import gym # Provides a collection of test problems — environments — that you can use to work out your reinforcement learning algorithms
|
| 9 |
+
import torch # A machine learning framework that provides tensor computation (like NumPy) with strong acceleration on GPUs
|
| 10 |
+
import copy # Provides a module for shallow and deep copying operations
|
| 11 |
+
import matplotlib.pyplot as plt # A plotting library for the Python programming language and its numerical mathematics extension NumPy
|
| 12 |
+
import matplotlib.patches as mpatches # Provides a way of adding a colored patch to the plot, for example to create a legend
|
| 13 |
+
BASE_DIR = os.path.dirname(os.path.abspath('__file__'))
|
| 14 |
+
AGENTS_DIR = os.path.join(BASE_DIR,'agents')
|
| 15 |
+
sys.path.append(AGENTS_DIR)
|
| 16 |
+
from agents.PDPPO import PDPPO
|
| 17 |
+
from envs import *
|
| 18 |
+
import copy
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class SimplePlantSB(SimplePlant):
|
| 22 |
+
def __init__(self, settings, stoch_model):
|
| 23 |
+
super().__init__(settings, stoch_model)
|
| 24 |
+
try:self.dict_obs = settings['dict_obs']
|
| 25 |
+
except:self.dict_obs = False
|
| 26 |
+
self.last_inventory = copy.copy(self.inventory_level)
|
| 27 |
+
self.action_space = gym.spaces.MultiDiscrete(
|
| 28 |
+
[self.n_items+1] * self.n_machines
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
if self.dict_obs:
|
| 32 |
+
self.observation_space = gym.spaces.Dict({
|
| 33 |
+
'inventory_level': gym.spaces.Box(low = np.zeros(self.n_items),high = np.ones(self.n_items)*(settings['max_inventory_level'][0]+1)*self.n_items),
|
| 34 |
+
'machine_setup': gym.spaces.MultiDiscrete([self.n_items+1] * self.n_machines)
|
| 35 |
+
#'last_inventory_level':gym.spaces.Box(low = np.zeros(self.n_items),high = np.ones(self.n_items)*(settings['max_inventory_level'][0]+1)*self.n_items)
|
| 36 |
+
})
|
| 37 |
+
else:
|
| 38 |
+
self.observation_space = gym.spaces.Box(
|
| 39 |
+
low=np.zeros(self.n_items+self.n_machines),# high for the inventory level
|
| 40 |
+
high=np.concatenate(
|
| 41 |
+
[
|
| 42 |
+
np.array(self.max_inventory_level),
|
| 43 |
+
np.ones(self.n_machines) * (self.n_items+1), #high for the machine setups
|
| 44 |
+
#np.array(self.max_inventory_level) # high for the inventory level
|
| 45 |
+
]),
|
| 46 |
+
dtype=np.int32
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
def step(self, action):
|
| 50 |
+
"""
|
| 51 |
+
Step method: Execute one time step within the environment
|
| 52 |
+
|
| 53 |
+
Parameters
|
| 54 |
+
----------
|
| 55 |
+
action : action given by the agent
|
| 56 |
+
|
| 57 |
+
Returns
|
| 58 |
+
-------
|
| 59 |
+
obs : Observation of the state give the method _next_observation
|
| 60 |
+
reward : Cost given by the _reward method
|
| 61 |
+
done : returns True or False given by the _done method
|
| 62 |
+
dict : possible information for control to environment monitoring
|
| 63 |
+
|
| 64 |
+
"""
|
| 65 |
+
self.last_inventory = copy.copy(self.inventory_level)
|
| 66 |
+
|
| 67 |
+
self.total_cost = self._take_action(action, self.machine_setup, self.inventory_level, self.demand)
|
| 68 |
+
|
| 69 |
+
# self.total_cost['setup_costs'] = 0
|
| 70 |
+
# self.total_cost['holding_costs'] = 0
|
| 71 |
+
|
| 72 |
+
reward = -sum([ele for key, ele in self.total_cost.items()])
|
| 73 |
+
#reward = -self.total_cost['lost_sales']
|
| 74 |
+
|
| 75 |
+
#reward = np.abs(action)
|
| 76 |
+
|
| 77 |
+
self.current_step += 1
|
| 78 |
+
done = self.current_step == self.T
|
| 79 |
+
obs = self._next_observation()
|
| 80 |
+
|
| 81 |
+
return obs, reward, done, self.total_cost
|
| 82 |
+
|
| 83 |
+
def _next_observation(self):
|
| 84 |
+
"""
|
| 85 |
+
Returns the next demand
|
| 86 |
+
"""
|
| 87 |
+
obs = SimplePlant._next_observation(self)
|
| 88 |
+
#obs['last_inventory_level'] = copy.copy(self.last_inventory)
|
| 89 |
+
if isinstance(obs, dict):
|
| 90 |
+
if not self.dict_obs:
|
| 91 |
+
obs = np.concatenate(
|
| 92 |
+
(
|
| 93 |
+
obs['inventory_level'], # n_items size
|
| 94 |
+
obs['machine_setup'], # n_machine size
|
| 95 |
+
#obs['last_inventory_level']# n_items size
|
| 96 |
+
)
|
| 97 |
+
)
|
| 98 |
+
else:
|
| 99 |
+
if self.dict_obs:
|
| 100 |
+
raise('Change dict_obst to False')
|
| 101 |
+
return obs
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
class PDPPOAgent():
|
| 105 |
+
def __init__(self, env: SimplePlant, settings: dict):
|
| 106 |
+
self.env = env
|
| 107 |
+
|
| 108 |
+
self.model_name = settings['model_name']
|
| 109 |
+
self.experiment_name = settings['experiment_name']
|
| 110 |
+
self.parallelization = settings['parallelization']
|
| 111 |
+
try:self.dict_obs = settings['dict_obs']
|
| 112 |
+
except:self.dict_obs = False
|
| 113 |
+
|
| 114 |
+
self.POSSIBLE_STATES = self.env.observation_space.n
|
| 115 |
+
self.env.cost_to_reward = True
|
| 116 |
+
self.epsilon = 0
|
| 117 |
+
|
| 118 |
+
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 119 |
+
# Use the logs file in the root path of the main.
|
| 120 |
+
self.LOG_DIR = os.path.join(BASE_DIR,'logs')
|
| 121 |
+
|
| 122 |
+
print("============================================================================================")
|
| 123 |
+
|
| 124 |
+
####### initialize environment hyperparameters ######
|
| 125 |
+
|
| 126 |
+
self.has_continuous_action_space = False # continuous action space; else discrete
|
| 127 |
+
|
| 128 |
+
self.max_ep_len = 100 # max timesteps in one episode
|
| 129 |
+
self.tau = 1
|
| 130 |
+
self.tau_start = 1.0 # initial value of tau
|
| 131 |
+
self.tau_end = 2.0 # final value of tau
|
| 132 |
+
|
| 133 |
+
self.print_freq = self.max_ep_len * 4 # print avg reward in the interval (in num timesteps)
|
| 134 |
+
self.log_freq = self.max_ep_len * 4 # log avg reward in the interval (in num timesteps)
|
| 135 |
+
self.save_model_freq = int(4999) # save model frequency (in num timesteps)
|
| 136 |
+
|
| 137 |
+
self.action_std = 0.6 # starting std for action distribution (Multivariate Normal)
|
| 138 |
+
self.action_std_decay_rate = 0.05 # linearly decay self.action_std (self.action_std = self.action_std - self.action_std_decay_rate)
|
| 139 |
+
self.min_action_std = 0.1 # minimum self.action_std (stop decay after self.action_std <= min_self.action_std)
|
| 140 |
+
self.action_std_decay_freq = int(2.5e5) # self.action_std decay frequency (in num timesteps)
|
| 141 |
+
#####################################################
|
| 142 |
+
|
| 143 |
+
## Note : print/log frequencies should be > than self.max_ep_len
|
| 144 |
+
|
| 145 |
+
################ PDPPO hyperparameters ################
|
| 146 |
+
self.update_timestep = self.max_ep_len * 6 # update policy every n timesteps
|
| 147 |
+
self.K_epochs = 40 # update policy for K epochs in one PDPPO update
|
| 148 |
+
|
| 149 |
+
self.eps_clip = 0.21 # clip parameter for PDPPO
|
| 150 |
+
self.gamma = 0.991 # discount factor
|
| 151 |
+
|
| 152 |
+
self.lr_actor = 0.0004 # learning rate for actor network
|
| 153 |
+
self.lr_critic = 0.0012 # learning rate for critic network
|
| 154 |
+
|
| 155 |
+
self.random_seed = 0 # set random seed if required (0 = no random seed)
|
| 156 |
+
#####################################################
|
| 157 |
+
self.run_num_pretrained = 0 #### change this to prevent overwriting weights in same self.experiment_name folder
|
| 158 |
+
|
| 159 |
+
print("training environment name : " + self.experiment_name + '_PDPPO')
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
# state space dimension
|
| 164 |
+
self.state_dim = self.env.observation_space.n
|
| 165 |
+
|
| 166 |
+
# action space dimension
|
| 167 |
+
if self.has_continuous_action_space:
|
| 168 |
+
self.action_dim = self.env.action_space.n
|
| 169 |
+
else:
|
| 170 |
+
self.action_dim = self.env.action_space.n
|
| 171 |
+
|
| 172 |
+
self.pdppo_agent = PDPPO(self.state_dim, self.action_dim, self.lr_actor, self.lr_critic, self.gamma, self.K_epochs, self.eps_clip, copy.copy(self.env), self.has_continuous_action_space,self.tau, self.action_std)
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
################################### Training ###################################
|
| 176 |
+
def learn(self,n_episodes = 100000):
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
###################### logging ######################
|
| 180 |
+
|
| 181 |
+
self.max_training_timesteps = n_episodes # break training loop if timeteps > self.max_training_timesteps
|
| 182 |
+
|
| 183 |
+
env = self.env
|
| 184 |
+
|
| 185 |
+
#### log files for multiple runs are NOT overwritten
|
| 186 |
+
log_dir = self.LOG_DIR
|
| 187 |
+
if not os.path.exists(log_dir):
|
| 188 |
+
os.makedirs(log_dir)
|
| 189 |
+
|
| 190 |
+
log_dir = log_dir + '/' + self.experiment_name + '_PDPPO/'
|
| 191 |
+
if not os.path.exists(log_dir):
|
| 192 |
+
os.makedirs(log_dir)
|
| 193 |
+
|
| 194 |
+
#### get number of log files in log directory
|
| 195 |
+
run_num = 0
|
| 196 |
+
current_num_files = next(os.walk(log_dir))[2]
|
| 197 |
+
run_num = len(current_num_files)
|
| 198 |
+
|
| 199 |
+
#### create new log file for each run
|
| 200 |
+
log_f_name = log_dir + '/PDPPO_' + self.experiment_name + "_log_" + str(run_num) + ".csv"
|
| 201 |
+
|
| 202 |
+
print("current logging run number for " + self.experiment_name + " : ", run_num)
|
| 203 |
+
print("logging at : " + log_f_name)
|
| 204 |
+
#####################################################
|
| 205 |
+
|
| 206 |
+
################### checkpointing ###################
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
directory = self.LOG_DIR
|
| 210 |
+
if not os.path.exists(directory):
|
| 211 |
+
os.makedirs(directory)
|
| 212 |
+
|
| 213 |
+
directory = directory + '/' + self.experiment_name + '_PDPPO' + '/'
|
| 214 |
+
if not os.path.exists(directory):
|
| 215 |
+
os.makedirs(directory)
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
checkpoint_path = directory + "PDPPO_{}_{}_{}.pth".format(self.experiment_name, self.random_seed, self.run_num_pretrained)
|
| 219 |
+
print("save checkpoint path : " + checkpoint_path)
|
| 220 |
+
#####################################################
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
############# print all hyperparameters #############
|
| 224 |
+
print("--------------------------------------------------------------------------------------------")
|
| 225 |
+
print("max training timesteps : ", self.max_training_timesteps)
|
| 226 |
+
print("max timesteps per episode : ", self.max_ep_len)
|
| 227 |
+
print("model saving frequency : " + str(self.save_model_freq) + " timesteps")
|
| 228 |
+
print("log frequency : " + str(self.log_freq) + " timesteps")
|
| 229 |
+
print("printing average reward over episodes in last : " + str(self.print_freq) + " timesteps")
|
| 230 |
+
print("--------------------------------------------------------------------------------------------")
|
| 231 |
+
print("state space dimension : ", self.state_dim)
|
| 232 |
+
print("action space dimension : ", self.action_dim)
|
| 233 |
+
print("--------------------------------------------------------------------------------------------")
|
| 234 |
+
if self.has_continuous_action_space:
|
| 235 |
+
print("Initializing a continuous action space policy")
|
| 236 |
+
print("--------------------------------------------------------------------------------------------")
|
| 237 |
+
print("starting std of action distribution : ", self.action_std)
|
| 238 |
+
print("decay rate of std of action distribution : ", self.action_std_decay_rate)
|
| 239 |
+
print("minimum std of action distribution : ", min_self.action_std)
|
| 240 |
+
print("decay frequency of std of action distribution : " + str(self.action_std_decay_freq) + " timesteps")
|
| 241 |
+
else:
|
| 242 |
+
print("Initializing a discrete action space policy")
|
| 243 |
+
print("--------------------------------------------------------------------------------------------")
|
| 244 |
+
print("PDPPO update frequency : " + str(self.update_timestep) + " timesteps")
|
| 245 |
+
print("PDPPO K epochs : ", self.K_epochs)
|
| 246 |
+
print("PDPPO epsilon clip : ", self.eps_clip)
|
| 247 |
+
print("discount factor (self.gamma) : ", self.gamma)
|
| 248 |
+
print("--------------------------------------------------------------------------------------------")
|
| 249 |
+
print("optimizer learning rate actor : ", self.lr_actor)
|
| 250 |
+
print("optimizer learning rate critic : ", self.lr_critic)
|
| 251 |
+
if self.random_seed:
|
| 252 |
+
print("--------------------------------------------------------------------------------------------")
|
| 253 |
+
print("setting random seed to ", self.random_seed)
|
| 254 |
+
|
| 255 |
+
#####################################################
|
| 256 |
+
|
| 257 |
+
print("============================================================================================")
|
| 258 |
+
|
| 259 |
+
################# training procedure ################
|
| 260 |
+
|
| 261 |
+
# initialize a PDPPO agent
|
| 262 |
+
self.PDPPO_agent = PDPPO(self.state_dim, self.action_dim, self.lr_actor, self.lr_critic, self.gamma, self.K_epochs, self.eps_clip, copy.copy(self.env), self.has_continuous_action_space, self.action_std)
|
| 263 |
+
|
| 264 |
+
# track total training time
|
| 265 |
+
start_time = datetime.now().replace(microsecond=0)
|
| 266 |
+
print("Started training at (GMT) : ", start_time)
|
| 267 |
+
|
| 268 |
+
print("============================================================================================")
|
| 269 |
+
|
| 270 |
+
# logging file
|
| 271 |
+
log_f = open(log_f_name,"w+")
|
| 272 |
+
log_f.write('episode,timestep,reward\n')
|
| 273 |
+
|
| 274 |
+
# printing and logging variables
|
| 275 |
+
print_running_reward = 0
|
| 276 |
+
print_running_episodes = 0
|
| 277 |
+
|
| 278 |
+
log_running_reward = 0
|
| 279 |
+
log_running_episodes = 0
|
| 280 |
+
|
| 281 |
+
time_step = 0
|
| 282 |
+
i_episode = 0
|
| 283 |
+
|
| 284 |
+
annealing_steps = self.max_training_timesteps # total number of training steps
|
| 285 |
+
|
| 286 |
+
# training loop
|
| 287 |
+
while time_step <= self.max_training_timesteps:
|
| 288 |
+
|
| 289 |
+
anneal_rate = (self.tau_end - self.tau_start) / annealing_steps # rate of tau increase per step
|
| 290 |
+
|
| 291 |
+
self.tau = max(self.tau_end, self.tau_start + anneal_rate * time_step)
|
| 292 |
+
|
| 293 |
+
state = env.reset()
|
| 294 |
+
current_ep_reward = 0
|
| 295 |
+
|
| 296 |
+
binary_array = np.zeros(self.state_dim, dtype=int)
|
| 297 |
+
binary_array[state] = 1
|
| 298 |
+
state = binary_array
|
| 299 |
+
|
| 300 |
+
for t in range(1, self.max_ep_len+1):
|
| 301 |
+
|
| 302 |
+
# select action with policy
|
| 303 |
+
action = self.pdppo_agent.select_action(state,self.tau)
|
| 304 |
+
state, reward, done, _ = env.step(action.item())
|
| 305 |
+
|
| 306 |
+
binary_array = np.zeros(self.state_dim, dtype=int)
|
| 307 |
+
binary_array[state] = 1
|
| 308 |
+
state = binary_array
|
| 309 |
+
|
| 310 |
+
# saving reward and is_terminals
|
| 311 |
+
self.pdppo_agent.buffer.rewards.append(reward)
|
| 312 |
+
self.pdppo_agent.buffer.is_terminals.append(done)
|
| 313 |
+
|
| 314 |
+
time_step +=1
|
| 315 |
+
current_ep_reward += reward
|
| 316 |
+
|
| 317 |
+
# update PDPPO agent
|
| 318 |
+
if time_step % self.update_timestep == 0:
|
| 319 |
+
self.pdppo_agent.update()
|
| 320 |
+
|
| 321 |
+
# if continuous action space; then decay action std of ouput action distribution
|
| 322 |
+
if self.has_continuous_action_space and time_step % self.action_std_decay_freq == 0:
|
| 323 |
+
self.pdppo_agent.decay_self.action_std(self.action_std_decay_rate, self.action_std)
|
| 324 |
+
|
| 325 |
+
# log in logging file
|
| 326 |
+
if time_step % self.log_freq == 0:
|
| 327 |
+
|
| 328 |
+
# log average reward till last episode
|
| 329 |
+
log_avg_reward = log_running_reward / log_running_episodes
|
| 330 |
+
log_avg_reward = round(log_avg_reward, 4)
|
| 331 |
+
|
| 332 |
+
log_f.write('{},{},{}\n'.format(i_episode, time_step, log_avg_reward))
|
| 333 |
+
log_f.flush()
|
| 334 |
+
|
| 335 |
+
log_running_reward = 0
|
| 336 |
+
log_running_episodes = 0
|
| 337 |
+
|
| 338 |
+
# printing average reward
|
| 339 |
+
if time_step % self.print_freq == 0:
|
| 340 |
+
|
| 341 |
+
# print average reward till last episode
|
| 342 |
+
print_avg_reward = print_running_reward / print_running_episodes
|
| 343 |
+
print_avg_reward = round(print_avg_reward, 2)
|
| 344 |
+
|
| 345 |
+
print("Episode : {} \t\t Timestep : {} \t\t Average Reward : {}".format(i_episode, time_step, print_avg_reward))
|
| 346 |
+
|
| 347 |
+
print_running_reward = 0
|
| 348 |
+
print_running_episodes = 0
|
| 349 |
+
|
| 350 |
+
# save model weights
|
| 351 |
+
if time_step % self.save_model_freq == 0:
|
| 352 |
+
print("--------------------------------------------------------------------------------------------")
|
| 353 |
+
#print("saving model at : " + checkpoint_path)
|
| 354 |
+
self.pdppo_agent.save(checkpoint_path)
|
| 355 |
+
#print("model saved")
|
| 356 |
+
print("PDPPO Elapsed Time : ", datetime.now().replace(microsecond=0) - start_time)
|
| 357 |
+
print("--------------------------------------------------------------------------------------------")
|
| 358 |
+
|
| 359 |
+
# break; if the episode is over
|
| 360 |
+
if done:
|
| 361 |
+
break
|
| 362 |
+
|
| 363 |
+
print_running_reward += current_ep_reward
|
| 364 |
+
print_running_episodes += 1
|
| 365 |
+
|
| 366 |
+
log_running_reward += current_ep_reward
|
| 367 |
+
log_running_episodes += 1
|
| 368 |
+
|
| 369 |
+
i_episode += 1
|
| 370 |
+
|
| 371 |
+
log_f.close()
|
| 372 |
+
#env.close()
|
| 373 |
+
|
| 374 |
+
# print total training time
|
| 375 |
+
print("============================================================================================")
|
| 376 |
+
end_time = datetime.now().replace(microsecond=0)
|
| 377 |
+
print("Started training at (GMT) : ", start_time)
|
| 378 |
+
print("Finished training at (GMT) : ", end_time)
|
| 379 |
+
print("Total training time : ", end_time - start_time)
|
| 380 |
+
print("============================================================================================")
|
| 381 |
+
|
| 382 |
+
def get_action(self,state):
|
| 383 |
+
if isinstance(state, dict):
|
| 384 |
+
if not self.dict_obs:
|
| 385 |
+
state = np.concatenate(
|
| 386 |
+
(
|
| 387 |
+
state['inventory_level'], # n_items size
|
| 388 |
+
state['machine_setup'], # n_machine size
|
| 389 |
+
)
|
| 390 |
+
)
|
| 391 |
+
else:
|
| 392 |
+
if self.dict_obs:
|
| 393 |
+
raise('Change dict_obst to False')
|
| 394 |
+
return self.pdppo_agent.select_action(state,self.tau)
|
| 395 |
+
|
| 396 |
+
def load_agent(self,path):
|
| 397 |
+
#directory = "PDPPO_preTrained" + '/' + env_name + '/'
|
| 398 |
+
directory = self.LOG_DIR
|
| 399 |
+
directory = directory + '/' + self.experiment_name + '_PDPPO' + '/'
|
| 400 |
+
checkpoint_path = directory + "PDPPO_{}_{}_{}.pth".format(self.experiment_name, self.random_seed, self.run_num_pretrained)
|
| 401 |
+
print("loading network from : " + checkpoint_path)
|
| 402 |
+
self.pdppo_agent.load(checkpoint_path)
|
code/Lake application/agents/PDPPO_two_actors.py
ADDED
|
@@ -0,0 +1,353 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Created on Wed Mar 1 00:43:49 2023
|
| 4 |
+
|
| 5 |
+
@author: leona
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import torch
|
| 9 |
+
import numpy as np
|
| 10 |
+
import torch.nn as nn
|
| 11 |
+
from torch.distributions import MultivariateNormal
|
| 12 |
+
from torch.distributions import Categorical
|
| 13 |
+
|
| 14 |
+
################################## set device ##################################
|
| 15 |
+
print("============================================================================================")
|
| 16 |
+
# set device to cpu or cuda
|
| 17 |
+
device = torch.device('cpu')
|
| 18 |
+
if(torch.cuda.is_available()):
|
| 19 |
+
device = torch.device('cuda:0')
|
| 20 |
+
torch.cuda.empty_cache()
|
| 21 |
+
print("Device set to : " + str(torch.cuda.get_device_name(device)))
|
| 22 |
+
else:
|
| 23 |
+
print("Device set to : cpu")
|
| 24 |
+
print("============================================================================================")
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
################################## PDPPO Policy ##################################
|
| 28 |
+
class RolloutBuffer:
|
| 29 |
+
def __init__(self):
|
| 30 |
+
self.actions = []
|
| 31 |
+
self.actions_pre = []
|
| 32 |
+
self.actions_post = []
|
| 33 |
+
self.states = []
|
| 34 |
+
self.pre_states = []
|
| 35 |
+
self.post_states = []
|
| 36 |
+
self.logprobs = []
|
| 37 |
+
self.logprobs_pre = []
|
| 38 |
+
self.logprobs_post = []
|
| 39 |
+
self.rewards = []
|
| 40 |
+
self.rewards_pre = []
|
| 41 |
+
self.rewards_post = []
|
| 42 |
+
self.state_values = []
|
| 43 |
+
self.state_values_post = []
|
| 44 |
+
self.is_terminals = []
|
| 45 |
+
|
| 46 |
+
def clear(self):
|
| 47 |
+
del self.actions[:]
|
| 48 |
+
del self.actions_pre[:]
|
| 49 |
+
del self.actions_post[:]
|
| 50 |
+
del self.states[:]
|
| 51 |
+
del self.pre_states[:]
|
| 52 |
+
del self.post_states[:]
|
| 53 |
+
del self.logprobs[:]
|
| 54 |
+
del self.logprobs_pre[:]
|
| 55 |
+
del self.logprobs_post[:]
|
| 56 |
+
del self.rewards[:]
|
| 57 |
+
del self.rewards_pre[:]
|
| 58 |
+
del self.rewards_post[:]
|
| 59 |
+
del self.state_values[:]
|
| 60 |
+
del self.state_values_post[:]
|
| 61 |
+
del self.is_terminals[:]
|
| 62 |
+
|
| 63 |
+
class ActorCritic(nn.Module):
|
| 64 |
+
def __init__(self, state_dim,state_dim_pre,state_dim_post, action_dim, has_continuous_action_space, action_std_init):
|
| 65 |
+
super(ActorCritic, self).__init__()
|
| 66 |
+
# actor - multidiscrete
|
| 67 |
+
self.action_dim = action_dim
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
self.actor = nn.Sequential(
|
| 71 |
+
nn.Linear(state_dim, 128),
|
| 72 |
+
nn.Linear(128, 128),
|
| 73 |
+
nn.Linear(128, self.action_dim.nvec.sum())
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
self.actor_pre = nn.Sequential(
|
| 77 |
+
nn.Linear(state_dim_pre, 128),
|
| 78 |
+
nn.Linear(128, 128),
|
| 79 |
+
nn.Linear(128, self.action_dim.nvec.sum())
|
| 80 |
+
)
|
| 81 |
+
self.actor_post = nn.Sequential(
|
| 82 |
+
nn.Linear(state_dim_post, 128),
|
| 83 |
+
nn.Linear(128, 128),
|
| 84 |
+
nn.Linear(128, self.action_dim.nvec.sum())
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
# critic
|
| 89 |
+
self.critic_pre = nn.Sequential(
|
| 90 |
+
nn.Linear(state_dim_pre, 128),
|
| 91 |
+
# nn.Tanh(),
|
| 92 |
+
# nn.Linear(64, 64),
|
| 93 |
+
nn.Tanh(),
|
| 94 |
+
nn.Linear(128, 1)
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
self.critic_post = nn.Sequential(
|
| 98 |
+
nn.Linear(state_dim_post, 128),
|
| 99 |
+
# nn.Tanh(),
|
| 100 |
+
# nn.Linear(64, 64),
|
| 101 |
+
nn.Tanh(),
|
| 102 |
+
nn.Linear(128, 1)
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
def forward(self, state):
|
| 106 |
+
raise NotImplementedError
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def set_action_std(self, new_action_std):
|
| 111 |
+
if self.has_continuous_action_space:
|
| 112 |
+
self.action_var = torch.full((self.action_dim,), new_action_std * new_action_std).to(device)
|
| 113 |
+
else:
|
| 114 |
+
print("--------------------------------------------------------------------------------------------")
|
| 115 |
+
print("WARNING : Calling ActorCritic::set_action_std() on discrete action space policy")
|
| 116 |
+
print("--------------------------------------------------------------------------------------------")
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def act(self, state):
|
| 121 |
+
|
| 122 |
+
# x = nn.functional.relu(self.fc(state))
|
| 123 |
+
x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
|
| 124 |
+
logits = self.actor(x)
|
| 125 |
+
action_probs = nn.functional.softmax(logits, dim=-1)
|
| 126 |
+
dist = Categorical(action_probs.view(len(self.action_dim.nvec),-1))
|
| 127 |
+
|
| 128 |
+
action = dist.sample()
|
| 129 |
+
action_logprob = dist.log_prob(action)
|
| 130 |
+
|
| 131 |
+
return action.detach(), action_logprob.detach()
|
| 132 |
+
|
| 133 |
+
def evaluate(self, state, pre_state, post_state, action):
|
| 134 |
+
|
| 135 |
+
# x = nn.functional.relu(self.fc(state))
|
| 136 |
+
x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
|
| 137 |
+
logits = self.actor(x)
|
| 138 |
+
action_probs = nn.functional.softmax(logits, dim=-1)
|
| 139 |
+
dist = Categorical(action_probs.view(state.shape[0],len(self.action_dim.nvec),-1))
|
| 140 |
+
# action_probs = self.actor(state)
|
| 141 |
+
# dist = Categorical(action_probs)
|
| 142 |
+
|
| 143 |
+
action_logprobs = dist.log_prob(action)
|
| 144 |
+
dist_entropy = dist.entropy()
|
| 145 |
+
state_values_pre = self.critic_pre(pre_state)
|
| 146 |
+
state_values_post = self.critic_post(post_state)
|
| 147 |
+
|
| 148 |
+
return action_logprobs, state_values_pre,state_values_post, dist_entropy
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
class PDPPO:
|
| 152 |
+
def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, has_continuous_action_space, env, action_std_init=0.6):
|
| 153 |
+
|
| 154 |
+
self.has_continuous_action_space = has_continuous_action_space
|
| 155 |
+
|
| 156 |
+
if has_continuous_action_space:
|
| 157 |
+
self.action_std = action_std_init
|
| 158 |
+
|
| 159 |
+
self.env = env
|
| 160 |
+
|
| 161 |
+
self.reward_old_pre = -np.inf
|
| 162 |
+
self.reward_old_post = -np.inf
|
| 163 |
+
|
| 164 |
+
self.gamma = gamma
|
| 165 |
+
self.eps_clip = eps_clip
|
| 166 |
+
self.K_epochs = K_epochs
|
| 167 |
+
|
| 168 |
+
self.buffer = RolloutBuffer()
|
| 169 |
+
|
| 170 |
+
state_dim_pre = self.env.n_machines
|
| 171 |
+
state_dim_post = self.env.n_items
|
| 172 |
+
|
| 173 |
+
self.policy = ActorCritic(state_dim,state_dim_pre,state_dim_post, action_dim, has_continuous_action_space, action_std_init).to(device)
|
| 174 |
+
self.optimizer = torch.optim.Adam([
|
| 175 |
+
{'params': self.policy.actor.parameters(), 'lr': lr_actor},
|
| 176 |
+
{'params': self.policy.critic.parameters(), 'lr': lr_critic*10},
|
| 177 |
+
{'params': self.policy.critic_post.parameters(), 'lr': lr_critic*1}
|
| 178 |
+
])
|
| 179 |
+
|
| 180 |
+
self.policy_old = ActorCritic(state_dim,state_dim_pre,state_dim_post, action_dim, has_continuous_action_space, action_std_init).to(device)
|
| 181 |
+
self.policy_old.load_state_dict(self.policy.state_dict())
|
| 182 |
+
|
| 183 |
+
self.MseLoss = nn.MSELoss()
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
def get_post_state(self, action, machine_setup, inventory_level):
|
| 187 |
+
setup_loss = np.zeros(self.env.n_machines, dtype=int)
|
| 188 |
+
setup_costs = np.zeros(self.env.n_machines)
|
| 189 |
+
# if we are just changing the setup, we use the setup cost matrix with the corresponding position given by the actual setup and the new setup
|
| 190 |
+
for m in range(self.env.n_machines):
|
| 191 |
+
if action[m] != 0: # if the machine is not iddle
|
| 192 |
+
# 1. IF NEEDED CHANGE SETUP
|
| 193 |
+
if machine_setup[m] != action[m] and action[m] != 0:
|
| 194 |
+
setup_costs[m] = self.env.setup_costs[m][action[m] - 1]
|
| 195 |
+
setup_loss[m] = self.env.setup_loss[m][action[m] - 1]
|
| 196 |
+
machine_setup[m] = action[m]
|
| 197 |
+
# 2. PRODUCTION
|
| 198 |
+
production = self.env.machine_production_matrix[m][action[m] - 1] - setup_loss[m]
|
| 199 |
+
inventory_level[action[m] - 1] += production
|
| 200 |
+
else:
|
| 201 |
+
machine_setup[m] = 0
|
| 202 |
+
# return the new machine_setup_inventory_level and the setup_cost
|
| 203 |
+
return machine_setup, inventory_level, setup_costs
|
| 204 |
+
|
| 205 |
+
def select_action(self, state):
|
| 206 |
+
with torch.no_grad():
|
| 207 |
+
state = torch.FloatTensor(state).to(device)
|
| 208 |
+
action, action_logprob = self.policy_old.act(state)
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
pre_state = state[self.env.n_items:self.env.n_items+self.env.n_machines].clone()
|
| 212 |
+
|
| 213 |
+
machine_setup, inventory_level, setup_cost = self.get_post_state(action, state[self.env.n_items:self.env.n_items+self.env.n_machines], state[0:self.env.n_items])
|
| 214 |
+
|
| 215 |
+
post_state = inventory_level.clone()
|
| 216 |
+
|
| 217 |
+
with torch.no_grad():
|
| 218 |
+
action_pre, action_logprob_pre = self.policy_old.act_pre(pre_state)
|
| 219 |
+
action_post, action_logprob_post = self.policy_old.act_post(post_state)
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
self.buffer.states.append(state)
|
| 223 |
+
self.buffer.pre_states.append(pre_state)
|
| 224 |
+
self.buffer.post_states.append(post_state)
|
| 225 |
+
self.buffer.actions.append(action)
|
| 226 |
+
self.buffer.actions_pre.append(action_pre)
|
| 227 |
+
self.buffer.actions_post.append(action_post)
|
| 228 |
+
self.buffer.logprobs.append(action_logprob)
|
| 229 |
+
self.buffer.logprobs.append(action_logprob_pre)
|
| 230 |
+
self.buffer.logprobs.append(action_logprob_post)
|
| 231 |
+
|
| 232 |
+
with torch.no_grad():
|
| 233 |
+
state_val = self.policy_old.critic(pre_state).detach()
|
| 234 |
+
state_val_post = self.policy_old.critic_post(post_state).detach()
|
| 235 |
+
|
| 236 |
+
self.buffer.state_values.append(state_val)
|
| 237 |
+
self.buffer.state_values_post.append(state_val_post)
|
| 238 |
+
|
| 239 |
+
if self.has_continuous_action_space:
|
| 240 |
+
return action.detach().cpu().numpy().flatten()
|
| 241 |
+
|
| 242 |
+
else:
|
| 243 |
+
return action.numpy()
|
| 244 |
+
|
| 245 |
+
def update(self):
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
# Monte Carlo estimate of returns
|
| 249 |
+
rewards = []
|
| 250 |
+
discounted_reward = 0
|
| 251 |
+
for reward, is_terminal in zip(reversed(self.buffer.rewards), reversed(self.buffer.is_terminals)):
|
| 252 |
+
if is_terminal:
|
| 253 |
+
discounted_reward = 0
|
| 254 |
+
discounted_reward = reward + (self.gamma * discounted_reward)
|
| 255 |
+
rewards.insert(0, discounted_reward)
|
| 256 |
+
|
| 257 |
+
# Normalizing the rewards
|
| 258 |
+
rewards = torch.tensor(rewards, dtype=torch.float32).to(device)
|
| 259 |
+
rewards = rewards/(-rewards).max()
|
| 260 |
+
#rewards = rewards - rewards.min()
|
| 261 |
+
rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-7)
|
| 262 |
+
|
| 263 |
+
# Monte Carlo estimate of returns (pre decision)
|
| 264 |
+
rewards_pre = []
|
| 265 |
+
discounted_reward = 0
|
| 266 |
+
for reward_pre, is_terminal in zip(reversed(self.buffer.rewards_pre), reversed(self.buffer.is_terminals)):
|
| 267 |
+
if is_terminal:
|
| 268 |
+
discounted_reward = 0
|
| 269 |
+
discounted_reward = reward_pre + (self.gamma * discounted_reward)
|
| 270 |
+
rewards_pre.insert(0, discounted_reward)
|
| 271 |
+
|
| 272 |
+
# Normalizing the rewards
|
| 273 |
+
rewards_pre = torch.tensor(rewards_pre, dtype=torch.float32).to(device)
|
| 274 |
+
#rewards_pre = rewards_pre/(-rewards_pre).max()
|
| 275 |
+
#rewards_pre = rewards_pre - rewards_pre.min()
|
| 276 |
+
rewards_pre = (rewards_pre - rewards_pre.mean()) / (rewards_pre.std() + 1e-7)
|
| 277 |
+
|
| 278 |
+
# Monte Carlo estimate of returns (post decision)
|
| 279 |
+
rewards_post = []
|
| 280 |
+
discounted_reward = 0
|
| 281 |
+
for reward_post, is_terminal in zip(reversed(self.buffer.rewards_post), reversed(self.buffer.is_terminals)):
|
| 282 |
+
if is_terminal:
|
| 283 |
+
discounted_reward = 0
|
| 284 |
+
discounted_reward = reward_post + (self.gamma * discounted_reward)
|
| 285 |
+
rewards_post.insert(0, discounted_reward)
|
| 286 |
+
|
| 287 |
+
# Normalizing the rewards
|
| 288 |
+
rewards_post = torch.tensor(rewards_post, dtype=torch.float32).to(device)
|
| 289 |
+
#rewards_post = rewards_post/(-rewards_post).max()
|
| 290 |
+
#rewards_post = rewards_post - rewards_post.min()
|
| 291 |
+
rewards_post = (rewards_post - rewards_post.mean()) / (rewards_post.std() + 1e-7)
|
| 292 |
+
|
| 293 |
+
# rewards_post = -rewards_post/(rewards_pre + rewards_post).min()
|
| 294 |
+
|
| 295 |
+
# rewards_pre = -rewards_pre/(rewards_pre + rewards_post).min()
|
| 296 |
+
|
| 297 |
+
# convert list to tensor
|
| 298 |
+
old_states = torch.squeeze(torch.stack(self.buffer.states, dim=0)).detach().to(device)
|
| 299 |
+
old_pre_states = torch.squeeze(torch.stack(self.buffer.pre_states, dim=0)).detach().to(device)
|
| 300 |
+
old_post_states = torch.squeeze(torch.stack(self.buffer.post_states, dim=0)).detach().to(device)
|
| 301 |
+
old_actions = torch.squeeze(torch.stack(self.buffer.actions, dim=0)).detach().to(device)
|
| 302 |
+
old_actions_pre = torch.squeeze(torch.stack(self.buffer.actions, dim=0)).detach().to(device)
|
| 303 |
+
old_actions_post = torch.squeeze(torch.stack(self.buffer.actions, dim=0)).detach().to(device)
|
| 304 |
+
old_logprobs = torch.squeeze(torch.stack(self.buffer.logprobs, dim=0)).detach().to(device)
|
| 305 |
+
old_logprobs_pre = torch.squeeze(torch.stack(self.buffer.logprobs, dim=0)).detach().to(device)
|
| 306 |
+
old_logprobs_post = torch.squeeze(torch.stack(self.buffer.logprobs, dim=0)).detach().to(device)
|
| 307 |
+
old_state_values = torch.squeeze(torch.stack(self.buffer.state_values, dim=0)).detach().to(device)
|
| 308 |
+
old_state_values_post = torch.squeeze(torch.stack(self.buffer.state_values_post, dim=0)).detach().to(device)
|
| 309 |
+
|
| 310 |
+
# calculate advantages
|
| 311 |
+
advantages_post = rewards_post.detach() - old_state_values_post.detach()
|
| 312 |
+
advantages_pre = rewards_pre.detach() - old_state_values.detach()
|
| 313 |
+
advantages = rewards.detach() - old_state_values_post.detach() - old_state_values.detach()
|
| 314 |
+
|
| 315 |
+
sum_loss = 0
|
| 316 |
+
|
| 317 |
+
# Optimize policy for K epochs
|
| 318 |
+
for i in range(self.K_epochs):
|
| 319 |
+
|
| 320 |
+
# Evaluating old actions and values
|
| 321 |
+
logprobs, logprobs_pre, logprobs_post, state_values, state_values_post, dist_entropy, dist_entropy_pre, dist_entropy_post = self.policy.evaluate(old_states, old_pre_states, old_post_states, old_actions, old_actions_pre, old_actions_post)
|
| 322 |
+
|
| 323 |
+
# Finding the ratio (pi_theta / pi_theta__old)
|
| 324 |
+
ratios = torch.exp(logprobs - old_logprobs)
|
| 325 |
+
|
| 326 |
+
# Finding Surrogate Loss
|
| 327 |
+
surr1 = ratios * advantages.unsqueeze(1)
|
| 328 |
+
surr2 = torch.clamp(ratios, 1 - self.eps_clip, 1 + self.eps_clip) * advantages.unsqueeze(1)
|
| 329 |
+
|
| 330 |
+
surr = -torch.min(surr1, surr2)
|
| 331 |
+
|
| 332 |
+
loss = surr + 0.5 * self.MseLoss(old_state_values_post, old_state_values) - 0.01*dist_entropy
|
| 333 |
+
|
| 334 |
+
# Optmization - gradient backpropagation
|
| 335 |
+
self.optimizer.zero_grad()
|
| 336 |
+
loss.mean().backward(retain_graph=True)
|
| 337 |
+
self.optimizer.step()
|
| 338 |
+
|
| 339 |
+
|
| 340 |
+
# print('Avg Loss: {}'.format(sum_loss.mean().item()))
|
| 341 |
+
print('Last Loss {}'.format(loss.sum().item()))
|
| 342 |
+
# Copy new weights into old policy
|
| 343 |
+
self.policy_old.load_state_dict(self.policy.state_dict())
|
| 344 |
+
|
| 345 |
+
# clear buffer
|
| 346 |
+
self.buffer.clear()
|
| 347 |
+
|
| 348 |
+
def save(self, checkpoint_path):
|
| 349 |
+
torch.save(self.policy_old.state_dict(), checkpoint_path)
|
| 350 |
+
|
| 351 |
+
def load(self, checkpoint_path):
|
| 352 |
+
self.policy_old.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
|
| 353 |
+
self.policy.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
|
code/Lake application/agents/PDPPO_two_critics_two_actors.py
ADDED
|
@@ -0,0 +1,377 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Created on Wed Mar 1 00:43:49 2023
|
| 4 |
+
|
| 5 |
+
@author: leona
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import torch
|
| 9 |
+
import numpy as np
|
| 10 |
+
import torch.nn as nn
|
| 11 |
+
from torch.distributions import MultivariateNormal
|
| 12 |
+
from torch.distributions import Categorical
|
| 13 |
+
|
| 14 |
+
################################## set device ##################################
|
| 15 |
+
print("============================================================================================")
|
| 16 |
+
# set device to cpu or cuda
|
| 17 |
+
device = torch.device('cpu')
|
| 18 |
+
if(torch.cuda.is_available()):
|
| 19 |
+
device = torch.device('cuda:0')
|
| 20 |
+
torch.cuda.empty_cache()
|
| 21 |
+
print("Device set to : " + str(torch.cuda.get_device_name(device)))
|
| 22 |
+
else:
|
| 23 |
+
print("Device set to : cpu")
|
| 24 |
+
print("============================================================================================")
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
################################## PDPPO Policy ##################################
|
| 28 |
+
class RolloutBuffer:
|
| 29 |
+
def __init__(self):
|
| 30 |
+
self.actions = []
|
| 31 |
+
self.states = []
|
| 32 |
+
self.pre_states = []
|
| 33 |
+
self.post_states = []
|
| 34 |
+
self.logprobs = []
|
| 35 |
+
self.rewards = []
|
| 36 |
+
self.rewards_pre = []
|
| 37 |
+
self.rewards_post = []
|
| 38 |
+
self.state_values = []
|
| 39 |
+
self.state_values_post = []
|
| 40 |
+
self.is_terminals = []
|
| 41 |
+
|
| 42 |
+
def clear(self):
|
| 43 |
+
del self.actions[:]
|
| 44 |
+
del self.states[:]
|
| 45 |
+
del self.pre_states[:]
|
| 46 |
+
del self.post_states[:]
|
| 47 |
+
del self.logprobs[:]
|
| 48 |
+
del self.rewards[:]
|
| 49 |
+
del self.rewards_pre[:]
|
| 50 |
+
del self.rewards_post[:]
|
| 51 |
+
del self.state_values[:]
|
| 52 |
+
del self.state_values_post[:]
|
| 53 |
+
del self.is_terminals[:]
|
| 54 |
+
|
| 55 |
+
class ActorCritic(nn.Module):
|
| 56 |
+
def __init__(self, state_dim,state_dim_pre,state_dim_post, action_dim, has_continuous_action_space, action_std_init):
|
| 57 |
+
super(ActorCritic, self).__init__()
|
| 58 |
+
|
| 59 |
+
self.has_continuous_action_space = has_continuous_action_space
|
| 60 |
+
|
| 61 |
+
if has_continuous_action_space:
|
| 62 |
+
self.action_dim = action_dim
|
| 63 |
+
self.action_var = torch.full((action_dim,), action_std_init * action_std_init).to(device)
|
| 64 |
+
# actor
|
| 65 |
+
if has_continuous_action_space :
|
| 66 |
+
self.actor = nn.Sequential(
|
| 67 |
+
nn.Linear(state_dim, 64),
|
| 68 |
+
nn.Tanh(),
|
| 69 |
+
nn.Linear(64, 64),
|
| 70 |
+
nn.Tanh(),
|
| 71 |
+
nn.Linear(64, action_dim),
|
| 72 |
+
nn.Tanh()
|
| 73 |
+
)
|
| 74 |
+
else:
|
| 75 |
+
# actor - multidiscrete
|
| 76 |
+
self.action_dim = action_dim
|
| 77 |
+
# self.fc = nn.Linear(state_dim, 64)
|
| 78 |
+
self.fc1 = nn.Linear(state_dim, 128)
|
| 79 |
+
self.fc2 = nn.Linear(128, 128)
|
| 80 |
+
self.actor = nn.Linear(128, self.action_dim.nvec.sum())
|
| 81 |
+
|
| 82 |
+
# critic
|
| 83 |
+
self.critic = nn.Sequential(
|
| 84 |
+
nn.Linear(state_dim, 128),
|
| 85 |
+
nn.Tanh(),
|
| 86 |
+
nn.Linear(128, 128),
|
| 87 |
+
nn.Tanh(),
|
| 88 |
+
nn.Linear(128, 1)
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
self.critic_post = nn.Sequential(
|
| 92 |
+
nn.Linear(state_dim, 128),
|
| 93 |
+
nn.Tanh(),
|
| 94 |
+
nn.Linear(128, 128),
|
| 95 |
+
nn.Tanh(),
|
| 96 |
+
nn.Linear(128, 1)
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
def forward(self, state):
|
| 100 |
+
raise NotImplementedError
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def set_action_std(self, new_action_std):
|
| 105 |
+
if self.has_continuous_action_space:
|
| 106 |
+
self.action_var = torch.full((self.action_dim,), new_action_std * new_action_std).to(device)
|
| 107 |
+
else:
|
| 108 |
+
print("--------------------------------------------------------------------------------------------")
|
| 109 |
+
print("WARNING : Calling ActorCritic::set_action_std() on discrete action space policy")
|
| 110 |
+
print("--------------------------------------------------------------------------------------------")
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def act(self, state):
|
| 115 |
+
|
| 116 |
+
if self.has_continuous_action_space:
|
| 117 |
+
action_mean = self.actor(state)
|
| 118 |
+
cov_mat = torch.diag(self.action_var).unsqueeze(dim=0)
|
| 119 |
+
dist = MultivariateNormal(action_mean, cov_mat)
|
| 120 |
+
else:
|
| 121 |
+
# x = nn.functional.relu(self.fc(state))
|
| 122 |
+
x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
|
| 123 |
+
logits = self.actor(x)
|
| 124 |
+
action_probs = nn.functional.softmax(logits, dim=-1)
|
| 125 |
+
dist = Categorical(action_probs.view(len(self.action_dim.nvec),-1))
|
| 126 |
+
|
| 127 |
+
action = dist.sample()
|
| 128 |
+
action_logprob = dist.log_prob(action)
|
| 129 |
+
|
| 130 |
+
return action.detach(), action_logprob.detach()
|
| 131 |
+
|
| 132 |
+
def evaluate(self, state, pre_state, post_state, action):
|
| 133 |
+
|
| 134 |
+
if self.has_continuous_action_space:
|
| 135 |
+
action_mean = self.actor(state)
|
| 136 |
+
|
| 137 |
+
action_var = self.action_var.expand_as(action_mean)
|
| 138 |
+
cov_mat = torch.diag_embed(action_var).to(device)
|
| 139 |
+
dist = MultivariateNormal(action_mean, cov_mat)
|
| 140 |
+
|
| 141 |
+
# For Single Action Environments.
|
| 142 |
+
if self.action_dim == 1:
|
| 143 |
+
action = action.reshape(-1, self.action_dim)
|
| 144 |
+
else:
|
| 145 |
+
# x = nn.functional.relu(self.fc(state))
|
| 146 |
+
x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
|
| 147 |
+
logits = self.actor(x)
|
| 148 |
+
action_probs = nn.functional.softmax(logits, dim=-1)
|
| 149 |
+
dist = Categorical(action_probs.view(state.shape[0],len(self.action_dim.nvec),-1))
|
| 150 |
+
# action_probs = self.actor(state)
|
| 151 |
+
# dist = Categorical(action_probs)
|
| 152 |
+
action_logprobs = dist.log_prob(action)
|
| 153 |
+
dist_entropy = dist.entropy()
|
| 154 |
+
state_values = self.critic(pre_state)
|
| 155 |
+
state_values_post = self.critic_post(post_state)
|
| 156 |
+
|
| 157 |
+
return action_logprobs, state_values,state_values_post, dist_entropy
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
class PDPPO:
|
| 161 |
+
def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, has_continuous_action_space, env, action_std_init=0.6):
|
| 162 |
+
|
| 163 |
+
self.has_continuous_action_space = has_continuous_action_space
|
| 164 |
+
|
| 165 |
+
if has_continuous_action_space:
|
| 166 |
+
self.action_std = action_std_init
|
| 167 |
+
|
| 168 |
+
self.env = env
|
| 169 |
+
|
| 170 |
+
self.reward_old_pre = -np.inf
|
| 171 |
+
self.reward_old_post = -np.inf
|
| 172 |
+
|
| 173 |
+
self.gamma = gamma
|
| 174 |
+
self.eps_clip = eps_clip
|
| 175 |
+
self.K_epochs = K_epochs
|
| 176 |
+
|
| 177 |
+
self.buffer = RolloutBuffer()
|
| 178 |
+
|
| 179 |
+
state_dim_pre = self.env.n_machines
|
| 180 |
+
state_dim_post = self.env.n_items
|
| 181 |
+
|
| 182 |
+
self.policy = ActorCritic(state_dim,state_dim_pre,state_dim_post, action_dim, has_continuous_action_space, action_std_init).to(device)
|
| 183 |
+
self.optimizer = torch.optim.Adam([
|
| 184 |
+
{'params': self.policy.actor.parameters(), 'lr': lr_actor},
|
| 185 |
+
{'params': self.policy.critic.parameters(), 'lr': lr_critic*10},
|
| 186 |
+
{'params': self.policy.critic_post.parameters(), 'lr': lr_critic*1}
|
| 187 |
+
])
|
| 188 |
+
|
| 189 |
+
self.policy_old = ActorCritic(state_dim,state_dim_pre,state_dim_post, action_dim, has_continuous_action_space, action_std_init).to(device)
|
| 190 |
+
self.policy_old.load_state_dict(self.policy.state_dict())
|
| 191 |
+
|
| 192 |
+
self.MseLoss = nn.MSELoss()
|
| 193 |
+
|
| 194 |
+
def set_action_std(self, new_action_std):
|
| 195 |
+
if self.has_continuous_action_space:
|
| 196 |
+
self.action_std = new_action_std
|
| 197 |
+
self.policy.set_action_std(new_action_std)
|
| 198 |
+
self.policy_old.set_action_std(new_action_std)
|
| 199 |
+
else:
|
| 200 |
+
print("--------------------------------------------------------------------------------------------")
|
| 201 |
+
print("WARNING : Calling PDPPO::set_action_std() on discrete action space policy")
|
| 202 |
+
print("--------------------------------------------------------------------------------------------")
|
| 203 |
+
|
| 204 |
+
def decay_action_std(self, action_std_decay_rate, min_action_std):
|
| 205 |
+
print("--------------------------------------------------------------------------------------------")
|
| 206 |
+
if self.has_continuous_action_space:
|
| 207 |
+
self.action_std = self.action_std - action_std_decay_rate
|
| 208 |
+
self.action_std = round(self.action_std, 4)
|
| 209 |
+
if (self.action_std <= min_action_std):
|
| 210 |
+
self.action_std = min_action_std
|
| 211 |
+
print("setting actor output action_std to min_action_std : ", self.action_std)
|
| 212 |
+
else:
|
| 213 |
+
print("setting actor output action_std to : ", self.action_std)
|
| 214 |
+
self.set_action_std(self.action_std)
|
| 215 |
+
|
| 216 |
+
else:
|
| 217 |
+
print("WARNING : Calling PDPPO::decay_action_std() on discrete action space policy")
|
| 218 |
+
print("--------------------------------------------------------------------------------------------")
|
| 219 |
+
|
| 220 |
+
def get_post_state(self, action, machine_setup, inventory_level):
|
| 221 |
+
setup_loss = np.zeros(self.env.n_machines, dtype=int)
|
| 222 |
+
setup_costs = np.zeros(self.env.n_machines)
|
| 223 |
+
# if we are just changing the setup, we use the setup cost matrix with the corresponding position given by the actual setup and the new setup
|
| 224 |
+
for m in range(self.env.n_machines):
|
| 225 |
+
if action[m] != 0: # if the machine is not iddle
|
| 226 |
+
# 1. IF NEEDED CHANGE SETUP
|
| 227 |
+
if machine_setup[m] != action[m] and action[m] != 0:
|
| 228 |
+
setup_costs[m] = self.env.setup_costs[m][action[m] - 1]
|
| 229 |
+
setup_loss[m] = self.env.setup_loss[m][action[m] - 1]
|
| 230 |
+
machine_setup[m] = action[m]
|
| 231 |
+
# 2. PRODUCTION
|
| 232 |
+
production = self.env.machine_production_matrix[m][action[m] - 1] - setup_loss[m]
|
| 233 |
+
inventory_level[action[m] - 1] += production
|
| 234 |
+
else:
|
| 235 |
+
machine_setup[m] = 0
|
| 236 |
+
# return the new machine_setup_inventory_level and the setup_cost
|
| 237 |
+
return machine_setup, inventory_level, setup_costs
|
| 238 |
+
|
| 239 |
+
def select_action(self, state):
|
| 240 |
+
with torch.no_grad():
|
| 241 |
+
state = torch.FloatTensor(state).to(device)
|
| 242 |
+
action, action_logprob = self.policy_old.act(state)
|
| 243 |
+
|
| 244 |
+
#pre_state = state[self.env.n_items:self.env.n_items+self.env.n_machines].clone()
|
| 245 |
+
|
| 246 |
+
machine_setup, inventory_level, setup_cost = self.get_post_state(action, state[self.env.n_items:self.env.n_items+self.env.n_machines], state[0:self.env.n_items])
|
| 247 |
+
|
| 248 |
+
post_state = state.clone()
|
| 249 |
+
post_state[self.env.n_items:self.env.n_items+self.env.n_machines] = machine_setup.clone()
|
| 250 |
+
post_state[0:self.env.n_items] = inventory_level.clone()
|
| 251 |
+
|
| 252 |
+
pre_state = state.clone()
|
| 253 |
+
|
| 254 |
+
self.buffer.states.append(state)
|
| 255 |
+
self.buffer.pre_states.append(pre_state)
|
| 256 |
+
self.buffer.post_states.append(post_state)
|
| 257 |
+
self.buffer.actions.append(action)
|
| 258 |
+
self.buffer.logprobs.append(action_logprob)
|
| 259 |
+
|
| 260 |
+
with torch.no_grad():
|
| 261 |
+
state_val = self.policy_old.critic(pre_state).detach()
|
| 262 |
+
state_val_post = self.policy_old.critic_post(post_state).detach()
|
| 263 |
+
|
| 264 |
+
self.buffer.state_values.append(state_val)
|
| 265 |
+
self.buffer.state_values_post.append(state_val_post)
|
| 266 |
+
|
| 267 |
+
if self.has_continuous_action_space:
|
| 268 |
+
return action.detach().cpu().numpy().flatten()
|
| 269 |
+
|
| 270 |
+
else:
|
| 271 |
+
return action.numpy()
|
| 272 |
+
|
| 273 |
+
def update(self):
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
# Monte Carlo estimate of returns
|
| 277 |
+
rewards = []
|
| 278 |
+
discounted_reward = 0
|
| 279 |
+
for reward, is_terminal in zip(reversed(self.buffer.rewards), reversed(self.buffer.is_terminals)):
|
| 280 |
+
if is_terminal:
|
| 281 |
+
discounted_reward = 0
|
| 282 |
+
discounted_reward = reward + (self.gamma * discounted_reward)
|
| 283 |
+
rewards.insert(0, discounted_reward)
|
| 284 |
+
|
| 285 |
+
# Normalizing the rewards
|
| 286 |
+
rewards = torch.tensor(rewards, dtype=torch.float32).to(device)
|
| 287 |
+
#rewards = rewards/(-rewards).max()
|
| 288 |
+
#rewards = rewards - rewards.min()
|
| 289 |
+
#rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-7)
|
| 290 |
+
|
| 291 |
+
# Monte Carlo estimate of returns (pre decision)
|
| 292 |
+
rewards_pre = []
|
| 293 |
+
discounted_reward = 0
|
| 294 |
+
for reward_pre, is_terminal in zip(reversed(self.buffer.rewards_pre), reversed(self.buffer.is_terminals)):
|
| 295 |
+
if is_terminal:
|
| 296 |
+
discounted_reward = 0
|
| 297 |
+
discounted_reward = reward_pre + (self.gamma * discounted_reward)
|
| 298 |
+
rewards_pre.insert(0, discounted_reward)
|
| 299 |
+
|
| 300 |
+
# Normalizing the rewards
|
| 301 |
+
rewards_pre = torch.tensor(rewards_pre, dtype=torch.float32).to(device)
|
| 302 |
+
#rewards_pre = rewards_pre/(-rewards_pre).max()
|
| 303 |
+
#rewards_pre = rewards_pre - rewards_pre.min()
|
| 304 |
+
rewards_pre = (rewards_pre - rewards_pre.mean()) / (rewards_pre.std() + 1e-7)
|
| 305 |
+
|
| 306 |
+
# Monte Carlo estimate of returns (post decision)
|
| 307 |
+
rewards_post = []
|
| 308 |
+
discounted_reward = 0
|
| 309 |
+
for reward_post, is_terminal in zip(reversed(self.buffer.rewards_post), reversed(self.buffer.is_terminals)):
|
| 310 |
+
if is_terminal:
|
| 311 |
+
discounted_reward = 0
|
| 312 |
+
discounted_reward = reward_post + (self.gamma * discounted_reward)
|
| 313 |
+
rewards_post.insert(0, discounted_reward)
|
| 314 |
+
|
| 315 |
+
# Normalizing the rewards
|
| 316 |
+
rewards_post = torch.tensor(rewards_post, dtype=torch.float32).to(device)
|
| 317 |
+
#rewards_post = rewards_post/(-rewards_post).max()
|
| 318 |
+
#rewards_post = rewards_post - rewards_post.min()
|
| 319 |
+
rewards_post = (rewards_post - rewards_post.mean()) / (rewards_post.std() + 1e-7)
|
| 320 |
+
|
| 321 |
+
#rewards_post = -rewards_post/(rewards_pre + rewards_post).min()
|
| 322 |
+
|
| 323 |
+
#rewards_pre = -rewards_pre/(rewards_pre + rewards_post).min()
|
| 324 |
+
|
| 325 |
+
# convert list to tensor
|
| 326 |
+
old_states = torch.squeeze(torch.stack(self.buffer.states, dim=0)).detach().to(device)
|
| 327 |
+
old_pre_states = torch.squeeze(torch.stack(self.buffer.pre_states, dim=0)).detach().to(device)
|
| 328 |
+
old_post_states = torch.squeeze(torch.stack(self.buffer.post_states, dim=0)).detach().to(device)
|
| 329 |
+
old_actions = torch.squeeze(torch.stack(self.buffer.actions, dim=0)).detach().to(device)
|
| 330 |
+
old_logprobs = torch.squeeze(torch.stack(self.buffer.logprobs, dim=0)).detach().to(device)
|
| 331 |
+
old_state_values = torch.squeeze(torch.stack(self.buffer.state_values, dim=0)).detach().to(device)
|
| 332 |
+
old_state_values_post = torch.squeeze(torch.stack(self.buffer.state_values_post, dim=0)).detach().to(device)
|
| 333 |
+
|
| 334 |
+
# calculate advantages
|
| 335 |
+
#advantages_post = rewards_post.detach() - old_state_values_post.detach()
|
| 336 |
+
#advantages_pre = rewards_pre.detach() - old_state_values.detach()
|
| 337 |
+
advantages = rewards - (old_state_values.detach()+old_state_values_post.detach())#(rewards_post + rewards_pre) - old_state_values_post.detach() - old_state_values.detach()# torch.min(advantages_pre,advantages_post)
|
| 338 |
+
|
| 339 |
+
sum_loss = 0
|
| 340 |
+
|
| 341 |
+
# Optimize policy for K epochs
|
| 342 |
+
for i in range(self.K_epochs):
|
| 343 |
+
|
| 344 |
+
# Evaluating old actions and values
|
| 345 |
+
logprobs, state_values, state_values_post, dist_entropy = self.policy.evaluate(old_states, old_pre_states, old_post_states, old_actions)
|
| 346 |
+
|
| 347 |
+
# Finding the ratio (pi_theta / pi_theta__old)
|
| 348 |
+
ratios = torch.exp(logprobs - old_logprobs)
|
| 349 |
+
|
| 350 |
+
# Finding Surrogate Loss
|
| 351 |
+
surr1 = ratios * advantages.unsqueeze(1)
|
| 352 |
+
surr2 = torch.clamp(ratios, 1 - self.eps_clip, 1 + self.eps_clip) * advantages.unsqueeze(1)
|
| 353 |
+
|
| 354 |
+
surr = -torch.min(surr1, surr2)
|
| 355 |
+
|
| 356 |
+
loss = surr + 0.5 * self.MseLoss(state_values_post+state_values,rewards) - 0.01*dist_entropy
|
| 357 |
+
|
| 358 |
+
# Optmization - gradient backpropagation
|
| 359 |
+
self.optimizer.zero_grad()
|
| 360 |
+
loss.mean().backward(retain_graph=True)
|
| 361 |
+
self.optimizer.step()
|
| 362 |
+
|
| 363 |
+
|
| 364 |
+
# print('Avg Loss: {}'.format(sum_loss.mean().item()))
|
| 365 |
+
print('Last Loss {}'.format(loss.sum().item()))
|
| 366 |
+
# Copy new weights into old policy
|
| 367 |
+
self.policy_old.load_state_dict(self.policy.state_dict())
|
| 368 |
+
|
| 369 |
+
# clear buffer
|
| 370 |
+
self.buffer.clear()
|
| 371 |
+
|
| 372 |
+
def save(self, checkpoint_path):
|
| 373 |
+
torch.save(self.policy_old.state_dict(), checkpoint_path)
|
| 374 |
+
|
| 375 |
+
def load(self, checkpoint_path):
|
| 376 |
+
self.policy_old.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
|
| 377 |
+
self.policy.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
|
code/Lake application/agents/PDPPO_v0.py
ADDED
|
@@ -0,0 +1,328 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import copy
|
| 3 |
+
import numpy as np
|
| 4 |
+
import torch
|
| 5 |
+
import torch.nn as nn
|
| 6 |
+
import torch.optim as optim
|
| 7 |
+
import torch.nn.functional as F
|
| 8 |
+
from torch.distributions import Categorical
|
| 9 |
+
from envs import *
|
| 10 |
+
import gym
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class SimplePlantSB(SimplePlant):
|
| 15 |
+
def __init__(self, settings, stoch_model):
|
| 16 |
+
super().__init__(settings, stoch_model)
|
| 17 |
+
try:self.dict_obs = settings['dict_obs']
|
| 18 |
+
except:self.dict_obs = False
|
| 19 |
+
self.last_inventory = copy.copy(self.inventory_level)
|
| 20 |
+
self.action_space = gym.spaces.MultiDiscrete(
|
| 21 |
+
[self.n_items+1] * self.n_machines
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
if self.dict_obs:
|
| 25 |
+
self.observation_space = gym.spaces.Dict({
|
| 26 |
+
'inventory_level': gym.spaces.Box(low = np.zeros(self.n_items),high = np.ones(self.n_items)*(settings['max_inventory_level'][0]+1)*self.n_items),
|
| 27 |
+
'machine_setup': gym.spaces.MultiDiscrete([self.n_items+1] * self.n_machines)
|
| 28 |
+
})
|
| 29 |
+
else:
|
| 30 |
+
self.observation_space = gym.spaces.Box(
|
| 31 |
+
low=np.zeros(self.n_items+self.n_machines),# high for the inventory level
|
| 32 |
+
high=np.concatenate(
|
| 33 |
+
[
|
| 34 |
+
np.array(self.max_inventory_level),
|
| 35 |
+
np.ones(self.n_machines) * (self.n_items+1), #high for the machine setups
|
| 36 |
+
]),
|
| 37 |
+
dtype=np.int32
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
def step(self, action):
|
| 41 |
+
"""
|
| 42 |
+
Step method: Execute one time step within the environment
|
| 43 |
+
|
| 44 |
+
Parameters
|
| 45 |
+
----------
|
| 46 |
+
action : action given by the agent
|
| 47 |
+
|
| 48 |
+
Returns
|
| 49 |
+
-------
|
| 50 |
+
obs : Observation of the state give the method _next_observation
|
| 51 |
+
reward : Cost given by the _reward method
|
| 52 |
+
done : returns True or False given by the _done method
|
| 53 |
+
dict : possible information for control to environment monitoring
|
| 54 |
+
|
| 55 |
+
"""
|
| 56 |
+
self.last_inventory = copy.copy(self.inventory_level)
|
| 57 |
+
|
| 58 |
+
self.total_cost = self._take_action(action, self.machine_setup, self.inventory_level, self.demand)
|
| 59 |
+
|
| 60 |
+
# self.total_cost['setup_costs'] = 0
|
| 61 |
+
# self.total_cost['holding_costs'] = 0
|
| 62 |
+
|
| 63 |
+
reward = -sum([ele for key, ele in self.total_cost.items()])
|
| 64 |
+
#reward = -self.total_cost['lost_sales']
|
| 65 |
+
|
| 66 |
+
#reward = np.abs(action)
|
| 67 |
+
|
| 68 |
+
self.current_step += 1
|
| 69 |
+
done = self.current_step == self.T
|
| 70 |
+
obs = self._next_observation()
|
| 71 |
+
|
| 72 |
+
return obs, reward, done, self.total_cost
|
| 73 |
+
|
| 74 |
+
def _next_observation(self):
|
| 75 |
+
"""
|
| 76 |
+
Returns the next demand
|
| 77 |
+
"""
|
| 78 |
+
obs = SimplePlant._next_observation(self)
|
| 79 |
+
#obs['last_inventory_level'] = copy.copy(self.last_inventory)
|
| 80 |
+
if isinstance(obs, dict):
|
| 81 |
+
if not self.dict_obs:
|
| 82 |
+
obs = np.concatenate(
|
| 83 |
+
(
|
| 84 |
+
obs['inventory_level'], # n_items size
|
| 85 |
+
obs['machine_setup'], # n_machine size
|
| 86 |
+
#obs['last_inventory_level']# n_items size
|
| 87 |
+
)
|
| 88 |
+
)
|
| 89 |
+
else:
|
| 90 |
+
if self.dict_obs:
|
| 91 |
+
raise('Change dict_obst to False')
|
| 92 |
+
return obs
|
| 93 |
+
|
| 94 |
+
# Define the policy network
|
| 95 |
+
class Policy(nn.Module):
|
| 96 |
+
def __init__(self, input_size, output_shape):
|
| 97 |
+
super(Policy, self).__init__()
|
| 98 |
+
self.fc1 = nn.Linear(input_size, 128)
|
| 99 |
+
self.fc_list = nn.ModuleList([nn.Linear(128, output_shape[0]) for list(output_shape)[1] in range(0,output_shape[1])])
|
| 100 |
+
|
| 101 |
+
def forward(self, x):
|
| 102 |
+
x = F.relu(self.fc1(x)).requires_grad_()
|
| 103 |
+
outputs = [F.softmax(fc(x), dim=1)for fc in self.fc_list]
|
| 104 |
+
return outputs
|
| 105 |
+
|
| 106 |
+
# Define the value network for deterministic components
|
| 107 |
+
class Value(nn.Module):
|
| 108 |
+
def __init__(self,input_size,output_size):
|
| 109 |
+
super(Value, self).__init__()
|
| 110 |
+
self.fc1 = nn.Linear(input_size, 128)
|
| 111 |
+
self.fc2 = nn.Linear(128, output_size)
|
| 112 |
+
|
| 113 |
+
def forward(self, x):
|
| 114 |
+
x = F.relu(self.fc1(x)).requires_grad_()
|
| 115 |
+
x = self.fc2(x)
|
| 116 |
+
return x
|
| 117 |
+
|
| 118 |
+
# Define the value network for stochastic components
|
| 119 |
+
class ValueStochastic(nn.Module):
|
| 120 |
+
def __init__(self,input_size,output_size):
|
| 121 |
+
super(ValueStochastic, self).__init__()
|
| 122 |
+
self.fc1 = nn.Linear(input_size, 128)
|
| 123 |
+
self.fc2 = nn.Linear(128, output_size)
|
| 124 |
+
|
| 125 |
+
def forward(self, x):
|
| 126 |
+
x = F.relu(self.fc1(x)).requires_grad_()
|
| 127 |
+
x = F.softmax(self.fc2(x), dim=1)
|
| 128 |
+
return x
|
| 129 |
+
|
| 130 |
+
# Define the PPO agent
|
| 131 |
+
class PDPPO:
|
| 132 |
+
def __init__(self, env: SimplePlant, settings: dict):
|
| 133 |
+
|
| 134 |
+
self.env = SimplePlantSB(env.settings, env.stoch_model)
|
| 135 |
+
self.last_inventory = env.inventory_level
|
| 136 |
+
self.experiment_name = settings['experiment_name']
|
| 137 |
+
try:self.dict_obs = settings['dict_obs']
|
| 138 |
+
except:self.dict_obs = False
|
| 139 |
+
|
| 140 |
+
self.POSSIBLE_STATES = self.env.n_items + 1
|
| 141 |
+
self.env.cost_to_reward = True
|
| 142 |
+
self.epsilon = 0
|
| 143 |
+
|
| 144 |
+
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 145 |
+
# Use the logs file in the root path of the main.
|
| 146 |
+
self.LOG_DIR = os.path.join(BASE_DIR,'logs')
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
if self.dict_obs == False:
|
| 150 |
+
input_size = self.env.observation_space.shape[0]
|
| 151 |
+
output_size_policy = (self.env.n_items+1, self.env.action_space.shape[0]) # we add 1 for the idle state
|
| 152 |
+
output_size_value = self.env.action_space.shape[0]
|
| 153 |
+
self.policy = Policy(input_size,output_size_policy)
|
| 154 |
+
self.value = Value(input_size,output_size_value)
|
| 155 |
+
self.value_post = ValueStochastic(input_size,output_size_value)
|
| 156 |
+
self.optimizer_policy = optim.Adam(self.policy.parameters(), lr=1e-3)
|
| 157 |
+
self.optimizer_value = optim.Adam(self.value.parameters(), lr=1e-3)
|
| 158 |
+
self.optimizer_value_post = optim.Adam(self.value_post.parameters(), lr=1e-3)
|
| 159 |
+
self.eps_clip = 0.2
|
| 160 |
+
self.gamma = 0.99
|
| 161 |
+
self.lmbda = 0.95
|
| 162 |
+
|
| 163 |
+
def get_post_state(self, action, machine_setup, inventory_level):
|
| 164 |
+
setup_loss = np.zeros(self.env.n_machines, dtype=int)
|
| 165 |
+
setup_costs = np.zeros(self.env.n_machines)
|
| 166 |
+
# if we are just changing the setup, we use the setup cost matrix with the corresponding position given by the actual setup and the new setup
|
| 167 |
+
for m in range(self.env.n_machines):
|
| 168 |
+
if action[m] != 0: # if the machine is not iddle
|
| 169 |
+
# 1. IF NEEDED CHANGE SETUP
|
| 170 |
+
if machine_setup[m] != action[m] and action[m] != 0:
|
| 171 |
+
setup_costs[m] = self.env.setup_costs[m][action[m] - 1]
|
| 172 |
+
setup_loss[m] = self.env.setup_loss[m][action[m] - 1]
|
| 173 |
+
machine_setup[m] = action[m]
|
| 174 |
+
# 2. PRODUCTION
|
| 175 |
+
production = self.env.machine_production_matrix[m][action[m] - 1] - setup_loss[m]
|
| 176 |
+
inventory_level[action[m] - 1] += production
|
| 177 |
+
else:
|
| 178 |
+
machine_setup[m] = 0
|
| 179 |
+
# return the new machine_setup_inventory_level and the setup_cost
|
| 180 |
+
return machine_setup, inventory_level, setup_costs
|
| 181 |
+
|
| 182 |
+
def get_action(self, state):
|
| 183 |
+
state = torch.from_numpy(state).float().unsqueeze(0)
|
| 184 |
+
probs = self.policy(state)
|
| 185 |
+
probs_concat = torch.stack(probs, dim=1)
|
| 186 |
+
m = Categorical(probs_concat)
|
| 187 |
+
action = m.sample()
|
| 188 |
+
value = self.value(state)
|
| 189 |
+
machine_setup, inventory_level, setup_cost = self.get_post_state(action.numpy()[0], state[0][self.env.n_items:self.env.n_items+self.env.n_machines].numpy(), state[0][0:self.env.n_items].numpy())
|
| 190 |
+
value_post = self.value_post(state)
|
| 191 |
+
|
| 192 |
+
return action, m.log_prob(action), probs_concat, value, value_post
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
def update(self, rewards, rewards_pre_state, rewards_post_state, states, post_states, actions, probs, next_states):
|
| 196 |
+
# Update deterministic value function
|
| 197 |
+
for epoch in range(10):
|
| 198 |
+
for i in range(len(actions)):
|
| 199 |
+
state = torch.from_numpy(states[i]).float().unsqueeze(0)
|
| 200 |
+
value = self.value(state)
|
| 201 |
+
next_state = torch.from_numpy(next_states[i]).float().unsqueeze(0)
|
| 202 |
+
next_value = self.value(next_state)
|
| 203 |
+
target = rewards_pre_state[i] + self.gamma * next_value
|
| 204 |
+
advantage = target - value
|
| 205 |
+
loss = advantage.pow(2).mean()
|
| 206 |
+
self.optimizer_value.zero_grad()
|
| 207 |
+
loss.backward()
|
| 208 |
+
self.optimizer_value.step()
|
| 209 |
+
|
| 210 |
+
# Update stochastic value function
|
| 211 |
+
for epoch in range(10):
|
| 212 |
+
for i in range(len(actions)):
|
| 213 |
+
state = torch.from_numpy(states[i]).float().unsqueeze(0)
|
| 214 |
+
value = self.value_post(state)
|
| 215 |
+
post_state = torch.from_numpy(post_states[i]).float().unsqueeze(0)
|
| 216 |
+
value_post = self.value_post(post_state)
|
| 217 |
+
target = rewards_post_state[i] + self.gamma * value_post
|
| 218 |
+
advantage = target - value
|
| 219 |
+
loss = advantage.pow(2).mean()
|
| 220 |
+
self.optimizer_value_post.zero_grad()
|
| 221 |
+
loss.backward()
|
| 222 |
+
self.optimizer_value_post.step()
|
| 223 |
+
|
| 224 |
+
# Update policy network
|
| 225 |
+
states = torch.from_numpy(np.vstack(states)).float()
|
| 226 |
+
actions = torch.cat(actions).unsqueeze(1)
|
| 227 |
+
old_probs = torch.cat(probs)
|
| 228 |
+
old_probs = torch.gather(old_probs.clone(),2, actions)
|
| 229 |
+
|
| 230 |
+
policy_epochs = 10
|
| 231 |
+
for epoch in range(policy_epochs):
|
| 232 |
+
probs = self.policy(states)
|
| 233 |
+
probs = torch.stack(probs, dim=1).clone()
|
| 234 |
+
m = Categorical(probs)
|
| 235 |
+
action = m.sample()
|
| 236 |
+
probs = torch.gather(probs, 2, actions)
|
| 237 |
+
kl_div = (old_probs * (torch.log(old_probs) - torch.log(probs))).sum()
|
| 238 |
+
|
| 239 |
+
for state,post_state, action, old_prob, prob, next_state, reward_pre_state, reward_post_state in zip(states,post_states, actions, old_probs, probs, next_states,rewards_pre_state,rewards_post_state):
|
| 240 |
+
state = state.unsqueeze(0)
|
| 241 |
+
next_state = torch.from_numpy(next_state).unsqueeze(0).float()
|
| 242 |
+
post_state = torch.from_numpy(post_state).unsqueeze(0).float()
|
| 243 |
+
action = action.unsqueeze(0)
|
| 244 |
+
old_prob = old_prob.unsqueeze(0)
|
| 245 |
+
prob = prob.unsqueeze(0)
|
| 246 |
+
value = self.value(state)
|
| 247 |
+
value_post = self.value_post(post_state)
|
| 248 |
+
advantage = reward_pre_state + self.gamma * self.value(next_state) - self.value(state)
|
| 249 |
+
advantage_post = reward_post_state + self.gamma * self.value_post(post_state) - self.value_post(state)
|
| 250 |
+
|
| 251 |
+
ratio = (prob / old_prob)
|
| 252 |
+
surr1 = ratio * advantage
|
| 253 |
+
surr2 = torch.clamp(ratio, 1 - self.eps_clip, 1 + self.eps_clip) * advantage
|
| 254 |
+
policy_loss = -torch.min(surr1, surr2) - 0.01 * m.entropy()
|
| 255 |
+
|
| 256 |
+
ratio_post = ratio
|
| 257 |
+
surr1_post = ratio_post * advantage_post
|
| 258 |
+
surr2_post = torch.clamp(ratio_post, 1 - self.eps_clip, 1 + self.eps_clip) * advantage_post
|
| 259 |
+
policy_loss_post = -torch.min(surr1_post, surr2_post) - 0.01 * m.entropy()
|
| 260 |
+
|
| 261 |
+
self.optimizer_policy.zero_grad()
|
| 262 |
+
(policy_loss.pow(2).mean() + policy_loss_post.pow(2).mean() + 0.5 * value.pow(2).mean() + 0.5 * value_post.pow(2).mean()).backward(retain_graph=True)
|
| 263 |
+
self.optimizer_policy.step()
|
| 264 |
+
|
| 265 |
+
def learn(self, n_episodes=1000, save_interval=100):
|
| 266 |
+
# Train the agent
|
| 267 |
+
for episode in range(n_episodes):
|
| 268 |
+
state = self.env.reset()
|
| 269 |
+
rewards = []
|
| 270 |
+
rewards_pre_state = []
|
| 271 |
+
rewards_post_state = []
|
| 272 |
+
states = []
|
| 273 |
+
next_states = []
|
| 274 |
+
actions = []
|
| 275 |
+
probs = []
|
| 276 |
+
post_states = []
|
| 277 |
+
# next_post_states = []
|
| 278 |
+
done = False
|
| 279 |
+
while not done:
|
| 280 |
+
action, log_prob, prob, value, value_post = self.get_action(state)
|
| 281 |
+
next_state, reward, done, info = self.env.step(action[0].detach().numpy())
|
| 282 |
+
machine_setup, inventory_level, setup_cost = self.get_post_state(action[0].detach().numpy(), state[self.env.n_items:self.env.n_items+self.env.n_machines], state[0:self.env.n_items])
|
| 283 |
+
post_state = state.copy()
|
| 284 |
+
post_state[self.env.n_items:self.env.n_items+self.env.n_machines] = machine_setup
|
| 285 |
+
post_state[0:self.env.n_items] = inventory_level
|
| 286 |
+
post_states.append(post_state)
|
| 287 |
+
post_state = torch.from_numpy(post_state).float().unsqueeze(0)
|
| 288 |
+
rewards.append(reward)
|
| 289 |
+
reward_pre_state = -(self.env.total_cost['holding_costs'] + self.env.total_cost['lost_sales'])
|
| 290 |
+
reward_post_state = -setup_cost.sum()
|
| 291 |
+
rewards_pre_state.append(reward_pre_state)
|
| 292 |
+
rewards_post_state.append(reward_post_state)
|
| 293 |
+
states.append(state)
|
| 294 |
+
next_states.append(next_state)
|
| 295 |
+
actions.append(action)
|
| 296 |
+
probs.append(prob)
|
| 297 |
+
|
| 298 |
+
state = next_state
|
| 299 |
+
if done:
|
| 300 |
+
self.update(rewards, rewards_pre_state, rewards_post_state, states, post_states, actions, probs, next_states)
|
| 301 |
+
print('Episode:', episode, 'Reward:', sum(rewards))
|
| 302 |
+
if episode % save_interval == 0:
|
| 303 |
+
self.save(f'policy_{episode}.pt')
|
| 304 |
+
self.save(self.LOG_DIR)
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
def save(self, filepath):
|
| 308 |
+
torch.save({
|
| 309 |
+
'policy_state_dict': self.policy.state_dict(),
|
| 310 |
+
'value_state_dict': self.value.state_dict(),
|
| 311 |
+
'value_post_state_dict': self.value_post.state_dict(),
|
| 312 |
+
'optimizer_policy_state_dict': self.optimizer_policy.state_dict(),
|
| 313 |
+
'optimizer_value_state_dict': self.optimizer_value.state_dict(),
|
| 314 |
+
'optimizer_value_post_state_dict': self.optimizer_value_post.state_dict()
|
| 315 |
+
}, filepath)
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
def load(self, filepath):
|
| 320 |
+
checkpoint = torch.load(filepath)
|
| 321 |
+
self.policy.load_state_dict(checkpoint['policy_state_dict'])
|
| 322 |
+
self.value.load_state_dict(checkpoint['value_state_dict'])
|
| 323 |
+
self.value_post.load_state_dict(checkpoint['value_post_state_dict'])
|
| 324 |
+
self.optimizer_policy.load_state_dict(checkpoint['optimizer_policy_state_dict'])
|
| 325 |
+
self.optimizer_value.load_state_dict(checkpoint['optimizer_value_state_dict'])
|
| 326 |
+
self.optimizer_value_post.load_state_dict(checkpoint['optimizer_value_post_state_dict'])
|
| 327 |
+
|
| 328 |
+
|
code/Lake application/agents/PPO.py
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Created on Wed Mar 1 00:43:49 2023
|
| 4 |
+
|
| 5 |
+
@author: leona
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import torch
|
| 9 |
+
import torch.nn as nn
|
| 10 |
+
from torch.distributions import MultivariateNormal
|
| 11 |
+
from torch.distributions import Categorical
|
| 12 |
+
|
| 13 |
+
################################## set device ##################################
|
| 14 |
+
print("============================================================================================")
|
| 15 |
+
# set device to cpu or cuda
|
| 16 |
+
device = torch.device('cpu')
|
| 17 |
+
if(torch.cuda.is_available()):
|
| 18 |
+
device = torch.device('cuda:0')
|
| 19 |
+
torch.cuda.empty_cache()
|
| 20 |
+
print("Device set to : " + str(torch.cuda.get_device_name(device)))
|
| 21 |
+
else:
|
| 22 |
+
print("Device set to : cpu")
|
| 23 |
+
print("============================================================================================")
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
################################## PPO Policy ##################################
|
| 27 |
+
class RolloutBuffer:
|
| 28 |
+
def __init__(self):
|
| 29 |
+
self.actions = []
|
| 30 |
+
self.states = []
|
| 31 |
+
self.logprobs = []
|
| 32 |
+
self.rewards = []
|
| 33 |
+
self.state_values = []
|
| 34 |
+
self.is_terminals = []
|
| 35 |
+
|
| 36 |
+
def clear(self):
|
| 37 |
+
del self.actions[:]
|
| 38 |
+
del self.states[:]
|
| 39 |
+
del self.logprobs[:]
|
| 40 |
+
del self.rewards[:]
|
| 41 |
+
del self.state_values[:]
|
| 42 |
+
del self.is_terminals[:]
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class ActorCritic(nn.Module):
|
| 46 |
+
def __init__(self, state_dim, action_dim, has_continuous_action_space, action_std_init):
|
| 47 |
+
super(ActorCritic, self).__init__()
|
| 48 |
+
|
| 49 |
+
self.has_continuous_action_space = has_continuous_action_space
|
| 50 |
+
self.action_dim = action_dim
|
| 51 |
+
|
| 52 |
+
if has_continuous_action_space:
|
| 53 |
+
self.action_dim = action_dim
|
| 54 |
+
self.action_var = torch.full((action_dim,), action_std_init * action_std_init).to(device)
|
| 55 |
+
# actor
|
| 56 |
+
if has_continuous_action_space :
|
| 57 |
+
self.actor = nn.Sequential(
|
| 58 |
+
nn.Linear(state_dim, 64),
|
| 59 |
+
nn.Tanh(),
|
| 60 |
+
nn.Linear(64, 64),
|
| 61 |
+
nn.Tanh(),
|
| 62 |
+
nn.Linear(64, action_dim),
|
| 63 |
+
nn.Tanh()
|
| 64 |
+
)
|
| 65 |
+
else:
|
| 66 |
+
|
| 67 |
+
self.actor = nn.Sequential(
|
| 68 |
+
nn.Linear(state_dim, 128),
|
| 69 |
+
nn.Tanh(),
|
| 70 |
+
nn.Linear(128, 128),
|
| 71 |
+
nn.Tanh(),
|
| 72 |
+
nn.Linear(128, action_dim)
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
# critic
|
| 76 |
+
self.critic = nn.Sequential(
|
| 77 |
+
nn.Linear(state_dim, 128),
|
| 78 |
+
nn.Tanh(),
|
| 79 |
+
nn.Linear(128, 128),
|
| 80 |
+
nn.Tanh(),
|
| 81 |
+
nn.Linear(128, 1)
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
def forward(self, state):
|
| 85 |
+
raise NotImplementedError
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def set_action_std(self, new_action_std):
|
| 90 |
+
if self.has_continuous_action_space:
|
| 91 |
+
self.action_var = torch.full((self.action_dim,), new_action_std * new_action_std).to(device)
|
| 92 |
+
else:
|
| 93 |
+
print("--------------------------------------------------------------------------------------------")
|
| 94 |
+
print("WARNING : Calling ActorCritic::set_action_std() on discrete action space policy")
|
| 95 |
+
print("--------------------------------------------------------------------------------------------")
|
| 96 |
+
|
| 97 |
+
def act(self, state):
|
| 98 |
+
|
| 99 |
+
logits = self.actor(state)
|
| 100 |
+
action_probs = nn.functional.softmax(logits, dim=-1)
|
| 101 |
+
dist = Categorical(action_probs)
|
| 102 |
+
|
| 103 |
+
action = dist.sample()
|
| 104 |
+
action_logprob = dist.log_prob(action)
|
| 105 |
+
state_val = self.critic(state)
|
| 106 |
+
|
| 107 |
+
return action.detach(), action_logprob.detach(), state_val.detach()
|
| 108 |
+
|
| 109 |
+
def evaluate(self, state, action):
|
| 110 |
+
|
| 111 |
+
logits = self.actor(state)
|
| 112 |
+
action_probs = nn.functional.softmax(logits, dim=-1)
|
| 113 |
+
dist = Categorical(action_probs)
|
| 114 |
+
|
| 115 |
+
action_logprobs = dist.log_prob(action.T).T
|
| 116 |
+
dist_entropy = dist.entropy()
|
| 117 |
+
state_values = self.critic(state)
|
| 118 |
+
|
| 119 |
+
return action_logprobs, state_values, dist_entropy
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
class PPO:
|
| 123 |
+
def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, has_continuous_action_space, action_std_init=0.6):
|
| 124 |
+
|
| 125 |
+
self.has_continuous_action_space = has_continuous_action_space
|
| 126 |
+
|
| 127 |
+
if has_continuous_action_space:
|
| 128 |
+
self.action_std = action_std_init
|
| 129 |
+
|
| 130 |
+
self.gamma = gamma
|
| 131 |
+
self.eps_clip = eps_clip
|
| 132 |
+
self.K_epochs = K_epochs
|
| 133 |
+
|
| 134 |
+
self.buffer = RolloutBuffer()
|
| 135 |
+
|
| 136 |
+
self.policy = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
|
| 137 |
+
self.optimizer = torch.optim.Adam([
|
| 138 |
+
{'params': self.policy.actor.parameters(), 'lr': lr_actor},
|
| 139 |
+
{'params': self.policy.critic.parameters(), 'lr': lr_critic}
|
| 140 |
+
])
|
| 141 |
+
|
| 142 |
+
self.policy_old = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
|
| 143 |
+
self.policy_old.load_state_dict(self.policy.state_dict())
|
| 144 |
+
|
| 145 |
+
self.MseLoss = nn.MSELoss()
|
| 146 |
+
|
| 147 |
+
def set_action_std(self, new_action_std):
|
| 148 |
+
if self.has_continuous_action_space:
|
| 149 |
+
self.action_std = new_action_std
|
| 150 |
+
self.policy.set_action_std(new_action_std)
|
| 151 |
+
self.policy_old.set_action_std(new_action_std)
|
| 152 |
+
else:
|
| 153 |
+
print("--------------------------------------------------------------------------------------------")
|
| 154 |
+
print("WARNING : Calling PPO::set_action_std() on discrete action space policy")
|
| 155 |
+
print("--------------------------------------------------------------------------------------------")
|
| 156 |
+
|
| 157 |
+
def decay_action_std(self, action_std_decay_rate, min_action_std):
|
| 158 |
+
print("--------------------------------------------------------------------------------------------")
|
| 159 |
+
if self.has_continuous_action_space:
|
| 160 |
+
self.action_std = self.action_std - action_std_decay_rate
|
| 161 |
+
self.action_std = round(self.action_std, 4)
|
| 162 |
+
if (self.action_std <= min_action_std):
|
| 163 |
+
self.action_std = min_action_std
|
| 164 |
+
print("setting actor output action_std to min_action_std : ", self.action_std)
|
| 165 |
+
else:
|
| 166 |
+
print("setting actor output action_std to : ", self.action_std)
|
| 167 |
+
self.set_action_std(self.action_std)
|
| 168 |
+
|
| 169 |
+
else:
|
| 170 |
+
print("WARNING : Calling PPO::decay_action_std() on discrete action space policy")
|
| 171 |
+
print("--------------------------------------------------------------------------------------------")
|
| 172 |
+
|
| 173 |
+
def select_action(self, state):
|
| 174 |
+
|
| 175 |
+
with torch.no_grad():
|
| 176 |
+
state = torch.tensor(state).to(device)
|
| 177 |
+
state = state.float()
|
| 178 |
+
state = torch.unsqueeze(state, 1).T
|
| 179 |
+
action, action_logprob, state_val = self.policy_old.act(state)
|
| 180 |
+
|
| 181 |
+
self.buffer.states.append(state)
|
| 182 |
+
self.buffer.actions.append(action)
|
| 183 |
+
self.buffer.logprobs.append(action_logprob)
|
| 184 |
+
self.buffer.state_values.append(state_val)
|
| 185 |
+
|
| 186 |
+
return action.cpu().numpy()
|
| 187 |
+
|
| 188 |
+
def update(self):
|
| 189 |
+
# Monte Carlo estimate of returns
|
| 190 |
+
rewards = []
|
| 191 |
+
discounted_reward = 0
|
| 192 |
+
for reward, is_terminal in zip(reversed(self.buffer.rewards), reversed(self.buffer.is_terminals)):
|
| 193 |
+
if is_terminal:
|
| 194 |
+
discounted_reward = 0
|
| 195 |
+
discounted_reward = reward + (self.gamma * discounted_reward)
|
| 196 |
+
rewards.insert(0, discounted_reward)
|
| 197 |
+
|
| 198 |
+
# Normalizing the rewards
|
| 199 |
+
rewards = torch.tensor(rewards, dtype=torch.float32).to(device)
|
| 200 |
+
rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-7)
|
| 201 |
+
|
| 202 |
+
# convert list to tensor
|
| 203 |
+
old_states = torch.squeeze(torch.stack(self.buffer.states, dim=1)).detach().to(device)
|
| 204 |
+
old_actions = torch.stack(self.buffer.actions, dim=0).detach().to(device)
|
| 205 |
+
old_logprobs = torch.stack(self.buffer.logprobs, dim=0).detach().to(device)
|
| 206 |
+
old_state_values = torch.squeeze(torch.stack(self.buffer.state_values, dim=0)).detach().to(device)
|
| 207 |
+
|
| 208 |
+
# calculate advantages
|
| 209 |
+
advantages = rewards.detach() - old_state_values.detach()
|
| 210 |
+
|
| 211 |
+
# Optimize policy for K epochs
|
| 212 |
+
for _ in range(self.K_epochs):
|
| 213 |
+
|
| 214 |
+
# Evaluating old actions and values
|
| 215 |
+
logprobs, state_values, dist_entropy = self.policy.evaluate(old_states, old_actions)
|
| 216 |
+
|
| 217 |
+
# match state_values tensor dimensions with rewards tensor
|
| 218 |
+
state_values = torch.squeeze(state_values)
|
| 219 |
+
|
| 220 |
+
# Finding the ratio (pi_theta / pi_theta__old)
|
| 221 |
+
ratios = torch.exp(logprobs - old_logprobs.detach())
|
| 222 |
+
|
| 223 |
+
# Finding Surrogate Loss
|
| 224 |
+
surr1 = ratios * advantages.unsqueeze(1)
|
| 225 |
+
surr2 = torch.clamp(ratios, 1-self.eps_clip, 1+self.eps_clip) * advantages.unsqueeze(1)
|
| 226 |
+
|
| 227 |
+
# final loss of clipped objective PPO
|
| 228 |
+
loss = -torch.min(surr1, surr2) + 0.5 * self.MseLoss(state_values, rewards) - 0.012 * dist_entropy
|
| 229 |
+
|
| 230 |
+
loss_numpy = loss.detach().cpu().numpy()
|
| 231 |
+
|
| 232 |
+
# take gradient step
|
| 233 |
+
self.optimizer.zero_grad()
|
| 234 |
+
loss.mean().backward()
|
| 235 |
+
self.optimizer.step()
|
| 236 |
+
|
| 237 |
+
# Copy new weights into old policy
|
| 238 |
+
self.policy_old.load_state_dict(self.policy.state_dict())
|
| 239 |
+
|
| 240 |
+
# clear buffer
|
| 241 |
+
self.buffer.clear()
|
| 242 |
+
|
| 243 |
+
def save(self, checkpoint_path):
|
| 244 |
+
torch.save(self.policy_old.state_dict(), checkpoint_path)
|
| 245 |
+
|
| 246 |
+
def load(self, checkpoint_path):
|
| 247 |
+
self.policy_old.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
|
| 248 |
+
self.policy.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
|
code/Lake application/agents/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .PPOAgent import PPOAgent
|
| 2 |
+
from .PDPPOAgent import PDPPOAgent
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
__all__ = [
|
| 6 |
+
"PPOAgent",
|
| 7 |
+
"PDPPOAgent"
|
| 8 |
+
]
|
code/Lake application/envs/frozen_lake.py
ADDED
|
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
from contextlib import closing
|
| 3 |
+
|
| 4 |
+
import numpy as np
|
| 5 |
+
from io import StringIO
|
| 6 |
+
|
| 7 |
+
from gym import utils
|
| 8 |
+
from gym.envs.toy_text import discrete
|
| 9 |
+
|
| 10 |
+
LEFT = 0
|
| 11 |
+
DOWN = 1
|
| 12 |
+
RIGHT = 2
|
| 13 |
+
UP = 3
|
| 14 |
+
|
| 15 |
+
MAPS = {
|
| 16 |
+
"4x4": ["SFFF", "FHFH", "FFFH", "HFFG"],
|
| 17 |
+
"8x8": [
|
| 18 |
+
"SFFFFFFF",
|
| 19 |
+
"FFFFFFFF",
|
| 20 |
+
"FFFHFFFF",
|
| 21 |
+
"FFFFFHFF",
|
| 22 |
+
"FFFHFFFF",
|
| 23 |
+
"FHHFFFHF",
|
| 24 |
+
"FHFFHFHF",
|
| 25 |
+
"FFFHFFFG",
|
| 26 |
+
],
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def generate_random_map(size=8, p=0.8):
|
| 31 |
+
"""Generates a random valid map (one that has a path from start to goal)
|
| 32 |
+
:param size: size of each side of the grid
|
| 33 |
+
:param p: probability that a tile is frozen
|
| 34 |
+
"""
|
| 35 |
+
valid = False
|
| 36 |
+
|
| 37 |
+
# DFS to check that it's a valid path.
|
| 38 |
+
def is_valid(res):
|
| 39 |
+
frontier, discovered = [], set()
|
| 40 |
+
frontier.append((0, 0))
|
| 41 |
+
while frontier:
|
| 42 |
+
r, c = frontier.pop()
|
| 43 |
+
if not (r, c) in discovered:
|
| 44 |
+
discovered.add((r, c))
|
| 45 |
+
directions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
|
| 46 |
+
for x, y in directions:
|
| 47 |
+
r_new = r + x
|
| 48 |
+
c_new = c + y
|
| 49 |
+
if r_new < 0 or r_new >= size or c_new < 0 or c_new >= size:
|
| 50 |
+
continue
|
| 51 |
+
if res[r_new][c_new] == "G":
|
| 52 |
+
return True
|
| 53 |
+
if res[r_new][c_new] != "H":
|
| 54 |
+
frontier.append((r_new, c_new))
|
| 55 |
+
return False
|
| 56 |
+
|
| 57 |
+
while not valid:
|
| 58 |
+
p = min(1, p)
|
| 59 |
+
res = np.random.choice(["F", "H"], (size, size), p=[p, 1 - p])
|
| 60 |
+
res[0][0] = "S"
|
| 61 |
+
res[-1][-1] = "G"
|
| 62 |
+
valid = is_valid(res)
|
| 63 |
+
return ["".join(x) for x in res]
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
class FrozenLakeEnv(discrete.DiscreteEnv):
|
| 67 |
+
"""
|
| 68 |
+
Winter is here. You and your friends were tossing around a frisbee at the
|
| 69 |
+
park when you made a wild throw that left the frisbee out in the middle of
|
| 70 |
+
the lake. The water is mostly frozen, but there are a few holes where the
|
| 71 |
+
ice has melted. If you step into one of those holes, you'll fall into the
|
| 72 |
+
freezing water. At this time, there's an international frisbee shortage, so
|
| 73 |
+
it's absolutely imperative that you navigate across the lake and retrieve
|
| 74 |
+
the disc. However, the ice is slippery, so you won't always move in the
|
| 75 |
+
direction you intend.
|
| 76 |
+
The surface is described using a grid like the following
|
| 77 |
+
|
| 78 |
+
SFFF
|
| 79 |
+
FHFH
|
| 80 |
+
FFFH
|
| 81 |
+
HFFG
|
| 82 |
+
|
| 83 |
+
S : starting point, safe
|
| 84 |
+
F : frozen surface, safe
|
| 85 |
+
H : hole, fall to your doom
|
| 86 |
+
G : goal, where the frisbee is located
|
| 87 |
+
|
| 88 |
+
The episode ends when you reach the goal or fall in a hole.
|
| 89 |
+
You receive a reward of 1 if you reach the goal, and zero otherwise.
|
| 90 |
+
"""
|
| 91 |
+
|
| 92 |
+
metadata = {"render.modes": ["human", "ansi"]}
|
| 93 |
+
|
| 94 |
+
def __init__(self, desc=None, map_name="4x4", is_slippery=True):
|
| 95 |
+
if desc is None and map_name is None:
|
| 96 |
+
desc = generate_random_map()
|
| 97 |
+
elif desc is None:
|
| 98 |
+
desc = MAPS[map_name]
|
| 99 |
+
self.desc = desc = np.asarray(desc, dtype="c")
|
| 100 |
+
self.nrow, self.ncol = nrow, ncol = desc.shape
|
| 101 |
+
self.reward_range = (0, 1)
|
| 102 |
+
|
| 103 |
+
nA = 4
|
| 104 |
+
nS = nrow * ncol
|
| 105 |
+
|
| 106 |
+
isd = np.array(desc == b"S").astype("float64").ravel()
|
| 107 |
+
isd /= isd.sum()
|
| 108 |
+
|
| 109 |
+
P = {s: {a: [] for a in range(nA)} for s in range(nS)}
|
| 110 |
+
|
| 111 |
+
def to_s(row, col):
|
| 112 |
+
return row * ncol + col
|
| 113 |
+
|
| 114 |
+
def inc(row, col, a):
|
| 115 |
+
if a == LEFT:
|
| 116 |
+
col = max(col - 1, 0)
|
| 117 |
+
elif a == DOWN:
|
| 118 |
+
row = min(row + 1, nrow - 1)
|
| 119 |
+
elif a == RIGHT:
|
| 120 |
+
col = min(col + 1, ncol - 1)
|
| 121 |
+
elif a == UP:
|
| 122 |
+
row = max(row - 1, 0)
|
| 123 |
+
return (row, col)
|
| 124 |
+
|
| 125 |
+
goal_position = None
|
| 126 |
+
for row in range(nrow):
|
| 127 |
+
for col in range(ncol):
|
| 128 |
+
if desc[row, col] == b'G':
|
| 129 |
+
goal_position = (row, col)
|
| 130 |
+
break
|
| 131 |
+
if goal_position:
|
| 132 |
+
break
|
| 133 |
+
|
| 134 |
+
def proximity_reward(current_row, current_col):
|
| 135 |
+
goal_row, goal_col = goal_position
|
| 136 |
+
distance = abs(goal_row - current_row) + abs(goal_col - current_col)
|
| 137 |
+
return 1.0 / (1.0 + distance)
|
| 138 |
+
|
| 139 |
+
def update_probability_matrix(row, col, a):
|
| 140 |
+
newrow, newcol = inc(row, col, a)
|
| 141 |
+
newstate = to_s(newrow, newcol)
|
| 142 |
+
newletter = desc[newrow, newcol]
|
| 143 |
+
terminated = bytes(newletter) in b"GH"
|
| 144 |
+
reward = float(newletter == b"G")
|
| 145 |
+
if not terminated:
|
| 146 |
+
reward = proximity_reward(newrow, newcol)
|
| 147 |
+
return newstate, reward, terminated
|
| 148 |
+
|
| 149 |
+
# def update_probability_matrix(row, col, action):
|
| 150 |
+
# newrow, newcol = inc(row, col, action)
|
| 151 |
+
# newstate = to_s(newrow, newcol)
|
| 152 |
+
# newletter = desc[newrow, newcol]
|
| 153 |
+
# done = bytes(newletter) in b"GH"
|
| 154 |
+
# reward = float(newletter == b"G")
|
| 155 |
+
# return newstate, reward, done
|
| 156 |
+
|
| 157 |
+
# for row in range(nrow):
|
| 158 |
+
# for col in range(ncol):
|
| 159 |
+
# s = to_s(row, col)
|
| 160 |
+
# for a in range(4):
|
| 161 |
+
# li = P[s][a]
|
| 162 |
+
# letter = desc[row, col]
|
| 163 |
+
# if letter in b"GH":
|
| 164 |
+
# li.append((1.0, s, 0, True))
|
| 165 |
+
# else:
|
| 166 |
+
# if is_slippery:
|
| 167 |
+
# for b in [(a - 1) % 4, a, (a + 1) % 4]:
|
| 168 |
+
# li.append(
|
| 169 |
+
# (1.0 / 3.0, *update_probability_matrix(row, col, b))
|
| 170 |
+
# )
|
| 171 |
+
# else:
|
| 172 |
+
# li.append((1.0, *update_probability_matrix(row, col, a)))
|
| 173 |
+
|
| 174 |
+
np.random.seed(42) # Set a seed for reproducibility
|
| 175 |
+
tile_probabilities = np.random.dirichlet(np.ones(4), size=(nrow, ncol))
|
| 176 |
+
|
| 177 |
+
def to_row_col(s):
|
| 178 |
+
return divmod(s, ncol)
|
| 179 |
+
|
| 180 |
+
# for row in range(nrow):
|
| 181 |
+
# for col in range(ncol):
|
| 182 |
+
# s = to_s(row, col)
|
| 183 |
+
# for a in range(4):
|
| 184 |
+
# li = P[s][a]
|
| 185 |
+
# letter = desc[row, col]
|
| 186 |
+
# if letter in b"GH":
|
| 187 |
+
# li.append((1.0, s, 0, True))
|
| 188 |
+
# else:
|
| 189 |
+
# if is_slippery:
|
| 190 |
+
# # First, the agent moves in the desired direction
|
| 191 |
+
# newstate, reward, terminated = update_probability_matrix(row, col, a)
|
| 192 |
+
# if terminated:
|
| 193 |
+
# li.append((1.0, newstate, reward, terminated))
|
| 194 |
+
# else:
|
| 195 |
+
# # After the first move, slippery condition causes a random additional movement
|
| 196 |
+
# row2, col2 = to_row_col(newstate)
|
| 197 |
+
# for b in range(4):
|
| 198 |
+
# li.append(
|
| 199 |
+
# (1.0 / 4.0, *update_probability_matrix(row2, col2, b))
|
| 200 |
+
# )
|
| 201 |
+
# else:
|
| 202 |
+
# li.append((1.0, *update_probability_matrix(row, col, a)))
|
| 203 |
+
|
| 204 |
+
# for row in range(nrow):
|
| 205 |
+
# for col in range(ncol):
|
| 206 |
+
# s = to_s(row, col)
|
| 207 |
+
# for a in range(4):
|
| 208 |
+
# li = P[s][a]
|
| 209 |
+
# letter = desc[row, col]
|
| 210 |
+
# if letter in b"GH":
|
| 211 |
+
# li.append((1.0, s, 0, True))
|
| 212 |
+
# else:
|
| 213 |
+
# if is_slippery:
|
| 214 |
+
# # First, the agent moves in the desired direction
|
| 215 |
+
# newstate, reward, terminated = update_probability_matrix(row, col, a)
|
| 216 |
+
# if terminated:
|
| 217 |
+
# li.append((1.0, newstate, reward, terminated))
|
| 218 |
+
# else:
|
| 219 |
+
# # After the first move, slippery condition causes an additional movement
|
| 220 |
+
# row2, col2 = to_row_col(newstate)
|
| 221 |
+
# for b, prob in enumerate(tile_probabilities[row2, col2]):
|
| 222 |
+
# li.append(
|
| 223 |
+
# (prob, *update_probability_matrix(row2, col2, b))
|
| 224 |
+
# )
|
| 225 |
+
# else:
|
| 226 |
+
# li.append((1.0, *update_probability_matrix(row, col, a)))
|
| 227 |
+
|
| 228 |
+
base_slip_prob=0.3
|
| 229 |
+
|
| 230 |
+
for row in range(nrow):
|
| 231 |
+
for col in range(ncol):
|
| 232 |
+
s = to_s(row, col)
|
| 233 |
+
for a in range(4):
|
| 234 |
+
li = P[s][a]
|
| 235 |
+
letter = desc[row, col]
|
| 236 |
+
if letter in b"GH":
|
| 237 |
+
li.append((1.0, s, 0, True))
|
| 238 |
+
else:
|
| 239 |
+
if is_slippery:
|
| 240 |
+
# First, the agent moves in the desired direction
|
| 241 |
+
newstate, reward, terminated = update_probability_matrix(row, col, a)
|
| 242 |
+
if terminated:
|
| 243 |
+
li.append((1.0, newstate, reward, terminated))
|
| 244 |
+
else:
|
| 245 |
+
# After the first move, slippery condition causes an additional movement
|
| 246 |
+
row2, col2 = to_row_col(newstate)
|
| 247 |
+
for b, prob in enumerate(tile_probabilities[row2, col2]):
|
| 248 |
+
li.append(
|
| 249 |
+
(base_slip_prob * prob, *update_probability_matrix(row2, col2, b))
|
| 250 |
+
)
|
| 251 |
+
# Add the remaining probability for staying at the newstate
|
| 252 |
+
li.append((1.0 - base_slip_prob, newstate, reward, False))
|
| 253 |
+
else:
|
| 254 |
+
li.append((1.0, *update_probability_matrix(row, col, a)))
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
super(FrozenLakeEnv, self).__init__(nS, nA, P, isd)
|
| 258 |
+
|
| 259 |
+
def get_post_decision_state(self, s, a):
|
| 260 |
+
def inc(row, col, a):
|
| 261 |
+
if a == LEFT:
|
| 262 |
+
col = max(col - 1, 0)
|
| 263 |
+
elif a == DOWN:
|
| 264 |
+
row = min(row + 1, self.nrow - 1)
|
| 265 |
+
elif a == RIGHT:
|
| 266 |
+
col = min(col + 1, self.ncol - 1)
|
| 267 |
+
elif a == UP:
|
| 268 |
+
row = max(row - 1, 0)
|
| 269 |
+
return (row, col)
|
| 270 |
+
|
| 271 |
+
def to_s(row, col):
|
| 272 |
+
return row * self.ncol + col
|
| 273 |
+
|
| 274 |
+
def to_row_col(s):
|
| 275 |
+
row = s // self.ncol
|
| 276 |
+
col = s % self.ncol
|
| 277 |
+
return row, col
|
| 278 |
+
|
| 279 |
+
row, col = to_row_col(s)
|
| 280 |
+
next_row, next_col = inc(row, col, a)
|
| 281 |
+
next_s = to_s(next_row, next_col)
|
| 282 |
+
return next_s
|
| 283 |
+
|
| 284 |
+
def render(self, mode="human"):
|
| 285 |
+
outfile = StringIO() if mode == "ansi" else sys.stdout
|
| 286 |
+
|
| 287 |
+
row, col = self.s // self.ncol, self.s % self.ncol
|
| 288 |
+
desc = self.desc.tolist()
|
| 289 |
+
desc = [[c.decode("utf-8") for c in line] for line in desc]
|
| 290 |
+
desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True)
|
| 291 |
+
if self.lastaction is not None:
|
| 292 |
+
outfile.write(
|
| 293 |
+
" ({})\n".format(["Left", "Down", "Right", "Up"][self.lastaction])
|
| 294 |
+
)
|
| 295 |
+
else:
|
| 296 |
+
outfile.write("\n")
|
| 297 |
+
outfile.write("\n".join("".join(line) for line in desc) + "\n")
|
| 298 |
+
|
| 299 |
+
if mode != "human":
|
| 300 |
+
with closing(outfile):
|
| 301 |
+
return outfile.getvalue()
|
code/Lake application/experiments.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import os
|
| 4 |
+
import gc
|
| 5 |
+
import sys
|
| 6 |
+
import json
|
| 7 |
+
import random
|
| 8 |
+
import gym
|
| 9 |
+
|
| 10 |
+
BASE_DIR = os.path.dirname(os.path.abspath('__file__'))
|
| 11 |
+
AGENTS_DIR = os.path.join(BASE_DIR,'agents')
|
| 12 |
+
sys.path.append(AGENTS_DIR)
|
| 13 |
+
|
| 14 |
+
from agents.PPO import PPO
|
| 15 |
+
from agents.PDPPO import PDPPO
|
| 16 |
+
|
| 17 |
+
import numpy as np
|
| 18 |
+
from agents import *
|
| 19 |
+
from agents import StochasticProgrammingAgent, AdpAgentHD3
|
| 20 |
+
from agents import StableBaselineAgent, MultiAgentRL, EnsembleAgent, PerfectInfoAgent,PSOagent,AdpAgentHD, PPOAgent
|
| 21 |
+
from test_functions import *
|
| 22 |
+
from scenarioManager.stochasticDemandModel import StochasticDemandModel
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
#'15items_5machines_i100','25items_10machines'
|
| 26 |
+
|
| 27 |
+
if __name__ == '__main__':
|
| 28 |
+
for i in range(0,5):
|
| 29 |
+
|
| 30 |
+
experiment_name = 'frozen_lake'
|
| 31 |
+
|
| 32 |
+
# Setting the seeds
|
| 33 |
+
np.random.seed(1)
|
| 34 |
+
random.seed(10)
|
| 35 |
+
|
| 36 |
+
from gym.envs.toy_text.frozen_lake import generate_random_map
|
| 37 |
+
|
| 38 |
+
# Models setups:
|
| 39 |
+
env = gym.make('FrozenLake-v1', desc=generate_random_map(size=8), is_slippery=True)
|
| 40 |
+
|
| 41 |
+
setting_sol_method = {
|
| 42 |
+
'discount_rate': 0.99,
|
| 43 |
+
'experiment_name': experiment_name,
|
| 44 |
+
'parallelization': False,
|
| 45 |
+
'model_name': 'PPO',
|
| 46 |
+
'branching_factors': [4, 2, 2],
|
| 47 |
+
'dict_obs': False # To be employed if dictionary observations are necessary
|
| 48 |
+
}
|
| 49 |
+
# Parameters for the RL:
|
| 50 |
+
|
| 51 |
+
training_epochs_RL = 200000
|
| 52 |
+
|
| 53 |
+
setting_sol_method['parallelization'] = False
|
| 54 |
+
|
| 55 |
+
# Number of test execution (number of complet environment iterations)
|
| 56 |
+
nreps = 100
|
| 57 |
+
|
| 58 |
+
###########################################################################
|
| 59 |
+
# #PPO
|
| 60 |
+
###########################################################################
|
| 61 |
+
|
| 62 |
+
base_model_name = 'PPO'
|
| 63 |
+
ppo_agent = PPOAgent(
|
| 64 |
+
env,
|
| 65 |
+
setting_sol_method
|
| 66 |
+
)
|
| 67 |
+
ppo_agent.learn(n_episodes=training_epochs_RL) # Each ep with 200 steps
|
| 68 |
+
|
| 69 |
+
#load best agent before appending in the test list
|
| 70 |
+
BEST_MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath('__file__')),'logs',f'best_{base_model_name}_{experiment_name}','best_model')
|
| 71 |
+
ppo_agent.load_agent(BEST_MODEL_DIR) # For training purposes
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
###########################################################################
|
| 76 |
+
# Post-decision PPO
|
| 77 |
+
###########################################################################
|
| 78 |
+
|
| 79 |
+
base_model_name = 'PDPPO'
|
| 80 |
+
pdppo_agent = PDPPOAgent(
|
| 81 |
+
env,
|
| 82 |
+
setting_sol_method
|
| 83 |
+
)
|
| 84 |
+
pdppo_agent.learn(n_episodes=training_epochs_RL) # Each ep with 200 steps
|
| 85 |
+
|
| 86 |
+
#load best agent before appending in the test list
|
| 87 |
+
BEST_MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath('__file__')),'logs',f'best_{base_model_name}_{experiment_name}','best_model')
|
| 88 |
+
pdppo_agent.load_agent(BEST_MODEL_DIR) # For training purposes
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
###########################################################################
|
| 92 |
+
#TESTING
|
| 93 |
+
# settings['dict_obs'] = False
|
| 94 |
+
# setting_sol_method['multiagent'] = False
|
| 95 |
+
# setting_sol_method['dict_obs'] = False
|
| 96 |
+
# env = SimplePlant(settings, stoch_model)
|
| 97 |
+
# setting_sol_method['experiment_name'] = experiment_name
|
| 98 |
+
# dict_res = test_agents(
|
| 99 |
+
# env,
|
| 100 |
+
# agents=agents,
|
| 101 |
+
# n_reps=nreps,
|
| 102 |
+
# setting_sol_method = setting_sol_method,
|
| 103 |
+
# use_benchmark_PI=False
|
| 104 |
+
# )
|
| 105 |
+
|
| 106 |
+
# for key,_ in agents:
|
| 107 |
+
# cost = dict_res[key,'costs']
|
| 108 |
+
# print(f'\n Cost in {nreps} iterations for the model {key}: {cost}')
|
| 109 |
+
# try:
|
| 110 |
+
# cost = dict_res['PI','costs']
|
| 111 |
+
# print(f'\n Cost in {nreps} repetitions for the model PI: {cost}')
|
| 112 |
+
# except:
|
| 113 |
+
# pass
|
| 114 |
+
|
| 115 |
+
#del multiagent
|
| 116 |
+
del env
|
| 117 |
+
gc.collect()
|
code/Lake application/generate_tables.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
import seaborn as sns
|
| 6 |
+
|
| 7 |
+
main_folder = 'logs/results_2'
|
| 8 |
+
|
| 9 |
+
def get_max_rewards():
|
| 10 |
+
experiment_names = ['frozen_lake']
|
| 11 |
+
methods = ['PDPPO', 'PPO']
|
| 12 |
+
results = pd.DataFrame(columns=['Environment', 'Method', 'Max Reward', 'Max Reward Standard Deviation'])
|
| 13 |
+
main_folder = 'logs/results_2'
|
| 14 |
+
for experiment_name in experiment_names:
|
| 15 |
+
for method in methods:
|
| 16 |
+
env_name = experiment_name
|
| 17 |
+
max_rewards = []
|
| 18 |
+
for run_num in range(1, 6):
|
| 19 |
+
log_f_name = f'{main_folder}/{method}_{env_name}_log_{run_num}.csv'
|
| 20 |
+
data = pd.read_csv(log_f_name)
|
| 21 |
+
max_reward = data['reward'].max()
|
| 22 |
+
max_rewards.append(max_reward)
|
| 23 |
+
mean_max_reward = np.mean(max_rewards)
|
| 24 |
+
std_max_reward = np.std(max_rewards)
|
| 25 |
+
results = results.append({'Environment': env_name, 'Method': method, 'Max Reward': mean_max_reward, 'Max Reward Standard Deviation': std_max_reward}, ignore_index=True)
|
| 26 |
+
|
| 27 |
+
return results
|
| 28 |
+
|
| 29 |
+
def get_first_rewards():
|
| 30 |
+
experiment_names = ['frozen_lake']
|
| 31 |
+
methods = ['PDPPO', 'PPO']
|
| 32 |
+
results = pd.DataFrame(columns=['Environment', 'Method', 'First Reward', 'First Reward Standard Deviation'])
|
| 33 |
+
|
| 34 |
+
for experiment_name in experiment_names:
|
| 35 |
+
for method in methods:
|
| 36 |
+
env_name = experiment_name
|
| 37 |
+
first_rewards = []
|
| 38 |
+
for run_num in range(1, 6):
|
| 39 |
+
log_f_name = f'{main_folder}/{method}_{env_name}_log_{run_num}.csv'
|
| 40 |
+
data = pd.read_csv(log_f_name)
|
| 41 |
+
reward_50000 = data[data['timestep'] == 50000]['reward'].values
|
| 42 |
+
first_rewards.append(reward_50000)
|
| 43 |
+
mean_first_reward = np.mean(first_rewards)
|
| 44 |
+
std_first_reward = np.std(first_rewards)
|
| 45 |
+
results = results.append({'Environment': env_name, 'Method': method, 'First Reward': mean_first_reward, 'First Reward Standard Deviation': std_first_reward}, ignore_index=True)
|
| 46 |
+
|
| 47 |
+
return results
|
| 48 |
+
|
| 49 |
+
def get_steps_reward_threshold():
|
| 50 |
+
experiment_names = ['frozen_lake']
|
| 51 |
+
methods = ['PDPPO', 'PPO']
|
| 52 |
+
reward_thresholds = [10]
|
| 53 |
+
results = pd.DataFrame(columns=['Environment', 'Method', 'Steps', 'Steps Standard Deviation'])
|
| 54 |
+
|
| 55 |
+
for i, experiment_name in enumerate(experiment_names):
|
| 56 |
+
for j, method in enumerate(methods):
|
| 57 |
+
env_name = experiment_name
|
| 58 |
+
reward_steps = []
|
| 59 |
+
for run_num in range(1, 6):
|
| 60 |
+
log_f_name = f'logs/results_1/{method}_{env_name}_log_{run_num}.csv'
|
| 61 |
+
data = pd.read_csv(log_f_name)
|
| 62 |
+
reward_threshold_value = reward_thresholds[i]
|
| 63 |
+
reward_steps.append(data[data['reward'] >= reward_threshold_value]['timestep'].iloc[0])
|
| 64 |
+
mean_reward_steps = np.mean(reward_steps) if reward_steps else np.nan
|
| 65 |
+
std_reward_steps = np.std(reward_steps) if reward_steps else np.nan
|
| 66 |
+
results = results.append({'Environment': env_name, 'Method': method, 'Steps': mean_reward_steps, 'Steps Standard Deviation': std_reward_steps}, ignore_index=True)
|
| 67 |
+
|
| 68 |
+
return results
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
if __name__ == '__main__':
|
| 72 |
+
max_rewards_df = get_max_rewards()
|
| 73 |
+
first_rewards_df = get_first_rewards()
|
| 74 |
+
steps_rewards_df = get_steps_reward_threshold()
|
| 75 |
+
|
| 76 |
+
final_results = pd.merge(max_rewards_df, first_rewards_df, on=['Environment', 'Method'])
|
| 77 |
+
final_results = pd.merge(final_results, steps_rewards_df, on=['Environment', 'Method'])
|
| 78 |
+
print(final_results)
|
code/Lake application/logs/.gitkeep
ADDED
|
File without changes
|
code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_0_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1c9e2a71c5d15566400c1716b2450770bb0b1e02e290ba4d6a0e6d866e97a1a
|
| 3 |
+
size 307271
|
code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_1.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
22,400,1.2526
|
| 3 |
+
48,800,1.1762
|
| 4 |
+
66,1200,1.698
|
| 5 |
+
86,1600,1.5699
|
| 6 |
+
100,2000,2.2196
|
| 7 |
+
114,2400,2.2243
|
| 8 |
+
130,2800,1.9494
|
| 9 |
+
144,3200,2.1269
|
| 10 |
+
161,3600,1.8416
|
| 11 |
+
173,4000,2.4108
|
| 12 |
+
186,4400,2.3831
|
| 13 |
+
195,4800,3.7798
|
| 14 |
+
204,5200,3.3905
|
| 15 |
+
212,5600,3.7893
|
| 16 |
+
221,6000,2.6417
|
| 17 |
+
230,6400,3.9088
|
| 18 |
+
245,6800,2.3282
|
| 19 |
+
256,7200,2.7405
|
| 20 |
+
263,7600,4.6528
|
| 21 |
+
273,8000,3.0146
|
| 22 |
+
281,8400,3.2375
|
| 23 |
+
291,8800,3.553
|
| 24 |
+
297,9200,4.7911
|
| 25 |
+
307,9600,3.4636
|
| 26 |
+
314,10000,4.4748
|
| 27 |
+
320,10400,4.6664
|
| 28 |
+
329,10800,3.4853
|
| 29 |
+
336,11200,4.7085
|
| 30 |
+
341,11600,6.6859
|
| 31 |
+
349,12000,3.6321
|
| 32 |
+
355,12400,5.3428
|
| 33 |
+
362,12800,4.1236
|
| 34 |
+
367,13200,6.1173
|
| 35 |
+
373,13600,5.3324
|
| 36 |
+
377,14000,7.2656
|
| 37 |
+
381,14400,7.7223
|
| 38 |
+
386,14800,5.9847
|
| 39 |
+
390,15200,7.6658
|
| 40 |
+
395,15600,5.8334
|
| 41 |
+
400,16000,7.1939
|
| 42 |
+
404,16400,7.1478
|
| 43 |
+
408,16800,7.5988
|
| 44 |
+
414,17200,6.1732
|
| 45 |
+
419,17600,6.1007
|
| 46 |
+
423,18000,7.2673
|
| 47 |
+
427,18400,7.41
|
| 48 |
+
432,18800,7.0193
|
| 49 |
+
438,19200,5.3807
|
| 50 |
+
443,19600,5.9541
|
| 51 |
+
447,20000,7.6451
|
| 52 |
+
452,20400,7.0583
|
| 53 |
+
457,20800,7.3468
|
| 54 |
+
461,21200,7.6243
|
| 55 |
+
466,21600,6.6661
|
| 56 |
+
473,22000,4.9625
|
| 57 |
+
479,22400,4.5857
|
| 58 |
+
484,22800,6.9212
|
| 59 |
+
488,23200,6.1686
|
| 60 |
+
493,23600,7.897
|
| 61 |
+
497,24000,7.9243
|
| 62 |
+
501,24400,8.608
|
| 63 |
+
506,24800,6.6952
|
| 64 |
+
511,25200,6.2059
|
| 65 |
+
516,25600,7.1384
|
| 66 |
+
522,26000,5.7323
|
| 67 |
+
527,26400,5.9145
|
| 68 |
+
534,26800,5.6198
|
| 69 |
+
538,27200,7.6158
|
| 70 |
+
542,27600,7.7017
|
| 71 |
+
546,28000,7.8191
|
| 72 |
+
551,28400,8.2935
|
| 73 |
+
556,28800,7.3137
|
| 74 |
+
562,29200,5.8557
|
| 75 |
+
570,29600,4.3915
|
| 76 |
+
576,30000,4.9182
|
| 77 |
+
581,30400,6.3954
|
| 78 |
+
585,30800,7.2091
|
| 79 |
+
590,31200,9.0975
|
| 80 |
+
594,31600,5.9126
|
| 81 |
+
600,32000,5.1805
|
| 82 |
+
606,32400,6.9075
|
| 83 |
+
611,32800,6.2059
|
| 84 |
+
616,33200,4.6794
|
| 85 |
+
621,33600,7.0498
|
| 86 |
+
626,34000,7.4649
|
| 87 |
+
631,34400,6.6183
|
| 88 |
+
635,34800,8.8603
|
| 89 |
+
639,35200,6.275
|
| 90 |
+
643,35600,9.1126
|
| 91 |
+
648,36000,6.9308
|
| 92 |
+
653,36400,6.7373
|
| 93 |
+
657,36800,7.6857
|
| 94 |
+
662,37200,7.836
|
| 95 |
+
666,37600,6.3725
|
| 96 |
+
671,38000,6.2922
|
| 97 |
+
676,38400,6.6979
|
| 98 |
+
680,38800,7.6388
|
| 99 |
+
684,39200,7.0079
|
| 100 |
+
688,39600,7.5892
|
| 101 |
+
692,40000,7.7355
|
| 102 |
+
697,40400,6.5238
|
| 103 |
+
702,40800,6.181
|
| 104 |
+
706,41200,6.5753
|
| 105 |
+
711,41600,6.499
|
| 106 |
+
716,42000,6.1642
|
| 107 |
+
721,42400,6.3709
|
| 108 |
+
725,42800,6.5148
|
| 109 |
+
730,43200,6.1071
|
| 110 |
+
734,43600,7.2231
|
| 111 |
+
739,44000,6.9884
|
| 112 |
+
743,44400,7.6752
|
| 113 |
+
747,44800,7.645
|
| 114 |
+
751,45200,7.6253
|
| 115 |
+
756,45600,6.9888
|
| 116 |
+
760,46000,6.551
|
| 117 |
+
764,46400,8.8765
|
| 118 |
+
768,46800,7.644
|
| 119 |
+
772,47200,7.7078
|
| 120 |
+
776,47600,7.7402
|
| 121 |
+
780,48000,7.7096
|
| 122 |
+
786,48400,6.4542
|
| 123 |
+
790,48800,6.8511
|
| 124 |
+
795,49200,6.7727
|
| 125 |
+
799,49600,7.6417
|
| 126 |
+
803,50000,7.6993
|
| 127 |
+
807,50400,7.7043
|
| 128 |
+
812,50800,6.8261
|
| 129 |
+
816,51200,6.92
|
| 130 |
+
820,51600,7.9919
|
| 131 |
+
825,52000,6.4103
|
| 132 |
+
830,52400,5.7618
|
| 133 |
+
834,52800,7.0424
|
| 134 |
+
838,53200,7.6928
|
| 135 |
+
842,53600,7.7354
|
| 136 |
+
847,54000,7.3411
|
| 137 |
+
852,54400,6.3078
|
| 138 |
+
856,54800,7.4612
|
| 139 |
+
860,55200,6.6696
|
| 140 |
+
865,55600,6.7569
|
| 141 |
+
869,56000,6.9279
|
| 142 |
+
874,56400,6.5996
|
| 143 |
+
878,56800,7.7271
|
| 144 |
+
882,57200,7.6932
|
| 145 |
+
886,57600,7.5902
|
| 146 |
+
890,58000,7.1683
|
| 147 |
+
897,58400,4.5812
|
| 148 |
+
903,58800,6.1682
|
| 149 |
+
908,59200,6.1325
|
| 150 |
+
912,59600,7.8386
|
| 151 |
+
916,60000,6.3943
|
| 152 |
+
921,60400,6.8943
|
| 153 |
+
925,60800,7.8821
|
| 154 |
+
932,61200,4.9096
|
| 155 |
+
937,61600,6.3275
|
| 156 |
+
941,62000,8.5893
|
| 157 |
+
945,62400,7.9071
|
| 158 |
+
949,62800,7.8808
|
| 159 |
+
953,63200,7.8324
|
| 160 |
+
957,63600,8.5609
|
| 161 |
+
961,64000,8.4952
|
| 162 |
+
965,64400,7.8793
|
| 163 |
+
971,64800,5.9811
|
| 164 |
+
975,65200,6.3148
|
| 165 |
+
979,65600,8.3047
|
| 166 |
+
985,66000,6.6049
|
| 167 |
+
991,66400,5.5465
|
| 168 |
+
995,66800,6.7472
|
| 169 |
+
1000,67200,6.688
|
| 170 |
+
1004,67600,8.5059
|
| 171 |
+
1008,68000,8.0416
|
| 172 |
+
1012,68400,9.3594
|
| 173 |
+
1017,68800,7.135
|
| 174 |
+
1022,69200,7.7882
|
| 175 |
+
1028,69600,5.1304
|
| 176 |
+
1032,70000,7.9267
|
| 177 |
+
1036,70400,7.9253
|
| 178 |
+
1040,70800,7.522
|
| 179 |
+
1045,71200,6.7979
|
| 180 |
+
1050,71600,7.0769
|
| 181 |
+
1055,72000,7.1814
|
| 182 |
+
1059,72400,7.9156
|
| 183 |
+
1063,72800,6.9876
|
| 184 |
+
1069,73200,5.6939
|
| 185 |
+
1073,73600,7.603
|
| 186 |
+
1078,74000,6.9544
|
| 187 |
+
1085,74400,5.1872
|
| 188 |
+
1089,74800,8.0712
|
| 189 |
+
1094,75200,5.4866
|
| 190 |
+
1099,75600,6.856
|
| 191 |
+
1104,76000,6.6695
|
| 192 |
+
1111,76400,5.2366
|
| 193 |
+
1115,76800,8.231
|
| 194 |
+
1120,77200,6.3017
|
| 195 |
+
1125,77600,6.1984
|
| 196 |
+
1129,78000,8.0981
|
| 197 |
+
1134,78400,5.6874
|
| 198 |
+
1139,78800,6.7436
|
| 199 |
+
1144,79200,6.7781
|
| 200 |
+
1148,79600,8.035
|
| 201 |
+
1153,80000,7.0241
|
| 202 |
+
1157,80400,8.1216
|
| 203 |
+
1161,80800,7.7847
|
| 204 |
+
1165,81200,7.0458
|
| 205 |
+
1172,81600,4.9159
|
| 206 |
+
1176,82000,8.8252
|
| 207 |
+
1180,82400,8.1435
|
| 208 |
+
1184,82800,8.0794
|
| 209 |
+
1188,83200,8.4439
|
| 210 |
+
1194,83600,6.5179
|
| 211 |
+
1198,84000,9.5129
|
| 212 |
+
1202,84400,9.5982
|
| 213 |
+
1206,84800,8.1605
|
| 214 |
+
1213,85200,5.1735
|
| 215 |
+
1218,85600,6.3955
|
| 216 |
+
1222,86000,6.5836
|
| 217 |
+
1227,86400,7.8827
|
| 218 |
+
1231,86800,9.0583
|
| 219 |
+
1236,87200,7.4464
|
| 220 |
+
1242,87600,6.9449
|
| 221 |
+
1247,88000,7.6785
|
| 222 |
+
1251,88400,7.7551
|
| 223 |
+
1255,88800,9.1958
|
| 224 |
+
1260,89200,7.8329
|
| 225 |
+
1265,89600,9.6998
|
| 226 |
+
1269,90000,12.2941
|
| 227 |
+
1274,90400,7.8743
|
| 228 |
+
1281,90800,6.7261
|
| 229 |
+
1285,91200,10.3406
|
| 230 |
+
1289,91600,10.3629
|
| 231 |
+
1294,92000,12.8416
|
| 232 |
+
1298,92400,11.8637
|
| 233 |
+
1302,92800,10.95
|
| 234 |
+
1307,93200,12.6815
|
| 235 |
+
1312,93600,10.2359
|
| 236 |
+
1316,94000,14.5616
|
| 237 |
+
1320,94400,12.2057
|
| 238 |
+
1325,94800,10.3496
|
| 239 |
+
1331,95200,10.0749
|
| 240 |
+
1336,95600,12.5332
|
| 241 |
+
1341,96000,7.9488
|
| 242 |
+
1347,96400,11.6734
|
| 243 |
+
1353,96800,8.6258
|
| 244 |
+
1359,97200,8.6982
|
| 245 |
+
1363,97600,16.8782
|
| 246 |
+
1369,98000,13.076
|
| 247 |
+
1374,98400,12.6862
|
| 248 |
+
1380,98800,13.0069
|
| 249 |
+
1385,99200,13.2064
|
| 250 |
+
1394,99600,7.5732
|
| 251 |
+
1399,100000,13.571
|
| 252 |
+
1406,100400,11.8885
|
| 253 |
+
1411,100800,14.0567
|
| 254 |
+
1416,101200,14.4459
|
| 255 |
+
1422,101600,14.7572
|
| 256 |
+
1430,102000,9.0038
|
| 257 |
+
1436,102400,11.7543
|
| 258 |
+
1443,102800,12.3206
|
| 259 |
+
1448,103200,13.6686
|
| 260 |
+
1454,103600,14.6563
|
| 261 |
+
1461,104000,10.5416
|
| 262 |
+
1467,104400,11.8187
|
| 263 |
+
1473,104800,12.6766
|
| 264 |
+
1478,105200,13.234
|
| 265 |
+
1487,105600,10.4377
|
| 266 |
+
1494,106000,10.4682
|
| 267 |
+
1499,106400,14.2136
|
| 268 |
+
1505,106800,13.3214
|
| 269 |
+
1512,107200,9.9131
|
| 270 |
+
1518,107600,11.3795
|
| 271 |
+
1524,108000,13.6525
|
| 272 |
+
1531,108400,10.5166
|
| 273 |
+
1536,108800,15.6816
|
| 274 |
+
1541,109200,13.5686
|
| 275 |
+
1546,109600,15.8877
|
| 276 |
+
1552,110000,10.798
|
| 277 |
+
1556,110400,15.505
|
| 278 |
+
1564,110800,8.7457
|
| 279 |
+
1571,111200,9.8442
|
| 280 |
+
1576,111600,16.783
|
| 281 |
+
1581,112000,16.6507
|
| 282 |
+
1587,112400,12.5058
|
| 283 |
+
1597,112800,7.0879
|
| 284 |
+
1604,113200,9.4795
|
| 285 |
+
1612,113600,8.8247
|
| 286 |
+
1619,114000,9.8475
|
| 287 |
+
1626,114400,9.1637
|
| 288 |
+
1631,114800,14.0166
|
| 289 |
+
1638,115200,8.0027
|
| 290 |
+
1642,115600,17.5209
|
| 291 |
+
1650,116000,7.7073
|
| 292 |
+
1654,116400,20.1086
|
| 293 |
+
1661,116800,7.9623
|
| 294 |
+
1668,117200,12.2066
|
| 295 |
+
1675,117600,8.7988
|
| 296 |
+
1681,118000,14.56
|
| 297 |
+
1686,118400,16.3382
|
| 298 |
+
1691,118800,12.1992
|
| 299 |
+
1699,119200,10.4266
|
| 300 |
+
1703,119600,19.3562
|
| 301 |
+
1708,120000,11.3536
|
| 302 |
+
1712,120400,20.2356
|
| 303 |
+
1716,120800,19.5323
|
| 304 |
+
1721,121200,17.4551
|
| 305 |
+
1728,121600,10.1801
|
| 306 |
+
1732,122000,16.947
|
| 307 |
+
1739,122400,9.827
|
| 308 |
+
1745,122800,13.3383
|
| 309 |
+
1749,123200,19.4348
|
| 310 |
+
1755,123600,12.4996
|
| 311 |
+
1761,124000,13.8883
|
| 312 |
+
1765,124400,20.16
|
| 313 |
+
1769,124800,19.6822
|
| 314 |
+
1776,125200,10.0647
|
| 315 |
+
1780,125600,18.9827
|
| 316 |
+
1785,126000,10.8816
|
| 317 |
+
1791,126400,12.6382
|
| 318 |
+
1797,126800,11.2938
|
| 319 |
+
1806,127200,8.8997
|
| 320 |
+
1811,127600,15.7514
|
| 321 |
+
1817,128000,9.8611
|
| 322 |
+
1825,128400,10.0803
|
| 323 |
+
1833,128800,9.5121
|
| 324 |
+
1841,129200,7.1428
|
| 325 |
+
1850,129600,8.5931
|
| 326 |
+
1858,130000,9.1178
|
| 327 |
+
1866,130400,10.6061
|
| 328 |
+
1871,130800,15.7645
|
| 329 |
+
1875,131200,19.0111
|
| 330 |
+
1881,131600,10.6172
|
| 331 |
+
1885,132000,20.6998
|
| 332 |
+
1889,132400,20.2484
|
| 333 |
+
1894,132800,16.4684
|
| 334 |
+
1900,133200,13.5329
|
| 335 |
+
1904,133600,20.9186
|
| 336 |
+
1910,134000,15.0977
|
| 337 |
+
1915,134400,14.7358
|
| 338 |
+
1920,134800,15.5285
|
| 339 |
+
1928,135200,8.3647
|
| 340 |
+
1934,135600,11.9479
|
| 341 |
+
1940,136000,14.4666
|
| 342 |
+
1947,136400,9.6302
|
| 343 |
+
1953,136800,13.6634
|
| 344 |
+
1958,137200,14.0396
|
| 345 |
+
1966,137600,9.1948
|
| 346 |
+
1973,138000,9.3788
|
| 347 |
+
1982,138400,9.0964
|
| 348 |
+
1991,138800,7.105
|
| 349 |
+
1998,139200,11.6642
|
| 350 |
+
2005,139600,8.2024
|
| 351 |
+
2014,140000,7.7126
|
| 352 |
+
2021,140400,11.0422
|
| 353 |
+
2027,140800,11.2108
|
| 354 |
+
2032,141200,15.9589
|
| 355 |
+
2039,141600,8.9692
|
| 356 |
+
2046,142000,12.0016
|
| 357 |
+
2052,142400,13.3516
|
| 358 |
+
2056,142800,19.3885
|
| 359 |
+
2061,143200,14.0276
|
| 360 |
+
2066,143600,14.164
|
| 361 |
+
2073,144000,11.3607
|
| 362 |
+
2082,144400,6.5908
|
| 363 |
+
2089,144800,10.36
|
| 364 |
+
2094,145200,14.2824
|
| 365 |
+
2105,145600,4.9849
|
| 366 |
+
2111,146000,11.1523
|
| 367 |
+
2116,146400,12.4602
|
| 368 |
+
2123,146800,10.201
|
| 369 |
+
2129,147200,11.5534
|
| 370 |
+
2138,147600,5.9679
|
| 371 |
+
2142,148000,16.4718
|
| 372 |
+
2147,148400,17.5833
|
| 373 |
+
2152,148800,16.768
|
| 374 |
+
2156,149200,19.1331
|
| 375 |
+
2162,149600,12.3424
|
| 376 |
+
2166,150000,18.0845
|
| 377 |
+
2170,150400,18.9557
|
| 378 |
+
2175,150800,14.0846
|
| 379 |
+
2179,151200,20.1474
|
| 380 |
+
2184,151600,15.3493
|
| 381 |
+
2188,152000,18.1315
|
| 382 |
+
2193,152400,15.8044
|
| 383 |
+
2197,152800,17.7751
|
| 384 |
+
2203,153200,11.6984
|
| 385 |
+
2209,153600,13.0837
|
| 386 |
+
2213,154000,15.855
|
| 387 |
+
2218,154400,16.8814
|
| 388 |
+
2223,154800,16.0311
|
| 389 |
+
2227,155200,15.3192
|
| 390 |
+
2234,155600,11.457
|
| 391 |
+
2239,156000,16.1934
|
| 392 |
+
2246,156400,11.9881
|
| 393 |
+
2252,156800,9.2926
|
| 394 |
+
2257,157200,16.73
|
| 395 |
+
2261,157600,14.6868
|
| 396 |
+
2268,158000,7.9747
|
| 397 |
+
2273,158400,10.7901
|
| 398 |
+
2277,158800,18.3051
|
| 399 |
+
2283,159200,14.0301
|
| 400 |
+
2288,159600,15.5108
|
| 401 |
+
2294,160000,10.0966
|
| 402 |
+
2300,160400,8.0898
|
| 403 |
+
2306,160800,8.0323
|
| 404 |
+
2315,161200,5.836
|
| 405 |
+
2321,161600,10.9128
|
| 406 |
+
2329,162000,5.4973
|
| 407 |
+
2334,162400,11.0091
|
| 408 |
+
2340,162800,8.3576
|
| 409 |
+
2349,163200,6.2928
|
| 410 |
+
2354,163600,13.008
|
| 411 |
+
2358,164000,15.7582
|
| 412 |
+
2364,164400,10.6657
|
| 413 |
+
2372,164800,7.3352
|
| 414 |
+
2378,165200,7.9416
|
| 415 |
+
2385,165600,9.7033
|
| 416 |
+
2390,166000,14.8003
|
| 417 |
+
2395,166400,13.5949
|
| 418 |
+
2399,166800,16.5582
|
| 419 |
+
2405,167200,13.1281
|
| 420 |
+
2410,167600,13.6583
|
| 421 |
+
2415,168000,13.3859
|
| 422 |
+
2422,168400,12.7336
|
| 423 |
+
2427,168800,13.4928
|
| 424 |
+
2432,169200,14.222
|
| 425 |
+
2438,169600,12.2052
|
| 426 |
+
2442,170000,17.538
|
| 427 |
+
2449,170400,13.4975
|
| 428 |
+
2454,170800,13.4316
|
| 429 |
+
2460,171200,14.3904
|
| 430 |
+
2466,171600,14.1914
|
| 431 |
+
2472,172000,10.7212
|
| 432 |
+
2478,172400,13.2043
|
| 433 |
+
2484,172800,12.3505
|
| 434 |
+
2488,173200,18.4707
|
| 435 |
+
2493,173600,13.7594
|
| 436 |
+
2498,174000,15.2434
|
| 437 |
+
2503,174400,17.6234
|
| 438 |
+
2509,174800,11.5482
|
| 439 |
+
2513,175200,18.5919
|
| 440 |
+
2518,175600,18.2289
|
| 441 |
+
2525,176000,10.1992
|
| 442 |
+
2529,176400,19.2187
|
| 443 |
+
2533,176800,20.1763
|
| 444 |
+
2538,177200,15.8706
|
| 445 |
+
2542,177600,20.1437
|
| 446 |
+
2547,178000,14.6606
|
| 447 |
+
2552,178400,16.5006
|
| 448 |
+
2557,178800,17.2947
|
| 449 |
+
2561,179200,20.4127
|
| 450 |
+
2566,179600,16.8026
|
| 451 |
+
2571,180000,16.4537
|
| 452 |
+
2575,180400,19.0096
|
| 453 |
+
2580,180800,12.4961
|
| 454 |
+
2586,181200,13.702
|
| 455 |
+
2590,181600,19.4121
|
| 456 |
+
2595,182000,19.1734
|
| 457 |
+
2599,182400,15.6252
|
| 458 |
+
2604,182800,17.4543
|
| 459 |
+
2608,183200,18.4527
|
| 460 |
+
2613,183600,17.0079
|
| 461 |
+
2617,184000,15.8574
|
| 462 |
+
2623,184400,14.6486
|
| 463 |
+
2627,184800,16.7431
|
| 464 |
+
2632,185200,16.3204
|
| 465 |
+
2636,185600,18.1816
|
| 466 |
+
2640,186000,19.3138
|
| 467 |
+
2646,186400,13.6693
|
| 468 |
+
2652,186800,12.1288
|
| 469 |
+
2656,187200,19.5385
|
| 470 |
+
2662,187600,12.7194
|
| 471 |
+
2666,188000,18.8978
|
| 472 |
+
2671,188400,17.7266
|
| 473 |
+
2675,188800,20.3572
|
| 474 |
+
2680,189200,16.1169
|
| 475 |
+
2685,189600,14.712
|
| 476 |
+
2692,190000,11.1334
|
| 477 |
+
2696,190400,17.4639
|
| 478 |
+
2701,190800,16.8716
|
| 479 |
+
2705,191200,19.8286
|
| 480 |
+
2710,191600,18.7337
|
| 481 |
+
2714,192000,19.8553
|
| 482 |
+
2719,192400,14.2238
|
| 483 |
+
2725,192800,12.8373
|
| 484 |
+
2735,193200,7.0151
|
| 485 |
+
2739,193600,13.3644
|
| 486 |
+
2745,194000,13.3385
|
| 487 |
+
2753,194400,8.739
|
| 488 |
+
2758,194800,13.8917
|
| 489 |
+
2764,195200,10.3893
|
| 490 |
+
2773,195600,4.8033
|
| 491 |
+
2784,196000,5.5534
|
| 492 |
+
2792,196400,7.2659
|
| 493 |
+
2796,196800,13.9818
|
| 494 |
+
2802,197200,13.2211
|
| 495 |
+
2807,197600,16.0152
|
| 496 |
+
2814,198000,9.9562
|
| 497 |
+
2823,198400,6.7525
|
| 498 |
+
2828,198800,14.4238
|
| 499 |
+
2833,199200,10.7252
|
| 500 |
+
2838,199600,13.7153
|
| 501 |
+
2842,200000,19.2825
|
code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_2.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
25,400,1.2001
|
| 3 |
+
50,800,1.1974
|
| 4 |
+
67,1200,1.7766
|
| 5 |
+
87,1600,1.4873
|
| 6 |
+
104,2000,1.7077
|
| 7 |
+
114,2400,2.5908
|
| 8 |
+
134,2800,1.8992
|
| 9 |
+
145,3200,2.8642
|
| 10 |
+
154,3600,2.9615
|
| 11 |
+
163,4000,3.7629
|
| 12 |
+
170,4400,4.1044
|
| 13 |
+
180,4800,3.4985
|
| 14 |
+
187,5200,4.3872
|
| 15 |
+
192,5600,5.6852
|
| 16 |
+
198,6000,5.7852
|
| 17 |
+
205,6400,4.5497
|
| 18 |
+
210,6800,5.1203
|
| 19 |
+
215,7200,6.3622
|
| 20 |
+
222,7600,4.9101
|
| 21 |
+
230,8000,3.7216
|
| 22 |
+
237,8400,3.9412
|
| 23 |
+
243,8800,5.1797
|
| 24 |
+
250,9200,4.9764
|
| 25 |
+
255,9600,5.0597
|
| 26 |
+
260,10000,6.6622
|
| 27 |
+
266,10400,5.4034
|
| 28 |
+
274,10800,3.7803
|
| 29 |
+
279,11200,4.8601
|
| 30 |
+
284,11600,6.8704
|
| 31 |
+
289,12000,5.884
|
| 32 |
+
298,12400,3.1085
|
| 33 |
+
306,12800,3.7402
|
| 34 |
+
317,13200,2.4474
|
| 35 |
+
325,13600,4.697
|
| 36 |
+
333,14000,3.7155
|
| 37 |
+
340,14400,4.1133
|
| 38 |
+
345,14800,6.3342
|
| 39 |
+
354,15200,3.4713
|
| 40 |
+
363,15600,2.973
|
| 41 |
+
369,16000,5.3844
|
| 42 |
+
376,16400,3.9942
|
| 43 |
+
384,16800,4.3917
|
| 44 |
+
390,17200,3.9032
|
| 45 |
+
396,17600,5.5522
|
| 46 |
+
402,18000,5.0959
|
| 47 |
+
410,18400,4.0555
|
| 48 |
+
419,18800,3.8592
|
| 49 |
+
424,19200,5.5717
|
| 50 |
+
430,19600,5.4533
|
| 51 |
+
436,20000,4.4112
|
| 52 |
+
442,20400,5.2485
|
| 53 |
+
453,20800,3.6337
|
| 54 |
+
458,21200,5.7439
|
| 55 |
+
464,21600,5.8554
|
| 56 |
+
470,22000,6.1356
|
| 57 |
+
478,22400,4.1016
|
| 58 |
+
483,22800,6.9287
|
| 59 |
+
487,23200,7.9029
|
| 60 |
+
492,23600,7.2121
|
| 61 |
+
496,24000,6.8316
|
| 62 |
+
501,24400,7.3779
|
| 63 |
+
506,24800,7.0025
|
| 64 |
+
511,25200,7.3224
|
| 65 |
+
516,25600,6.0262
|
| 66 |
+
521,26000,7.3493
|
| 67 |
+
525,26400,8.5734
|
| 68 |
+
530,26800,7.505
|
| 69 |
+
536,27200,5.6146
|
| 70 |
+
541,27600,7.32
|
| 71 |
+
547,28000,5.8791
|
| 72 |
+
551,28400,8.3691
|
| 73 |
+
555,28800,8.5242
|
| 74 |
+
560,29200,6.8016
|
| 75 |
+
564,29600,6.4966
|
| 76 |
+
570,30000,6.5884
|
| 77 |
+
574,30400,7.9429
|
| 78 |
+
579,30800,8.2591
|
| 79 |
+
586,31200,5.1781
|
| 80 |
+
590,31600,6.725
|
| 81 |
+
595,32000,7.3339
|
| 82 |
+
602,32400,5.5153
|
| 83 |
+
606,32800,7.294
|
| 84 |
+
611,33200,7.861
|
| 85 |
+
617,33600,5.8614
|
| 86 |
+
622,34000,6.2895
|
| 87 |
+
626,34400,7.641
|
| 88 |
+
631,34800,7.1765
|
| 89 |
+
635,35200,8.3742
|
| 90 |
+
639,35600,8.5644
|
| 91 |
+
644,36000,7.5061
|
| 92 |
+
648,36400,7.137
|
| 93 |
+
654,36800,6.317
|
| 94 |
+
659,37200,7.5171
|
| 95 |
+
663,37600,8.4671
|
| 96 |
+
667,38000,8.8055
|
| 97 |
+
672,38400,7.2286
|
| 98 |
+
677,38800,7.0709
|
| 99 |
+
682,39200,6.0232
|
| 100 |
+
687,39600,7.7556
|
| 101 |
+
691,40000,7.7135
|
| 102 |
+
695,40400,8.3451
|
| 103 |
+
701,40800,6.0775
|
| 104 |
+
705,41200,8.3363
|
| 105 |
+
709,41600,8.6071
|
| 106 |
+
715,42000,5.3694
|
| 107 |
+
720,42400,7.6011
|
| 108 |
+
724,42800,8.7117
|
| 109 |
+
729,43200,7.0684
|
| 110 |
+
735,43600,5.433
|
| 111 |
+
739,44000,8.6411
|
| 112 |
+
743,44400,8.5924
|
| 113 |
+
747,44800,8.9168
|
| 114 |
+
752,45200,7.2905
|
| 115 |
+
756,45600,8.9831
|
| 116 |
+
761,46000,7.4007
|
| 117 |
+
766,46400,6.664
|
| 118 |
+
770,46800,7.3928
|
| 119 |
+
777,47200,5.7024
|
| 120 |
+
783,47600,5.151
|
| 121 |
+
788,48000,7.2108
|
| 122 |
+
793,48400,7.2763
|
| 123 |
+
799,48800,7.0493
|
| 124 |
+
803,49200,8.5485
|
| 125 |
+
808,49600,7.5421
|
| 126 |
+
812,50000,9.5257
|
| 127 |
+
816,50400,9.1144
|
| 128 |
+
822,50800,5.6572
|
| 129 |
+
828,51200,7.8776
|
| 130 |
+
832,51600,10.0686
|
| 131 |
+
838,52000,6.2858
|
| 132 |
+
842,52400,8.7905
|
| 133 |
+
847,52800,6.9064
|
| 134 |
+
852,53200,8.591
|
| 135 |
+
857,53600,7.9326
|
| 136 |
+
861,54000,10.7555
|
| 137 |
+
865,54400,11.0084
|
| 138 |
+
870,54800,8.5343
|
| 139 |
+
874,55200,10.2597
|
| 140 |
+
878,55600,11.0706
|
| 141 |
+
883,56000,9.7567
|
| 142 |
+
887,56400,11.0066
|
| 143 |
+
891,56800,11.0205
|
| 144 |
+
897,57200,8.6028
|
| 145 |
+
902,57600,7.8706
|
| 146 |
+
906,58000,11.6827
|
| 147 |
+
911,58400,11.5678
|
| 148 |
+
916,58800,7.3302
|
| 149 |
+
920,59200,11.8686
|
| 150 |
+
925,59600,9.6659
|
| 151 |
+
929,60000,11.8218
|
| 152 |
+
933,60400,11.753
|
| 153 |
+
938,60800,10.8838
|
| 154 |
+
942,61200,11.9606
|
| 155 |
+
946,61600,11.9891
|
| 156 |
+
950,62000,10.7408
|
| 157 |
+
955,62400,9.9649
|
| 158 |
+
959,62800,10.1192
|
| 159 |
+
963,63200,11.9243
|
| 160 |
+
967,63600,12.0996
|
| 161 |
+
971,64000,12.2641
|
| 162 |
+
975,64400,12.2119
|
| 163 |
+
980,64800,9.6896
|
| 164 |
+
984,65200,11.985
|
| 165 |
+
988,65600,12.0389
|
| 166 |
+
993,66000,9.7261
|
| 167 |
+
998,66400,10.0918
|
| 168 |
+
1002,66800,11.9409
|
| 169 |
+
1007,67200,9.7976
|
| 170 |
+
1011,67600,11.9562
|
| 171 |
+
1015,68000,11.9393
|
| 172 |
+
1019,68400,11.9489
|
| 173 |
+
1023,68800,10.6665
|
| 174 |
+
1029,69200,8.6481
|
| 175 |
+
1035,69600,8.2744
|
| 176 |
+
1040,70000,9.8761
|
| 177 |
+
1044,70400,12.7648
|
| 178 |
+
1048,70800,12.0824
|
| 179 |
+
1053,71200,9.9615
|
| 180 |
+
1059,71600,7.5596
|
| 181 |
+
1063,72000,12.066
|
| 182 |
+
1068,72400,10.3717
|
| 183 |
+
1072,72800,11.9771
|
| 184 |
+
1076,73200,11.9456
|
| 185 |
+
1080,73600,9.1042
|
| 186 |
+
1084,74000,12.076
|
| 187 |
+
1090,74400,9.4681
|
| 188 |
+
1094,74800,11.529
|
| 189 |
+
1098,75200,12.0008
|
| 190 |
+
1103,75600,10.1566
|
| 191 |
+
1108,76000,9.751
|
| 192 |
+
1112,76400,12.3104
|
| 193 |
+
1116,76800,9.2405
|
| 194 |
+
1120,77200,12.0348
|
| 195 |
+
1125,77600,10.0069
|
| 196 |
+
1130,78000,10.0386
|
| 197 |
+
1136,78400,10.4839
|
| 198 |
+
1142,78800,7.6819
|
| 199 |
+
1149,79200,5.9398
|
| 200 |
+
1156,79600,7.5437
|
| 201 |
+
1163,80000,7.4114
|
| 202 |
+
1167,80400,9.315
|
| 203 |
+
1172,80800,11.2425
|
| 204 |
+
1176,81200,10.7758
|
| 205 |
+
1180,81600,12.0153
|
| 206 |
+
1185,82000,10.7721
|
| 207 |
+
1190,82400,9.8986
|
| 208 |
+
1194,82800,11.9053
|
| 209 |
+
1198,83200,11.9558
|
| 210 |
+
1203,83600,9.3269
|
| 211 |
+
1207,84000,12.2099
|
| 212 |
+
1213,84400,8.4628
|
| 213 |
+
1217,84800,10.7038
|
| 214 |
+
1221,85200,12.2837
|
| 215 |
+
1226,85600,9.9915
|
| 216 |
+
1231,86000,9.9481
|
| 217 |
+
1236,86400,10.1545
|
| 218 |
+
1240,86800,12.2327
|
| 219 |
+
1244,87200,12.075
|
| 220 |
+
1248,87600,12.01
|
| 221 |
+
1252,88000,10.5855
|
| 222 |
+
1256,88400,12.134
|
| 223 |
+
1260,88800,12.3757
|
| 224 |
+
1264,89200,11.9803
|
| 225 |
+
1269,89600,9.3804
|
| 226 |
+
1274,90000,9.8415
|
| 227 |
+
1278,90400,11.9713
|
| 228 |
+
1284,90800,8.3152
|
| 229 |
+
1289,91200,8.8216
|
| 230 |
+
1293,91600,12.3776
|
| 231 |
+
1299,92000,8.471
|
| 232 |
+
1303,92400,12.5027
|
| 233 |
+
1307,92800,12.517
|
| 234 |
+
1313,93200,8.4122
|
| 235 |
+
1317,93600,13.1839
|
| 236 |
+
1322,94000,10.4515
|
| 237 |
+
1326,94400,11.1728
|
| 238 |
+
1330,94800,13.0548
|
| 239 |
+
1334,95200,12.0536
|
| 240 |
+
1338,95600,12.4526
|
| 241 |
+
1344,96000,9.0872
|
| 242 |
+
1351,96400,7.9055
|
| 243 |
+
1355,96800,11.2378
|
| 244 |
+
1360,97200,11.1121
|
| 245 |
+
1364,97600,12.7524
|
| 246 |
+
1368,98000,12.6111
|
| 247 |
+
1373,98400,11.2153
|
| 248 |
+
1378,98800,8.2927
|
| 249 |
+
1382,99200,12.5971
|
| 250 |
+
1388,99600,9.538
|
| 251 |
+
1392,100000,12.5941
|
| 252 |
+
1397,100400,10.2156
|
| 253 |
+
1401,100800,12.5003
|
| 254 |
+
1406,101200,9.1204
|
| 255 |
+
1411,101600,11.3426
|
| 256 |
+
1418,102000,6.7031
|
| 257 |
+
1422,102400,13.0065
|
| 258 |
+
1427,102800,10.4644
|
| 259 |
+
1431,103200,12.6849
|
| 260 |
+
1435,103600,12.3948
|
| 261 |
+
1441,104000,9.7853
|
| 262 |
+
1446,104400,9.4532
|
| 263 |
+
1450,104800,10.5596
|
| 264 |
+
1456,105200,8.6362
|
| 265 |
+
1461,105600,11.1625
|
| 266 |
+
1465,106000,9.7383
|
| 267 |
+
1471,106400,9.5542
|
| 268 |
+
1476,106800,10.1016
|
| 269 |
+
1480,107200,11.3806
|
| 270 |
+
1484,107600,12.8012
|
| 271 |
+
1488,108000,12.754
|
| 272 |
+
1493,108400,9.2045
|
| 273 |
+
1497,108800,12.9866
|
| 274 |
+
1504,109200,7.3446
|
| 275 |
+
1509,109600,11.8225
|
| 276 |
+
1514,110000,8.9346
|
| 277 |
+
1518,110400,12.7821
|
| 278 |
+
1523,110800,11.2293
|
| 279 |
+
1527,111200,12.976
|
| 280 |
+
1532,111600,8.6775
|
| 281 |
+
1536,112000,12.7469
|
| 282 |
+
1540,112400,12.7591
|
| 283 |
+
1545,112800,10.3778
|
| 284 |
+
1550,113200,10.3744
|
| 285 |
+
1554,113600,12.7611
|
| 286 |
+
1558,114000,11.8172
|
| 287 |
+
1562,114400,12.8452
|
| 288 |
+
1566,114800,12.6974
|
| 289 |
+
1570,115200,12.7894
|
| 290 |
+
1575,115600,10.9595
|
| 291 |
+
1579,116000,13.0989
|
| 292 |
+
1583,116400,12.9278
|
| 293 |
+
1589,116800,9.0682
|
| 294 |
+
1593,117200,12.9978
|
| 295 |
+
1597,117600,10.7087
|
| 296 |
+
1601,118000,13.5158
|
| 297 |
+
1605,118400,13.435
|
| 298 |
+
1610,118800,11.5973
|
| 299 |
+
1614,119200,13.0533
|
| 300 |
+
1618,119600,13.0017
|
| 301 |
+
1623,120000,10.8166
|
| 302 |
+
1629,120400,9.0293
|
| 303 |
+
1634,120800,9.2539
|
| 304 |
+
1639,121200,10.6134
|
| 305 |
+
1644,121600,11.5216
|
| 306 |
+
1648,122000,13.3206
|
| 307 |
+
1652,122400,12.207
|
| 308 |
+
1657,122800,11.3809
|
| 309 |
+
1661,123200,10.1333
|
| 310 |
+
1665,123600,13.4154
|
| 311 |
+
1669,124000,13.7245
|
| 312 |
+
1673,124400,13.2766
|
| 313 |
+
1677,124800,13.6751
|
| 314 |
+
1681,125200,13.3002
|
| 315 |
+
1686,125600,11.3154
|
| 316 |
+
1690,126000,14.442
|
| 317 |
+
1697,126400,9.1752
|
| 318 |
+
1701,126800,12.6002
|
| 319 |
+
1710,127200,8.7327
|
| 320 |
+
1715,127600,11.4596
|
| 321 |
+
1722,128000,9.0605
|
| 322 |
+
1730,128400,6.6071
|
| 323 |
+
1738,128800,7.8556
|
| 324 |
+
1743,129200,8.6705
|
| 325 |
+
1748,129600,13.2619
|
| 326 |
+
1757,130000,8.0688
|
| 327 |
+
1762,130400,11.255
|
| 328 |
+
1769,130800,10.1434
|
| 329 |
+
1776,131200,7.9637
|
| 330 |
+
1780,131600,16.234
|
| 331 |
+
1784,132000,16.009
|
| 332 |
+
1790,132400,11.5696
|
| 333 |
+
1795,132800,13.03
|
| 334 |
+
1801,133200,11.5952
|
| 335 |
+
1807,133600,10.1351
|
| 336 |
+
1811,134000,14.54
|
| 337 |
+
1823,134400,4.607
|
| 338 |
+
1829,134800,12.6576
|
| 339 |
+
1833,135200,16.8332
|
| 340 |
+
1838,135600,11.8201
|
| 341 |
+
1842,136000,17.1005
|
| 342 |
+
1846,136400,16.8673
|
| 343 |
+
1851,136800,15.3565
|
| 344 |
+
1857,137200,8.8723
|
| 345 |
+
1862,137600,13.1234
|
| 346 |
+
1868,138000,12.9135
|
| 347 |
+
1876,138400,6.6299
|
| 348 |
+
1883,138800,8.6404
|
| 349 |
+
1892,139200,8.0842
|
| 350 |
+
1896,139600,16.6064
|
| 351 |
+
1900,140000,12.9285
|
| 352 |
+
1912,140400,5.249
|
| 353 |
+
1918,140800,9.7863
|
| 354 |
+
1926,141200,7.8766
|
| 355 |
+
1932,141600,7.9322
|
| 356 |
+
1942,142000,5.3181
|
| 357 |
+
1947,142400,12.7024
|
| 358 |
+
1956,142800,8.2081
|
| 359 |
+
1968,143200,2.9574
|
| 360 |
+
1975,143600,6.7944
|
| 361 |
+
1981,144000,11.6649
|
| 362 |
+
1994,144400,5.4107
|
| 363 |
+
2003,144800,6.3419
|
| 364 |
+
2012,145200,6.9728
|
| 365 |
+
2023,145600,5.0183
|
| 366 |
+
2031,146000,7.8319
|
| 367 |
+
2041,146400,4.6116
|
| 368 |
+
2054,146800,4.337
|
| 369 |
+
2063,147200,5.3691
|
| 370 |
+
2069,147600,12.9849
|
| 371 |
+
2078,148000,8.5405
|
| 372 |
+
2091,148400,4.9827
|
| 373 |
+
2108,148800,3.026
|
| 374 |
+
2123,149200,3.3827
|
| 375 |
+
2132,149600,6.8501
|
| 376 |
+
2141,150000,7.5638
|
| 377 |
+
2152,150400,5.2852
|
| 378 |
+
2162,150800,7.5752
|
| 379 |
+
2170,151200,7.9508
|
| 380 |
+
2177,151600,10.2038
|
| 381 |
+
2191,152000,3.0956
|
| 382 |
+
2200,152400,7.7211
|
| 383 |
+
2208,152800,6.9913
|
| 384 |
+
2215,153200,10.9929
|
| 385 |
+
2219,153600,15.1937
|
| 386 |
+
2226,154000,12.1173
|
| 387 |
+
2235,154400,6.3827
|
| 388 |
+
2244,154800,9.904
|
| 389 |
+
2251,155200,10.4535
|
| 390 |
+
2257,155600,11.1928
|
| 391 |
+
2262,156000,11.8628
|
| 392 |
+
2269,156400,11.1129
|
| 393 |
+
2275,156800,10.8419
|
| 394 |
+
2282,157200,10.6974
|
| 395 |
+
2288,157600,12.6491
|
| 396 |
+
2293,158000,17.1227
|
| 397 |
+
2297,158400,15.1925
|
| 398 |
+
2302,158800,15.8225
|
| 399 |
+
2310,159200,10.5351
|
| 400 |
+
2314,159600,20.5124
|
| 401 |
+
2318,160000,20.7472
|
| 402 |
+
2323,160400,16.1619
|
| 403 |
+
2328,160800,17.0157
|
| 404 |
+
2332,161200,19.5865
|
| 405 |
+
2336,161600,20.3359
|
| 406 |
+
2340,162000,15.7826
|
| 407 |
+
2344,162400,20.7786
|
| 408 |
+
2349,162800,19.027
|
| 409 |
+
2353,163200,16.8306
|
| 410 |
+
2358,163600,15.0345
|
| 411 |
+
2364,164000,13.7065
|
| 412 |
+
2370,164400,15.8193
|
| 413 |
+
2375,164800,15.9792
|
| 414 |
+
2379,165200,16.8467
|
| 415 |
+
2384,165600,17.832
|
| 416 |
+
2388,166000,20.4626
|
| 417 |
+
2393,166400,14.8119
|
| 418 |
+
2399,166800,12.4114
|
| 419 |
+
2403,167200,20.9186
|
| 420 |
+
2408,167600,13.3934
|
| 421 |
+
2412,168000,20.5788
|
| 422 |
+
2418,168400,16.2933
|
| 423 |
+
2422,168800,18.8223
|
| 424 |
+
2427,169200,19.3578
|
| 425 |
+
2432,169600,13.3396
|
| 426 |
+
2437,170000,17.3548
|
| 427 |
+
2443,170400,16.2848
|
| 428 |
+
2448,170800,18.1538
|
| 429 |
+
2453,171200,16.7561
|
| 430 |
+
2457,171600,17.7607
|
| 431 |
+
2463,172000,15.1953
|
| 432 |
+
2469,172400,13.134
|
| 433 |
+
2476,172800,12.5457
|
| 434 |
+
2482,173200,14.6165
|
| 435 |
+
2487,173600,18.5189
|
| 436 |
+
2492,174000,19.1314
|
| 437 |
+
2496,174400,17.353
|
| 438 |
+
2500,174800,22.18
|
| 439 |
+
2506,175200,15.5115
|
| 440 |
+
2511,175600,19.8744
|
| 441 |
+
2515,176000,18.1736
|
| 442 |
+
2519,176400,21.4187
|
| 443 |
+
2525,176800,17.5231
|
| 444 |
+
2531,177200,12.1789
|
| 445 |
+
2536,177600,17.784
|
| 446 |
+
2540,178000,20.6193
|
| 447 |
+
2545,178400,17.4618
|
| 448 |
+
2549,178800,21.499
|
| 449 |
+
2553,179200,17.4586
|
| 450 |
+
2557,179600,20.6227
|
| 451 |
+
2561,180000,20.1806
|
| 452 |
+
2567,180400,13.3524
|
| 453 |
+
2573,180800,14.6734
|
| 454 |
+
2577,181200,21.1076
|
| 455 |
+
2582,181600,17.1151
|
| 456 |
+
2587,182000,12.7979
|
| 457 |
+
2592,182400,15.9493
|
| 458 |
+
2599,182800,14.3687
|
| 459 |
+
2604,183200,15.514
|
| 460 |
+
2610,183600,12.1476
|
| 461 |
+
2616,184000,13.7643
|
| 462 |
+
2622,184400,14.6858
|
| 463 |
+
2627,184800,13.8197
|
| 464 |
+
2631,185200,20.2988
|
| 465 |
+
2637,185600,13.8708
|
| 466 |
+
2641,186000,21.6494
|
| 467 |
+
2646,186400,15.6542
|
| 468 |
+
2653,186800,11.9435
|
| 469 |
+
2659,187200,13.8883
|
| 470 |
+
2666,187600,12.5814
|
| 471 |
+
2671,188000,17.3771
|
| 472 |
+
2678,188400,11.9801
|
| 473 |
+
2683,188800,13.1353
|
| 474 |
+
2692,189200,10.337
|
| 475 |
+
2698,189600,11.3343
|
| 476 |
+
2705,190000,12.8742
|
| 477 |
+
2711,190400,14.6771
|
| 478 |
+
2717,190800,11.9189
|
| 479 |
+
2726,191200,8.7059
|
| 480 |
+
2732,191600,13.1508
|
| 481 |
+
2739,192000,9.7473
|
| 482 |
+
2743,192400,19.009
|
| 483 |
+
2748,192800,16.8758
|
| 484 |
+
2754,193200,15.7392
|
| 485 |
+
2758,193600,20.0703
|
| 486 |
+
2764,194000,12.4249
|
| 487 |
+
2768,194400,21.7073
|
| 488 |
+
2773,194800,18.1804
|
| 489 |
+
2779,195200,15.5854
|
| 490 |
+
2785,195600,12.3091
|
| 491 |
+
2790,196000,15.0442
|
| 492 |
+
2794,196400,20.9159
|
| 493 |
+
2802,196800,11.6643
|
| 494 |
+
2811,197200,7.4276
|
| 495 |
+
2819,197600,8.6076
|
| 496 |
+
2826,198000,10.6653
|
| 497 |
+
2831,198400,16.6902
|
| 498 |
+
2837,198800,13.4796
|
| 499 |
+
2843,199200,14.7899
|
| 500 |
+
2848,199600,14.6657
|
| 501 |
+
2855,200000,11.2761
|
code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_3.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
20,400,1.4511
|
| 3 |
+
42,800,1.4159
|
| 4 |
+
60,1200,1.7077
|
| 5 |
+
73,1600,2.1135
|
| 6 |
+
84,2000,2.7468
|
| 7 |
+
105,2400,1.7674
|
| 8 |
+
113,2800,3.7814
|
| 9 |
+
122,3200,3.7896
|
| 10 |
+
134,3600,2.7067
|
| 11 |
+
146,4000,2.5524
|
| 12 |
+
152,4400,6.5864
|
| 13 |
+
157,4800,6.5195
|
| 14 |
+
162,5200,6.1349
|
| 15 |
+
168,5600,5.317
|
| 16 |
+
177,6000,3.6724
|
| 17 |
+
183,6400,6.1446
|
| 18 |
+
189,6800,5.5608
|
| 19 |
+
195,7200,6.1726
|
| 20 |
+
199,7600,6.344
|
| 21 |
+
205,8000,5.8716
|
| 22 |
+
213,8400,4.0247
|
| 23 |
+
217,8800,6.9902
|
| 24 |
+
224,9200,6.2118
|
| 25 |
+
229,9600,6.7983
|
| 26 |
+
236,10000,6.4537
|
| 27 |
+
241,10400,9.274
|
| 28 |
+
246,10800,7.4492
|
| 29 |
+
253,11200,6.7536
|
| 30 |
+
257,11600,10.0378
|
| 31 |
+
264,12000,5.3493
|
| 32 |
+
270,12400,6.8639
|
| 33 |
+
274,12800,9.7271
|
| 34 |
+
278,13200,10.6744
|
| 35 |
+
284,13600,7.8632
|
| 36 |
+
290,14000,8.3459
|
| 37 |
+
296,14400,6.5538
|
| 38 |
+
301,14800,9.4489
|
| 39 |
+
307,15200,8.4332
|
| 40 |
+
313,15600,7.6024
|
| 41 |
+
318,16000,9.6646
|
| 42 |
+
323,16400,7.5682
|
| 43 |
+
327,16800,12.2827
|
| 44 |
+
332,17200,11.2367
|
| 45 |
+
336,17600,10.6158
|
| 46 |
+
341,18000,8.7318
|
| 47 |
+
347,18400,8.9077
|
| 48 |
+
352,18800,10.0014
|
| 49 |
+
357,19200,9.9727
|
| 50 |
+
362,19600,8.4138
|
| 51 |
+
368,20000,8.7518
|
| 52 |
+
372,20400,12.3645
|
| 53 |
+
376,20800,10.8224
|
| 54 |
+
381,21200,11.2952
|
| 55 |
+
387,21600,7.5035
|
| 56 |
+
392,22000,10.1362
|
| 57 |
+
398,22400,8.0769
|
| 58 |
+
402,22800,11.4559
|
| 59 |
+
408,23200,10.9315
|
| 60 |
+
412,23600,12.5869
|
| 61 |
+
416,24000,9.7607
|
| 62 |
+
421,24400,10.3972
|
| 63 |
+
427,24800,9.1654
|
| 64 |
+
434,25200,6.5726
|
| 65 |
+
440,25600,7.9518
|
| 66 |
+
445,26000,12.163
|
| 67 |
+
449,26400,12.5389
|
| 68 |
+
453,26800,12.4047
|
| 69 |
+
457,27200,12.8434
|
| 70 |
+
461,27600,12.508
|
| 71 |
+
465,28000,12.7977
|
| 72 |
+
469,28400,12.7722
|
| 73 |
+
475,28800,7.5211
|
| 74 |
+
481,29200,8.8252
|
| 75 |
+
485,29600,11.5555
|
| 76 |
+
489,30000,11.477
|
| 77 |
+
493,30400,12.9982
|
| 78 |
+
497,30800,13.1206
|
| 79 |
+
504,31200,9.1732
|
| 80 |
+
509,31600,9.6877
|
| 81 |
+
515,32000,8.65
|
| 82 |
+
522,32400,6.6807
|
| 83 |
+
529,32800,7.7346
|
| 84 |
+
534,33200,7.8476
|
| 85 |
+
541,33600,7.9549
|
| 86 |
+
548,34000,6.9946
|
| 87 |
+
555,34400,5.7617
|
| 88 |
+
562,34800,7.4759
|
| 89 |
+
567,35200,9.4369
|
| 90 |
+
574,35600,8.9111
|
| 91 |
+
581,36000,6.5248
|
| 92 |
+
586,36400,10.6508
|
| 93 |
+
594,36800,4.8766
|
| 94 |
+
606,37200,5.0255
|
| 95 |
+
611,37600,8.7737
|
| 96 |
+
619,38000,7.297
|
| 97 |
+
625,38400,8.7064
|
| 98 |
+
631,38800,7.6392
|
| 99 |
+
637,39200,8.1329
|
| 100 |
+
643,39600,8.6996
|
| 101 |
+
648,40000,12.1593
|
| 102 |
+
654,40400,8.0097
|
| 103 |
+
658,40800,13.7355
|
| 104 |
+
663,41200,10.8767
|
| 105 |
+
667,41600,13.4623
|
| 106 |
+
671,42000,13.7128
|
| 107 |
+
677,42400,7.9521
|
| 108 |
+
685,42800,7.0389
|
| 109 |
+
693,43200,7.1228
|
| 110 |
+
699,43600,7.1906
|
| 111 |
+
704,44000,10.2499
|
| 112 |
+
711,44400,8.8439
|
| 113 |
+
715,44800,10.723
|
| 114 |
+
722,45200,8.0561
|
| 115 |
+
726,45600,12.8837
|
| 116 |
+
730,46000,11.6005
|
| 117 |
+
735,46400,10.5382
|
| 118 |
+
740,46800,10.9579
|
| 119 |
+
744,47200,12.8439
|
| 120 |
+
749,47600,8.5832
|
| 121 |
+
755,48000,10.2625
|
| 122 |
+
759,48400,11.0394
|
| 123 |
+
764,48800,10.5021
|
| 124 |
+
771,49200,7.5662
|
| 125 |
+
775,49600,11.8596
|
| 126 |
+
779,50000,10.9197
|
| 127 |
+
785,50400,9.6613
|
| 128 |
+
791,50800,7.1842
|
| 129 |
+
797,51200,8.9205
|
| 130 |
+
810,51600,3.7799
|
| 131 |
+
814,52000,11.7011
|
| 132 |
+
818,52400,13.1067
|
| 133 |
+
825,52800,6.8794
|
| 134 |
+
837,53200,4.1435
|
| 135 |
+
844,53600,7.8758
|
| 136 |
+
850,54000,6.8512
|
| 137 |
+
857,54400,7.279
|
| 138 |
+
863,54800,8.8461
|
| 139 |
+
870,55200,5.1664
|
| 140 |
+
876,55600,8.291
|
| 141 |
+
882,56000,7.4186
|
| 142 |
+
886,56400,12.7099
|
| 143 |
+
895,56800,5.8991
|
| 144 |
+
900,57200,10.3267
|
| 145 |
+
905,57600,10.4081
|
| 146 |
+
910,58000,8.2201
|
| 147 |
+
917,58400,7.2555
|
| 148 |
+
923,58800,8.6902
|
| 149 |
+
929,59200,8.4311
|
| 150 |
+
935,59600,9.3899
|
| 151 |
+
940,60000,9.48
|
| 152 |
+
947,60400,8.8701
|
| 153 |
+
952,60800,10.6712
|
| 154 |
+
957,61200,12.1303
|
| 155 |
+
963,61600,10.0041
|
| 156 |
+
967,62000,11.8506
|
| 157 |
+
975,62400,7.9768
|
| 158 |
+
981,62800,10.121
|
| 159 |
+
985,63200,12.7484
|
| 160 |
+
992,63600,8.6506
|
| 161 |
+
996,64000,12.0166
|
| 162 |
+
1003,64400,8.6989
|
| 163 |
+
1007,64800,13.5841
|
| 164 |
+
1012,65200,11.8948
|
| 165 |
+
1020,65600,7.5362
|
| 166 |
+
1024,66000,14.5183
|
| 167 |
+
1031,66400,8.4667
|
| 168 |
+
1039,66800,6.099
|
| 169 |
+
1044,67200,9.9433
|
| 170 |
+
1051,67600,10.2187
|
| 171 |
+
1056,68000,12.4386
|
| 172 |
+
1065,68400,4.846
|
| 173 |
+
1071,68800,12.9973
|
| 174 |
+
1076,69200,11.6068
|
| 175 |
+
1084,69600,9.6249
|
| 176 |
+
1090,70000,8.821
|
| 177 |
+
1098,70400,8.9721
|
| 178 |
+
1104,70800,10.8627
|
| 179 |
+
1110,71200,12.9207
|
| 180 |
+
1117,71600,9.6223
|
| 181 |
+
1124,72000,9.5217
|
| 182 |
+
1129,72400,12.7009
|
| 183 |
+
1137,72800,7.1291
|
| 184 |
+
1142,73200,15.8915
|
| 185 |
+
1147,73600,12.773
|
| 186 |
+
1152,74000,14.0152
|
| 187 |
+
1158,74400,12.8197
|
| 188 |
+
1165,74800,7.6715
|
| 189 |
+
1171,75200,11.6273
|
| 190 |
+
1176,75600,14.7481
|
| 191 |
+
1182,76000,10.9798
|
| 192 |
+
1186,76400,17.6803
|
| 193 |
+
1193,76800,10.2361
|
| 194 |
+
1198,77200,11.135
|
| 195 |
+
1203,77600,13.5392
|
| 196 |
+
1207,78000,16.3684
|
| 197 |
+
1213,78400,12.9063
|
| 198 |
+
1218,78800,11.9918
|
| 199 |
+
1226,79200,8.0133
|
| 200 |
+
1231,79600,13.8717
|
| 201 |
+
1236,80000,13.592
|
| 202 |
+
1240,80400,16.6494
|
| 203 |
+
1247,80800,9.0394
|
| 204 |
+
1253,81200,11.6721
|
| 205 |
+
1257,81600,12.4117
|
| 206 |
+
1263,82000,13.1805
|
| 207 |
+
1268,82400,12.8848
|
| 208 |
+
1273,82800,13.0017
|
| 209 |
+
1279,83200,11.932
|
| 210 |
+
1286,83600,10.3167
|
| 211 |
+
1292,84000,11.0747
|
| 212 |
+
1298,84400,12.7847
|
| 213 |
+
1302,84800,16.4195
|
| 214 |
+
1307,85200,14.5389
|
| 215 |
+
1313,85600,11.3515
|
| 216 |
+
1317,86000,19.0045
|
| 217 |
+
1324,86400,12.204
|
| 218 |
+
1328,86800,18.172
|
| 219 |
+
1334,87200,11.4799
|
| 220 |
+
1339,87600,15.2817
|
| 221 |
+
1346,88000,12.8543
|
| 222 |
+
1351,88400,15.2124
|
| 223 |
+
1355,88800,20.012
|
| 224 |
+
1360,89200,15.6753
|
| 225 |
+
1364,89600,18.9953
|
| 226 |
+
1369,90000,14.7316
|
| 227 |
+
1373,90400,18.3781
|
| 228 |
+
1379,90800,12.0495
|
| 229 |
+
1383,91200,19.1038
|
| 230 |
+
1388,91600,15.5228
|
| 231 |
+
1394,92000,13.1508
|
| 232 |
+
1399,92400,13.739
|
| 233 |
+
1404,92800,15.4669
|
| 234 |
+
1409,93200,12.2052
|
| 235 |
+
1413,93600,18.4534
|
| 236 |
+
1417,94000,18.4736
|
| 237 |
+
1421,94400,19.451
|
| 238 |
+
1426,94800,17.0253
|
| 239 |
+
1431,95200,13.6257
|
| 240 |
+
1436,95600,15.849
|
| 241 |
+
1447,96000,6.1302
|
| 242 |
+
1452,96400,13.5995
|
| 243 |
+
1460,96800,10.4263
|
| 244 |
+
1466,97200,12.8175
|
| 245 |
+
1470,97600,19.8985
|
| 246 |
+
1474,98000,19.4499
|
| 247 |
+
1480,98400,12.2035
|
| 248 |
+
1485,98800,15.5921
|
| 249 |
+
1493,99200,9.945
|
| 250 |
+
1500,99600,8.3822
|
| 251 |
+
1504,100000,20.1108
|
| 252 |
+
1511,100400,13.2678
|
| 253 |
+
1517,100800,13.3653
|
| 254 |
+
1521,101200,20.5694
|
| 255 |
+
1526,101600,12.9576
|
| 256 |
+
1531,102000,18.6283
|
| 257 |
+
1537,102400,13.6185
|
| 258 |
+
1542,102800,13.5109
|
| 259 |
+
1547,103200,14.7459
|
| 260 |
+
1555,103600,11.7803
|
| 261 |
+
1562,104000,8.6873
|
| 262 |
+
1572,104400,8.425
|
| 263 |
+
1577,104800,12.4127
|
| 264 |
+
1583,105200,11.9189
|
| 265 |
+
1590,105600,12.8694
|
| 266 |
+
1597,106000,10.6233
|
| 267 |
+
1604,106400,8.3289
|
| 268 |
+
1613,106800,9.3008
|
| 269 |
+
1622,107200,7.9757
|
| 270 |
+
1627,107600,15.1024
|
| 271 |
+
1633,108000,12.5267
|
| 272 |
+
1641,108400,10.7986
|
| 273 |
+
1646,108800,10.9844
|
| 274 |
+
1650,109200,20.3921
|
| 275 |
+
1657,109600,12.9882
|
| 276 |
+
1664,110000,8.8107
|
| 277 |
+
1671,110400,13.2145
|
| 278 |
+
1677,110800,13.7578
|
| 279 |
+
1683,111200,11.6441
|
| 280 |
+
1691,111600,10.6352
|
| 281 |
+
1698,112000,9.8465
|
| 282 |
+
1704,112400,13.4316
|
| 283 |
+
1708,112800,19.3443
|
| 284 |
+
1715,113200,11.1754
|
| 285 |
+
1722,113600,10.6371
|
| 286 |
+
1728,114000,9.1292
|
| 287 |
+
1734,114400,14.4382
|
| 288 |
+
1741,114800,13.7379
|
| 289 |
+
1745,115200,20.8923
|
| 290 |
+
1749,115600,16.3165
|
| 291 |
+
1755,116000,15.3684
|
| 292 |
+
1762,116400,9.6267
|
| 293 |
+
1766,116800,17.2582
|
| 294 |
+
1772,117200,14.4907
|
| 295 |
+
1777,117600,16.4743
|
| 296 |
+
1783,118000,13.0177
|
| 297 |
+
1792,118400,7.393
|
| 298 |
+
1799,118800,10.4686
|
| 299 |
+
1804,119200,17.1223
|
| 300 |
+
1814,119600,7.6813
|
| 301 |
+
1820,120000,10.9129
|
| 302 |
+
1826,120400,15.8259
|
| 303 |
+
1830,120800,20.2207
|
| 304 |
+
1837,121200,9.1836
|
| 305 |
+
1843,121600,14.5043
|
| 306 |
+
1848,122000,12.8295
|
| 307 |
+
1853,122400,18.3265
|
| 308 |
+
1858,122800,16.1205
|
| 309 |
+
1863,123200,19.6436
|
| 310 |
+
1869,123600,11.276
|
| 311 |
+
1873,124000,20.749
|
| 312 |
+
1882,124400,9.1372
|
| 313 |
+
1888,124800,13.6346
|
| 314 |
+
1893,125200,17.135
|
| 315 |
+
1899,125600,10.655
|
| 316 |
+
1904,126000,20.1275
|
| 317 |
+
1908,126400,19.847
|
| 318 |
+
1913,126800,15.7564
|
| 319 |
+
1918,127200,14.3012
|
| 320 |
+
1923,127600,15.9916
|
| 321 |
+
1928,128000,15.8541
|
| 322 |
+
1932,128400,20.358
|
| 323 |
+
1937,128800,16.4342
|
| 324 |
+
1941,129200,21.0778
|
| 325 |
+
1946,129600,15.8157
|
| 326 |
+
1952,130000,14.1684
|
| 327 |
+
1956,130400,17.5727
|
| 328 |
+
1962,130800,13.636
|
| 329 |
+
1967,131200,16.7871
|
| 330 |
+
1971,131600,19.9122
|
| 331 |
+
1976,132000,17.2468
|
| 332 |
+
1984,132400,10.4792
|
| 333 |
+
1989,132800,15.3806
|
| 334 |
+
1994,133200,15.9361
|
| 335 |
+
1998,133600,20.595
|
| 336 |
+
2003,134000,14.5683
|
| 337 |
+
2010,134400,12.9443
|
| 338 |
+
2017,134800,11.1499
|
| 339 |
+
2021,135200,16.4114
|
| 340 |
+
2028,135600,11.15
|
| 341 |
+
2033,136000,16.1641
|
| 342 |
+
2039,136400,11.8905
|
| 343 |
+
2045,136800,12.094
|
| 344 |
+
2051,137200,13.1055
|
| 345 |
+
2057,137600,12.3794
|
| 346 |
+
2062,138000,16.7795
|
| 347 |
+
2066,138400,15.2186
|
| 348 |
+
2071,138800,17.5386
|
| 349 |
+
2075,139200,19.7387
|
| 350 |
+
2080,139600,17.7345
|
| 351 |
+
2087,140000,9.286
|
| 352 |
+
2091,140400,20.1843
|
| 353 |
+
2095,140800,20.6579
|
| 354 |
+
2099,141200,20.4451
|
| 355 |
+
2105,141600,12.451
|
| 356 |
+
2111,142000,13.8034
|
| 357 |
+
2116,142400,16.789
|
| 358 |
+
2121,142800,17.3337
|
| 359 |
+
2127,143200,12.512
|
| 360 |
+
2132,143600,18.0873
|
| 361 |
+
2137,144000,16.5148
|
| 362 |
+
2143,144400,14.2245
|
| 363 |
+
2150,144800,11.4969
|
| 364 |
+
2158,145200,6.4741
|
| 365 |
+
2167,145600,10.4108
|
| 366 |
+
2173,146000,9.8208
|
| 367 |
+
2180,146400,14.0351
|
| 368 |
+
2186,146800,11.3705
|
| 369 |
+
2191,147200,17.1942
|
| 370 |
+
2196,147600,16.5483
|
| 371 |
+
2201,148000,17.2101
|
| 372 |
+
2206,148400,15.8254
|
| 373 |
+
2211,148800,16.944
|
| 374 |
+
2216,149200,13.2711
|
| 375 |
+
2222,149600,16.0584
|
| 376 |
+
2227,150000,15.4412
|
| 377 |
+
2231,150400,20.0904
|
| 378 |
+
2235,150800,18.9388
|
| 379 |
+
2241,151200,10.2277
|
| 380 |
+
2248,151600,9.7938
|
| 381 |
+
2257,152000,6.8651
|
| 382 |
+
2265,152400,10.0151
|
| 383 |
+
2269,152800,16.5873
|
| 384 |
+
2275,153200,13.0192
|
| 385 |
+
2280,153600,16.4787
|
| 386 |
+
2284,154000,19.9945
|
| 387 |
+
2288,154400,21.1409
|
| 388 |
+
2293,154800,17.4874
|
| 389 |
+
2298,155200,16.9422
|
| 390 |
+
2302,155600,16.5356
|
| 391 |
+
2307,156000,17.7096
|
| 392 |
+
2314,156400,11.4627
|
| 393 |
+
2318,156800,20.033
|
| 394 |
+
2323,157200,14.9603
|
| 395 |
+
2327,157600,19.5649
|
| 396 |
+
2334,158000,10.3648
|
| 397 |
+
2339,158400,17.1039
|
| 398 |
+
2343,158800,21.143
|
| 399 |
+
2348,159200,17.2194
|
| 400 |
+
2352,159600,21.0071
|
| 401 |
+
2356,160000,21.8045
|
| 402 |
+
2361,160400,17.6473
|
| 403 |
+
2367,160800,15.5177
|
| 404 |
+
2371,161200,20.4818
|
| 405 |
+
2376,161600,17.1564
|
| 406 |
+
2381,162000,14.4102
|
| 407 |
+
2386,162400,18.4027
|
| 408 |
+
2390,162800,22.3808
|
| 409 |
+
2395,163200,14.4186
|
| 410 |
+
2400,163600,13.4343
|
| 411 |
+
2404,164000,19.605
|
| 412 |
+
2409,164400,14.7853
|
| 413 |
+
2415,164800,11.0992
|
| 414 |
+
2421,165200,12.6908
|
| 415 |
+
2426,165600,17.289
|
| 416 |
+
2431,166000,17.1976
|
| 417 |
+
2435,166400,21.6014
|
| 418 |
+
2441,166800,12.793
|
| 419 |
+
2445,167200,14.232
|
| 420 |
+
2450,167600,15.6217
|
| 421 |
+
2457,168000,14.2949
|
| 422 |
+
2461,168400,16.1677
|
| 423 |
+
2465,168800,21.0584
|
| 424 |
+
2470,169200,19.4555
|
| 425 |
+
2474,169600,16.2403
|
| 426 |
+
2481,170000,10.8878
|
| 427 |
+
2491,170400,4.966
|
| 428 |
+
2497,170800,11.4854
|
| 429 |
+
2509,171200,4.8386
|
| 430 |
+
2517,171600,9.0965
|
| 431 |
+
2532,172000,4.4732
|
| 432 |
+
2539,172400,10.3322
|
| 433 |
+
2548,172800,6.617
|
| 434 |
+
2559,173200,6.6052
|
| 435 |
+
2566,173600,8.1639
|
| 436 |
+
2571,174000,14.7295
|
| 437 |
+
2575,174400,19.1853
|
| 438 |
+
2581,174800,11.4953
|
| 439 |
+
2586,175200,15.2959
|
| 440 |
+
2591,175600,17.3706
|
| 441 |
+
2597,176000,16.6239
|
| 442 |
+
2602,176400,16.1638
|
| 443 |
+
2608,176800,13.7811
|
| 444 |
+
2614,177200,11.7461
|
| 445 |
+
2624,177600,8.2969
|
| 446 |
+
2629,178000,16.7226
|
| 447 |
+
2636,178400,10.9358
|
| 448 |
+
2642,178800,16.5572
|
| 449 |
+
2649,179200,10.4538
|
| 450 |
+
2655,179600,12.1145
|
| 451 |
+
2660,180000,17.3922
|
| 452 |
+
2669,180400,10.0921
|
| 453 |
+
2675,180800,14.2412
|
| 454 |
+
2680,181200,16.8869
|
| 455 |
+
2684,181600,17.4712
|
| 456 |
+
2689,182000,19.3122
|
| 457 |
+
2693,182400,22.4468
|
| 458 |
+
2699,182800,15.3699
|
| 459 |
+
2704,183200,18.0721
|
| 460 |
+
2710,183600,13.5026
|
| 461 |
+
2717,184000,12.2414
|
| 462 |
+
2721,184400,17.901
|
| 463 |
+
2726,184800,16.2374
|
| 464 |
+
2731,185200,17.4196
|
| 465 |
+
2736,185600,18.1275
|
| 466 |
+
2740,186000,20.9471
|
| 467 |
+
2745,186400,20.6423
|
| 468 |
+
2750,186800,18.2358
|
| 469 |
+
2755,187200,17.1191
|
| 470 |
+
2760,187600,17.9405
|
| 471 |
+
2766,188000,12.8474
|
| 472 |
+
2770,188400,19.6427
|
| 473 |
+
2776,188800,15.7317
|
| 474 |
+
2780,189200,19.7856
|
| 475 |
+
2787,189600,10.9979
|
| 476 |
+
2792,190000,18.7782
|
| 477 |
+
2798,190400,13.124
|
| 478 |
+
2803,190800,15.1497
|
| 479 |
+
2808,191200,15.8551
|
| 480 |
+
2818,191600,6.6157
|
| 481 |
+
2824,192000,15.3035
|
| 482 |
+
2831,192400,11.4718
|
| 483 |
+
2835,192800,22.8164
|
| 484 |
+
2841,193200,16.0213
|
| 485 |
+
2846,193600,17.6302
|
| 486 |
+
2851,194000,14.8716
|
| 487 |
+
2855,194400,20.6271
|
| 488 |
+
2862,194800,14.8134
|
| 489 |
+
2868,195200,10.8108
|
| 490 |
+
2872,195600,22.52
|
| 491 |
+
2879,196000,13.9515
|
| 492 |
+
2885,196400,12.6253
|
| 493 |
+
2893,196800,8.5209
|
| 494 |
+
2900,197200,11.9898
|
| 495 |
+
2906,197600,13.4315
|
| 496 |
+
2911,198000,18.833
|
| 497 |
+
2918,198400,11.5714
|
| 498 |
+
2923,198800,15.1536
|
| 499 |
+
2931,199200,9.8851
|
| 500 |
+
2937,199600,12.3961
|
| 501 |
+
2943,200000,10.9256
|
code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_4.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
27,400,1.0785
|
| 3 |
+
45,800,1.6504
|
| 4 |
+
65,1200,1.4004
|
| 5 |
+
75,1600,2.9704
|
| 6 |
+
92,2000,1.7748
|
| 7 |
+
101,2400,3.44
|
| 8 |
+
112,2800,2.7486
|
| 9 |
+
118,3200,4.5942
|
| 10 |
+
124,3600,5.5212
|
| 11 |
+
132,4000,3.7269
|
| 12 |
+
139,4400,4.3077
|
| 13 |
+
146,4800,4.8111
|
| 14 |
+
151,5200,5.5228
|
| 15 |
+
157,5600,6.1189
|
| 16 |
+
163,6000,5.2543
|
| 17 |
+
168,6400,5.2306
|
| 18 |
+
175,6800,5.3845
|
| 19 |
+
182,7200,4.3935
|
| 20 |
+
187,7600,5.7774
|
| 21 |
+
192,8000,6.1537
|
| 22 |
+
197,8400,5.7906
|
| 23 |
+
202,8800,7.0905
|
| 24 |
+
209,9200,4.5122
|
| 25 |
+
213,9600,6.5891
|
| 26 |
+
218,10000,7.7331
|
| 27 |
+
223,10400,6.6175
|
| 28 |
+
227,10800,10.1737
|
| 29 |
+
231,11200,8.0792
|
| 30 |
+
238,11600,5.0695
|
| 31 |
+
243,12000,6.8807
|
| 32 |
+
247,12400,6.8492
|
| 33 |
+
252,12800,7.4324
|
| 34 |
+
256,13200,7.9897
|
| 35 |
+
263,13600,4.5812
|
| 36 |
+
270,14000,4.8277
|
| 37 |
+
275,14400,6.4227
|
| 38 |
+
281,14800,7.0743
|
| 39 |
+
287,15200,5.8331
|
| 40 |
+
292,15600,5.6509
|
| 41 |
+
297,16000,7.8764
|
| 42 |
+
303,16400,4.8805
|
| 43 |
+
308,16800,6.1394
|
| 44 |
+
312,17200,7.3848
|
| 45 |
+
319,17600,5.4064
|
| 46 |
+
326,18000,4.2903
|
| 47 |
+
333,18400,4.4865
|
| 48 |
+
338,18800,6.5078
|
| 49 |
+
344,19200,5.3164
|
| 50 |
+
348,19600,6.5957
|
| 51 |
+
352,20000,8.4949
|
| 52 |
+
357,20400,8.0414
|
| 53 |
+
361,20800,8.259
|
| 54 |
+
366,21200,8.7431
|
| 55 |
+
371,21600,8.1425
|
| 56 |
+
375,22000,7.6031
|
| 57 |
+
379,22400,10.6469
|
| 58 |
+
384,22800,7.4268
|
| 59 |
+
389,23200,7.8948
|
| 60 |
+
393,23600,9.7095
|
| 61 |
+
399,24000,7.9959
|
| 62 |
+
403,24400,12.6126
|
| 63 |
+
407,24800,10.4783
|
| 64 |
+
414,25200,8.8362
|
| 65 |
+
418,25600,12.1951
|
| 66 |
+
422,26000,12.6242
|
| 67 |
+
426,26400,13.1493
|
| 68 |
+
430,26800,11.1338
|
| 69 |
+
434,27200,11.6175
|
| 70 |
+
440,27600,7.7618
|
| 71 |
+
445,28000,10.8874
|
| 72 |
+
451,28400,9.7606
|
| 73 |
+
456,28800,11.5253
|
| 74 |
+
462,29200,6.7856
|
| 75 |
+
467,29600,11.7964
|
| 76 |
+
472,30000,8.8168
|
| 77 |
+
476,30400,13.8556
|
| 78 |
+
481,30800,11.2407
|
| 79 |
+
485,31200,13.1211
|
| 80 |
+
491,31600,10.3695
|
| 81 |
+
496,32000,8.6598
|
| 82 |
+
500,32400,13.3756
|
| 83 |
+
506,32800,9.0459
|
| 84 |
+
511,33200,10.4668
|
| 85 |
+
516,33600,9.0482
|
| 86 |
+
522,34000,9.8797
|
| 87 |
+
526,34400,13.0444
|
| 88 |
+
532,34800,7.3031
|
| 89 |
+
536,35200,12.3843
|
| 90 |
+
541,35600,11.6548
|
| 91 |
+
545,36000,12.1808
|
| 92 |
+
550,36400,8.937
|
| 93 |
+
554,36800,13.32
|
| 94 |
+
559,37200,10.2187
|
| 95 |
+
563,37600,11.1425
|
| 96 |
+
569,38000,10.4837
|
| 97 |
+
573,38400,9.4084
|
| 98 |
+
579,38800,9.5927
|
| 99 |
+
585,39200,8.1512
|
| 100 |
+
591,39600,7.4306
|
| 101 |
+
598,40000,7.4929
|
| 102 |
+
603,40400,12.0014
|
| 103 |
+
609,40800,6.8877
|
| 104 |
+
615,41200,8.7736
|
| 105 |
+
622,41600,6.575
|
| 106 |
+
632,42000,3.9331
|
| 107 |
+
638,42400,9.7754
|
| 108 |
+
644,42800,8.4288
|
| 109 |
+
649,43200,9.7733
|
| 110 |
+
656,43600,6.2225
|
| 111 |
+
666,44000,4.1808
|
| 112 |
+
672,44400,6.0964
|
| 113 |
+
685,44800,3.3983
|
| 114 |
+
694,45200,5.6596
|
| 115 |
+
702,45600,6.6803
|
| 116 |
+
708,46000,7.2517
|
| 117 |
+
716,46400,5.9717
|
| 118 |
+
729,46800,3.6724
|
| 119 |
+
740,47200,4.6841
|
| 120 |
+
744,47600,12.0424
|
| 121 |
+
751,48000,8.4528
|
| 122 |
+
757,48400,7.1028
|
| 123 |
+
761,48800,12.6503
|
| 124 |
+
770,49200,4.9012
|
| 125 |
+
775,49600,9.6465
|
| 126 |
+
782,50000,7.875
|
| 127 |
+
787,50400,12.0186
|
| 128 |
+
793,50800,9.2714
|
| 129 |
+
798,51200,9.0638
|
| 130 |
+
804,51600,9.1859
|
| 131 |
+
808,52000,13.3962
|
| 132 |
+
814,52400,9.1845
|
| 133 |
+
819,52800,8.6704
|
| 134 |
+
823,53200,12.922
|
| 135 |
+
827,53600,13.1999
|
| 136 |
+
832,54000,10.8399
|
| 137 |
+
837,54400,12.6242
|
| 138 |
+
842,54800,11.2661
|
| 139 |
+
847,55200,10.5994
|
| 140 |
+
853,55600,8.1275
|
| 141 |
+
858,56000,12.1206
|
| 142 |
+
866,56400,6.3821
|
| 143 |
+
871,56800,8.825
|
| 144 |
+
875,57200,13.9679
|
| 145 |
+
880,57600,13.0994
|
| 146 |
+
884,58000,14.0452
|
| 147 |
+
888,58400,10.9111
|
| 148 |
+
892,58800,13.7891
|
| 149 |
+
897,59200,12.1729
|
| 150 |
+
902,59600,12.7809
|
| 151 |
+
907,60000,10.9993
|
| 152 |
+
914,60400,6.609
|
| 153 |
+
922,60800,6.1909
|
| 154 |
+
927,61200,10.7844
|
| 155 |
+
934,61600,7.0903
|
| 156 |
+
945,62000,4.2601
|
| 157 |
+
953,62400,5.6534
|
| 158 |
+
958,62800,10.075
|
| 159 |
+
963,63200,10.4977
|
| 160 |
+
967,63600,11.9044
|
| 161 |
+
971,64000,13.6212
|
| 162 |
+
975,64400,13.086
|
| 163 |
+
980,64800,12.0809
|
| 164 |
+
987,65200,8.3579
|
| 165 |
+
993,65600,8.2868
|
| 166 |
+
997,66000,14.3792
|
| 167 |
+
1001,66400,11.228
|
| 168 |
+
1008,66800,8.2525
|
| 169 |
+
1017,67200,6.1501
|
| 170 |
+
1022,67600,9.1965
|
| 171 |
+
1027,68000,11.0591
|
| 172 |
+
1032,68400,11.1621
|
| 173 |
+
1038,68800,10.1783
|
| 174 |
+
1042,69200,10.0313
|
| 175 |
+
1052,69600,5.234
|
| 176 |
+
1060,70000,7.2449
|
| 177 |
+
1064,70400,11.1294
|
| 178 |
+
1070,70800,7.2192
|
| 179 |
+
1075,71200,10.5247
|
| 180 |
+
1082,71600,8.6408
|
| 181 |
+
1087,72000,12.3906
|
| 182 |
+
1091,72400,10.4689
|
| 183 |
+
1095,72800,14.3503
|
| 184 |
+
1100,73200,13.3414
|
| 185 |
+
1104,73600,14.6343
|
| 186 |
+
1108,74000,12.0234
|
| 187 |
+
1113,74400,10.4043
|
| 188 |
+
1118,74800,13.1958
|
| 189 |
+
1122,75200,14.1113
|
| 190 |
+
1128,75600,8.9121
|
| 191 |
+
1133,76000,9.4241
|
| 192 |
+
1138,76400,12.1937
|
| 193 |
+
1142,76800,10.8917
|
| 194 |
+
1148,77200,9.6075
|
| 195 |
+
1152,77600,12.5465
|
| 196 |
+
1157,78000,12.2911
|
| 197 |
+
1161,78400,14.3991
|
| 198 |
+
1166,78800,11.7178
|
| 199 |
+
1170,79200,14.2131
|
| 200 |
+
1176,79600,9.4352
|
| 201 |
+
1182,80000,9.8783
|
| 202 |
+
1187,80400,8.9671
|
| 203 |
+
1192,80800,11.4625
|
| 204 |
+
1197,81200,13.3021
|
| 205 |
+
1201,81600,14.9308
|
| 206 |
+
1205,82000,11.3369
|
| 207 |
+
1212,82400,9.6094
|
| 208 |
+
1217,82800,11.86
|
| 209 |
+
1221,83200,12.7279
|
| 210 |
+
1227,83600,10.7913
|
| 211 |
+
1232,84000,11.0473
|
| 212 |
+
1236,84400,14.2463
|
| 213 |
+
1242,84800,9.8461
|
| 214 |
+
1246,85200,14.7624
|
| 215 |
+
1252,85600,10.4951
|
| 216 |
+
1259,86000,8.9123
|
| 217 |
+
1265,86400,10.1258
|
| 218 |
+
1271,86800,9.0527
|
| 219 |
+
1276,87200,12.5009
|
| 220 |
+
1280,87600,14.9112
|
| 221 |
+
1286,88000,9.7683
|
| 222 |
+
1291,88400,12.2766
|
| 223 |
+
1295,88800,14.6523
|
| 224 |
+
1303,89200,8.0044
|
| 225 |
+
1307,89600,14.4251
|
| 226 |
+
1311,90000,14.7435
|
| 227 |
+
1316,90400,13.5846
|
| 228 |
+
1322,90800,11.1363
|
| 229 |
+
1326,91200,13.1161
|
| 230 |
+
1330,91600,14.8782
|
| 231 |
+
1335,92000,12.9898
|
| 232 |
+
1341,92400,10.5745
|
| 233 |
+
1349,92800,7.7201
|
| 234 |
+
1355,93200,9.7362
|
| 235 |
+
1362,93600,9.0283
|
| 236 |
+
1371,94000,5.595
|
| 237 |
+
1376,94400,9.584
|
| 238 |
+
1384,94800,7.7162
|
| 239 |
+
1389,95200,9.5471
|
| 240 |
+
1395,95600,11.8725
|
| 241 |
+
1400,96000,11.4859
|
| 242 |
+
1406,96400,10.0133
|
| 243 |
+
1411,96800,10.0898
|
| 244 |
+
1419,97200,7.1929
|
| 245 |
+
1425,97600,10.2308
|
| 246 |
+
1431,98000,10.8156
|
| 247 |
+
1436,98400,11.1904
|
| 248 |
+
1441,98800,13.0126
|
| 249 |
+
1448,99200,9.5278
|
| 250 |
+
1456,99600,7.7219
|
| 251 |
+
1460,100000,11.838
|
| 252 |
+
1467,100400,10.3963
|
| 253 |
+
1471,100800,15.1954
|
| 254 |
+
1477,101200,9.7102
|
| 255 |
+
1483,101600,10.8302
|
| 256 |
+
1487,102000,13.6986
|
| 257 |
+
1491,102400,17.6513
|
| 258 |
+
1497,102800,11.7696
|
| 259 |
+
1502,103200,14.6363
|
| 260 |
+
1506,103600,12.3268
|
| 261 |
+
1510,104000,15.2753
|
| 262 |
+
1515,104400,13.8737
|
| 263 |
+
1520,104800,9.0522
|
| 264 |
+
1524,105200,15.7886
|
| 265 |
+
1529,105600,14.429
|
| 266 |
+
1534,106000,12.0745
|
| 267 |
+
1539,106400,13.0085
|
| 268 |
+
1543,106800,16.5044
|
| 269 |
+
1547,107200,12.8213
|
| 270 |
+
1552,107600,10.883
|
| 271 |
+
1557,108000,12.4299
|
| 272 |
+
1561,108400,12.7274
|
| 273 |
+
1565,108800,15.701
|
| 274 |
+
1571,109200,11.4077
|
| 275 |
+
1576,109600,13.4852
|
| 276 |
+
1580,110000,15.1326
|
| 277 |
+
1585,110400,12.6496
|
| 278 |
+
1592,110800,11.1331
|
| 279 |
+
1598,111200,10.001
|
| 280 |
+
1604,111600,9.8463
|
| 281 |
+
1610,112000,9.5269
|
| 282 |
+
1615,112400,12.4523
|
| 283 |
+
1623,112800,8.4184
|
| 284 |
+
1627,113200,14.1469
|
| 285 |
+
1634,113600,7.9029
|
| 286 |
+
1640,114000,11.989
|
| 287 |
+
1646,114400,10.0607
|
| 288 |
+
1654,114800,7.935
|
| 289 |
+
1658,115200,15.8146
|
| 290 |
+
1662,115600,16.4529
|
| 291 |
+
1666,116000,16.031
|
| 292 |
+
1670,116400,16.961
|
| 293 |
+
1674,116800,16.9752
|
| 294 |
+
1680,117200,11.4042
|
| 295 |
+
1687,117600,10.9484
|
| 296 |
+
1693,118000,11.3393
|
| 297 |
+
1697,118400,19.1626
|
| 298 |
+
1702,118800,16.0651
|
| 299 |
+
1707,119200,12.5378
|
| 300 |
+
1713,119600,14.3033
|
| 301 |
+
1720,120000,11.5588
|
| 302 |
+
1724,120400,15.8731
|
| 303 |
+
1729,120800,16.0878
|
| 304 |
+
1734,121200,16.3894
|
| 305 |
+
1739,121600,14.4359
|
| 306 |
+
1745,122000,14.7653
|
| 307 |
+
1752,122400,10.6896
|
| 308 |
+
1756,122800,17.8908
|
| 309 |
+
1762,123200,14.3452
|
| 310 |
+
1768,123600,13.2548
|
| 311 |
+
1772,124000,19.6647
|
| 312 |
+
1776,124400,19.1847
|
| 313 |
+
1780,124800,16.8379
|
| 314 |
+
1786,125200,14.3642
|
| 315 |
+
1790,125600,16.4554
|
| 316 |
+
1794,126000,19.7687
|
| 317 |
+
1801,126400,10.1183
|
| 318 |
+
1806,126800,18.1265
|
| 319 |
+
1812,127200,11.8105
|
| 320 |
+
1818,127600,11.0402
|
| 321 |
+
1823,128000,16.0082
|
| 322 |
+
1830,128400,9.7205
|
| 323 |
+
1836,128800,10.7887
|
| 324 |
+
1841,129200,12.7927
|
| 325 |
+
1845,129600,17.2088
|
| 326 |
+
1852,130000,11.4728
|
| 327 |
+
1857,130400,13.019
|
| 328 |
+
1861,130800,18.7369
|
| 329 |
+
1868,131200,11.284
|
| 330 |
+
1873,131600,13.2389
|
| 331 |
+
1878,132000,15.4627
|
| 332 |
+
1882,132400,19.8252
|
| 333 |
+
1889,132800,10.8438
|
| 334 |
+
1894,133200,12.0874
|
| 335 |
+
1901,133600,11.009
|
| 336 |
+
1906,134000,13.7517
|
| 337 |
+
1915,134400,8.6863
|
| 338 |
+
1921,134800,10.9831
|
| 339 |
+
1928,135200,11.1524
|
| 340 |
+
1934,135600,11.4495
|
| 341 |
+
1941,136000,9.4189
|
| 342 |
+
1945,136400,18.9608
|
| 343 |
+
1952,136800,9.5868
|
| 344 |
+
1958,137200,10.5218
|
| 345 |
+
1964,137600,13.0402
|
| 346 |
+
1972,138000,7.7796
|
| 347 |
+
1980,138400,7.3364
|
| 348 |
+
1986,138800,13.7623
|
| 349 |
+
1991,139200,16.2419
|
| 350 |
+
1996,139600,11.4699
|
| 351 |
+
2003,140000,10.983
|
| 352 |
+
2012,140400,7.5045
|
| 353 |
+
2016,140800,20.0741
|
| 354 |
+
2022,141200,12.6223
|
| 355 |
+
2027,141600,15.3017
|
| 356 |
+
2033,142000,12.7772
|
| 357 |
+
2037,142400,20.7311
|
| 358 |
+
2041,142800,18.8427
|
| 359 |
+
2046,143200,19.43
|
| 360 |
+
2051,143600,15.0906
|
| 361 |
+
2056,144000,13.0377
|
| 362 |
+
2065,144400,7.2051
|
| 363 |
+
2071,144800,13.2424
|
| 364 |
+
2078,145200,11.0955
|
| 365 |
+
2088,145600,5.3895
|
| 366 |
+
2093,146000,12.318
|
| 367 |
+
2100,146400,12.8436
|
| 368 |
+
2105,146800,14.4873
|
| 369 |
+
2109,147200,19.8853
|
| 370 |
+
2117,147600,10.7438
|
| 371 |
+
2123,148000,13.8923
|
| 372 |
+
2128,148400,15.6226
|
| 373 |
+
2132,148800,18.0464
|
| 374 |
+
2137,149200,16.3704
|
| 375 |
+
2144,149600,12.576
|
| 376 |
+
2148,150000,20.3183
|
| 377 |
+
2156,150400,9.807
|
| 378 |
+
2161,150800,12.9068
|
| 379 |
+
2165,151200,21.0426
|
| 380 |
+
2171,151600,15.371
|
| 381 |
+
2175,152000,17.7154
|
| 382 |
+
2180,152400,17.2565
|
| 383 |
+
2185,152800,18.8765
|
| 384 |
+
2190,153200,17.2862
|
| 385 |
+
2194,153600,19.4167
|
| 386 |
+
2200,154000,13.2546
|
| 387 |
+
2205,154400,15.271
|
| 388 |
+
2211,154800,14.3329
|
| 389 |
+
2215,155200,21.1205
|
| 390 |
+
2221,155600,12.1206
|
| 391 |
+
2227,156000,14.4072
|
| 392 |
+
2232,156400,16.5247
|
| 393 |
+
2236,156800,21.231
|
| 394 |
+
2240,157200,20.8302
|
| 395 |
+
2244,157600,19.4892
|
| 396 |
+
2248,158000,20.936
|
| 397 |
+
2252,158400,21.2178
|
| 398 |
+
2256,158800,21.1056
|
| 399 |
+
2261,159200,16.8762
|
| 400 |
+
2267,159600,14.0483
|
| 401 |
+
2272,160000,13.3591
|
| 402 |
+
2279,160400,12.6142
|
| 403 |
+
2285,160800,9.0069
|
| 404 |
+
2291,161200,13.7843
|
| 405 |
+
2300,161600,7.8473
|
| 406 |
+
2307,162000,9.9078
|
| 407 |
+
2312,162400,15.3595
|
| 408 |
+
2317,162800,14.6767
|
| 409 |
+
2324,163200,10.7115
|
| 410 |
+
2331,163600,9.728
|
| 411 |
+
2336,164000,14.8431
|
| 412 |
+
2341,164400,15.2026
|
| 413 |
+
2346,164800,12.322
|
| 414 |
+
2351,165200,11.1342
|
| 415 |
+
2356,165600,14.9371
|
| 416 |
+
2361,166000,13.2367
|
| 417 |
+
2365,166400,20.3463
|
| 418 |
+
2371,166800,10.1723
|
| 419 |
+
2377,167200,10.4504
|
| 420 |
+
2381,167600,18.0134
|
| 421 |
+
2388,168000,10.336
|
| 422 |
+
2395,168400,11.1223
|
| 423 |
+
2403,168800,6.6491
|
| 424 |
+
2410,169200,9.6499
|
| 425 |
+
2418,169600,7.0802
|
| 426 |
+
2423,170000,10.1417
|
| 427 |
+
2431,170400,6.2054
|
| 428 |
+
2440,170800,7.8601
|
| 429 |
+
2449,171200,7.7198
|
| 430 |
+
2454,171600,14.5012
|
| 431 |
+
2459,172000,15.7605
|
| 432 |
+
2467,172400,7.7054
|
| 433 |
+
2473,172800,12.5347
|
| 434 |
+
2478,173200,15.5908
|
| 435 |
+
2486,173600,5.7416
|
| 436 |
+
2494,174000,9.3572
|
| 437 |
+
2501,174400,9.0669
|
| 438 |
+
2505,174800,11.2773
|
| 439 |
+
2512,175200,6.6277
|
| 440 |
+
2518,175600,4.9397
|
| 441 |
+
2524,176000,6.4387
|
| 442 |
+
2529,176400,9.8694
|
| 443 |
+
2534,176800,8.5331
|
| 444 |
+
2538,177200,8.5411
|
| 445 |
+
2545,177600,6.2453
|
| 446 |
+
2549,178000,11.7878
|
| 447 |
+
2554,178400,10.175
|
| 448 |
+
2558,178800,12.4336
|
| 449 |
+
2565,179200,6.5908
|
| 450 |
+
2570,179600,6.219
|
| 451 |
+
2577,180000,6.0009
|
| 452 |
+
2582,180400,5.8638
|
| 453 |
+
2588,180800,6.9441
|
| 454 |
+
2592,181200,6.8218
|
| 455 |
+
2598,181600,9.0203
|
| 456 |
+
2604,182000,7.8072
|
| 457 |
+
2609,182400,6.4259
|
| 458 |
+
2616,182800,6.3331
|
| 459 |
+
2622,183200,9.3925
|
| 460 |
+
2631,183600,6.0891
|
| 461 |
+
2639,184000,5.3467
|
| 462 |
+
2645,184400,5.1621
|
| 463 |
+
2652,184800,7.0998
|
| 464 |
+
2656,185200,8.041
|
| 465 |
+
2663,185600,4.7481
|
| 466 |
+
2670,186000,5.0002
|
| 467 |
+
2677,186400,4.4118
|
| 468 |
+
2682,186800,5.4232
|
| 469 |
+
2689,187200,7.2963
|
| 470 |
+
2693,187600,6.6073
|
| 471 |
+
2699,188000,8.326
|
| 472 |
+
2707,188400,5.5025
|
| 473 |
+
2713,188800,5.5795
|
| 474 |
+
2721,189200,3.796
|
| 475 |
+
2728,189600,5.6303
|
| 476 |
+
2734,190000,6.2013
|
| 477 |
+
2740,190400,6.0202
|
| 478 |
+
2746,190800,4.2373
|
| 479 |
+
2751,191200,8.5135
|
| 480 |
+
2758,191600,3.9532
|
| 481 |
+
2763,192000,7.6742
|
| 482 |
+
2770,192400,4.637
|
| 483 |
+
2776,192800,4.4964
|
| 484 |
+
2781,193200,8.2152
|
| 485 |
+
2789,193600,4.0498
|
| 486 |
+
2793,194000,7.944
|
| 487 |
+
2798,194400,6.7629
|
| 488 |
+
2802,194800,6.5532
|
| 489 |
+
2807,195200,7.4252
|
| 490 |
+
2811,195600,8.0144
|
| 491 |
+
2815,196000,7.9792
|
| 492 |
+
2819,196400,7.9971
|
| 493 |
+
2824,196800,5.5573
|
| 494 |
+
2828,197200,7.3163
|
| 495 |
+
2833,197600,8.0791
|
| 496 |
+
2838,198000,5.7746
|
| 497 |
+
2843,198400,7.1306
|
| 498 |
+
2848,198800,7.198
|
| 499 |
+
2852,199200,8.5707
|
| 500 |
+
2858,199600,5.5788
|
| 501 |
+
2862,200000,8.7377
|
code/Lake application/logs/frozen_lake_PDPPO/PDPPO_frozen_lake_log_5.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
22,400,1.2943
|
| 3 |
+
41,800,1.6015
|
| 4 |
+
55,1200,1.9737
|
| 5 |
+
64,1600,3.5487
|
| 6 |
+
73,2000,3.146
|
| 7 |
+
82,2400,3.2991
|
| 8 |
+
92,2800,2.7673
|
| 9 |
+
100,3200,3.8523
|
| 10 |
+
107,3600,3.7979
|
| 11 |
+
115,4000,4.256
|
| 12 |
+
119,4400,6.4793
|
| 13 |
+
124,4800,5.5414
|
| 14 |
+
128,5200,7.3476
|
| 15 |
+
133,5600,5.8006
|
| 16 |
+
138,6000,6.1344
|
| 17 |
+
144,6400,5.8326
|
| 18 |
+
150,6800,4.8366
|
| 19 |
+
154,7200,6.7648
|
| 20 |
+
159,7600,6.5947
|
| 21 |
+
163,8000,7.3957
|
| 22 |
+
167,8400,7.3241
|
| 23 |
+
172,8800,5.9961
|
| 24 |
+
177,9200,6.0296
|
| 25 |
+
182,9600,7.2685
|
| 26 |
+
187,10000,6.637
|
| 27 |
+
192,10400,5.999
|
| 28 |
+
196,10800,6.4386
|
| 29 |
+
201,11200,6.6646
|
| 30 |
+
207,11600,5.4069
|
| 31 |
+
212,12000,6.6252
|
| 32 |
+
216,12400,6.4882
|
| 33 |
+
223,12800,5.5298
|
| 34 |
+
228,13200,6.2035
|
| 35 |
+
233,13600,5.9536
|
| 36 |
+
237,14000,6.9952
|
| 37 |
+
244,14400,4.9771
|
| 38 |
+
249,14800,6.1157
|
| 39 |
+
253,15200,7.8858
|
| 40 |
+
257,15600,7.8068
|
| 41 |
+
261,16000,7.9837
|
| 42 |
+
266,16400,6.6484
|
| 43 |
+
270,16800,8.0041
|
| 44 |
+
274,17200,6.6656
|
| 45 |
+
278,17600,8.0284
|
| 46 |
+
283,18000,7.3533
|
| 47 |
+
290,18400,4.5723
|
| 48 |
+
295,18800,6.8589
|
| 49 |
+
299,19200,8.1765
|
| 50 |
+
303,19600,8.1
|
| 51 |
+
307,20000,8.1549
|
| 52 |
+
312,20400,5.818
|
| 53 |
+
318,20800,6.0381
|
| 54 |
+
323,21200,5.8448
|
| 55 |
+
327,21600,8.4165
|
| 56 |
+
332,22000,6.3011
|
| 57 |
+
336,22400,8.2937
|
| 58 |
+
343,22800,5.503
|
| 59 |
+
347,23200,8.5968
|
| 60 |
+
353,23600,5.1292
|
| 61 |
+
358,24000,7.4338
|
| 62 |
+
363,24400,6.6991
|
| 63 |
+
368,24800,6.5642
|
| 64 |
+
373,25200,6.8225
|
| 65 |
+
377,25600,8.3731
|
| 66 |
+
382,26000,7.4055
|
| 67 |
+
387,26400,7.1132
|
| 68 |
+
393,26800,5.6948
|
| 69 |
+
399,27200,5.2845
|
| 70 |
+
404,27600,5.9669
|
| 71 |
+
408,28000,8.3363
|
| 72 |
+
415,28400,5.6672
|
| 73 |
+
420,28800,5.4611
|
| 74 |
+
425,29200,7.714
|
| 75 |
+
429,29600,7.5512
|
| 76 |
+
434,30000,7.2347
|
| 77 |
+
439,30400,6.5507
|
| 78 |
+
443,30800,8.4242
|
| 79 |
+
449,31200,6.0604
|
| 80 |
+
453,31600,8.2476
|
| 81 |
+
458,32000,8.0065
|
| 82 |
+
462,32400,8.2966
|
| 83 |
+
467,32800,6.7805
|
| 84 |
+
471,33200,9.484
|
| 85 |
+
475,33600,8.7649
|
| 86 |
+
481,34000,8.2474
|
| 87 |
+
485,34400,9.5795
|
| 88 |
+
490,34800,8.6045
|
| 89 |
+
496,35200,7.4124
|
| 90 |
+
501,35600,7.6157
|
| 91 |
+
505,36000,10.9111
|
| 92 |
+
510,36400,8.8556
|
| 93 |
+
514,36800,9.9951
|
| 94 |
+
519,37200,11.3025
|
| 95 |
+
523,37600,11.5043
|
| 96 |
+
527,38000,11.7302
|
| 97 |
+
532,38400,10.3442
|
| 98 |
+
536,38800,12.0948
|
| 99 |
+
541,39200,10.2846
|
| 100 |
+
545,39600,12.2089
|
| 101 |
+
549,40000,11.8314
|
| 102 |
+
554,40400,9.8942
|
| 103 |
+
559,40800,10.0485
|
| 104 |
+
563,41200,12.4629
|
| 105 |
+
567,41600,12.2332
|
| 106 |
+
571,42000,11.1847
|
| 107 |
+
575,42400,12.4524
|
| 108 |
+
580,42800,10.2353
|
| 109 |
+
587,43200,6.303
|
| 110 |
+
591,43600,12.3469
|
| 111 |
+
596,44000,11.1024
|
| 112 |
+
600,44400,12.4106
|
| 113 |
+
604,44800,11.5728
|
| 114 |
+
608,45200,10.9579
|
| 115 |
+
614,45600,9.6399
|
| 116 |
+
618,46000,11.3217
|
| 117 |
+
622,46400,12.529
|
| 118 |
+
628,46800,9.0714
|
| 119 |
+
633,47200,9.9013
|
| 120 |
+
638,47600,10.0751
|
| 121 |
+
643,48000,10.2499
|
| 122 |
+
647,48400,12.5708
|
| 123 |
+
652,48800,8.7569
|
| 124 |
+
656,49200,12.3604
|
| 125 |
+
660,49600,12.5165
|
| 126 |
+
664,50000,12.3671
|
| 127 |
+
668,50400,12.618
|
| 128 |
+
672,50800,12.6543
|
| 129 |
+
677,51200,10.6911
|
| 130 |
+
681,51600,11.8317
|
| 131 |
+
685,52000,12.6955
|
| 132 |
+
690,52400,10.1157
|
| 133 |
+
695,52800,10.0116
|
| 134 |
+
700,53200,10.2901
|
| 135 |
+
705,53600,8.1824
|
| 136 |
+
710,54000,9.8285
|
| 137 |
+
716,54400,8.8717
|
| 138 |
+
721,54800,9.9854
|
| 139 |
+
725,55200,12.9736
|
| 140 |
+
731,55600,8.6973
|
| 141 |
+
737,56000,8.4719
|
| 142 |
+
742,56400,10.3744
|
| 143 |
+
747,56800,8.9466
|
| 144 |
+
752,57200,10.6086
|
| 145 |
+
758,57600,10.0045
|
| 146 |
+
763,58000,9.8052
|
| 147 |
+
768,58400,11.2697
|
| 148 |
+
774,58800,7.2144
|
| 149 |
+
780,59200,9.6594
|
| 150 |
+
784,59600,12.3888
|
| 151 |
+
789,60000,10.4215
|
| 152 |
+
795,60400,8.8573
|
| 153 |
+
800,60800,10.8871
|
| 154 |
+
807,61200,5.8992
|
| 155 |
+
812,61600,10.12
|
| 156 |
+
817,62000,10.8824
|
| 157 |
+
821,62400,13.1221
|
| 158 |
+
826,62800,10.7535
|
| 159 |
+
830,63200,13.1014
|
| 160 |
+
835,63600,10.7345
|
| 161 |
+
839,64000,13.082
|
| 162 |
+
843,64400,13.2581
|
| 163 |
+
847,64800,13.4413
|
| 164 |
+
851,65200,13.5097
|
| 165 |
+
855,65600,13.3641
|
| 166 |
+
859,66000,13.3583
|
| 167 |
+
863,66400,13.1701
|
| 168 |
+
868,66800,11.1134
|
| 169 |
+
872,67200,13.6245
|
| 170 |
+
880,67600,5.9886
|
| 171 |
+
886,68000,10.2475
|
| 172 |
+
890,68400,14.6202
|
| 173 |
+
894,68800,10.0685
|
| 174 |
+
900,69200,11.0053
|
| 175 |
+
904,69600,13.6849
|
| 176 |
+
909,70000,9.6346
|
| 177 |
+
913,70400,13.5021
|
| 178 |
+
917,70800,13.1414
|
| 179 |
+
922,71200,10.8646
|
| 180 |
+
926,71600,13.4908
|
| 181 |
+
932,72000,9.1396
|
| 182 |
+
936,72400,11.6489
|
| 183 |
+
940,72800,13.2407
|
| 184 |
+
944,73200,13.0342
|
| 185 |
+
948,73600,13.5329
|
| 186 |
+
952,74000,13.4403
|
| 187 |
+
956,74400,13.7446
|
| 188 |
+
960,74800,13.1605
|
| 189 |
+
964,75200,13.3433
|
| 190 |
+
969,75600,10.9952
|
| 191 |
+
974,76000,11.5741
|
| 192 |
+
980,76400,9.3918
|
| 193 |
+
985,76800,11.7496
|
| 194 |
+
992,77200,8.3268
|
| 195 |
+
997,77600,10.0094
|
| 196 |
+
1002,78000,10.6563
|
| 197 |
+
1007,78400,11.5348
|
| 198 |
+
1011,78800,15.119
|
| 199 |
+
1017,79200,12.0912
|
| 200 |
+
1023,79600,8.9712
|
| 201 |
+
1027,80000,11.6409
|
| 202 |
+
1031,80400,15.629
|
| 203 |
+
1036,80800,13.3766
|
| 204 |
+
1040,81200,15.2536
|
| 205 |
+
1045,81600,12.2289
|
| 206 |
+
1049,82000,15.4839
|
| 207 |
+
1055,82400,9.1355
|
| 208 |
+
1060,82800,12.202
|
| 209 |
+
1064,83200,14.748
|
| 210 |
+
1068,83600,14.5001
|
| 211 |
+
1072,84000,15.0432
|
| 212 |
+
1080,84400,7.0194
|
| 213 |
+
1085,84800,10.1806
|
| 214 |
+
1090,85200,11.795
|
| 215 |
+
1095,85600,12.7322
|
| 216 |
+
1100,86000,10.2449
|
| 217 |
+
1109,86400,6.4535
|
| 218 |
+
1117,86800,6.893
|
| 219 |
+
1123,87200,8.8297
|
| 220 |
+
1129,87600,9.9537
|
| 221 |
+
1134,88000,9.4762
|
| 222 |
+
1138,88400,15.7652
|
| 223 |
+
1144,88800,12.4334
|
| 224 |
+
1153,89200,6.025
|
| 225 |
+
1157,89600,14.0408
|
| 226 |
+
1162,90000,12.8277
|
| 227 |
+
1167,90400,12.4492
|
| 228 |
+
1173,90800,8.5448
|
| 229 |
+
1180,91200,9.9864
|
| 230 |
+
1184,91600,15.1804
|
| 231 |
+
1190,92000,8.7061
|
| 232 |
+
1196,92400,11.9629
|
| 233 |
+
1202,92800,9.8801
|
| 234 |
+
1207,93200,10.6953
|
| 235 |
+
1211,93600,16.5854
|
| 236 |
+
1221,94000,6.0867
|
| 237 |
+
1229,94400,9.2746
|
| 238 |
+
1234,94800,12.5134
|
| 239 |
+
1240,95200,8.38
|
| 240 |
+
1245,95600,14.4995
|
| 241 |
+
1249,96000,16.0927
|
| 242 |
+
1255,96400,8.763
|
| 243 |
+
1261,96800,11.1095
|
| 244 |
+
1265,97200,15.5608
|
| 245 |
+
1271,97600,12.1043
|
| 246 |
+
1278,98000,9.2772
|
| 247 |
+
1283,98400,10.4808
|
| 248 |
+
1291,98800,9.4265
|
| 249 |
+
1296,99200,13.4881
|
| 250 |
+
1303,99600,8.7073
|
| 251 |
+
1311,100000,7.7693
|
| 252 |
+
1323,100400,5.2588
|
| 253 |
+
1328,100800,12.0091
|
| 254 |
+
1338,101200,6.3879
|
| 255 |
+
1346,101600,9.2148
|
| 256 |
+
1354,102000,6.5712
|
| 257 |
+
1359,102400,15.4022
|
| 258 |
+
1363,102800,17.8751
|
| 259 |
+
1372,103200,5.6517
|
| 260 |
+
1379,103600,11.7892
|
| 261 |
+
1383,104000,13.7202
|
| 262 |
+
1390,104400,12.0935
|
| 263 |
+
1398,104800,8.3031
|
| 264 |
+
1403,105200,11.4273
|
| 265 |
+
1409,105600,13.6581
|
| 266 |
+
1414,106000,12.8389
|
| 267 |
+
1421,106400,10.5737
|
| 268 |
+
1426,106800,14.0672
|
| 269 |
+
1431,107200,15.1905
|
| 270 |
+
1435,107600,16.499
|
| 271 |
+
1441,108000,14.9027
|
| 272 |
+
1447,108400,12.2743
|
| 273 |
+
1453,108800,12.5351
|
| 274 |
+
1458,109200,13.3324
|
| 275 |
+
1464,109600,10.1987
|
| 276 |
+
1470,110000,14.6681
|
| 277 |
+
1480,110400,6.3981
|
| 278 |
+
1484,110800,17.443
|
| 279 |
+
1489,111200,11.7544
|
| 280 |
+
1494,111600,12.3627
|
| 281 |
+
1500,112000,14.2433
|
| 282 |
+
1507,112400,9.9183
|
| 283 |
+
1514,112800,10.1194
|
| 284 |
+
1520,113200,11.6173
|
| 285 |
+
1526,113600,12.6407
|
| 286 |
+
1530,114000,14.9395
|
| 287 |
+
1537,114400,10.6247
|
| 288 |
+
1546,114800,8.2285
|
| 289 |
+
1555,115200,7.4419
|
| 290 |
+
1565,115600,6.9423
|
| 291 |
+
1571,116000,11.8016
|
| 292 |
+
1576,116400,13.4006
|
| 293 |
+
1582,116800,12.1714
|
| 294 |
+
1589,117200,9.5481
|
| 295 |
+
1594,117600,17.1002
|
| 296 |
+
1599,118000,13.3729
|
| 297 |
+
1605,118400,14.4748
|
| 298 |
+
1610,118800,15.9086
|
| 299 |
+
1616,119200,13.515
|
| 300 |
+
1621,119600,13.6216
|
| 301 |
+
1627,120000,13.3638
|
| 302 |
+
1631,120400,15.8724
|
| 303 |
+
1637,120800,13.1813
|
| 304 |
+
1642,121200,16.7492
|
| 305 |
+
1647,121600,16.2201
|
| 306 |
+
1652,122000,14.2471
|
| 307 |
+
1658,122400,11.9562
|
| 308 |
+
1663,122800,14.3429
|
| 309 |
+
1670,123200,8.2829
|
| 310 |
+
1675,123600,12.7689
|
| 311 |
+
1679,124000,17.9555
|
| 312 |
+
1686,124400,10.3992
|
| 313 |
+
1693,124800,9.3774
|
| 314 |
+
1700,125200,10.5229
|
| 315 |
+
1705,125600,13.095
|
| 316 |
+
1709,126000,16.827
|
| 317 |
+
1716,126400,11.5151
|
| 318 |
+
1720,126800,15.0954
|
| 319 |
+
1725,127200,17.1223
|
| 320 |
+
1729,127600,19.2025
|
| 321 |
+
1734,128000,16.3688
|
| 322 |
+
1740,128400,13.0476
|
| 323 |
+
1744,128800,17.5761
|
| 324 |
+
1751,129200,9.9195
|
| 325 |
+
1758,129600,12.3492
|
| 326 |
+
1763,130000,12.9946
|
| 327 |
+
1768,130400,15.0853
|
| 328 |
+
1774,130800,12.7493
|
| 329 |
+
1780,131200,13.7049
|
| 330 |
+
1784,131600,16.4027
|
| 331 |
+
1791,132000,10.02
|
| 332 |
+
1796,132400,13.2953
|
| 333 |
+
1802,132800,13.2571
|
| 334 |
+
1807,133200,16.9227
|
| 335 |
+
1816,133600,7.5362
|
| 336 |
+
1823,134000,9.0337
|
| 337 |
+
1831,134400,8.2112
|
| 338 |
+
1839,134800,9.5222
|
| 339 |
+
1845,135200,10.5068
|
| 340 |
+
1851,135600,15.2168
|
| 341 |
+
1860,136000,7.9646
|
| 342 |
+
1866,136400,9.9186
|
| 343 |
+
1872,136800,11.7983
|
| 344 |
+
1878,137200,15.5265
|
| 345 |
+
1884,137600,11.4403
|
| 346 |
+
1889,138000,14.1125
|
| 347 |
+
1893,138400,18.7814
|
| 348 |
+
1898,138800,15.9716
|
| 349 |
+
1907,139200,6.6898
|
| 350 |
+
1916,139600,8.6765
|
| 351 |
+
1926,140000,6.0023
|
| 352 |
+
1930,140400,18.1008
|
| 353 |
+
1938,140800,8.4123
|
| 354 |
+
1946,141200,8.4666
|
| 355 |
+
1953,141600,9.8084
|
| 356 |
+
1958,142000,12.6225
|
| 357 |
+
1962,142400,16.6253
|
| 358 |
+
1969,142800,11.879
|
| 359 |
+
1979,143200,5.891
|
| 360 |
+
1983,143600,14.6341
|
| 361 |
+
1991,144000,10.6154
|
| 362 |
+
1995,144400,19.3371
|
| 363 |
+
2000,144800,14.6999
|
| 364 |
+
2005,145200,17.3212
|
| 365 |
+
2010,145600,13.511
|
| 366 |
+
2019,146000,8.9176
|
| 367 |
+
2025,146400,11.9234
|
| 368 |
+
2029,146800,18.7508
|
| 369 |
+
2035,147200,11.3652
|
| 370 |
+
2042,147600,10.7875
|
| 371 |
+
2047,148000,17.2345
|
| 372 |
+
2052,148400,13.469
|
| 373 |
+
2057,148800,15.0187
|
| 374 |
+
2062,149200,18.2483
|
| 375 |
+
2067,149600,15.1589
|
| 376 |
+
2072,150000,13.8506
|
| 377 |
+
2078,150400,13.5154
|
| 378 |
+
2083,150800,17.4931
|
| 379 |
+
2088,151200,13.0434
|
| 380 |
+
2094,151600,12.4448
|
| 381 |
+
2098,152000,16.2641
|
| 382 |
+
2103,152400,18.5631
|
| 383 |
+
2109,152800,13.4721
|
| 384 |
+
2114,153200,17.5024
|
| 385 |
+
2118,153600,17.2629
|
| 386 |
+
2123,154000,16.0443
|
| 387 |
+
2130,154400,12.4068
|
| 388 |
+
2134,154800,19.7433
|
| 389 |
+
2141,155200,12.1725
|
| 390 |
+
2145,155600,17.4482
|
| 391 |
+
2151,156000,14.9725
|
| 392 |
+
2158,156400,8.3286
|
| 393 |
+
2163,156800,15.3569
|
| 394 |
+
2168,157200,16.8249
|
| 395 |
+
2176,157600,10.7159
|
| 396 |
+
2181,158000,13.5756
|
| 397 |
+
2186,158400,16.8722
|
| 398 |
+
2191,158800,16.1566
|
| 399 |
+
2196,159200,16.4402
|
| 400 |
+
2200,159600,19.2901
|
| 401 |
+
2206,160000,13.408
|
| 402 |
+
2216,160400,6.5277
|
| 403 |
+
2221,160800,16.5398
|
| 404 |
+
2225,161200,17.9313
|
| 405 |
+
2229,161600,16.0893
|
| 406 |
+
2234,162000,13.5297
|
| 407 |
+
2239,162400,18.5255
|
| 408 |
+
2244,162800,15.412
|
| 409 |
+
2249,163200,16.7656
|
| 410 |
+
2254,163600,12.2883
|
| 411 |
+
2260,164000,12.8931
|
| 412 |
+
2268,164400,9.994
|
| 413 |
+
2273,164800,14.903
|
| 414 |
+
2278,165200,14.9247
|
| 415 |
+
2284,165600,13.8873
|
| 416 |
+
2288,166000,15.893
|
| 417 |
+
2294,166400,16.0733
|
| 418 |
+
2298,166800,20.3023
|
| 419 |
+
2306,167200,8.823
|
| 420 |
+
2311,167600,14.7959
|
| 421 |
+
2316,168000,15.5506
|
| 422 |
+
2320,168400,18.9412
|
| 423 |
+
2328,168800,9.9269
|
| 424 |
+
2337,169200,7.8662
|
| 425 |
+
2342,169600,13.8098
|
| 426 |
+
2347,170000,16.0699
|
| 427 |
+
2352,170400,14.2326
|
| 428 |
+
2357,170800,14.3457
|
| 429 |
+
2365,171200,7.755
|
| 430 |
+
2372,171600,11.1969
|
| 431 |
+
2377,172000,17.7461
|
| 432 |
+
2382,172400,14.5072
|
| 433 |
+
2387,172800,15.6355
|
| 434 |
+
2392,173200,16.9889
|
| 435 |
+
2397,173600,16.7953
|
| 436 |
+
2402,174000,14.2381
|
| 437 |
+
2407,174400,15.2855
|
| 438 |
+
2414,174800,9.9614
|
| 439 |
+
2422,175200,6.716
|
| 440 |
+
2429,175600,9.1914
|
| 441 |
+
2433,176000,15.9262
|
| 442 |
+
2439,176400,13.2029
|
| 443 |
+
2444,176800,12.0649
|
| 444 |
+
2448,177200,11.2086
|
| 445 |
+
2453,177600,17.596
|
| 446 |
+
2458,178000,10.4265
|
| 447 |
+
2464,178400,12.7507
|
| 448 |
+
2470,178800,10.5018
|
| 449 |
+
2477,179200,9.6174
|
| 450 |
+
2484,179600,12.3463
|
| 451 |
+
2493,180000,7.463
|
| 452 |
+
2502,180400,7.0977
|
| 453 |
+
2511,180800,6.566
|
| 454 |
+
2516,181200,12.4002
|
| 455 |
+
2522,181600,12.8125
|
| 456 |
+
2526,182000,16.8705
|
| 457 |
+
2532,182400,15.6621
|
| 458 |
+
2537,182800,15.5005
|
| 459 |
+
2545,183200,7.1256
|
| 460 |
+
2549,183600,19.091
|
| 461 |
+
2553,184000,20.7269
|
| 462 |
+
2558,184400,17.8736
|
| 463 |
+
2562,184800,16.5145
|
| 464 |
+
2568,185200,12.29
|
| 465 |
+
2574,185600,11.165
|
| 466 |
+
2580,186000,14.862
|
| 467 |
+
2585,186400,12.9498
|
| 468 |
+
2590,186800,16.6379
|
| 469 |
+
2597,187200,12.0999
|
| 470 |
+
2601,187600,19.986
|
| 471 |
+
2605,188000,21.4165
|
| 472 |
+
2610,188400,12.3688
|
| 473 |
+
2614,188800,17.9821
|
| 474 |
+
2618,189200,20.5196
|
| 475 |
+
2623,189600,19.5269
|
| 476 |
+
2629,190000,11.2376
|
| 477 |
+
2633,190400,20.1738
|
| 478 |
+
2637,190800,20.5599
|
| 479 |
+
2642,191200,15.8952
|
| 480 |
+
2649,191600,12.2397
|
| 481 |
+
2654,192000,16.6287
|
| 482 |
+
2659,192400,14.966
|
| 483 |
+
2665,192800,13.1073
|
| 484 |
+
2671,193200,11.1274
|
| 485 |
+
2676,193600,15.6753
|
| 486 |
+
2681,194000,15.8626
|
| 487 |
+
2688,194400,9.1965
|
| 488 |
+
2693,194800,15.0967
|
| 489 |
+
2697,195200,19.8348
|
| 490 |
+
2701,195600,14.855
|
| 491 |
+
2706,196000,13.0618
|
| 492 |
+
2715,196400,9.1275
|
| 493 |
+
2721,196800,7.1574
|
| 494 |
+
2726,197200,16.5246
|
| 495 |
+
2732,197600,10.026
|
| 496 |
+
2739,198000,11.8092
|
| 497 |
+
2745,198400,10.2368
|
| 498 |
+
2751,198800,11.7544
|
| 499 |
+
2757,199200,11.6034
|
| 500 |
+
2764,199600,9.3743
|
| 501 |
+
2769,200000,15.3278
|
code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_0_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8370e4fdc218fd2e709261132b6cb059f988bd8753b44228b5191c345b8cc2ed
|
| 3 |
+
size 205481
|
code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_1.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
15,400,1.9345
|
| 3 |
+
32,800,1.8784
|
| 4 |
+
42,1200,3.0732
|
| 5 |
+
52,1600,2.9835
|
| 6 |
+
65,2000,2.3838
|
| 7 |
+
69,2400,7.2159
|
| 8 |
+
75,2800,4.758
|
| 9 |
+
80,3200,5.6732
|
| 10 |
+
85,3600,7.076
|
| 11 |
+
89,4000,5.77
|
| 12 |
+
95,4400,5.4889
|
| 13 |
+
101,4800,4.8132
|
| 14 |
+
106,5200,7.4812
|
| 15 |
+
115,5600,2.816
|
| 16 |
+
123,6000,4.0168
|
| 17 |
+
129,6400,5.4723
|
| 18 |
+
137,6800,3.675
|
| 19 |
+
142,7200,5.8091
|
| 20 |
+
147,7600,5.6523
|
| 21 |
+
151,8000,7.3976
|
| 22 |
+
158,8400,4.279
|
| 23 |
+
164,8800,4.7596
|
| 24 |
+
168,9200,7.4893
|
| 25 |
+
173,9600,6.204
|
| 26 |
+
177,10000,7.3342
|
| 27 |
+
183,10400,4.9965
|
| 28 |
+
188,10800,6.2773
|
| 29 |
+
193,11200,6.1197
|
| 30 |
+
197,11600,7.2967
|
| 31 |
+
201,12000,7.4756
|
| 32 |
+
205,12400,7.1791
|
| 33 |
+
209,12800,7.3157
|
| 34 |
+
213,13200,7.0592
|
| 35 |
+
218,13600,5.8954
|
| 36 |
+
222,14000,7.1992
|
| 37 |
+
227,14400,6.5493
|
| 38 |
+
232,14800,5.9315
|
| 39 |
+
237,15200,7.0655
|
| 40 |
+
243,15600,5.2935
|
| 41 |
+
251,16000,3.6833
|
| 42 |
+
258,16400,4.511
|
| 43 |
+
267,16800,4.1031
|
| 44 |
+
271,17200,7.4993
|
| 45 |
+
275,17600,9.0943
|
| 46 |
+
279,18000,7.6642
|
| 47 |
+
285,18400,6.6167
|
| 48 |
+
291,18800,5.4259
|
| 49 |
+
295,19200,7.9226
|
| 50 |
+
299,19600,8.2753
|
| 51 |
+
304,20000,6.8015
|
| 52 |
+
308,20400,7.8775
|
| 53 |
+
315,20800,4.5316
|
| 54 |
+
321,21200,5.4028
|
| 55 |
+
326,21600,6.3314
|
| 56 |
+
330,22000,8.1528
|
| 57 |
+
334,22400,7.4973
|
| 58 |
+
339,22800,6.6922
|
| 59 |
+
343,23200,7.7072
|
| 60 |
+
347,23600,7.2187
|
| 61 |
+
351,24000,7.7859
|
| 62 |
+
356,24400,7.0575
|
| 63 |
+
360,24800,7.2281
|
| 64 |
+
366,25200,5.8797
|
| 65 |
+
372,25600,4.8101
|
| 66 |
+
378,26000,5.0378
|
| 67 |
+
384,26400,5.6277
|
| 68 |
+
389,26800,5.2954
|
| 69 |
+
400,27200,2.9769
|
| 70 |
+
406,27600,4.6418
|
| 71 |
+
416,28000,3.4303
|
| 72 |
+
423,28400,4.0932
|
| 73 |
+
430,28800,4.505
|
| 74 |
+
435,29200,5.5546
|
| 75 |
+
439,29600,10.5906
|
| 76 |
+
445,30000,6.1133
|
| 77 |
+
451,30400,5.9823
|
| 78 |
+
455,30800,8.4017
|
| 79 |
+
462,31200,5.0732
|
| 80 |
+
468,31600,5.7932
|
| 81 |
+
474,32000,6.708
|
| 82 |
+
479,32400,6.8853
|
| 83 |
+
485,32800,9.0958
|
| 84 |
+
490,33200,6.4707
|
| 85 |
+
495,33600,7.4192
|
| 86 |
+
501,34000,6.0651
|
| 87 |
+
506,34400,6.7035
|
| 88 |
+
511,34800,6.8316
|
| 89 |
+
518,35200,5.9218
|
| 90 |
+
524,35600,7.2932
|
| 91 |
+
528,36000,8.8676
|
| 92 |
+
534,36400,8.8016
|
| 93 |
+
538,36800,8.7465
|
| 94 |
+
544,37200,8.41
|
| 95 |
+
549,37600,8.3048
|
| 96 |
+
555,38000,8.0676
|
| 97 |
+
561,38400,6.0523
|
| 98 |
+
566,38800,8.8973
|
| 99 |
+
571,39200,8.7871
|
| 100 |
+
577,39600,6.6108
|
| 101 |
+
582,40000,10.0771
|
| 102 |
+
587,40400,7.6992
|
| 103 |
+
594,40800,5.8859
|
| 104 |
+
602,41200,5.2757
|
| 105 |
+
607,41600,7.4604
|
| 106 |
+
614,42000,6.3297
|
| 107 |
+
624,42400,4.3
|
| 108 |
+
630,42800,7.3462
|
| 109 |
+
636,43200,7.7832
|
| 110 |
+
642,43600,7.2278
|
| 111 |
+
649,44000,7.0903
|
| 112 |
+
656,44400,7.2864
|
| 113 |
+
660,44800,11.4337
|
| 114 |
+
664,45200,9.4219
|
| 115 |
+
669,45600,10.1337
|
| 116 |
+
674,46000,10.8808
|
| 117 |
+
679,46400,9.5832
|
| 118 |
+
686,46800,6.8856
|
| 119 |
+
692,47200,7.9853
|
| 120 |
+
699,47600,5.4185
|
| 121 |
+
705,48000,8.51
|
| 122 |
+
711,48400,8.7795
|
| 123 |
+
717,48800,8.1386
|
| 124 |
+
721,49200,12.1496
|
| 125 |
+
727,49600,9.671
|
| 126 |
+
734,50000,4.7004
|
| 127 |
+
742,50400,5.4595
|
| 128 |
+
751,50800,7.1566
|
| 129 |
+
756,51200,11.0405
|
| 130 |
+
763,51600,7.8217
|
| 131 |
+
769,52000,6.9623
|
| 132 |
+
774,52400,9.7564
|
| 133 |
+
780,52800,8.2737
|
| 134 |
+
786,53200,7.7905
|
| 135 |
+
792,53600,8.2016
|
| 136 |
+
798,54000,11.1113
|
| 137 |
+
803,54400,9.71
|
| 138 |
+
811,54800,6.3473
|
| 139 |
+
816,55200,10.3967
|
| 140 |
+
822,55600,8.5624
|
| 141 |
+
829,56000,7.9157
|
| 142 |
+
836,56400,7.3681
|
| 143 |
+
843,56800,7.9999
|
| 144 |
+
848,57200,12.6498
|
| 145 |
+
855,57600,6.7851
|
| 146 |
+
864,58000,5.926
|
| 147 |
+
871,58400,8.3211
|
| 148 |
+
877,58800,8.4122
|
| 149 |
+
885,59200,5.5055
|
| 150 |
+
892,59600,4.8152
|
| 151 |
+
896,60000,10.2298
|
| 152 |
+
903,60400,8.8526
|
| 153 |
+
908,60800,10.4385
|
| 154 |
+
914,61200,6.8265
|
| 155 |
+
920,61600,7.4231
|
| 156 |
+
926,62000,7.7046
|
| 157 |
+
932,62400,8.744
|
| 158 |
+
939,62800,6.815
|
| 159 |
+
944,63200,8.9794
|
| 160 |
+
950,63600,9.0955
|
| 161 |
+
960,64000,3.9011
|
| 162 |
+
966,64400,9.1169
|
| 163 |
+
970,64800,9.8831
|
| 164 |
+
975,65200,10.7317
|
| 165 |
+
983,65600,5.231
|
| 166 |
+
988,66000,9.8312
|
| 167 |
+
993,66400,8.1545
|
| 168 |
+
999,66800,8.3283
|
| 169 |
+
1008,67200,3.3866
|
| 170 |
+
1014,67600,6.2135
|
| 171 |
+
1018,68000,8.4834
|
| 172 |
+
1025,68400,4.2607
|
| 173 |
+
1031,68800,7.755
|
| 174 |
+
1035,69200,6.7764
|
| 175 |
+
1043,69600,4.8484
|
| 176 |
+
1049,70000,6.8767
|
| 177 |
+
1055,70400,7.2876
|
| 178 |
+
1061,70800,6.5337
|
| 179 |
+
1066,71200,8.7095
|
| 180 |
+
1071,71600,8.5315
|
| 181 |
+
1076,72000,8.1106
|
| 182 |
+
1082,72400,6.5303
|
| 183 |
+
1088,72800,5.1858
|
| 184 |
+
1092,73200,8.2166
|
| 185 |
+
1097,73600,7.7642
|
| 186 |
+
1103,74000,5.7689
|
| 187 |
+
1109,74400,6.5443
|
| 188 |
+
1113,74800,6.0629
|
| 189 |
+
1119,75200,6.9554
|
| 190 |
+
1124,75600,6.5107
|
| 191 |
+
1131,76000,5.8779
|
| 192 |
+
1136,76400,7.0196
|
| 193 |
+
1141,76800,8.221
|
| 194 |
+
1146,77200,7.0804
|
| 195 |
+
1150,77600,7.0824
|
| 196 |
+
1158,78000,4.9257
|
| 197 |
+
1165,78400,4.3861
|
| 198 |
+
1171,78800,5.412
|
| 199 |
+
1178,79200,5.8524
|
| 200 |
+
1186,79600,4.6331
|
| 201 |
+
1193,80000,5.5782
|
| 202 |
+
1199,80400,7.2403
|
| 203 |
+
1204,80800,8.5925
|
| 204 |
+
1208,81200,9.0404
|
| 205 |
+
1212,81600,10.5095
|
| 206 |
+
1220,82000,7.0118
|
| 207 |
+
1225,82400,8.1882
|
| 208 |
+
1229,82800,10.863
|
| 209 |
+
1233,83200,10.7497
|
| 210 |
+
1239,83600,6.4536
|
| 211 |
+
1243,84000,11.2907
|
| 212 |
+
1247,84400,11.783
|
| 213 |
+
1252,84800,11.0893
|
| 214 |
+
1257,85200,9.8707
|
| 215 |
+
1261,85600,13.0076
|
| 216 |
+
1265,86000,12.8858
|
| 217 |
+
1269,86400,13.1408
|
| 218 |
+
1273,86800,11.8421
|
| 219 |
+
1277,87200,9.9549
|
| 220 |
+
1284,87600,6.7063
|
| 221 |
+
1288,88000,12.4549
|
| 222 |
+
1292,88400,13.2602
|
| 223 |
+
1299,88800,7.2956
|
| 224 |
+
1303,89200,15.6022
|
| 225 |
+
1308,89600,12.0928
|
| 226 |
+
1317,90000,3.9969
|
| 227 |
+
1323,90400,6.6326
|
| 228 |
+
1331,90800,6.7157
|
| 229 |
+
1337,91200,6.7272
|
| 230 |
+
1346,91600,4.2204
|
| 231 |
+
1351,92000,10.5966
|
| 232 |
+
1357,92400,6.7511
|
| 233 |
+
1364,92800,6.8195
|
| 234 |
+
1371,93200,6.92
|
| 235 |
+
1381,93600,4.6135
|
| 236 |
+
1387,94000,9.0048
|
| 237 |
+
1395,94400,4.8047
|
| 238 |
+
1401,94800,9.5174
|
| 239 |
+
1407,95200,11.6457
|
| 240 |
+
1414,95600,6.9109
|
| 241 |
+
1421,96000,5.7336
|
| 242 |
+
1430,96400,5.3074
|
| 243 |
+
1434,96800,9.2165
|
| 244 |
+
1439,97200,9.5083
|
| 245 |
+
1447,97600,5.2046
|
| 246 |
+
1456,98000,4.3764
|
| 247 |
+
1462,98400,5.923
|
| 248 |
+
1467,98800,8.1644
|
| 249 |
+
1473,99200,6.7967
|
| 250 |
+
1480,99600,7.7647
|
| 251 |
+
1484,100000,12.2281
|
| 252 |
+
1488,100400,9.1569
|
| 253 |
+
1494,100800,8.54
|
| 254 |
+
1499,101200,9.4563
|
| 255 |
+
1503,101600,9.5965
|
| 256 |
+
1509,102000,8.6554
|
| 257 |
+
1514,102400,11.0492
|
| 258 |
+
1518,102800,10.4916
|
| 259 |
+
1525,103200,5.7317
|
| 260 |
+
1534,103600,5.4161
|
| 261 |
+
1539,104000,9.7729
|
| 262 |
+
1546,104400,6.9356
|
| 263 |
+
1552,104800,7.0423
|
| 264 |
+
1556,105200,12.5092
|
| 265 |
+
1563,105600,8.7948
|
| 266 |
+
1567,106000,12.3567
|
| 267 |
+
1573,106400,7.8177
|
| 268 |
+
1577,106800,12.8039
|
| 269 |
+
1584,107200,6.8714
|
| 270 |
+
1589,107600,8.3869
|
| 271 |
+
1593,108000,12.6683
|
| 272 |
+
1601,108400,6.5902
|
| 273 |
+
1607,108800,8.628
|
| 274 |
+
1615,109200,5.4762
|
| 275 |
+
1621,109600,9.3018
|
| 276 |
+
1627,110000,7.4751
|
| 277 |
+
1640,110400,4.139
|
| 278 |
+
1647,110800,7.7327
|
| 279 |
+
1652,111200,10.3567
|
| 280 |
+
1662,111600,4.9881
|
| 281 |
+
1668,112000,8.8983
|
| 282 |
+
1673,112400,9.1326
|
| 283 |
+
1683,112800,3.9693
|
| 284 |
+
1693,113200,4.1639
|
| 285 |
+
1699,113600,8.137
|
| 286 |
+
1705,114000,8.6208
|
| 287 |
+
1710,114400,9.1387
|
| 288 |
+
1717,114800,6.4969
|
| 289 |
+
1724,115200,5.8328
|
| 290 |
+
1729,115600,9.6428
|
| 291 |
+
1736,116000,7.7885
|
| 292 |
+
1742,116400,6.1858
|
| 293 |
+
1751,116800,5.3759
|
| 294 |
+
1760,117200,4.9088
|
| 295 |
+
1764,117600,12.8121
|
| 296 |
+
1773,118000,5.528
|
| 297 |
+
1781,118400,4.8728
|
| 298 |
+
1786,118800,9.4107
|
| 299 |
+
1794,119200,5.6412
|
| 300 |
+
1800,119600,8.3779
|
| 301 |
+
1804,120000,14.4553
|
| 302 |
+
1814,120400,4.32
|
| 303 |
+
1819,120800,12.7509
|
| 304 |
+
1828,121200,6.4023
|
| 305 |
+
1833,121600,15.743
|
| 306 |
+
1841,122000,6.9661
|
| 307 |
+
1847,122400,11.3894
|
| 308 |
+
1852,122800,12.2086
|
| 309 |
+
1857,123200,16.0304
|
| 310 |
+
1862,123600,16.1933
|
| 311 |
+
1870,124000,8.6231
|
| 312 |
+
1875,124400,13.6078
|
| 313 |
+
1880,124800,14.3245
|
| 314 |
+
1889,125200,5.8726
|
| 315 |
+
1896,125600,9.655
|
| 316 |
+
1904,126000,6.397
|
| 317 |
+
1912,126400,5.4703
|
| 318 |
+
1917,126800,13.7367
|
| 319 |
+
1923,127200,10.1322
|
| 320 |
+
1928,127600,12.9031
|
| 321 |
+
1932,128000,11.6047
|
| 322 |
+
1941,128400,8.1528
|
| 323 |
+
1948,128800,10.0696
|
| 324 |
+
1953,129200,12.8557
|
| 325 |
+
1964,129600,6.1629
|
| 326 |
+
1974,130000,4.7264
|
| 327 |
+
1983,130400,5.0263
|
| 328 |
+
1989,130800,13.0645
|
| 329 |
+
2000,131200,4.9296
|
| 330 |
+
2006,131600,9.139
|
| 331 |
+
2016,132000,6.4189
|
| 332 |
+
2025,132400,4.758
|
| 333 |
+
2033,132800,7.2135
|
| 334 |
+
2038,133200,8.544
|
| 335 |
+
2045,133600,9.7969
|
| 336 |
+
2051,134000,7.3074
|
| 337 |
+
2063,134400,4.5675
|
| 338 |
+
2072,134800,4.937
|
| 339 |
+
2080,135200,6.392
|
| 340 |
+
2088,135600,4.6451
|
| 341 |
+
2093,136000,9.6252
|
| 342 |
+
2100,136400,5.984
|
| 343 |
+
2106,136800,7.3098
|
| 344 |
+
2114,137200,6.8205
|
| 345 |
+
2122,137600,5.1513
|
| 346 |
+
2130,138000,6.8115
|
| 347 |
+
2140,138400,4.8287
|
| 348 |
+
2149,138800,3.6011
|
| 349 |
+
2157,139200,5.5075
|
| 350 |
+
2163,139600,9.1481
|
| 351 |
+
2171,140000,6.788
|
| 352 |
+
2178,140400,7.9736
|
| 353 |
+
2186,140800,8.5882
|
| 354 |
+
2191,141200,9.2546
|
| 355 |
+
2196,141600,15.4158
|
| 356 |
+
2206,142000,7.1935
|
| 357 |
+
2211,142400,13.2408
|
| 358 |
+
2223,142800,4.9066
|
| 359 |
+
2235,143200,4.5763
|
| 360 |
+
2241,143600,10.3808
|
| 361 |
+
2246,144000,10.8564
|
| 362 |
+
2254,144400,7.7899
|
| 363 |
+
2261,144800,8.8404
|
| 364 |
+
2268,145200,8.8255
|
| 365 |
+
2276,145600,7.4552
|
| 366 |
+
2282,146000,11.216
|
| 367 |
+
2289,146400,10.299
|
| 368 |
+
2295,146800,9.2089
|
| 369 |
+
2302,147200,8.5813
|
| 370 |
+
2311,147600,5.5125
|
| 371 |
+
2318,148000,7.7001
|
| 372 |
+
2323,148400,11.1793
|
| 373 |
+
2333,148800,3.7442
|
| 374 |
+
2340,149200,10.413
|
| 375 |
+
2345,149600,11.7132
|
| 376 |
+
2350,150000,9.5357
|
| 377 |
+
2357,150400,8.7914
|
| 378 |
+
2363,150800,8.3319
|
| 379 |
+
2370,151200,7.1484
|
| 380 |
+
2376,151600,5.6768
|
| 381 |
+
2385,152000,4.1424
|
| 382 |
+
2390,152400,6.528
|
| 383 |
+
2394,152800,7.7268
|
| 384 |
+
2399,153200,8.4871
|
| 385 |
+
2406,153600,6.8809
|
| 386 |
+
2411,154000,8.3506
|
| 387 |
+
2415,154400,11.0307
|
| 388 |
+
2419,154800,10.397
|
| 389 |
+
2427,155200,5.1522
|
| 390 |
+
2434,155600,5.8036
|
| 391 |
+
2439,156000,10.0986
|
| 392 |
+
2445,156400,7.7494
|
| 393 |
+
2452,156800,8.7844
|
| 394 |
+
2458,157200,9.1953
|
| 395 |
+
2464,157600,9.7125
|
| 396 |
+
2469,158000,14.3923
|
| 397 |
+
2475,158400,10.5696
|
| 398 |
+
2479,158800,19.5742
|
| 399 |
+
2488,159200,6.9904
|
| 400 |
+
2494,159600,12.7756
|
| 401 |
+
2499,160000,12.9402
|
| 402 |
+
2506,160400,11.8509
|
| 403 |
+
2510,160800,14.1441
|
| 404 |
+
2522,161200,5.8916
|
| 405 |
+
2530,161600,8.0032
|
| 406 |
+
2536,162000,11.8227
|
| 407 |
+
2540,162400,14.6518
|
| 408 |
+
2548,162800,9.0002
|
| 409 |
+
2555,163200,5.4754
|
| 410 |
+
2560,163600,15.7747
|
| 411 |
+
2568,164000,9.1886
|
| 412 |
+
2572,164400,18.4005
|
| 413 |
+
2580,164800,6.8589
|
| 414 |
+
2586,165200,13.1998
|
| 415 |
+
2591,165600,12.0732
|
| 416 |
+
2599,166000,7.623
|
| 417 |
+
2608,166400,6.2975
|
| 418 |
+
2614,166800,10.325
|
| 419 |
+
2622,167200,7.3189
|
| 420 |
+
2628,167600,11.0537
|
| 421 |
+
2636,168000,8.6657
|
| 422 |
+
2642,168400,13.2533
|
| 423 |
+
2650,168800,5.9973
|
| 424 |
+
2660,169200,7.9659
|
| 425 |
+
2665,169600,14.8961
|
| 426 |
+
2673,170000,9.1592
|
| 427 |
+
2680,170400,12.4329
|
| 428 |
+
2687,170800,12.6719
|
| 429 |
+
2694,171200,11.0606
|
| 430 |
+
2702,171600,10.5086
|
| 431 |
+
2709,172000,10.1179
|
| 432 |
+
2716,172400,9.1879
|
| 433 |
+
2723,172800,11.2291
|
| 434 |
+
2728,173200,12.3325
|
| 435 |
+
2734,173600,12.1718
|
| 436 |
+
2741,174000,10.1937
|
| 437 |
+
2748,174400,9.3497
|
| 438 |
+
2753,174800,10.5969
|
| 439 |
+
2761,175200,10.1798
|
| 440 |
+
2767,175600,10.9461
|
| 441 |
+
2772,176000,13.9387
|
| 442 |
+
2777,176400,14.2562
|
| 443 |
+
2783,176800,8.5326
|
| 444 |
+
2789,177200,9.8383
|
| 445 |
+
2794,177600,12.2766
|
| 446 |
+
2800,178000,14.2094
|
| 447 |
+
2804,178400,15.156
|
| 448 |
+
2811,178800,6.4663
|
| 449 |
+
2816,179200,15.0295
|
| 450 |
+
2823,179600,10.1727
|
| 451 |
+
2828,180000,11.4667
|
| 452 |
+
2834,180400,11.0373
|
| 453 |
+
2840,180800,11.0221
|
| 454 |
+
2844,181200,18.4976
|
| 455 |
+
2850,181600,9.7416
|
| 456 |
+
2857,182000,8.77
|
| 457 |
+
2863,182400,7.9617
|
| 458 |
+
2867,182800,21.1088
|
| 459 |
+
2872,183200,15.0862
|
| 460 |
+
2880,183600,9.2334
|
| 461 |
+
2885,184000,15.0575
|
| 462 |
+
2891,184400,8.7998
|
| 463 |
+
2898,184800,9.4749
|
| 464 |
+
2903,185200,15.2583
|
| 465 |
+
2908,185600,15.9073
|
| 466 |
+
2913,186000,18.1103
|
| 467 |
+
2920,186400,10.0962
|
| 468 |
+
2925,186800,14.1606
|
| 469 |
+
2931,187200,13.2483
|
| 470 |
+
2936,187600,9.6115
|
| 471 |
+
2942,188000,10.5395
|
| 472 |
+
2948,188400,13.0603
|
| 473 |
+
2952,188800,15.9296
|
| 474 |
+
2958,189200,11.0247
|
| 475 |
+
2964,189600,13.2235
|
| 476 |
+
2973,190000,6.3575
|
| 477 |
+
2978,190400,11.5839
|
| 478 |
+
2983,190800,13.9557
|
| 479 |
+
2989,191200,9.2667
|
| 480 |
+
2995,191600,10.0113
|
| 481 |
+
3000,192000,10.4833
|
| 482 |
+
3006,192400,11.3706
|
| 483 |
+
3012,192800,10.4763
|
| 484 |
+
3018,193200,10.3391
|
| 485 |
+
3022,193600,16.8413
|
| 486 |
+
3030,194000,9.8189
|
| 487 |
+
3042,194400,4.0255
|
| 488 |
+
3049,194800,8.3469
|
| 489 |
+
3058,195200,6.7786
|
| 490 |
+
3066,195600,9.455
|
| 491 |
+
3072,196000,9.361
|
| 492 |
+
3078,196400,11.942
|
| 493 |
+
3086,196800,8.1976
|
| 494 |
+
3093,197200,8.8319
|
| 495 |
+
3098,197600,10.2572
|
| 496 |
+
3105,198000,10.4746
|
| 497 |
+
3111,198400,9.7314
|
| 498 |
+
3117,198800,13.439
|
| 499 |
+
3121,199200,15.2623
|
| 500 |
+
3127,199600,15.3384
|
| 501 |
+
3137,200000,8.1123
|
code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_2.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
19,400,1.6097
|
| 3 |
+
38,800,1.6416
|
| 4 |
+
61,1200,1.4003
|
| 5 |
+
73,1600,2.5901
|
| 6 |
+
81,2000,3.7322
|
| 7 |
+
90,2400,3.1764
|
| 8 |
+
95,2800,6.9871
|
| 9 |
+
102,3200,4.2802
|
| 10 |
+
109,3600,4.7436
|
| 11 |
+
116,4000,4.2818
|
| 12 |
+
120,4400,7.5307
|
| 13 |
+
124,4800,6.3049
|
| 14 |
+
132,5200,4.3085
|
| 15 |
+
137,5600,6.2239
|
| 16 |
+
141,6000,7.0954
|
| 17 |
+
145,6400,6.7039
|
| 18 |
+
150,6800,6.2469
|
| 19 |
+
155,7200,6.36
|
| 20 |
+
160,7600,6.1549
|
| 21 |
+
164,8000,6.9545
|
| 22 |
+
169,8400,7.0148
|
| 23 |
+
175,8800,4.7251
|
| 24 |
+
179,9200,7.9361
|
| 25 |
+
184,9600,6.5351
|
| 26 |
+
189,10000,6.2774
|
| 27 |
+
195,10400,5.7017
|
| 28 |
+
199,10800,7.825
|
| 29 |
+
203,11200,6.8724
|
| 30 |
+
207,11600,7.7336
|
| 31 |
+
211,12000,7.5364
|
| 32 |
+
215,12400,7.8172
|
| 33 |
+
219,12800,7.9367
|
| 34 |
+
223,13200,7.7311
|
| 35 |
+
228,13600,7.2277
|
| 36 |
+
232,14000,7.0049
|
| 37 |
+
238,14400,5.1928
|
| 38 |
+
242,14800,7.6765
|
| 39 |
+
246,15200,7.4842
|
| 40 |
+
250,15600,7.5239
|
| 41 |
+
254,16000,7.1122
|
| 42 |
+
260,16400,5.5361
|
| 43 |
+
265,16800,7.1787
|
| 44 |
+
272,17200,4.4245
|
| 45 |
+
276,17600,6.2406
|
| 46 |
+
280,18000,7.7256
|
| 47 |
+
286,18400,5.6842
|
| 48 |
+
291,18800,6.646
|
| 49 |
+
297,19200,5.5573
|
| 50 |
+
302,19600,4.9828
|
| 51 |
+
308,20000,5.9812
|
| 52 |
+
313,20400,6.4346
|
| 53 |
+
320,20800,4.4117
|
| 54 |
+
324,21200,6.6082
|
| 55 |
+
330,21600,6.2485
|
| 56 |
+
334,22000,7.8137
|
| 57 |
+
339,22400,5.5175
|
| 58 |
+
343,22800,7.8811
|
| 59 |
+
347,23200,7.1971
|
| 60 |
+
353,23600,5.196
|
| 61 |
+
358,24000,6.0231
|
| 62 |
+
364,24400,6.6306
|
| 63 |
+
369,24800,5.7511
|
| 64 |
+
374,25200,6.1309
|
| 65 |
+
378,25600,7.6626
|
| 66 |
+
382,26000,6.8129
|
| 67 |
+
387,26400,6.4101
|
| 68 |
+
391,26800,8.1621
|
| 69 |
+
395,27200,7.6024
|
| 70 |
+
399,27600,7.6139
|
| 71 |
+
404,28000,7.434
|
| 72 |
+
409,28400,7.5108
|
| 73 |
+
413,28800,10.0048
|
| 74 |
+
417,29200,8.9736
|
| 75 |
+
423,29600,5.5721
|
| 76 |
+
428,30000,5.6607
|
| 77 |
+
433,30400,7.3198
|
| 78 |
+
437,30800,7.5887
|
| 79 |
+
441,31200,7.1714
|
| 80 |
+
446,31600,5.5443
|
| 81 |
+
452,32000,6.0095
|
| 82 |
+
456,32400,6.5919
|
| 83 |
+
461,32800,6.9332
|
| 84 |
+
465,33200,5.9742
|
| 85 |
+
472,33600,4.6549
|
| 86 |
+
476,34000,6.7065
|
| 87 |
+
481,34400,6.6351
|
| 88 |
+
485,34800,7.3781
|
| 89 |
+
491,35200,5.3901
|
| 90 |
+
495,35600,7.481
|
| 91 |
+
499,36000,7.3904
|
| 92 |
+
504,36400,5.5203
|
| 93 |
+
508,36800,6.6256
|
| 94 |
+
513,37200,6.2715
|
| 95 |
+
519,37600,5.4467
|
| 96 |
+
523,38000,7.5914
|
| 97 |
+
528,38400,5.8271
|
| 98 |
+
535,38800,4.7497
|
| 99 |
+
539,39200,6.705
|
| 100 |
+
543,39600,7.2193
|
| 101 |
+
547,40000,7.6591
|
| 102 |
+
553,40400,5.4713
|
| 103 |
+
559,40800,5.6278
|
| 104 |
+
563,41200,7.0377
|
| 105 |
+
567,41600,6.5975
|
| 106 |
+
572,42000,6.4589
|
| 107 |
+
577,42400,6.4962
|
| 108 |
+
581,42800,7.5891
|
| 109 |
+
585,43200,7.5598
|
| 110 |
+
593,43600,3.9839
|
| 111 |
+
597,44000,6.7188
|
| 112 |
+
601,44400,6.9495
|
| 113 |
+
606,44800,7.4883
|
| 114 |
+
613,45200,3.7567
|
| 115 |
+
617,45600,7.7218
|
| 116 |
+
622,46000,6.69
|
| 117 |
+
626,46400,7.9262
|
| 118 |
+
630,46800,6.3034
|
| 119 |
+
635,47200,6.0545
|
| 120 |
+
639,47600,7.4195
|
| 121 |
+
644,48000,7.13
|
| 122 |
+
648,48400,7.0398
|
| 123 |
+
652,48800,7.6392
|
| 124 |
+
660,49200,3.7038
|
| 125 |
+
667,49600,4.3578
|
| 126 |
+
672,50000,5.9194
|
| 127 |
+
678,50400,4.8665
|
| 128 |
+
683,50800,6.3008
|
| 129 |
+
691,51200,4.5975
|
| 130 |
+
697,51600,5.1888
|
| 131 |
+
702,52000,5.7734
|
| 132 |
+
707,52400,7.4679
|
| 133 |
+
715,52800,3.3284
|
| 134 |
+
721,53200,6.0641
|
| 135 |
+
727,53600,6.0441
|
| 136 |
+
731,54000,7.6702
|
| 137 |
+
736,54400,7.4219
|
| 138 |
+
740,54800,6.222
|
| 139 |
+
746,55200,6.4839
|
| 140 |
+
751,55600,5.6146
|
| 141 |
+
755,56000,7.5972
|
| 142 |
+
761,56400,5.612
|
| 143 |
+
765,56800,7.8148
|
| 144 |
+
770,57200,5.7253
|
| 145 |
+
774,57600,7.9334
|
| 146 |
+
778,58000,8.3276
|
| 147 |
+
783,58400,6.8913
|
| 148 |
+
787,58800,7.2828
|
| 149 |
+
793,59200,6.9596
|
| 150 |
+
797,59600,8.2329
|
| 151 |
+
803,60000,6.4227
|
| 152 |
+
808,60400,6.7123
|
| 153 |
+
812,60800,7.9976
|
| 154 |
+
819,61200,5.5859
|
| 155 |
+
826,61600,4.4553
|
| 156 |
+
832,62000,6.7451
|
| 157 |
+
837,62400,6.241
|
| 158 |
+
843,62800,6.4673
|
| 159 |
+
848,63200,6.9543
|
| 160 |
+
855,63600,5.0913
|
| 161 |
+
861,64000,7.2159
|
| 162 |
+
865,64400,7.9432
|
| 163 |
+
870,64800,7.8044
|
| 164 |
+
875,65200,7.2243
|
| 165 |
+
880,65600,7.7488
|
| 166 |
+
886,66000,6.3843
|
| 167 |
+
890,66400,9.9507
|
| 168 |
+
894,66800,9.063
|
| 169 |
+
899,67200,7.5112
|
| 170 |
+
903,67600,7.6428
|
| 171 |
+
908,68000,8.2787
|
| 172 |
+
912,68400,8.5889
|
| 173 |
+
918,68800,5.723
|
| 174 |
+
922,69200,8.2975
|
| 175 |
+
926,69600,7.3824
|
| 176 |
+
931,70000,8.2068
|
| 177 |
+
937,70400,7.7218
|
| 178 |
+
941,70800,8.7229
|
| 179 |
+
945,71200,10.2961
|
| 180 |
+
950,71600,7.6121
|
| 181 |
+
955,72000,7.8974
|
| 182 |
+
960,72400,7.8978
|
| 183 |
+
967,72800,5.6801
|
| 184 |
+
979,73200,2.1174
|
| 185 |
+
990,73600,3.7935
|
| 186 |
+
995,74000,10.1194
|
| 187 |
+
1000,74400,6.8977
|
| 188 |
+
1006,74800,7.6098
|
| 189 |
+
1011,75200,7.2811
|
| 190 |
+
1017,75600,7.2474
|
| 191 |
+
1023,76000,8.5673
|
| 192 |
+
1029,76400,6.6197
|
| 193 |
+
1035,76800,8.3262
|
| 194 |
+
1041,77200,7.9771
|
| 195 |
+
1047,77600,7.1542
|
| 196 |
+
1051,78000,11.2335
|
| 197 |
+
1056,78400,8.8322
|
| 198 |
+
1062,78800,7.5779
|
| 199 |
+
1066,79200,13.4042
|
| 200 |
+
1071,79600,10.4175
|
| 201 |
+
1076,80000,13.3333
|
| 202 |
+
1080,80400,12.734
|
| 203 |
+
1086,80800,11.1026
|
| 204 |
+
1090,81200,14.7759
|
| 205 |
+
1096,81600,8.6412
|
| 206 |
+
1104,82000,6.3892
|
| 207 |
+
1110,82400,9.8357
|
| 208 |
+
1118,82800,7.1741
|
| 209 |
+
1125,83200,7.8912
|
| 210 |
+
1131,83600,8.455
|
| 211 |
+
1135,84000,14.1645
|
| 212 |
+
1140,84400,9.4645
|
| 213 |
+
1146,84800,10.1664
|
| 214 |
+
1153,85200,5.8022
|
| 215 |
+
1160,85600,6.215
|
| 216 |
+
1168,86000,4.1062
|
| 217 |
+
1175,86400,6.1067
|
| 218 |
+
1180,86800,6.3874
|
| 219 |
+
1187,87200,6.3452
|
| 220 |
+
1192,87600,6.9666
|
| 221 |
+
1198,88000,5.1382
|
| 222 |
+
1203,88400,6.1001
|
| 223 |
+
1212,88800,4.8099
|
| 224 |
+
1218,89200,7.4769
|
| 225 |
+
1226,89600,7.5724
|
| 226 |
+
1231,90000,10.0412
|
| 227 |
+
1238,90400,8.6996
|
| 228 |
+
1242,90800,13.2195
|
| 229 |
+
1247,91200,11.4526
|
| 230 |
+
1252,91600,12.0908
|
| 231 |
+
1260,92000,6.7688
|
| 232 |
+
1265,92400,10.449
|
| 233 |
+
1270,92800,10.2558
|
| 234 |
+
1277,93200,5.5992
|
| 235 |
+
1284,93600,9.3672
|
| 236 |
+
1292,94000,7.979
|
| 237 |
+
1302,94400,6.4251
|
| 238 |
+
1308,94800,8.6478
|
| 239 |
+
1314,95200,12.1168
|
| 240 |
+
1322,95600,6.8864
|
| 241 |
+
1333,96000,5.5147
|
| 242 |
+
1339,96400,9.0327
|
| 243 |
+
1347,96800,6.0256
|
| 244 |
+
1356,97200,4.7894
|
| 245 |
+
1365,97600,5.5282
|
| 246 |
+
1371,98000,7.7578
|
| 247 |
+
1380,98400,8.3785
|
| 248 |
+
1389,98800,5.239
|
| 249 |
+
1395,99200,8.3681
|
| 250 |
+
1400,99600,9.7949
|
| 251 |
+
1406,100000,9.5216
|
| 252 |
+
1414,100400,5.3299
|
| 253 |
+
1420,100800,7.8871
|
| 254 |
+
1428,101200,6.0441
|
| 255 |
+
1434,101600,6.6909
|
| 256 |
+
1444,102000,5.0341
|
| 257 |
+
1450,102400,7.7649
|
| 258 |
+
1461,102800,4.1021
|
| 259 |
+
1472,103200,2.9832
|
| 260 |
+
1482,103600,4.4576
|
| 261 |
+
1489,104000,7.3442
|
| 262 |
+
1497,104400,5.6498
|
| 263 |
+
1505,104800,6.4131
|
| 264 |
+
1511,105200,10.5803
|
| 265 |
+
1516,105600,11.3073
|
| 266 |
+
1520,106000,13.2675
|
| 267 |
+
1527,106400,10.6688
|
| 268 |
+
1535,106800,5.8265
|
| 269 |
+
1540,107200,10.7056
|
| 270 |
+
1545,107600,7.6742
|
| 271 |
+
1553,108000,5.7264
|
| 272 |
+
1560,108400,6.6821
|
| 273 |
+
1567,108800,5.4216
|
| 274 |
+
1573,109200,8.4766
|
| 275 |
+
1578,109600,8.6739
|
| 276 |
+
1582,110000,11.4666
|
| 277 |
+
1586,110400,9.0832
|
| 278 |
+
1594,110800,6.2276
|
| 279 |
+
1599,111200,8.6866
|
| 280 |
+
1606,111600,6.2615
|
| 281 |
+
1614,112000,5.6982
|
| 282 |
+
1621,112400,5.8051
|
| 283 |
+
1626,112800,7.93
|
| 284 |
+
1631,113200,10.595
|
| 285 |
+
1636,113600,7.6407
|
| 286 |
+
1640,114000,11.1847
|
| 287 |
+
1647,114400,5.6078
|
| 288 |
+
1651,114800,9.1446
|
| 289 |
+
1658,115200,6.8432
|
| 290 |
+
1662,115600,12.9911
|
| 291 |
+
1671,116000,4.613
|
| 292 |
+
1676,116400,11.0305
|
| 293 |
+
1681,116800,8.694
|
| 294 |
+
1688,117200,6.11
|
| 295 |
+
1698,117600,6.4954
|
| 296 |
+
1703,118000,9.7062
|
| 297 |
+
1712,118400,5.8668
|
| 298 |
+
1717,118800,7.5547
|
| 299 |
+
1724,119200,8.3224
|
| 300 |
+
1729,119600,8.329
|
| 301 |
+
1737,120000,7.1094
|
| 302 |
+
1742,120400,9.2663
|
| 303 |
+
1747,120800,9.6127
|
| 304 |
+
1757,121200,5.6769
|
| 305 |
+
1762,121600,8.5658
|
| 306 |
+
1770,122000,7.4468
|
| 307 |
+
1776,122400,8.0238
|
| 308 |
+
1782,122800,8.0927
|
| 309 |
+
1790,123200,5.6296
|
| 310 |
+
1798,123600,6.9246
|
| 311 |
+
1806,124000,6.5561
|
| 312 |
+
1813,124400,7.4058
|
| 313 |
+
1821,124800,6.9491
|
| 314 |
+
1833,125200,4.4584
|
| 315 |
+
1839,125600,8.2732
|
| 316 |
+
1848,126000,6.3892
|
| 317 |
+
1853,126400,10.4589
|
| 318 |
+
1858,126800,12.1972
|
| 319 |
+
1867,127200,5.6719
|
| 320 |
+
1874,127600,7.9428
|
| 321 |
+
1880,128000,7.5333
|
| 322 |
+
1885,128400,12.1847
|
| 323 |
+
1889,128800,11.591
|
| 324 |
+
1895,129200,9.8666
|
| 325 |
+
1902,129600,9.6165
|
| 326 |
+
1910,130000,8.469
|
| 327 |
+
1917,130400,5.6391
|
| 328 |
+
1924,130800,8.5008
|
| 329 |
+
1931,131200,9.7077
|
| 330 |
+
1935,131600,13.5229
|
| 331 |
+
1939,132000,16.4664
|
| 332 |
+
1944,132400,13.0046
|
| 333 |
+
1949,132800,8.4371
|
| 334 |
+
1955,133200,8.8647
|
| 335 |
+
1959,133600,14.3521
|
| 336 |
+
1963,134000,11.7871
|
| 337 |
+
1968,134400,10.4688
|
| 338 |
+
1974,134800,9.3431
|
| 339 |
+
1979,135200,8.226
|
| 340 |
+
1984,135600,10.8513
|
| 341 |
+
1988,136000,10.6682
|
| 342 |
+
1998,136400,4.4273
|
| 343 |
+
2003,136800,12.778
|
| 344 |
+
2011,137200,8.0067
|
| 345 |
+
2017,137600,9.4886
|
| 346 |
+
2027,138000,5.6532
|
| 347 |
+
2033,138400,7.9827
|
| 348 |
+
2039,138800,5.9282
|
| 349 |
+
2045,139200,9.6567
|
| 350 |
+
2053,139600,7.7935
|
| 351 |
+
2059,140000,5.9489
|
| 352 |
+
2064,140400,6.9939
|
| 353 |
+
2070,140800,7.3466
|
| 354 |
+
2075,141200,6.8939
|
| 355 |
+
2079,141600,9.1796
|
| 356 |
+
2085,142000,9.0719
|
| 357 |
+
2089,142400,13.9325
|
| 358 |
+
2094,142800,11.0623
|
| 359 |
+
2099,143200,6.7284
|
| 360 |
+
2107,143600,5.6728
|
| 361 |
+
2115,144000,6.321
|
| 362 |
+
2119,144400,10.3033
|
| 363 |
+
2124,144800,8.3466
|
| 364 |
+
2130,145200,6.2657
|
| 365 |
+
2136,145600,5.7428
|
| 366 |
+
2142,146000,7.4074
|
| 367 |
+
2147,146400,7.8209
|
| 368 |
+
2153,146800,7.3415
|
| 369 |
+
2161,147200,5.2917
|
| 370 |
+
2166,147600,12.5109
|
| 371 |
+
2174,148000,5.0395
|
| 372 |
+
2179,148400,10.3555
|
| 373 |
+
2187,148800,5.349
|
| 374 |
+
2192,149200,10.0253
|
| 375 |
+
2198,149600,7.0379
|
| 376 |
+
2205,150000,7.5125
|
| 377 |
+
2211,150400,7.8038
|
| 378 |
+
2217,150800,7.8167
|
| 379 |
+
2221,151200,11.7039
|
| 380 |
+
2229,151600,5.489
|
| 381 |
+
2236,152000,6.0868
|
| 382 |
+
2243,152400,6.2941
|
| 383 |
+
2249,152800,6.6384
|
| 384 |
+
2254,153200,8.3705
|
| 385 |
+
2259,153600,8.5642
|
| 386 |
+
2265,154000,6.8584
|
| 387 |
+
2272,154400,7.3834
|
| 388 |
+
2278,154800,8.2766
|
| 389 |
+
2286,155200,5.1656
|
| 390 |
+
2290,155600,12.9118
|
| 391 |
+
2294,156000,11.8071
|
| 392 |
+
2299,156400,8.9169
|
| 393 |
+
2303,156800,8.9791
|
| 394 |
+
2308,157200,9.3741
|
| 395 |
+
2312,157600,11.1361
|
| 396 |
+
2316,158000,11.7926
|
| 397 |
+
2320,158400,11.7151
|
| 398 |
+
2326,158800,8.0207
|
| 399 |
+
2333,159200,6.0691
|
| 400 |
+
2338,159600,9.577
|
| 401 |
+
2344,160000,6.4232
|
| 402 |
+
2350,160400,8.8049
|
| 403 |
+
2354,160800,9.1235
|
| 404 |
+
2359,161200,8.3861
|
| 405 |
+
2365,161600,7.0742
|
| 406 |
+
2369,162000,7.6221
|
| 407 |
+
2373,162400,7.9897
|
| 408 |
+
2378,162800,5.8857
|
| 409 |
+
2384,163200,7.0723
|
| 410 |
+
2389,163600,6.654
|
| 411 |
+
2395,164000,7.1041
|
| 412 |
+
2399,164400,8.4728
|
| 413 |
+
2407,164800,4.6772
|
| 414 |
+
2411,165200,7.5167
|
| 415 |
+
2417,165600,7.8597
|
| 416 |
+
2422,166000,9.3692
|
| 417 |
+
2427,166400,8.0704
|
| 418 |
+
2431,166800,10.7773
|
| 419 |
+
2435,167200,9.905
|
| 420 |
+
2440,167600,8.5513
|
| 421 |
+
2444,168000,8.9629
|
| 422 |
+
2450,168400,7.6352
|
| 423 |
+
2457,168800,6.9678
|
| 424 |
+
2463,169200,7.899
|
| 425 |
+
2469,169600,7.6206
|
| 426 |
+
2474,170000,8.0358
|
| 427 |
+
2481,170400,7.7953
|
| 428 |
+
2486,170800,10.2047
|
| 429 |
+
2492,171200,12.0048
|
| 430 |
+
2500,171600,5.3765
|
| 431 |
+
2509,172000,5.1863
|
| 432 |
+
2513,172400,12.2984
|
| 433 |
+
2517,172800,11.7797
|
| 434 |
+
2524,173200,7.875
|
| 435 |
+
2534,173600,3.9569
|
| 436 |
+
2539,174000,11.8781
|
| 437 |
+
2544,174400,10.3608
|
| 438 |
+
2550,174800,8.7275
|
| 439 |
+
2556,175200,8.4987
|
| 440 |
+
2561,175600,10.7181
|
| 441 |
+
2566,176000,8.2619
|
| 442 |
+
2571,176400,8.8454
|
| 443 |
+
2575,176800,8.7559
|
| 444 |
+
2580,177200,8.4039
|
| 445 |
+
2584,177600,11.9876
|
| 446 |
+
2592,178000,6.4634
|
| 447 |
+
2597,178400,7.8467
|
| 448 |
+
2602,178800,6.8881
|
| 449 |
+
2607,179200,7.1522
|
| 450 |
+
2612,179600,7.6053
|
| 451 |
+
2620,180000,5.2739
|
| 452 |
+
2628,180400,4.7562
|
| 453 |
+
2636,180800,3.7301
|
| 454 |
+
2641,181200,10.6831
|
| 455 |
+
2645,181600,8.9943
|
| 456 |
+
2652,182000,5.277
|
| 457 |
+
2661,182400,4.275
|
| 458 |
+
2671,182800,3.453
|
| 459 |
+
2676,183200,8.5581
|
| 460 |
+
2681,183600,7.2929
|
| 461 |
+
2686,184000,12.0382
|
| 462 |
+
2690,184400,12.133
|
| 463 |
+
2694,184800,12.2167
|
| 464 |
+
2701,185200,5.3062
|
| 465 |
+
2706,185600,10.3605
|
| 466 |
+
2714,186000,5.9351
|
| 467 |
+
2718,186400,9.3307
|
| 468 |
+
2723,186800,9.5625
|
| 469 |
+
2730,187200,7.1449
|
| 470 |
+
2735,187600,7.7182
|
| 471 |
+
2741,188000,6.4756
|
| 472 |
+
2745,188400,8.736
|
| 473 |
+
2751,188800,8.123
|
| 474 |
+
2757,189200,8.8964
|
| 475 |
+
2761,189600,10.7888
|
| 476 |
+
2765,190000,12.0779
|
| 477 |
+
2769,190400,10.5991
|
| 478 |
+
2775,190800,8.0868
|
| 479 |
+
2780,191200,8.917
|
| 480 |
+
2785,191600,9.9389
|
| 481 |
+
2789,192000,11.5622
|
| 482 |
+
2795,192400,8.386
|
| 483 |
+
2800,192800,8.3721
|
| 484 |
+
2805,193200,8.8166
|
| 485 |
+
2811,193600,8.1763
|
| 486 |
+
2817,194000,8.3255
|
| 487 |
+
2823,194400,8.8563
|
| 488 |
+
2827,194800,12.6773
|
| 489 |
+
2833,195200,7.9132
|
| 490 |
+
2837,195600,10.4681
|
| 491 |
+
2844,196000,8.2131
|
| 492 |
+
2849,196400,9.2795
|
| 493 |
+
2855,196800,7.094
|
| 494 |
+
2860,197200,10.5194
|
| 495 |
+
2864,197600,12.9269
|
| 496 |
+
2870,198000,7.9327
|
| 497 |
+
2876,198400,7.223
|
| 498 |
+
2881,198800,10.8269
|
| 499 |
+
2885,199200,12.3889
|
| 500 |
+
2892,199600,6.5769
|
| 501 |
+
2897,200000,11.076
|
code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_3.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
15,400,1.8261
|
| 3 |
+
32,800,1.7685
|
| 4 |
+
45,1200,2.4142
|
| 5 |
+
51,1600,4.6704
|
| 6 |
+
61,2000,3.1899
|
| 7 |
+
67,2400,5.3735
|
| 8 |
+
75,2800,4.1154
|
| 9 |
+
82,3200,3.7725
|
| 10 |
+
92,3600,3.7704
|
| 11 |
+
103,4000,2.6905
|
| 12 |
+
110,4400,4.0276
|
| 13 |
+
117,4800,5.8064
|
| 14 |
+
124,5200,4.4713
|
| 15 |
+
132,5600,4.0093
|
| 16 |
+
137,6000,5.9495
|
| 17 |
+
142,6400,5.8014
|
| 18 |
+
148,6800,5.8551
|
| 19 |
+
155,7200,4.6169
|
| 20 |
+
162,7600,4.2494
|
| 21 |
+
170,8000,3.9722
|
| 22 |
+
177,8400,3.9192
|
| 23 |
+
186,8800,3.8894
|
| 24 |
+
192,9200,5.2858
|
| 25 |
+
196,9600,6.1068
|
| 26 |
+
201,10000,7.4978
|
| 27 |
+
206,10400,6.5813
|
| 28 |
+
210,10800,7.0333
|
| 29 |
+
216,11200,5.7853
|
| 30 |
+
220,11600,6.9548
|
| 31 |
+
226,12000,6.9295
|
| 32 |
+
231,12400,8.6606
|
| 33 |
+
235,12800,8.1602
|
| 34 |
+
241,13200,5.5479
|
| 35 |
+
248,13600,5.2991
|
| 36 |
+
253,14000,5.5222
|
| 37 |
+
259,14400,6.6097
|
| 38 |
+
266,14800,5.7681
|
| 39 |
+
272,15200,5.1424
|
| 40 |
+
281,15600,3.9703
|
| 41 |
+
289,16000,5.446
|
| 42 |
+
296,16400,5.6469
|
| 43 |
+
301,16800,8.1023
|
| 44 |
+
309,17200,5.4118
|
| 45 |
+
313,17600,10.0884
|
| 46 |
+
318,18000,7.85
|
| 47 |
+
324,18400,7.1867
|
| 48 |
+
334,18800,4.1973
|
| 49 |
+
341,19200,6.5775
|
| 50 |
+
346,19600,8.8721
|
| 51 |
+
353,20000,8.3356
|
| 52 |
+
358,20400,8.4092
|
| 53 |
+
364,20800,7.0598
|
| 54 |
+
369,21200,9.7722
|
| 55 |
+
375,21600,8.7407
|
| 56 |
+
381,22000,7.1701
|
| 57 |
+
387,22400,6.8198
|
| 58 |
+
391,22800,11.2731
|
| 59 |
+
399,23200,6.9213
|
| 60 |
+
404,23600,7.6675
|
| 61 |
+
408,24000,8.5927
|
| 62 |
+
412,24400,9.5454
|
| 63 |
+
416,24800,11.0606
|
| 64 |
+
421,25200,11.0094
|
| 65 |
+
426,25600,9.2321
|
| 66 |
+
431,26000,7.2778
|
| 67 |
+
436,26400,10.7602
|
| 68 |
+
440,26800,10.2041
|
| 69 |
+
444,27200,10.7586
|
| 70 |
+
448,27600,8.5868
|
| 71 |
+
452,28000,9.4227
|
| 72 |
+
458,28400,8.901
|
| 73 |
+
462,28800,9.059
|
| 74 |
+
467,29200,9.2612
|
| 75 |
+
472,29600,10.802
|
| 76 |
+
478,30000,7.4041
|
| 77 |
+
483,30400,8.8905
|
| 78 |
+
489,30800,6.3011
|
| 79 |
+
493,31200,12.0364
|
| 80 |
+
497,31600,11.5981
|
| 81 |
+
501,32000,11.5024
|
| 82 |
+
505,32400,11.9104
|
| 83 |
+
509,32800,11.9212
|
| 84 |
+
513,33200,12.0185
|
| 85 |
+
518,33600,9.4035
|
| 86 |
+
522,34000,11.5176
|
| 87 |
+
526,34400,11.0239
|
| 88 |
+
534,34800,6.1402
|
| 89 |
+
538,35200,9.7348
|
| 90 |
+
546,35600,6.35
|
| 91 |
+
551,36000,7.8883
|
| 92 |
+
558,36400,7.7455
|
| 93 |
+
563,36800,7.1019
|
| 94 |
+
571,37200,6.6505
|
| 95 |
+
576,37600,7.5348
|
| 96 |
+
581,38000,12.1861
|
| 97 |
+
586,38400,9.1601
|
| 98 |
+
591,38800,8.1292
|
| 99 |
+
596,39200,7.3226
|
| 100 |
+
602,39600,9.3
|
| 101 |
+
608,40000,10.1455
|
| 102 |
+
614,40400,7.404
|
| 103 |
+
620,40800,9.5543
|
| 104 |
+
627,41200,8.0328
|
| 105 |
+
636,41600,5.051
|
| 106 |
+
648,42000,4.3144
|
| 107 |
+
654,42400,8.7103
|
| 108 |
+
661,42800,8.5619
|
| 109 |
+
666,43200,9.0912
|
| 110 |
+
671,43600,12.1562
|
| 111 |
+
679,44000,6.8929
|
| 112 |
+
683,44400,12.4673
|
| 113 |
+
690,44800,7.4547
|
| 114 |
+
700,45200,6.1627
|
| 115 |
+
708,45600,5.2344
|
| 116 |
+
712,46000,14.522
|
| 117 |
+
718,46400,9.7264
|
| 118 |
+
724,46800,9.4083
|
| 119 |
+
731,47200,7.3673
|
| 120 |
+
735,47600,10.918
|
| 121 |
+
741,48000,9.7135
|
| 122 |
+
746,48400,11.6226
|
| 123 |
+
753,48800,6.5335
|
| 124 |
+
760,49200,6.1922
|
| 125 |
+
765,49600,11.59
|
| 126 |
+
772,50000,7.6406
|
| 127 |
+
779,50400,7.3931
|
| 128 |
+
785,50800,8.8649
|
| 129 |
+
790,51200,13.0236
|
| 130 |
+
796,51600,9.1355
|
| 131 |
+
802,52000,9.2798
|
| 132 |
+
812,52400,4.6073
|
| 133 |
+
818,52800,8.5625
|
| 134 |
+
823,53200,8.0732
|
| 135 |
+
829,53600,8.2494
|
| 136 |
+
837,54000,5.0721
|
| 137 |
+
849,54400,3.926
|
| 138 |
+
857,54800,5.9843
|
| 139 |
+
866,55200,5.4496
|
| 140 |
+
872,55600,9.6436
|
| 141 |
+
877,56000,9.8259
|
| 142 |
+
882,56400,12.0831
|
| 143 |
+
886,56800,11.8707
|
| 144 |
+
892,57200,9.3723
|
| 145 |
+
897,57600,8.75
|
| 146 |
+
902,58000,9.1673
|
| 147 |
+
908,58400,8.2213
|
| 148 |
+
919,58800,3.6353
|
| 149 |
+
929,59200,3.9628
|
| 150 |
+
935,59600,6.8984
|
| 151 |
+
942,60000,6.928
|
| 152 |
+
948,60400,8.007
|
| 153 |
+
954,60800,7.1696
|
| 154 |
+
962,61200,6.8068
|
| 155 |
+
970,61600,5.7813
|
| 156 |
+
979,62000,6.7075
|
| 157 |
+
990,62400,4.5979
|
| 158 |
+
995,62800,11.0131
|
| 159 |
+
1001,63200,9.9881
|
| 160 |
+
1007,63600,8.771
|
| 161 |
+
1013,64000,6.4708
|
| 162 |
+
1020,64400,8.4602
|
| 163 |
+
1024,64800,12.5658
|
| 164 |
+
1029,65200,12.6734
|
| 165 |
+
1033,65600,13.9195
|
| 166 |
+
1037,66000,10.7454
|
| 167 |
+
1043,66400,10.9443
|
| 168 |
+
1048,66800,10.9429
|
| 169 |
+
1053,67200,9.4126
|
| 170 |
+
1059,67600,8.414
|
| 171 |
+
1066,68000,6.8977
|
| 172 |
+
1071,68400,8.4342
|
| 173 |
+
1077,68800,6.9781
|
| 174 |
+
1081,69200,9.3134
|
| 175 |
+
1087,69600,8.1705
|
| 176 |
+
1091,70000,8.8618
|
| 177 |
+
1096,70400,10.7669
|
| 178 |
+
1100,70800,10.662
|
| 179 |
+
1104,71200,9.404
|
| 180 |
+
1108,71600,10.9212
|
| 181 |
+
1114,72000,7.7906
|
| 182 |
+
1120,72400,6.9977
|
| 183 |
+
1124,72800,10.0251
|
| 184 |
+
1130,73200,8.4128
|
| 185 |
+
1134,73600,9.9691
|
| 186 |
+
1138,74000,11.4181
|
| 187 |
+
1143,74400,8.6228
|
| 188 |
+
1152,74800,5.3614
|
| 189 |
+
1157,75200,8.1655
|
| 190 |
+
1164,75600,7.1774
|
| 191 |
+
1171,76000,5.9159
|
| 192 |
+
1180,76400,4.0023
|
| 193 |
+
1189,76800,4.7476
|
| 194 |
+
1197,77200,5.5766
|
| 195 |
+
1202,77600,8.0878
|
| 196 |
+
1209,78000,6.6897
|
| 197 |
+
1213,78400,13.7633
|
| 198 |
+
1221,78800,6.482
|
| 199 |
+
1230,79200,6.0141
|
| 200 |
+
1234,79600,12.2026
|
| 201 |
+
1242,80000,5.6537
|
| 202 |
+
1251,80400,4.3695
|
| 203 |
+
1259,80800,7.4921
|
| 204 |
+
1264,81200,9.8077
|
| 205 |
+
1269,81600,10.9606
|
| 206 |
+
1275,82000,9.6273
|
| 207 |
+
1280,82400,12.195
|
| 208 |
+
1287,82800,7.4125
|
| 209 |
+
1292,83200,12.1273
|
| 210 |
+
1296,83600,13.0822
|
| 211 |
+
1303,84000,7.0237
|
| 212 |
+
1308,84400,11.6651
|
| 213 |
+
1313,84800,9.4606
|
| 214 |
+
1318,85200,12.5532
|
| 215 |
+
1324,85600,9.9701
|
| 216 |
+
1329,86000,11.7337
|
| 217 |
+
1333,86400,14.7119
|
| 218 |
+
1339,86800,9.0294
|
| 219 |
+
1345,87200,7.5184
|
| 220 |
+
1353,87600,7.3597
|
| 221 |
+
1359,88000,10.1495
|
| 222 |
+
1365,88400,8.7107
|
| 223 |
+
1375,88800,4.0411
|
| 224 |
+
1383,89200,5.6476
|
| 225 |
+
1389,89600,6.5996
|
| 226 |
+
1397,90000,8.0321
|
| 227 |
+
1404,90400,8.9973
|
| 228 |
+
1410,90800,8.9065
|
| 229 |
+
1417,91200,5.712
|
| 230 |
+
1427,91600,4.6027
|
| 231 |
+
1436,92000,5.296
|
| 232 |
+
1441,92400,7.6209
|
| 233 |
+
1449,92800,7.4688
|
| 234 |
+
1456,93200,5.0893
|
| 235 |
+
1464,93600,6.9208
|
| 236 |
+
1472,94000,7.4646
|
| 237 |
+
1479,94400,8.2095
|
| 238 |
+
1484,94800,9.7461
|
| 239 |
+
1490,95200,8.781
|
| 240 |
+
1494,95600,8.3202
|
| 241 |
+
1499,96000,9.0412
|
| 242 |
+
1506,96400,6.9789
|
| 243 |
+
1511,96800,7.9199
|
| 244 |
+
1517,97200,10.6694
|
| 245 |
+
1524,97600,6.481
|
| 246 |
+
1532,98000,6.7254
|
| 247 |
+
1541,98400,6.2726
|
| 248 |
+
1547,98800,7.5851
|
| 249 |
+
1555,99200,5.3696
|
| 250 |
+
1560,99600,10.4877
|
| 251 |
+
1567,100000,8.8012
|
| 252 |
+
1577,100400,5.5168
|
| 253 |
+
1584,100800,10.155
|
| 254 |
+
1588,101200,8.5114
|
| 255 |
+
1595,101600,6.3359
|
| 256 |
+
1602,102000,6.6452
|
| 257 |
+
1608,102400,6.8052
|
| 258 |
+
1612,102800,8.0109
|
| 259 |
+
1617,103200,5.8693
|
| 260 |
+
1621,103600,8.5857
|
| 261 |
+
1626,104000,9.6799
|
| 262 |
+
1634,104400,5.6426
|
| 263 |
+
1639,104800,6.5151
|
| 264 |
+
1644,105200,7.8849
|
| 265 |
+
1648,105600,8.9338
|
| 266 |
+
1654,106000,5.6678
|
| 267 |
+
1659,106400,6.3244
|
| 268 |
+
1665,106800,5.6218
|
| 269 |
+
1669,107200,9.5578
|
| 270 |
+
1676,107600,5.3338
|
| 271 |
+
1686,108000,4.6969
|
| 272 |
+
1693,108400,5.4701
|
| 273 |
+
1698,108800,10.4455
|
| 274 |
+
1704,109200,8.6891
|
| 275 |
+
1710,109600,10.0138
|
| 276 |
+
1717,110000,7.5448
|
| 277 |
+
1726,110400,7.8171
|
| 278 |
+
1731,110800,10.4446
|
| 279 |
+
1739,111200,8.1385
|
| 280 |
+
1748,111600,6.0488
|
| 281 |
+
1756,112000,7.1346
|
| 282 |
+
1761,112400,9.6157
|
| 283 |
+
1767,112800,9.1395
|
| 284 |
+
1774,113200,8.8882
|
| 285 |
+
1782,113600,8.0369
|
| 286 |
+
1789,114000,9.7018
|
| 287 |
+
1795,114400,8.5519
|
| 288 |
+
1802,114800,12.4735
|
| 289 |
+
1810,115200,6.0752
|
| 290 |
+
1815,115600,11.7469
|
| 291 |
+
1824,116000,5.4505
|
| 292 |
+
1829,116400,9.3351
|
| 293 |
+
1835,116800,10.7987
|
| 294 |
+
1840,117200,15.1342
|
| 295 |
+
1846,117600,14.5398
|
| 296 |
+
1853,118000,10.6334
|
| 297 |
+
1861,118400,11.3101
|
| 298 |
+
1866,118800,14.8907
|
| 299 |
+
1873,119200,9.6076
|
| 300 |
+
1882,119600,7.7126
|
| 301 |
+
1893,120000,4.7907
|
| 302 |
+
1901,120400,6.0066
|
| 303 |
+
1906,120800,15.4955
|
| 304 |
+
1911,121200,13.3978
|
| 305 |
+
1919,121600,9.6642
|
| 306 |
+
1924,122000,15.7393
|
| 307 |
+
1928,122400,18.9361
|
| 308 |
+
1932,122800,19.4331
|
| 309 |
+
1940,123200,8.6073
|
| 310 |
+
1947,123600,10.161
|
| 311 |
+
1954,124000,8.8061
|
| 312 |
+
1959,124400,13.5384
|
| 313 |
+
1966,124800,9.9289
|
| 314 |
+
1981,125200,4.1506
|
| 315 |
+
1991,125600,8.5539
|
| 316 |
+
1995,126000,20.6911
|
| 317 |
+
2002,126400,11.9305
|
| 318 |
+
2007,126800,16.2045
|
| 319 |
+
2016,127200,6.5353
|
| 320 |
+
2021,127600,17.3839
|
| 321 |
+
2027,128000,13.1571
|
| 322 |
+
2035,128400,11.3393
|
| 323 |
+
2039,128800,22.3028
|
| 324 |
+
2047,129200,12.8433
|
| 325 |
+
2055,129600,12.7583
|
| 326 |
+
2066,130000,6.9837
|
| 327 |
+
2071,130400,18.0303
|
| 328 |
+
2076,130800,18.2896
|
| 329 |
+
2084,131200,8.7988
|
| 330 |
+
2093,131600,8.7713
|
| 331 |
+
2098,132000,11.0877
|
| 332 |
+
2109,132400,7.8183
|
| 333 |
+
2115,132800,12.4679
|
| 334 |
+
2124,133200,10.5047
|
| 335 |
+
2137,133600,6.17
|
| 336 |
+
2142,134000,20.3565
|
| 337 |
+
2148,134400,10.4538
|
| 338 |
+
2154,134800,12.7331
|
| 339 |
+
2161,135200,12.5367
|
| 340 |
+
2171,135600,5.7754
|
| 341 |
+
2178,136000,13.3435
|
| 342 |
+
2182,136400,13.3376
|
| 343 |
+
2195,136800,5.0278
|
| 344 |
+
2203,137200,8.039
|
| 345 |
+
2215,137600,5.0622
|
| 346 |
+
2225,138000,7.6281
|
| 347 |
+
2232,138400,12.4199
|
| 348 |
+
2243,138800,5.7324
|
| 349 |
+
2249,139200,14.5818
|
| 350 |
+
2255,139600,14.0929
|
| 351 |
+
2262,140000,13.6329
|
| 352 |
+
2267,140400,18.3515
|
| 353 |
+
2272,140800,18.0695
|
| 354 |
+
2280,141200,12.0349
|
| 355 |
+
2287,141600,13.6652
|
| 356 |
+
2296,142000,9.2929
|
| 357 |
+
2305,142400,10.1985
|
| 358 |
+
2312,142800,12.7522
|
| 359 |
+
2323,143200,7.2459
|
| 360 |
+
2331,143600,8.9751
|
| 361 |
+
2338,144000,11.4881
|
| 362 |
+
2344,144400,15.2227
|
| 363 |
+
2351,144800,12.8927
|
| 364 |
+
2358,145200,10.6543
|
| 365 |
+
2362,145600,22.496
|
| 366 |
+
2368,146000,13.9616
|
| 367 |
+
2373,146400,18.1932
|
| 368 |
+
2378,146800,16.1787
|
| 369 |
+
2382,147200,21.2142
|
| 370 |
+
2386,147600,22.1002
|
| 371 |
+
2396,148000,8.9528
|
| 372 |
+
2401,148400,15.8869
|
| 373 |
+
2408,148800,13.7149
|
| 374 |
+
2413,149200,13.7033
|
| 375 |
+
2419,149600,17.2193
|
| 376 |
+
2425,150000,11.3894
|
| 377 |
+
2432,150400,13.8544
|
| 378 |
+
2437,150800,17.5939
|
| 379 |
+
2444,151200,12.8075
|
| 380 |
+
2449,151600,12.1515
|
| 381 |
+
2457,152000,10.4033
|
| 382 |
+
2465,152400,11.4859
|
| 383 |
+
2470,152800,14.4762
|
| 384 |
+
2477,153200,12.3627
|
| 385 |
+
2483,153600,14.8347
|
| 386 |
+
2488,154000,18.2382
|
| 387 |
+
2497,154400,9.2311
|
| 388 |
+
2501,154800,19.7235
|
| 389 |
+
2509,155200,13.3697
|
| 390 |
+
2515,155600,11.9598
|
| 391 |
+
2525,156000,7.2526
|
| 392 |
+
2534,156400,9.3025
|
| 393 |
+
2545,156800,8.9835
|
| 394 |
+
2551,157200,12.1765
|
| 395 |
+
2558,157600,14.0303
|
| 396 |
+
2564,158000,13.4739
|
| 397 |
+
2573,158400,9.8322
|
| 398 |
+
2578,158800,19.6338
|
| 399 |
+
2584,159200,15.7125
|
| 400 |
+
2588,159600,17.0086
|
| 401 |
+
2594,160000,14.7127
|
| 402 |
+
2598,160400,23.1588
|
| 403 |
+
2607,160800,11.0373
|
| 404 |
+
2615,161200,9.348
|
| 405 |
+
2619,161600,21.6514
|
| 406 |
+
2624,162000,12.9316
|
| 407 |
+
2631,162400,12.1088
|
| 408 |
+
2636,162800,20.0918
|
| 409 |
+
2640,163200,18.6887
|
| 410 |
+
2644,163600,19.3577
|
| 411 |
+
2653,164000,8.5057
|
| 412 |
+
2662,164400,8.0083
|
| 413 |
+
2668,164800,15.0007
|
| 414 |
+
2676,165200,8.8861
|
| 415 |
+
2682,165600,15.3621
|
| 416 |
+
2689,166000,13.6995
|
| 417 |
+
2696,166400,10.5381
|
| 418 |
+
2701,166800,19.4263
|
| 419 |
+
2708,167200,12.0695
|
| 420 |
+
2713,167600,11.9025
|
| 421 |
+
2719,168000,10.0897
|
| 422 |
+
2725,168400,15.0383
|
| 423 |
+
2731,168800,14.8992
|
| 424 |
+
2735,169200,14.9242
|
| 425 |
+
2739,169600,20.4302
|
| 426 |
+
2745,170000,15.2987
|
| 427 |
+
2750,170400,16.7812
|
| 428 |
+
2754,170800,16.0345
|
| 429 |
+
2759,171200,16.4285
|
| 430 |
+
2768,171600,9.1208
|
| 431 |
+
2780,172000,5.583
|
| 432 |
+
2787,172400,8.2014
|
| 433 |
+
2793,172800,10.1961
|
| 434 |
+
2798,173200,17.1725
|
| 435 |
+
2806,173600,9.115
|
| 436 |
+
2814,174000,9.2754
|
| 437 |
+
2821,174400,10.8946
|
| 438 |
+
2827,174800,11.5879
|
| 439 |
+
2834,175200,10.3869
|
| 440 |
+
2840,175600,13.8918
|
| 441 |
+
2845,176000,12.5769
|
| 442 |
+
2854,176400,10.322
|
| 443 |
+
2863,176800,6.8967
|
| 444 |
+
2869,177200,17.4846
|
| 445 |
+
2874,177600,19.6151
|
| 446 |
+
2881,178000,12.9361
|
| 447 |
+
2886,178400,18.2368
|
| 448 |
+
2892,178800,12.8876
|
| 449 |
+
2898,179200,12.3181
|
| 450 |
+
2903,179600,17.6907
|
| 451 |
+
2908,180000,15.7174
|
| 452 |
+
2915,180400,11.7662
|
| 453 |
+
2920,180800,17.438
|
| 454 |
+
2925,181200,14.2649
|
| 455 |
+
2931,181600,12.5882
|
| 456 |
+
2936,182000,17.2888
|
| 457 |
+
2942,182400,15.7864
|
| 458 |
+
2946,182800,19.7236
|
| 459 |
+
2952,183200,14.7757
|
| 460 |
+
2957,183600,13.2554
|
| 461 |
+
2962,184000,16.9161
|
| 462 |
+
2966,184400,19.4477
|
| 463 |
+
2971,184800,14.806
|
| 464 |
+
2976,185200,14.7174
|
| 465 |
+
2981,185600,14.6584
|
| 466 |
+
2985,186000,13.1555
|
| 467 |
+
2993,186400,8.2998
|
| 468 |
+
2999,186800,10.4079
|
| 469 |
+
3004,187200,14.5865
|
| 470 |
+
3011,187600,9.0036
|
| 471 |
+
3015,188000,13.7298
|
| 472 |
+
3022,188400,8.8899
|
| 473 |
+
3026,188800,15.7034
|
| 474 |
+
3032,189200,11.7676
|
| 475 |
+
3036,189600,17.0897
|
| 476 |
+
3044,190000,10.1182
|
| 477 |
+
3049,190400,13.9028
|
| 478 |
+
3054,190800,16.7113
|
| 479 |
+
3059,191200,16.4022
|
| 480 |
+
3064,191600,18.3592
|
| 481 |
+
3069,192000,17.6439
|
| 482 |
+
3074,192400,15.5535
|
| 483 |
+
3079,192800,15.6137
|
| 484 |
+
3085,193200,14.7975
|
| 485 |
+
3091,193600,11.9988
|
| 486 |
+
3099,194000,10.8644
|
| 487 |
+
3106,194400,14.6474
|
| 488 |
+
3113,194800,12.833
|
| 489 |
+
3117,195200,22.6677
|
| 490 |
+
3121,195600,21.1913
|
| 491 |
+
3126,196000,17.2308
|
| 492 |
+
3131,196400,14.7737
|
| 493 |
+
3140,196800,6.5658
|
| 494 |
+
3147,197200,8.1251
|
| 495 |
+
3155,197600,8.3248
|
| 496 |
+
3162,198000,10.0096
|
| 497 |
+
3168,198400,9.34
|
| 498 |
+
3175,198800,9.7054
|
| 499 |
+
3181,199200,9.9748
|
| 500 |
+
3186,199600,11.6185
|
| 501 |
+
3191,200000,10.6864
|
code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_4.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
30,400,0.8897
|
| 3 |
+
47,800,1.6985
|
| 4 |
+
70,1200,1.5344
|
| 5 |
+
81,1600,2.8837
|
| 6 |
+
93,2000,2.7853
|
| 7 |
+
99,2400,4.9228
|
| 8 |
+
105,2800,4.8879
|
| 9 |
+
109,3200,6.7205
|
| 10 |
+
116,3600,5.1037
|
| 11 |
+
121,4000,6.8188
|
| 12 |
+
126,4400,5.7358
|
| 13 |
+
130,4800,7.5711
|
| 14 |
+
136,5200,5.2419
|
| 15 |
+
141,5600,5.8543
|
| 16 |
+
147,6000,5.2331
|
| 17 |
+
152,6400,4.9293
|
| 18 |
+
161,6800,4.0985
|
| 19 |
+
166,7200,5.2082
|
| 20 |
+
172,7600,5.3103
|
| 21 |
+
177,8000,6.0557
|
| 22 |
+
181,8400,6.9039
|
| 23 |
+
188,8800,4.5342
|
| 24 |
+
192,9200,9.1478
|
| 25 |
+
199,9600,5.3605
|
| 26 |
+
206,10000,5.1273
|
| 27 |
+
211,10400,7.8887
|
| 28 |
+
216,10800,6.2693
|
| 29 |
+
220,11200,7.3964
|
| 30 |
+
224,11600,7.1054
|
| 31 |
+
230,12000,5.4118
|
| 32 |
+
234,12400,7.4287
|
| 33 |
+
238,12800,7.4478
|
| 34 |
+
245,13200,4.4117
|
| 35 |
+
249,13600,7.2105
|
| 36 |
+
253,14000,6.7015
|
| 37 |
+
257,14400,7.4679
|
| 38 |
+
262,14800,6.5292
|
| 39 |
+
267,15200,6.025
|
| 40 |
+
271,15600,7.6356
|
| 41 |
+
275,16000,7.6501
|
| 42 |
+
280,16400,7.0316
|
| 43 |
+
284,16800,8.6843
|
| 44 |
+
289,17200,6.7495
|
| 45 |
+
294,17600,6.023
|
| 46 |
+
299,18000,6.415
|
| 47 |
+
305,18400,5.4163
|
| 48 |
+
311,18800,5.4882
|
| 49 |
+
316,19200,6.2097
|
| 50 |
+
320,19600,8.0329
|
| 51 |
+
324,20000,7.5882
|
| 52 |
+
329,20400,7.3095
|
| 53 |
+
333,20800,7.5687
|
| 54 |
+
339,21200,6.6201
|
| 55 |
+
343,21600,9.5695
|
| 56 |
+
349,22000,6.358
|
| 57 |
+
354,22400,7.729
|
| 58 |
+
362,22800,3.8919
|
| 59 |
+
367,23200,6.2263
|
| 60 |
+
372,23600,8.9296
|
| 61 |
+
377,24000,8.9107
|
| 62 |
+
383,24400,6.8109
|
| 63 |
+
391,24800,4.3733
|
| 64 |
+
397,25200,7.8942
|
| 65 |
+
402,25600,8.1473
|
| 66 |
+
408,26000,6.342
|
| 67 |
+
414,26400,6.6643
|
| 68 |
+
419,26800,8.5145
|
| 69 |
+
423,27200,9.8487
|
| 70 |
+
427,27600,8.3884
|
| 71 |
+
432,28000,8.1417
|
| 72 |
+
438,28400,6.5363
|
| 73 |
+
442,28800,8.3084
|
| 74 |
+
447,29200,7.5203
|
| 75 |
+
451,29600,8.0109
|
| 76 |
+
455,30000,7.1375
|
| 77 |
+
459,30400,6.972
|
| 78 |
+
464,30800,7.2792
|
| 79 |
+
468,31200,8.3772
|
| 80 |
+
472,31600,8.6912
|
| 81 |
+
476,32000,7.6424
|
| 82 |
+
480,32400,8.1047
|
| 83 |
+
484,32800,6.6454
|
| 84 |
+
489,33200,7.4736
|
| 85 |
+
493,33600,8.1904
|
| 86 |
+
497,34000,7.1456
|
| 87 |
+
503,34400,6.2541
|
| 88 |
+
507,34800,7.7885
|
| 89 |
+
511,35200,7.3507
|
| 90 |
+
515,35600,8.0471
|
| 91 |
+
520,36000,8.5436
|
| 92 |
+
524,36400,6.8725
|
| 93 |
+
529,36800,8.4028
|
| 94 |
+
535,37200,5.2433
|
| 95 |
+
542,37600,4.7139
|
| 96 |
+
546,38000,7.3213
|
| 97 |
+
555,38400,3.8831
|
| 98 |
+
561,38800,5.6601
|
| 99 |
+
568,39200,4.7948
|
| 100 |
+
576,39600,4.6981
|
| 101 |
+
584,40000,4.3181
|
| 102 |
+
589,40400,7.5472
|
| 103 |
+
593,40800,9.3392
|
| 104 |
+
602,41200,3.9924
|
| 105 |
+
609,41600,7.1339
|
| 106 |
+
615,42000,6.7132
|
| 107 |
+
620,42400,6.7015
|
| 108 |
+
628,42800,5.4925
|
| 109 |
+
636,43200,3.7468
|
| 110 |
+
644,43600,4.3569
|
| 111 |
+
651,44000,5.8671
|
| 112 |
+
655,44400,8.3115
|
| 113 |
+
660,44800,9.1009
|
| 114 |
+
665,45200,7.2625
|
| 115 |
+
672,45600,5.378
|
| 116 |
+
678,46000,5.686
|
| 117 |
+
684,46400,5.8378
|
| 118 |
+
688,46800,7.674
|
| 119 |
+
693,47200,7.7574
|
| 120 |
+
697,47600,9.4904
|
| 121 |
+
706,48000,4.0155
|
| 122 |
+
712,48400,9.193
|
| 123 |
+
718,48800,6.0672
|
| 124 |
+
723,49200,8.703
|
| 125 |
+
729,49600,6.4219
|
| 126 |
+
737,50000,5.2146
|
| 127 |
+
742,50400,7.6968
|
| 128 |
+
747,50800,9.559
|
| 129 |
+
753,51200,6.7111
|
| 130 |
+
758,51600,9.7201
|
| 131 |
+
764,52000,7.5954
|
| 132 |
+
770,52400,8.0675
|
| 133 |
+
775,52800,7.1163
|
| 134 |
+
782,53200,5.3886
|
| 135 |
+
786,53600,10.9581
|
| 136 |
+
791,54000,9.5825
|
| 137 |
+
800,54400,4.9313
|
| 138 |
+
808,54800,3.2748
|
| 139 |
+
813,55200,9.4975
|
| 140 |
+
819,55600,8.5919
|
| 141 |
+
828,56000,4.0659
|
| 142 |
+
834,56400,6.4677
|
| 143 |
+
839,56800,8.6157
|
| 144 |
+
847,57200,7.6231
|
| 145 |
+
854,57600,6.1867
|
| 146 |
+
864,58000,5.138
|
| 147 |
+
875,58400,4.1107
|
| 148 |
+
884,58800,4.6541
|
| 149 |
+
890,59200,8.6775
|
| 150 |
+
898,59600,4.5193
|
| 151 |
+
903,60000,10.8015
|
| 152 |
+
909,60400,7.2792
|
| 153 |
+
916,60800,6.9898
|
| 154 |
+
920,61200,9.2429
|
| 155 |
+
926,61600,7.8279
|
| 156 |
+
930,62000,9.559
|
| 157 |
+
938,62400,6.2201
|
| 158 |
+
942,62800,12.4695
|
| 159 |
+
949,63200,6.0011
|
| 160 |
+
955,63600,7.5678
|
| 161 |
+
960,64000,8.5841
|
| 162 |
+
965,64400,8.8059
|
| 163 |
+
969,64800,9.559
|
| 164 |
+
974,65200,8.137
|
| 165 |
+
979,65600,6.2258
|
| 166 |
+
985,66000,6.0418
|
| 167 |
+
990,66400,7.6972
|
| 168 |
+
994,66800,10.6031
|
| 169 |
+
999,67200,6.4527
|
| 170 |
+
1004,67600,7.6003
|
| 171 |
+
1009,68000,8.036
|
| 172 |
+
1014,68400,11.559
|
| 173 |
+
1018,68800,9.9028
|
| 174 |
+
1024,69200,8.8209
|
| 175 |
+
1030,69600,6.8682
|
| 176 |
+
1034,70000,9.1513
|
| 177 |
+
1039,70400,8.8808
|
| 178 |
+
1045,70800,6.2892
|
| 179 |
+
1052,71200,6.6137
|
| 180 |
+
1056,71600,9.1258
|
| 181 |
+
1061,72000,8.2712
|
| 182 |
+
1069,72400,5.1346
|
| 183 |
+
1073,72800,9.3301
|
| 184 |
+
1079,73200,8.6006
|
| 185 |
+
1083,73600,9.7199
|
| 186 |
+
1088,74000,8.5393
|
| 187 |
+
1093,74400,9.4136
|
| 188 |
+
1098,74800,9.2309
|
| 189 |
+
1104,75200,9.9483
|
| 190 |
+
1111,75600,6.868
|
| 191 |
+
1117,76000,7.3642
|
| 192 |
+
1123,76400,8.7512
|
| 193 |
+
1128,76800,7.7363
|
| 194 |
+
1133,77200,11.2048
|
| 195 |
+
1138,77600,8.7672
|
| 196 |
+
1142,78000,11.2022
|
| 197 |
+
1148,78400,8.6267
|
| 198 |
+
1156,78800,5.7085
|
| 199 |
+
1165,79200,5.2502
|
| 200 |
+
1171,79600,9.1847
|
| 201 |
+
1181,80000,3.8257
|
| 202 |
+
1187,80400,9.0944
|
| 203 |
+
1193,80800,7.8396
|
| 204 |
+
1204,81200,4.6099
|
| 205 |
+
1214,81600,4.2423
|
| 206 |
+
1223,82000,4.2463
|
| 207 |
+
1231,82400,6.0866
|
| 208 |
+
1236,82800,9.3631
|
| 209 |
+
1244,83200,6.0139
|
| 210 |
+
1250,83600,9.3664
|
| 211 |
+
1258,84000,5.5445
|
| 212 |
+
1263,84400,9.2419
|
| 213 |
+
1270,84800,6.5126
|
| 214 |
+
1277,85200,5.3284
|
| 215 |
+
1284,85600,4.7442
|
| 216 |
+
1289,86000,6.6238
|
| 217 |
+
1295,86400,5.9787
|
| 218 |
+
1300,86800,6.273
|
| 219 |
+
1304,87200,7.7552
|
| 220 |
+
1310,87600,6.0625
|
| 221 |
+
1315,88000,6.2442
|
| 222 |
+
1320,88400,7.2193
|
| 223 |
+
1326,88800,5.8607
|
| 224 |
+
1333,89200,5.8177
|
| 225 |
+
1338,89600,5.6992
|
| 226 |
+
1344,90000,6.309
|
| 227 |
+
1350,90400,7.4904
|
| 228 |
+
1357,90800,5.3341
|
| 229 |
+
1362,91200,9.1276
|
| 230 |
+
1370,91600,5.6335
|
| 231 |
+
1377,92000,5.5404
|
| 232 |
+
1382,92400,10.4014
|
| 233 |
+
1387,92800,8.972
|
| 234 |
+
1393,93200,7.6199
|
| 235 |
+
1400,93600,7.0028
|
| 236 |
+
1408,94000,6.7953
|
| 237 |
+
1417,94400,4.607
|
| 238 |
+
1425,94800,6.7686
|
| 239 |
+
1431,95200,6.3672
|
| 240 |
+
1437,95600,7.3133
|
| 241 |
+
1442,96000,5.5286
|
| 242 |
+
1449,96400,8.1326
|
| 243 |
+
1454,96800,6.6459
|
| 244 |
+
1459,97200,9.6138
|
| 245 |
+
1465,97600,8.3167
|
| 246 |
+
1470,98000,11.7781
|
| 247 |
+
1474,98400,17.2643
|
| 248 |
+
1481,98800,9.2478
|
| 249 |
+
1490,99200,8.6222
|
| 250 |
+
1497,99600,10.0905
|
| 251 |
+
1504,100000,7.0319
|
| 252 |
+
1510,100400,11.7434
|
| 253 |
+
1515,100800,12.7016
|
| 254 |
+
1519,101200,14.5775
|
| 255 |
+
1525,101600,7.4347
|
| 256 |
+
1531,102000,17.0148
|
| 257 |
+
1535,102400,21.5497
|
| 258 |
+
1541,102800,13.3595
|
| 259 |
+
1546,103200,20.2214
|
| 260 |
+
1553,103600,12.2627
|
| 261 |
+
1562,104000,10.1528
|
| 262 |
+
1572,104400,7.0324
|
| 263 |
+
1577,104800,13.988
|
| 264 |
+
1587,105200,9.7205
|
| 265 |
+
1591,105600,22.2021
|
| 266 |
+
1599,106000,12.8697
|
| 267 |
+
1611,106400,6.7423
|
| 268 |
+
1618,106800,9.1311
|
| 269 |
+
1624,107200,13.5325
|
| 270 |
+
1630,107600,15.3574
|
| 271 |
+
1637,108000,11.4605
|
| 272 |
+
1647,108400,8.1593
|
| 273 |
+
1657,108800,7.3692
|
| 274 |
+
1664,109200,12.4204
|
| 275 |
+
1671,109600,16.0635
|
| 276 |
+
1677,110000,11.0747
|
| 277 |
+
1687,110400,10.0776
|
| 278 |
+
1695,110800,9.713
|
| 279 |
+
1704,111200,6.6402
|
| 280 |
+
1709,111600,16.4947
|
| 281 |
+
1714,112000,12.0573
|
| 282 |
+
1720,112400,12.4928
|
| 283 |
+
1726,112800,16.9818
|
| 284 |
+
1731,113200,16.4082
|
| 285 |
+
1735,113600,19.4684
|
| 286 |
+
1741,114000,17.2942
|
| 287 |
+
1746,114400,17.3803
|
| 288 |
+
1752,114800,14.3429
|
| 289 |
+
1759,115200,15.4686
|
| 290 |
+
1764,115600,18.3797
|
| 291 |
+
1771,116000,10.6607
|
| 292 |
+
1778,116400,12.9278
|
| 293 |
+
1783,116800,21.2477
|
| 294 |
+
1789,117200,12.1737
|
| 295 |
+
1795,117600,15.1248
|
| 296 |
+
1801,118000,11.4594
|
| 297 |
+
1808,118400,11.8572
|
| 298 |
+
1816,118800,8.6953
|
| 299 |
+
1822,119200,12.9991
|
| 300 |
+
1830,119600,8.346
|
| 301 |
+
1835,120000,16.5775
|
| 302 |
+
1840,120400,18.7012
|
| 303 |
+
1845,120800,18.5211
|
| 304 |
+
1852,121200,12.3151
|
| 305 |
+
1860,121600,11.191
|
| 306 |
+
1868,122000,10.6305
|
| 307 |
+
1875,122400,12.1361
|
| 308 |
+
1883,122800,12.0561
|
| 309 |
+
1887,123200,21.1206
|
| 310 |
+
1895,123600,10.3102
|
| 311 |
+
1901,124000,15.5468
|
| 312 |
+
1905,124400,21.1214
|
| 313 |
+
1913,124800,12.598
|
| 314 |
+
1919,125200,8.6702
|
| 315 |
+
1923,125600,19.5976
|
| 316 |
+
1928,126000,17.347
|
| 317 |
+
1936,126400,12.0519
|
| 318 |
+
1944,126800,6.2953
|
| 319 |
+
1949,127200,13.6435
|
| 320 |
+
1956,127600,9.3424
|
| 321 |
+
1960,128000,22.6692
|
| 322 |
+
1966,128400,12.2863
|
| 323 |
+
1973,128800,15.4013
|
| 324 |
+
1978,129200,17.9858
|
| 325 |
+
1988,129600,7.2154
|
| 326 |
+
1996,130000,10.964
|
| 327 |
+
2004,130400,10.9658
|
| 328 |
+
2009,130800,16.1921
|
| 329 |
+
2015,131200,19.8994
|
| 330 |
+
2020,131600,12.5598
|
| 331 |
+
2026,132000,18.5603
|
| 332 |
+
2034,132400,8.9442
|
| 333 |
+
2039,132800,15.7247
|
| 334 |
+
2044,133200,19.6043
|
| 335 |
+
2048,133600,22.708
|
| 336 |
+
2055,134000,12.1769
|
| 337 |
+
2059,134400,30.2886
|
| 338 |
+
2064,134800,19.3976
|
| 339 |
+
2069,135200,24.011
|
| 340 |
+
2075,135600,22.3232
|
| 341 |
+
2079,136000,22.4054
|
| 342 |
+
2087,136400,14.8207
|
| 343 |
+
2095,136800,14.1154
|
| 344 |
+
2102,137200,13.3378
|
| 345 |
+
2106,137600,22.9892
|
| 346 |
+
2112,138000,19.1975
|
| 347 |
+
2119,138400,16.2562
|
| 348 |
+
2125,138800,16.5325
|
| 349 |
+
2134,139200,9.7804
|
| 350 |
+
2143,139600,12.9261
|
| 351 |
+
2149,140000,15.1729
|
| 352 |
+
2157,140400,11.4505
|
| 353 |
+
2163,140800,16.225
|
| 354 |
+
2168,141200,15.0464
|
| 355 |
+
2175,141600,12.2286
|
| 356 |
+
2181,142000,14.5324
|
| 357 |
+
2187,142400,17.9193
|
| 358 |
+
2192,142800,21.9792
|
| 359 |
+
2202,143200,7.5693
|
| 360 |
+
2214,143600,7.0395
|
| 361 |
+
2219,144000,20.2988
|
| 362 |
+
2230,144400,8.1503
|
| 363 |
+
2237,144800,12.8959
|
| 364 |
+
2246,145200,11.8272
|
| 365 |
+
2254,145600,15.8534
|
| 366 |
+
2259,146000,20.5079
|
| 367 |
+
2266,146400,10.7379
|
| 368 |
+
2271,146800,21.599
|
| 369 |
+
2279,147200,12.679
|
| 370 |
+
2284,147600,14.8514
|
| 371 |
+
2291,148000,8.6118
|
| 372 |
+
2297,148400,7.5502
|
| 373 |
+
2306,148800,5.1645
|
| 374 |
+
2313,149200,10.6152
|
| 375 |
+
2319,149600,15.1497
|
| 376 |
+
2324,150000,13.6594
|
| 377 |
+
2331,150400,10.1251
|
| 378 |
+
2337,150800,10.9294
|
| 379 |
+
2345,151200,5.0712
|
| 380 |
+
2350,151600,13.3293
|
| 381 |
+
2359,152000,6.394
|
| 382 |
+
2370,152400,4.9969
|
| 383 |
+
2379,152800,7.9595
|
| 384 |
+
2391,153200,4.0272
|
| 385 |
+
2398,153600,7.3762
|
| 386 |
+
2407,154000,7.3333
|
| 387 |
+
2413,154400,10.8586
|
| 388 |
+
2423,154800,9.7345
|
| 389 |
+
2432,155200,7.9822
|
| 390 |
+
2439,155600,10.3486
|
| 391 |
+
2450,156000,6.3284
|
| 392 |
+
2458,156400,9.4372
|
| 393 |
+
2473,156800,3.132
|
| 394 |
+
2481,157200,10.3754
|
| 395 |
+
2487,157600,10.447
|
| 396 |
+
2493,158000,15.9101
|
| 397 |
+
2503,158400,8.3842
|
| 398 |
+
2510,158800,15.4866
|
| 399 |
+
2518,159200,11.682
|
| 400 |
+
2526,159600,11.0361
|
| 401 |
+
2537,160000,8.7871
|
| 402 |
+
2545,160400,11.1971
|
| 403 |
+
2555,160800,4.4022
|
| 404 |
+
2562,161200,13.1779
|
| 405 |
+
2568,161600,12.9045
|
| 406 |
+
2579,162000,8.0949
|
| 407 |
+
2587,162400,11.4998
|
| 408 |
+
2597,162800,7.2315
|
| 409 |
+
2604,163200,14.4484
|
| 410 |
+
2609,163600,15.2563
|
| 411 |
+
2622,164000,7.1052
|
| 412 |
+
2631,164400,8.0708
|
| 413 |
+
2636,164800,15.3412
|
| 414 |
+
2646,165200,7.0698
|
| 415 |
+
2654,165600,10.9479
|
| 416 |
+
2661,166000,12.6783
|
| 417 |
+
2666,166400,16.1794
|
| 418 |
+
2671,166800,18.6718
|
| 419 |
+
2676,167200,13.5239
|
| 420 |
+
2681,167600,12.3507
|
| 421 |
+
2691,168000,7.2502
|
| 422 |
+
2700,168400,6.2099
|
| 423 |
+
2707,168800,13.9091
|
| 424 |
+
2712,169200,16.7988
|
| 425 |
+
2717,169600,22.2866
|
| 426 |
+
2728,170000,8.0224
|
| 427 |
+
2738,170400,11.7132
|
| 428 |
+
2743,170800,22.9338
|
| 429 |
+
2750,171200,15.1354
|
| 430 |
+
2762,171600,8.9432
|
| 431 |
+
2768,172000,14.02
|
| 432 |
+
2773,172400,19.0923
|
| 433 |
+
2780,172800,14.6205
|
| 434 |
+
2787,173200,12.9528
|
| 435 |
+
2796,173600,11.7497
|
| 436 |
+
2803,174000,14.0874
|
| 437 |
+
2808,174400,21.8773
|
| 438 |
+
2813,174800,16.507
|
| 439 |
+
2819,175200,19.8032
|
| 440 |
+
2827,175600,13.8242
|
| 441 |
+
2833,176000,19.0166
|
| 442 |
+
2838,176400,27.172
|
| 443 |
+
2846,176800,11.3364
|
| 444 |
+
2851,177200,19.2692
|
| 445 |
+
2857,177600,21.4003
|
| 446 |
+
2865,178000,13.1762
|
| 447 |
+
2871,178400,20.7159
|
| 448 |
+
2878,178800,12.615
|
| 449 |
+
2882,179200,17.292
|
| 450 |
+
2886,179600,22.37
|
| 451 |
+
2893,180000,16.9824
|
| 452 |
+
2903,180400,7.4276
|
| 453 |
+
2912,180800,12.7024
|
| 454 |
+
2918,181200,14.4444
|
| 455 |
+
2926,181600,14.2302
|
| 456 |
+
2932,182000,18.124
|
| 457 |
+
2938,182400,13.5542
|
| 458 |
+
2943,182800,33.0073
|
| 459 |
+
2950,183200,12.0173
|
| 460 |
+
2954,183600,22.766
|
| 461 |
+
2959,184000,16.2093
|
| 462 |
+
2965,184400,15.9019
|
| 463 |
+
2972,184800,13.7782
|
| 464 |
+
2976,185200,26.2554
|
| 465 |
+
2985,185600,10.2687
|
| 466 |
+
2991,186000,14.767
|
| 467 |
+
2998,186400,12.8508
|
| 468 |
+
3002,186800,19.9929
|
| 469 |
+
3008,187200,16.2693
|
| 470 |
+
3016,187600,14.6283
|
| 471 |
+
3026,188000,9.5896
|
| 472 |
+
3034,188400,11.7475
|
| 473 |
+
3041,188800,12.2039
|
| 474 |
+
3045,189200,23.8742
|
| 475 |
+
3056,189600,8.3229
|
| 476 |
+
3063,190000,11.436
|
| 477 |
+
3070,190400,10.1774
|
| 478 |
+
3077,190800,10.9202
|
| 479 |
+
3082,191200,16.139
|
| 480 |
+
3087,191600,17.0197
|
| 481 |
+
3096,192000,6.3408
|
| 482 |
+
3103,192400,10.6383
|
| 483 |
+
3108,192800,14.7467
|
| 484 |
+
3113,193200,16.4579
|
| 485 |
+
3121,193600,9.5428
|
| 486 |
+
3125,194000,22.9954
|
| 487 |
+
3129,194400,11.4853
|
| 488 |
+
3136,194800,8.377
|
| 489 |
+
3141,195200,13.0133
|
| 490 |
+
3145,195600,14.8357
|
| 491 |
+
3150,196000,20.832
|
| 492 |
+
3159,196400,8.4116
|
| 493 |
+
3166,196800,16.5597
|
| 494 |
+
3178,197200,6.8153
|
| 495 |
+
3186,197600,11.2551
|
| 496 |
+
3197,198000,9.8681
|
| 497 |
+
3202,198400,16.0912
|
| 498 |
+
3210,198800,13.4439
|
| 499 |
+
3216,199200,11.4534
|
| 500 |
+
3222,199600,16.3593
|
| 501 |
+
3229,200000,11.4777
|
code/Lake application/logs/frozen_lake_PPO/PPO_frozen_lake_log_5.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
25,400,1.1394
|
| 3 |
+
48,800,1.3002
|
| 4 |
+
55,1200,4.2174
|
| 5 |
+
65,1600,3.1185
|
| 6 |
+
72,2000,4.6995
|
| 7 |
+
77,2400,5.4912
|
| 8 |
+
83,2800,5.3572
|
| 9 |
+
87,3200,6.4659
|
| 10 |
+
93,3600,5.7893
|
| 11 |
+
97,4000,7.5009
|
| 12 |
+
101,4400,7.0671
|
| 13 |
+
105,4800,6.9336
|
| 14 |
+
109,5200,7.0397
|
| 15 |
+
114,5600,6.2202
|
| 16 |
+
120,6000,4.966
|
| 17 |
+
126,6400,5.4569
|
| 18 |
+
131,6800,5.7776
|
| 19 |
+
135,7200,7.3573
|
| 20 |
+
139,7600,6.3645
|
| 21 |
+
144,8000,6.1568
|
| 22 |
+
148,8400,7.3496
|
| 23 |
+
152,8800,6.4834
|
| 24 |
+
157,9200,6.0306
|
| 25 |
+
162,9600,6.5141
|
| 26 |
+
167,10000,6.2576
|
| 27 |
+
172,10400,5.8399
|
| 28 |
+
176,10800,5.7246
|
| 29 |
+
182,11200,5.6336
|
| 30 |
+
186,11600,6.3594
|
| 31 |
+
192,12000,5.9243
|
| 32 |
+
196,12400,7.3959
|
| 33 |
+
200,12800,7.3087
|
| 34 |
+
204,13200,7.4288
|
| 35 |
+
208,13600,6.0092
|
| 36 |
+
212,14000,7.4051
|
| 37 |
+
219,14400,5.0751
|
| 38 |
+
223,14800,6.1337
|
| 39 |
+
228,15200,6.0328
|
| 40 |
+
233,15600,6.2323
|
| 41 |
+
237,16000,7.2947
|
| 42 |
+
242,16400,6.21
|
| 43 |
+
247,16800,5.2467
|
| 44 |
+
251,17200,7.301
|
| 45 |
+
256,17600,6.7575
|
| 46 |
+
260,18000,7.3051
|
| 47 |
+
264,18400,7.3659
|
| 48 |
+
270,18800,4.9364
|
| 49 |
+
275,19200,4.7764
|
| 50 |
+
280,19600,6.6525
|
| 51 |
+
286,20000,5.1666
|
| 52 |
+
290,20400,7.305
|
| 53 |
+
295,20800,5.3332
|
| 54 |
+
301,21200,5.8009
|
| 55 |
+
306,21600,6.3444
|
| 56 |
+
311,22000,5.38
|
| 57 |
+
316,22400,5.8886
|
| 58 |
+
321,22800,5.9808
|
| 59 |
+
326,23200,6.1518
|
| 60 |
+
331,23600,6.2596
|
| 61 |
+
335,24000,7.4699
|
| 62 |
+
340,24400,5.0459
|
| 63 |
+
344,24800,7.1979
|
| 64 |
+
348,25200,7.1906
|
| 65 |
+
354,25600,5.5289
|
| 66 |
+
358,26000,7.4249
|
| 67 |
+
362,26400,7.3209
|
| 68 |
+
366,26800,6.8339
|
| 69 |
+
371,27200,6.2896
|
| 70 |
+
375,27600,5.8839
|
| 71 |
+
382,28000,4.5678
|
| 72 |
+
387,28400,6.4948
|
| 73 |
+
391,28800,7.3334
|
| 74 |
+
395,29200,6.8561
|
| 75 |
+
400,29600,5.7921
|
| 76 |
+
405,30000,5.775
|
| 77 |
+
411,30400,5.2429
|
| 78 |
+
416,30800,6.3733
|
| 79 |
+
420,31200,6.9394
|
| 80 |
+
424,31600,7.4063
|
| 81 |
+
430,32000,5.6989
|
| 82 |
+
434,32400,6.6094
|
| 83 |
+
439,32800,6.4591
|
| 84 |
+
443,33200,7.5158
|
| 85 |
+
448,33600,5.1683
|
| 86 |
+
457,34000,3.2012
|
| 87 |
+
462,34400,6.4385
|
| 88 |
+
466,34800,7.6264
|
| 89 |
+
471,35200,6.8552
|
| 90 |
+
475,35600,6.6808
|
| 91 |
+
479,36000,7.5093
|
| 92 |
+
485,36400,6.0369
|
| 93 |
+
491,36800,5.9012
|
| 94 |
+
497,37200,4.7016
|
| 95 |
+
507,37600,3.1521
|
| 96 |
+
513,38000,5.874
|
| 97 |
+
517,38400,7.713
|
| 98 |
+
522,38800,6.4352
|
| 99 |
+
526,39200,7.6985
|
| 100 |
+
530,39600,7.8557
|
| 101 |
+
540,40000,3.9398
|
| 102 |
+
546,40400,5.5794
|
| 103 |
+
551,40800,6.7952
|
| 104 |
+
555,41200,7.4607
|
| 105 |
+
561,41600,6.2385
|
| 106 |
+
565,42000,6.9421
|
| 107 |
+
571,42400,5.5255
|
| 108 |
+
577,42800,7.0349
|
| 109 |
+
583,43200,7.038
|
| 110 |
+
587,43600,8.1308
|
| 111 |
+
592,44000,6.715
|
| 112 |
+
598,44400,6.6102
|
| 113 |
+
603,44800,5.7423
|
| 114 |
+
609,45200,5.7222
|
| 115 |
+
616,45600,4.6904
|
| 116 |
+
621,46000,6.6802
|
| 117 |
+
627,46400,5.1794
|
| 118 |
+
631,46800,7.8428
|
| 119 |
+
636,47200,6.6569
|
| 120 |
+
640,47600,6.0637
|
| 121 |
+
645,48000,6.6306
|
| 122 |
+
649,48400,7.8827
|
| 123 |
+
654,48800,7.2829
|
| 124 |
+
660,49200,5.3484
|
| 125 |
+
666,49600,4.965
|
| 126 |
+
671,50000,6.2847
|
| 127 |
+
675,50400,7.2723
|
| 128 |
+
681,50800,5.6506
|
| 129 |
+
689,51200,4.2316
|
| 130 |
+
693,51600,6.588
|
| 131 |
+
699,52000,5.8609
|
| 132 |
+
704,52400,6.7229
|
| 133 |
+
710,52800,5.2104
|
| 134 |
+
716,53200,5.5105
|
| 135 |
+
720,53600,6.7428
|
| 136 |
+
728,54000,4.2067
|
| 137 |
+
733,54400,6.1901
|
| 138 |
+
737,54800,6.1446
|
| 139 |
+
743,55200,6.4328
|
| 140 |
+
747,55600,7.5352
|
| 141 |
+
753,56000,4.6297
|
| 142 |
+
758,56400,6.7488
|
| 143 |
+
765,56800,4.4836
|
| 144 |
+
769,57200,7.332
|
| 145 |
+
773,57600,7.3174
|
| 146 |
+
778,58000,7.5865
|
| 147 |
+
784,58400,5.1354
|
| 148 |
+
788,58800,7.5499
|
| 149 |
+
792,59200,7.6182
|
| 150 |
+
802,59600,2.9436
|
| 151 |
+
807,60000,6.7174
|
| 152 |
+
812,60400,6.5712
|
| 153 |
+
820,60800,3.8267
|
| 154 |
+
827,61200,5.0311
|
| 155 |
+
831,61600,7.6775
|
| 156 |
+
835,62000,6.2429
|
| 157 |
+
842,62400,5.1519
|
| 158 |
+
847,62800,5.1346
|
| 159 |
+
858,63200,3.2845
|
| 160 |
+
862,63600,6.7612
|
| 161 |
+
868,64000,5.4555
|
| 162 |
+
874,64400,5.6836
|
| 163 |
+
879,64800,5.4058
|
| 164 |
+
883,65200,7.5245
|
| 165 |
+
887,65600,7.5208
|
| 166 |
+
892,66000,7.063
|
| 167 |
+
897,66400,6.6028
|
| 168 |
+
903,66800,6.9216
|
| 169 |
+
908,67200,7.874
|
| 170 |
+
912,67600,6.5846
|
| 171 |
+
918,68000,6.0015
|
| 172 |
+
923,68400,6.5042
|
| 173 |
+
927,68800,6.4518
|
| 174 |
+
932,69200,7.5216
|
| 175 |
+
937,69600,7.0083
|
| 176 |
+
942,70000,6.8853
|
| 177 |
+
948,70400,5.5392
|
| 178 |
+
954,70800,8.848
|
| 179 |
+
960,71200,11.4058
|
| 180 |
+
967,71600,7.5975
|
| 181 |
+
972,72000,8.9093
|
| 182 |
+
979,72400,9.266
|
| 183 |
+
985,72800,9.023
|
| 184 |
+
991,73200,11.5379
|
| 185 |
+
1000,73600,7.4839
|
| 186 |
+
1006,74000,10.8982
|
| 187 |
+
1011,74400,9.6794
|
| 188 |
+
1016,74800,11.5398
|
| 189 |
+
1022,75200,12.7577
|
| 190 |
+
1032,75600,7.5257
|
| 191 |
+
1040,76000,10.313
|
| 192 |
+
1052,76400,4.9592
|
| 193 |
+
1057,76800,9.929
|
| 194 |
+
1065,77200,9.0269
|
| 195 |
+
1074,77600,10.0283
|
| 196 |
+
1080,78000,9.1994
|
| 197 |
+
1084,78400,17.318
|
| 198 |
+
1090,78800,8.6919
|
| 199 |
+
1099,79200,8.3069
|
| 200 |
+
1104,79600,14.1304
|
| 201 |
+
1110,80000,13.6171
|
| 202 |
+
1114,80400,16.2281
|
| 203 |
+
1119,80800,13.7672
|
| 204 |
+
1127,81200,9.2344
|
| 205 |
+
1133,81600,12.9287
|
| 206 |
+
1139,82000,9.0991
|
| 207 |
+
1146,82400,10.4085
|
| 208 |
+
1154,82800,6.4297
|
| 209 |
+
1161,83200,11.2657
|
| 210 |
+
1170,83600,8.6181
|
| 211 |
+
1176,84000,10.1173
|
| 212 |
+
1182,84400,10.5116
|
| 213 |
+
1189,84800,7.5418
|
| 214 |
+
1197,85200,7.8979
|
| 215 |
+
1204,85600,10.4355
|
| 216 |
+
1214,86000,5.9039
|
| 217 |
+
1228,86400,4.1987
|
| 218 |
+
1238,86800,6.374
|
| 219 |
+
1246,87200,5.9424
|
| 220 |
+
1251,87600,15.9749
|
| 221 |
+
1257,88000,14.0111
|
| 222 |
+
1261,88400,19.8135
|
| 223 |
+
1270,88800,7.7016
|
| 224 |
+
1276,89200,10.2966
|
| 225 |
+
1281,89600,12.6069
|
| 226 |
+
1288,90000,10.6588
|
| 227 |
+
1293,90400,17.1633
|
| 228 |
+
1300,90800,9.8388
|
| 229 |
+
1308,91200,9.1061
|
| 230 |
+
1314,91600,10.2858
|
| 231 |
+
1319,92000,15.5991
|
| 232 |
+
1323,92400,19.9744
|
| 233 |
+
1329,92800,13.7349
|
| 234 |
+
1333,93200,16.2973
|
| 235 |
+
1340,93600,12.1433
|
| 236 |
+
1346,94000,15.6216
|
| 237 |
+
1350,94400,16.604
|
| 238 |
+
1356,94800,17.9473
|
| 239 |
+
1360,95200,22.7261
|
| 240 |
+
1367,95600,9.7798
|
| 241 |
+
1372,96000,18.8177
|
| 242 |
+
1376,96400,23.3835
|
| 243 |
+
1380,96800,22.7676
|
| 244 |
+
1385,97200,17.9196
|
| 245 |
+
1391,97600,11.0045
|
| 246 |
+
1398,98000,12.9032
|
| 247 |
+
1403,98400,19.9428
|
| 248 |
+
1411,98800,10.0649
|
| 249 |
+
1415,99200,23.4105
|
| 250 |
+
1421,99600,14.4659
|
| 251 |
+
1427,100000,12.6086
|
| 252 |
+
1434,100400,11.2016
|
| 253 |
+
1440,100800,16.4195
|
| 254 |
+
1445,101200,14.8528
|
| 255 |
+
1451,101600,13.3423
|
| 256 |
+
1457,102000,15.6651
|
| 257 |
+
1462,102400,19.3589
|
| 258 |
+
1467,102800,19.2814
|
| 259 |
+
1472,103200,17.4048
|
| 260 |
+
1479,103600,10.9156
|
| 261 |
+
1484,104000,19.1347
|
| 262 |
+
1489,104400,16.7974
|
| 263 |
+
1500,104800,7.7077
|
| 264 |
+
1508,105200,8.3873
|
| 265 |
+
1515,105600,11.8502
|
| 266 |
+
1522,106000,12.4617
|
| 267 |
+
1529,106400,10.997
|
| 268 |
+
1534,106800,18.1086
|
| 269 |
+
1538,107200,21.5753
|
| 270 |
+
1542,107600,18.1229
|
| 271 |
+
1548,108000,19.0807
|
| 272 |
+
1553,108400,19.9151
|
| 273 |
+
1557,108800,24.3347
|
| 274 |
+
1565,109200,11.5838
|
| 275 |
+
1571,109600,10.4892
|
| 276 |
+
1576,110000,18.4124
|
| 277 |
+
1583,110400,9.6659
|
| 278 |
+
1589,110800,15.3845
|
| 279 |
+
1594,111200,19.4332
|
| 280 |
+
1603,111600,9.1848
|
| 281 |
+
1608,112000,19.8579
|
| 282 |
+
1614,112400,14.6327
|
| 283 |
+
1620,112800,15.4716
|
| 284 |
+
1628,113200,7.6968
|
| 285 |
+
1633,113600,14.4689
|
| 286 |
+
1637,114000,19.6793
|
| 287 |
+
1642,114400,20.0721
|
| 288 |
+
1647,114800,15.0668
|
| 289 |
+
1652,115200,17.4454
|
| 290 |
+
1657,115600,19.6026
|
| 291 |
+
1663,116000,14.572
|
| 292 |
+
1669,116400,12.3857
|
| 293 |
+
1675,116800,15.0434
|
| 294 |
+
1679,117200,23.0521
|
| 295 |
+
1685,117600,15.9115
|
| 296 |
+
1691,118000,16.4641
|
| 297 |
+
1695,118400,18.5005
|
| 298 |
+
1701,118800,13.3055
|
| 299 |
+
1705,119200,20.5855
|
| 300 |
+
1711,119600,15.2568
|
| 301 |
+
1716,120000,17.1653
|
| 302 |
+
1721,120400,16.2964
|
| 303 |
+
1726,120800,17.3911
|
| 304 |
+
1731,121200,18.9176
|
| 305 |
+
1735,121600,20.2643
|
| 306 |
+
1741,122000,21.9711
|
| 307 |
+
1748,122400,15.1474
|
| 308 |
+
1752,122800,21.0002
|
| 309 |
+
1756,123200,27.017
|
| 310 |
+
1761,123600,21.0847
|
| 311 |
+
1768,124000,16.9835
|
| 312 |
+
1774,124400,16.988
|
| 313 |
+
1780,124800,17.102
|
| 314 |
+
1784,125200,27.0598
|
| 315 |
+
1790,125600,18.8929
|
| 316 |
+
1795,126000,18.4346
|
| 317 |
+
1799,126400,27.4704
|
| 318 |
+
1803,126800,20.451
|
| 319 |
+
1812,127200,12.5548
|
| 320 |
+
1816,127600,24.9355
|
| 321 |
+
1821,128000,14.002
|
| 322 |
+
1826,128400,16.9177
|
| 323 |
+
1834,128800,9.9734
|
| 324 |
+
1847,129200,4.5403
|
| 325 |
+
1853,129600,10.6147
|
| 326 |
+
1865,130000,6.26
|
| 327 |
+
1872,130400,12.1423
|
| 328 |
+
1877,130800,16.8818
|
| 329 |
+
1882,131200,14.1034
|
| 330 |
+
1887,131600,19.5902
|
| 331 |
+
1894,132000,12.8515
|
| 332 |
+
1899,132400,16.2843
|
| 333 |
+
1904,132800,15.5745
|
| 334 |
+
1914,133200,8.3905
|
| 335 |
+
1922,133600,13.0687
|
| 336 |
+
1929,134000,13.9548
|
| 337 |
+
1935,134400,11.7435
|
| 338 |
+
1944,134800,12.2644
|
| 339 |
+
1949,135200,18.9015
|
| 340 |
+
1957,135600,10.5449
|
| 341 |
+
1968,136000,6.614
|
| 342 |
+
1979,136400,7.9006
|
| 343 |
+
1988,136800,8.6919
|
| 344 |
+
1993,137200,19.6558
|
| 345 |
+
1999,137600,13.7705
|
| 346 |
+
2004,138000,19.7431
|
| 347 |
+
2010,138400,16.1015
|
| 348 |
+
2018,138800,7.969
|
| 349 |
+
2024,139200,10.7627
|
| 350 |
+
2033,139600,9.8075
|
| 351 |
+
2038,140000,15.1353
|
| 352 |
+
2044,140400,14.33
|
| 353 |
+
2051,140800,13.0915
|
| 354 |
+
2059,141200,11.0496
|
| 355 |
+
2067,141600,8.5425
|
| 356 |
+
2074,142000,12.6574
|
| 357 |
+
2079,142400,18.6865
|
| 358 |
+
2083,142800,18.9614
|
| 359 |
+
2091,143200,7.6956
|
| 360 |
+
2097,143600,19.3319
|
| 361 |
+
2106,144000,6.1586
|
| 362 |
+
2112,144400,11.7879
|
| 363 |
+
2117,144800,14.6574
|
| 364 |
+
2124,145200,9.78
|
| 365 |
+
2131,145600,8.172
|
| 366 |
+
2138,146000,9.3161
|
| 367 |
+
2145,146400,10.1464
|
| 368 |
+
2151,146800,13.3546
|
| 369 |
+
2158,147200,10.2643
|
| 370 |
+
2162,147600,17.7297
|
| 371 |
+
2167,148000,12.2066
|
| 372 |
+
2174,148400,11.723
|
| 373 |
+
2181,148800,12.61
|
| 374 |
+
2185,149200,20.9512
|
| 375 |
+
2192,149600,9.257
|
| 376 |
+
2200,150000,13.0471
|
| 377 |
+
2206,150400,10.6689
|
| 378 |
+
2212,150800,16.0447
|
| 379 |
+
2219,151200,13.6559
|
| 380 |
+
2225,151600,13.2487
|
| 381 |
+
2235,152000,7.2764
|
| 382 |
+
2242,152400,11.6686
|
| 383 |
+
2248,152800,12.3615
|
| 384 |
+
2255,153200,13.5621
|
| 385 |
+
2263,153600,9.6251
|
| 386 |
+
2269,154000,9.0672
|
| 387 |
+
2276,154400,13.0372
|
| 388 |
+
2281,154800,16.5969
|
| 389 |
+
2286,155200,18.0225
|
| 390 |
+
2292,155600,14.2052
|
| 391 |
+
2298,156000,11.6988
|
| 392 |
+
2304,156400,9.5336
|
| 393 |
+
2312,156800,8.0191
|
| 394 |
+
2324,157200,5.3825
|
| 395 |
+
2330,157600,8.1571
|
| 396 |
+
2337,158000,10.3493
|
| 397 |
+
2344,158400,10.4621
|
| 398 |
+
2350,158800,10.5959
|
| 399 |
+
2356,159200,7.2691
|
| 400 |
+
2364,159600,6.3992
|
| 401 |
+
2372,160000,7.9295
|
| 402 |
+
2377,160400,7.4555
|
| 403 |
+
2384,160800,7.9996
|
| 404 |
+
2389,161200,13.726
|
| 405 |
+
2395,161600,7.7046
|
| 406 |
+
2399,162000,16.8889
|
| 407 |
+
2410,162400,5.528
|
| 408 |
+
2422,162800,4.9575
|
| 409 |
+
2429,163200,8.7608
|
| 410 |
+
2438,163600,7.2575
|
| 411 |
+
2446,164000,7.0835
|
| 412 |
+
2452,164400,10.8246
|
| 413 |
+
2459,164800,7.831
|
| 414 |
+
2467,165200,6.133
|
| 415 |
+
2476,165600,7.8923
|
| 416 |
+
2483,166000,8.0733
|
| 417 |
+
2489,166400,11.0754
|
| 418 |
+
2493,166800,20.1624
|
| 419 |
+
2500,167200,12.4293
|
| 420 |
+
2504,167600,15.0355
|
| 421 |
+
2510,168000,13.0286
|
| 422 |
+
2518,168400,7.4877
|
| 423 |
+
2523,168800,12.2261
|
| 424 |
+
2531,169200,7.3993
|
| 425 |
+
2536,169600,8.9622
|
| 426 |
+
2542,170000,10.8549
|
| 427 |
+
2547,170400,11.6566
|
| 428 |
+
2555,170800,8.8997
|
| 429 |
+
2562,171200,8.861
|
| 430 |
+
2568,171600,13.2091
|
| 431 |
+
2574,172000,10.3659
|
| 432 |
+
2580,172400,11.7853
|
| 433 |
+
2585,172800,14.792
|
| 434 |
+
2592,173200,10.6782
|
| 435 |
+
2602,173600,6.9546
|
| 436 |
+
2609,174000,9.9301
|
| 437 |
+
2614,174400,17.7772
|
| 438 |
+
2623,174800,6.2142
|
| 439 |
+
2630,175200,12.9292
|
| 440 |
+
2637,175600,10.1204
|
| 441 |
+
2645,176000,9.0597
|
| 442 |
+
2651,176400,15.3755
|
| 443 |
+
2657,176800,13.128
|
| 444 |
+
2662,177200,19.3868
|
| 445 |
+
2670,177600,10.7437
|
| 446 |
+
2679,178000,6.904
|
| 447 |
+
2686,178400,9.2907
|
| 448 |
+
2696,178800,6.4837
|
| 449 |
+
2705,179200,8.2248
|
| 450 |
+
2711,179600,12.1069
|
| 451 |
+
2720,180000,8.3973
|
| 452 |
+
2724,180400,22.0167
|
| 453 |
+
2729,180800,15.3768
|
| 454 |
+
2734,181200,15.6707
|
| 455 |
+
2738,181600,21.4503
|
| 456 |
+
2744,182000,14.5199
|
| 457 |
+
2750,182400,16.9138
|
| 458 |
+
2756,182800,12.2078
|
| 459 |
+
2762,183200,15.9948
|
| 460 |
+
2769,183600,12.4933
|
| 461 |
+
2775,184000,14.7625
|
| 462 |
+
2781,184400,16.4597
|
| 463 |
+
2786,184800,12.6036
|
| 464 |
+
2793,185200,11.1748
|
| 465 |
+
2799,185600,13.5976
|
| 466 |
+
2805,186000,13.175
|
| 467 |
+
2811,186400,14.25
|
| 468 |
+
2816,186800,22.0337
|
| 469 |
+
2822,187200,17.4297
|
| 470 |
+
2827,187600,17.1395
|
| 471 |
+
2832,188000,18.1786
|
| 472 |
+
2837,188400,16.0257
|
| 473 |
+
2844,188800,11.8928
|
| 474 |
+
2850,189200,16.6968
|
| 475 |
+
2855,189600,19.1383
|
| 476 |
+
2860,190000,21.8792
|
| 477 |
+
2864,190400,27.2875
|
| 478 |
+
2868,190800,25.2937
|
| 479 |
+
2873,191200,20.9754
|
| 480 |
+
2882,191600,12.5236
|
| 481 |
+
2886,192000,26.9158
|
| 482 |
+
2896,192400,9.8619
|
| 483 |
+
2912,192800,5.0885
|
| 484 |
+
2923,193200,4.6341
|
| 485 |
+
2930,193600,13.8767
|
| 486 |
+
2937,194000,11.1766
|
| 487 |
+
2944,194400,14.2145
|
| 488 |
+
2952,194800,7.6092
|
| 489 |
+
2961,195200,9.0705
|
| 490 |
+
2968,195600,10.5332
|
| 491 |
+
2973,196000,13.6747
|
| 492 |
+
2979,196400,17.6262
|
| 493 |
+
2986,196800,12.0028
|
| 494 |
+
2997,197200,5.1463
|
| 495 |
+
3008,197600,9.2429
|
| 496 |
+
3015,198000,11.3805
|
| 497 |
+
3022,198400,7.9289
|
| 498 |
+
3029,198800,12.3534
|
| 499 |
+
3033,199200,26.1072
|
| 500 |
+
3041,199600,13.3515
|
| 501 |
+
3048,200000,11.5205
|
code/Lake application/logs/results_1/PDPPO_frozen_lake_log_1.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
23,400,1.2374
|
| 3 |
+
44,800,1.475
|
| 4 |
+
62,1200,1.6597
|
| 5 |
+
75,1600,2.2131
|
| 6 |
+
86,2000,2.8599
|
| 7 |
+
98,2400,2.246
|
| 8 |
+
109,2800,3.0708
|
| 9 |
+
120,3200,3.0907
|
| 10 |
+
134,3600,2.2625
|
| 11 |
+
148,4000,2.1622
|
| 12 |
+
162,4400,2.2727
|
| 13 |
+
177,4800,1.635
|
| 14 |
+
192,5200,2.4163
|
| 15 |
+
203,5600,2.6421
|
| 16 |
+
211,6000,4.1822
|
| 17 |
+
219,6400,3.2539
|
| 18 |
+
229,6800,3.5347
|
| 19 |
+
238,7200,3.3297
|
| 20 |
+
246,7600,3.7071
|
| 21 |
+
254,8000,3.3388
|
| 22 |
+
263,8400,3.4999
|
| 23 |
+
272,8800,3.3479
|
| 24 |
+
279,9200,3.8583
|
| 25 |
+
284,9600,6.0999
|
| 26 |
+
292,10000,3.6754
|
| 27 |
+
301,10400,3.3741
|
| 28 |
+
307,10800,5.4199
|
| 29 |
+
315,11200,3.3395
|
| 30 |
+
320,11600,6.3554
|
| 31 |
+
329,12000,3.8476
|
| 32 |
+
333,12400,7.4853
|
| 33 |
+
340,12800,4.5367
|
| 34 |
+
345,13200,5.1413
|
| 35 |
+
350,13600,5.9491
|
| 36 |
+
355,14000,6.9454
|
| 37 |
+
362,14400,4.5899
|
| 38 |
+
366,14800,7.3912
|
| 39 |
+
371,15200,5.3159
|
| 40 |
+
378,15600,4.6648
|
| 41 |
+
382,16000,6.0774
|
| 42 |
+
387,16400,6.5447
|
| 43 |
+
393,16800,4.8197
|
| 44 |
+
401,17200,4.653
|
| 45 |
+
405,17600,7.3019
|
| 46 |
+
410,18000,6.4426
|
| 47 |
+
415,18400,6.2152
|
| 48 |
+
420,18800,5.3748
|
| 49 |
+
424,19200,7.3437
|
| 50 |
+
428,19600,6.2297
|
| 51 |
+
432,20000,7.2185
|
| 52 |
+
438,20400,5.2351
|
| 53 |
+
442,20800,6.6825
|
| 54 |
+
448,21200,4.8845
|
| 55 |
+
455,21600,4.6814
|
| 56 |
+
459,22000,7.2773
|
| 57 |
+
463,22400,7.2303
|
| 58 |
+
467,22800,7.3327
|
| 59 |
+
474,23200,4.6069
|
| 60 |
+
479,23600,5.9596
|
| 61 |
+
483,24000,7.4753
|
| 62 |
+
487,24400,5.694
|
| 63 |
+
492,24800,6.0146
|
| 64 |
+
497,25200,6.2868
|
| 65 |
+
501,25600,7.4234
|
| 66 |
+
505,26000,7.4502
|
| 67 |
+
509,26400,7.501
|
| 68 |
+
514,26800,6.3456
|
| 69 |
+
518,27200,6.6853
|
| 70 |
+
523,27600,6.0334
|
| 71 |
+
527,28000,7.6037
|
| 72 |
+
531,28400,7.5199
|
| 73 |
+
535,28800,7.0901
|
| 74 |
+
540,29200,6.0599
|
| 75 |
+
545,29600,7.4143
|
| 76 |
+
549,30000,7.4906
|
| 77 |
+
553,30400,7.4618
|
| 78 |
+
558,30800,5.9038
|
| 79 |
+
562,31200,7.5903
|
| 80 |
+
567,31600,5.8108
|
| 81 |
+
572,32000,6.3685
|
| 82 |
+
577,32400,6.0121
|
| 83 |
+
581,32800,7.5516
|
| 84 |
+
585,33200,7.5793
|
| 85 |
+
589,33600,6.2426
|
| 86 |
+
593,34000,7.6518
|
| 87 |
+
597,34400,7.5643
|
| 88 |
+
601,34800,7.5213
|
| 89 |
+
606,35200,7.1744
|
| 90 |
+
610,35600,7.5771
|
| 91 |
+
614,36000,7.4704
|
| 92 |
+
618,36400,8.3538
|
| 93 |
+
622,36800,6.0225
|
| 94 |
+
626,37200,7.6223
|
| 95 |
+
631,37600,6.5412
|
| 96 |
+
636,38000,6.5792
|
| 97 |
+
641,38400,6.4666
|
| 98 |
+
645,38800,6.1159
|
| 99 |
+
649,39200,7.8856
|
| 100 |
+
653,39600,7.7125
|
| 101 |
+
658,40000,7.68
|
| 102 |
+
662,40400,6.3203
|
| 103 |
+
666,40800,7.7558
|
| 104 |
+
672,41200,5.3449
|
| 105 |
+
678,41600,5.3048
|
| 106 |
+
683,42000,6.0098
|
| 107 |
+
687,42400,7.7948
|
| 108 |
+
693,42800,5.2185
|
| 109 |
+
698,43200,6.8124
|
| 110 |
+
703,43600,7.1864
|
| 111 |
+
707,44000,6.9499
|
| 112 |
+
712,44400,7.3481
|
| 113 |
+
716,44800,6.9955
|
| 114 |
+
722,45200,5.2644
|
| 115 |
+
727,45600,7.1592
|
| 116 |
+
731,46000,7.8775
|
| 117 |
+
736,46400,6.8166
|
| 118 |
+
740,46800,7.7517
|
| 119 |
+
745,47200,6.382
|
| 120 |
+
749,47600,9.0147
|
| 121 |
+
755,48000,5.9239
|
| 122 |
+
760,48400,6.0407
|
| 123 |
+
766,48800,5.7661
|
| 124 |
+
770,49200,7.6221
|
| 125 |
+
777,49600,4.4481
|
| 126 |
+
782,50000,6.4733
|
| 127 |
+
786,50400,7.9791
|
| 128 |
+
790,50800,6.8551
|
| 129 |
+
797,51200,5.3038
|
| 130 |
+
801,51600,6.3203
|
| 131 |
+
808,52000,4.4792
|
| 132 |
+
813,52400,7.2173
|
| 133 |
+
817,52800,7.9572
|
| 134 |
+
821,53200,7.8356
|
| 135 |
+
826,53600,6.6151
|
| 136 |
+
830,54000,6.719
|
| 137 |
+
835,54400,6.5328
|
| 138 |
+
839,54800,7.9386
|
| 139 |
+
843,55200,7.8856
|
| 140 |
+
847,55600,7.8485
|
| 141 |
+
852,56000,6.087
|
| 142 |
+
858,56400,5.8267
|
| 143 |
+
862,56800,6.6001
|
| 144 |
+
869,57200,5.4358
|
| 145 |
+
874,57600,5.1707
|
| 146 |
+
879,58000,6.6192
|
| 147 |
+
883,58400,7.9342
|
| 148 |
+
887,58800,7.1165
|
| 149 |
+
891,59200,7.814
|
| 150 |
+
896,59600,6.5746
|
| 151 |
+
900,60000,7.7367
|
| 152 |
+
905,60400,7.0371
|
| 153 |
+
911,60800,6.1385
|
| 154 |
+
915,61200,6.8106
|
| 155 |
+
920,61600,6.758
|
| 156 |
+
924,62000,8.4806
|
| 157 |
+
928,62400,7.9427
|
| 158 |
+
933,62800,7.618
|
| 159 |
+
937,63200,7.3321
|
| 160 |
+
941,63600,7.6414
|
| 161 |
+
945,64000,7.5861
|
| 162 |
+
952,64400,5.5493
|
| 163 |
+
956,64800,7.9559
|
| 164 |
+
960,65200,7.2965
|
| 165 |
+
966,65600,6.0452
|
| 166 |
+
971,66000,6.785
|
| 167 |
+
975,66400,7.7964
|
| 168 |
+
979,66800,7.9213
|
| 169 |
+
983,67200,6.4723
|
| 170 |
+
987,67600,8.0026
|
| 171 |
+
992,68000,7.5901
|
| 172 |
+
996,68400,7.9942
|
| 173 |
+
1001,68800,5.7542
|
| 174 |
+
1007,69200,5.245
|
| 175 |
+
1011,69600,8.1308
|
| 176 |
+
1016,70000,6.861
|
| 177 |
+
1026,70400,4.9051
|
| 178 |
+
1033,70800,4.1145
|
| 179 |
+
1039,71200,8.9367
|
| 180 |
+
1045,71600,6.8587
|
| 181 |
+
1051,72000,6.208
|
| 182 |
+
1056,72400,9.627
|
| 183 |
+
1062,72800,6.7777
|
| 184 |
+
1068,73200,8.1144
|
| 185 |
+
1072,73600,10.0016
|
| 186 |
+
1078,74000,8.3931
|
| 187 |
+
1083,74400,8.5617
|
| 188 |
+
1090,74800,6.4565
|
| 189 |
+
1096,75200,7.7086
|
| 190 |
+
1102,75600,8.0011
|
| 191 |
+
1108,76000,7.8719
|
| 192 |
+
1117,76400,5.4102
|
| 193 |
+
1125,76800,5.3871
|
| 194 |
+
1132,77200,6.1708
|
| 195 |
+
1138,77600,9.6164
|
| 196 |
+
1143,78000,9.3242
|
| 197 |
+
1151,78400,6.2785
|
| 198 |
+
1156,78800,9.268
|
| 199 |
+
1163,79200,7.2284
|
| 200 |
+
1169,79600,7.7116
|
| 201 |
+
1174,80000,10.1517
|
| 202 |
+
1179,80400,10.7016
|
| 203 |
+
1184,80800,9.2447
|
| 204 |
+
1189,81200,9.5792
|
| 205 |
+
1194,81600,10.5578
|
| 206 |
+
1202,82000,6.7523
|
| 207 |
+
1206,82400,10.3011
|
| 208 |
+
1214,82800,8.1824
|
| 209 |
+
1219,83200,9.1268
|
| 210 |
+
1224,83600,9.169
|
| 211 |
+
1229,84000,11.3677
|
| 212 |
+
1234,84400,10.2567
|
| 213 |
+
1238,84800,12.7622
|
| 214 |
+
1242,85200,13.0512
|
| 215 |
+
1247,85600,11.1427
|
| 216 |
+
1253,86000,7.929
|
| 217 |
+
1258,86400,10.1415
|
| 218 |
+
1262,86800,13.3642
|
| 219 |
+
1267,87200,10.9775
|
| 220 |
+
1271,87600,12.6409
|
| 221 |
+
1276,88000,10.5992
|
| 222 |
+
1282,88400,9.3828
|
| 223 |
+
1289,88800,7.4124
|
| 224 |
+
1293,89200,11.4734
|
| 225 |
+
1298,89600,12.2623
|
| 226 |
+
1302,90000,10.1244
|
| 227 |
+
1307,90400,9.9046
|
| 228 |
+
1313,90800,9.779
|
| 229 |
+
1319,91200,8.0214
|
| 230 |
+
1325,91600,8.5971
|
| 231 |
+
1331,92000,9.6304
|
| 232 |
+
1336,92400,8.3368
|
| 233 |
+
1341,92800,9.7023
|
| 234 |
+
1345,93200,13.0409
|
| 235 |
+
1350,93600,11.4912
|
| 236 |
+
1356,94000,8.8157
|
| 237 |
+
1360,94400,11.3592
|
| 238 |
+
1366,94800,8.2193
|
| 239 |
+
1372,95200,10.369
|
| 240 |
+
1377,95600,9.2107
|
| 241 |
+
1381,96000,11.1205
|
| 242 |
+
1387,96400,9.8026
|
| 243 |
+
1392,96800,10.0977
|
| 244 |
+
1397,97200,11.0417
|
| 245 |
+
1401,97600,10.8291
|
| 246 |
+
1406,98000,10.6965
|
| 247 |
+
1413,98400,8.3182
|
| 248 |
+
1417,98800,12.0391
|
| 249 |
+
1422,99200,10.573
|
| 250 |
+
1427,99600,11.3273
|
| 251 |
+
1431,100000,13.3514
|
| 252 |
+
1436,100400,8.3171
|
| 253 |
+
1442,100800,9.6702
|
| 254 |
+
1446,101200,12.2739
|
| 255 |
+
1450,101600,13.6371
|
| 256 |
+
1454,102000,13.3332
|
| 257 |
+
1461,102400,8.6744
|
| 258 |
+
1468,102800,7.1006
|
| 259 |
+
1473,103200,9.9262
|
| 260 |
+
1479,103600,9.9459
|
| 261 |
+
1483,104000,12.8872
|
| 262 |
+
1487,104400,13.5411
|
| 263 |
+
1493,104800,8.3614
|
| 264 |
+
1501,105200,6.8721
|
| 265 |
+
1505,105600,12.9734
|
| 266 |
+
1510,106000,11.0086
|
| 267 |
+
1514,106400,13.0415
|
| 268 |
+
1519,106800,10.641
|
| 269 |
+
1525,107200,9.6682
|
| 270 |
+
1529,107600,11.3172
|
| 271 |
+
1537,108000,7.6408
|
| 272 |
+
1545,108400,7.5169
|
| 273 |
+
1554,108800,5.0547
|
| 274 |
+
1562,109200,8.1673
|
| 275 |
+
1566,109600,11.351
|
| 276 |
+
1572,110000,11.8269
|
| 277 |
+
1576,110400,14.5999
|
| 278 |
+
1581,110800,11.9343
|
| 279 |
+
1587,111200,9.0708
|
| 280 |
+
1599,111600,4.4076
|
| 281 |
+
1604,112000,9.6173
|
| 282 |
+
1610,112400,10.2097
|
| 283 |
+
1615,112800,11.4213
|
| 284 |
+
1619,113200,15.4919
|
| 285 |
+
1627,113600,8.0107
|
| 286 |
+
1633,114000,8.2533
|
| 287 |
+
1637,114400,15.4628
|
| 288 |
+
1643,114800,11.739
|
| 289 |
+
1648,115200,10.4027
|
| 290 |
+
1652,115600,14.405
|
| 291 |
+
1658,116000,10.397
|
| 292 |
+
1662,116400,15.0393
|
| 293 |
+
1667,116800,12.0286
|
| 294 |
+
1673,117200,10.1889
|
| 295 |
+
1678,117600,13.0175
|
| 296 |
+
1682,118000,13.202
|
| 297 |
+
1689,118400,8.8392
|
| 298 |
+
1694,118800,12.0206
|
| 299 |
+
1699,119200,15.2346
|
| 300 |
+
1706,119600,7.4044
|
| 301 |
+
1710,120000,15.8376
|
| 302 |
+
1716,120400,9.405
|
| 303 |
+
1721,120800,12.1716
|
| 304 |
+
1727,121200,10.639
|
| 305 |
+
1731,121600,12.4195
|
| 306 |
+
1735,122000,15.801
|
| 307 |
+
1740,122400,14.7642
|
| 308 |
+
1744,122800,14.9512
|
| 309 |
+
1749,123200,12.8902
|
| 310 |
+
1754,123600,12.6179
|
| 311 |
+
1758,124000,15.3085
|
| 312 |
+
1763,124400,15.2636
|
| 313 |
+
1767,124800,15.9938
|
| 314 |
+
1771,125200,13.2417
|
| 315 |
+
1776,125600,12.6163
|
| 316 |
+
1782,126000,10.864
|
| 317 |
+
1787,126400,12.165
|
| 318 |
+
1792,126800,12.2211
|
| 319 |
+
1796,127200,16.1173
|
| 320 |
+
1802,127600,9.7973
|
| 321 |
+
1811,128000,6.8105
|
| 322 |
+
1818,128400,7.4772
|
| 323 |
+
1824,128800,11.3689
|
| 324 |
+
1830,129200,9.7769
|
| 325 |
+
1837,129600,7.5446
|
| 326 |
+
1842,130000,12.6235
|
| 327 |
+
1848,130400,11.2806
|
| 328 |
+
1852,130800,13.906
|
| 329 |
+
1858,131200,12.9947
|
| 330 |
+
1865,131600,7.8085
|
| 331 |
+
1870,132000,12.0638
|
| 332 |
+
1876,132400,12.8332
|
| 333 |
+
1881,132800,12.0972
|
| 334 |
+
1888,133200,10.0171
|
| 335 |
+
1894,133600,8.1607
|
| 336 |
+
1906,134000,5.2001
|
| 337 |
+
1912,134400,10.769
|
| 338 |
+
1918,134800,10.4093
|
| 339 |
+
1922,135200,14.9802
|
| 340 |
+
1928,135600,12.3873
|
| 341 |
+
1932,136000,16.5533
|
| 342 |
+
1938,136400,10.1507
|
| 343 |
+
1944,136800,11.7987
|
| 344 |
+
1948,137200,16.9859
|
| 345 |
+
1953,137600,12.8739
|
| 346 |
+
1957,138000,15.2543
|
| 347 |
+
1962,138400,13.5113
|
| 348 |
+
1970,138800,9.3558
|
| 349 |
+
1974,139200,17.044
|
| 350 |
+
1980,139600,10.9737
|
| 351 |
+
1985,140000,12.4654
|
| 352 |
+
1991,140400,12.8888
|
| 353 |
+
1996,140800,14.0378
|
| 354 |
+
2003,141200,9.7178
|
| 355 |
+
2008,141600,14.7386
|
| 356 |
+
2013,142000,14.8715
|
| 357 |
+
2020,142400,9.5865
|
| 358 |
+
2025,142800,12.7104
|
| 359 |
+
2029,143200,16.0303
|
| 360 |
+
2034,143600,15.8244
|
| 361 |
+
2038,144000,16.9077
|
| 362 |
+
2042,144400,16.3177
|
| 363 |
+
2046,144800,17.1757
|
| 364 |
+
2050,145200,15.4536
|
| 365 |
+
2054,145600,16.8005
|
| 366 |
+
2063,146000,7.8445
|
| 367 |
+
2067,146400,13.3113
|
| 368 |
+
2074,146800,10.177
|
| 369 |
+
2085,147200,5.7266
|
| 370 |
+
2089,147600,13.1785
|
| 371 |
+
2097,148000,9.0101
|
| 372 |
+
2103,148400,11.5439
|
| 373 |
+
2110,148800,8.3925
|
| 374 |
+
2120,149200,7.0782
|
| 375 |
+
2127,149600,6.752
|
| 376 |
+
2136,150000,8.609
|
| 377 |
+
2141,150400,12.6775
|
| 378 |
+
2147,150800,11.4195
|
| 379 |
+
2152,151200,14.956
|
| 380 |
+
2159,151600,8.3099
|
| 381 |
+
2163,152000,13.0747
|
| 382 |
+
2169,152400,10.8855
|
| 383 |
+
2173,152800,16.1138
|
| 384 |
+
2179,153200,11.1642
|
| 385 |
+
2183,153600,17.5901
|
| 386 |
+
2189,154000,12.9443
|
| 387 |
+
2193,154400,15.0154
|
| 388 |
+
2199,154800,11.8207
|
| 389 |
+
2205,155200,12.6347
|
| 390 |
+
2209,155600,18.7259
|
| 391 |
+
2215,156000,12.7894
|
| 392 |
+
2224,156400,7.0928
|
| 393 |
+
2229,156800,13.0815
|
| 394 |
+
2237,157200,9.53
|
| 395 |
+
2244,157600,8.183
|
| 396 |
+
2250,158000,11.3156
|
| 397 |
+
2254,158400,17.8963
|
| 398 |
+
2264,158800,6.7863
|
| 399 |
+
2271,159200,9.1487
|
| 400 |
+
2278,159600,10.2853
|
| 401 |
+
2285,160000,11.1034
|
| 402 |
+
2291,160400,12.3193
|
| 403 |
+
2296,160800,12.8277
|
| 404 |
+
2304,161200,10.186
|
| 405 |
+
2311,161600,10.0795
|
| 406 |
+
2317,162000,13.2198
|
| 407 |
+
2322,162400,13.6536
|
| 408 |
+
2327,162800,16.2005
|
| 409 |
+
2333,163200,11.8695
|
| 410 |
+
2337,163600,18.9552
|
| 411 |
+
2341,164000,19.3696
|
| 412 |
+
2346,164400,15.9116
|
| 413 |
+
2351,164800,13.0325
|
| 414 |
+
2356,165200,14.5295
|
| 415 |
+
2362,165600,12.7752
|
| 416 |
+
2366,166000,20.2381
|
| 417 |
+
2370,166400,18.5701
|
| 418 |
+
2374,166800,19.7957
|
| 419 |
+
2378,167200,20.6729
|
| 420 |
+
2383,167600,17.1297
|
| 421 |
+
2388,168000,16.415
|
| 422 |
+
2392,168400,16.3995
|
| 423 |
+
2398,168800,14.5399
|
| 424 |
+
2403,169200,14.3127
|
| 425 |
+
2407,169600,20.0631
|
| 426 |
+
2412,170000,14.8381
|
| 427 |
+
2417,170400,16.6159
|
| 428 |
+
2422,170800,15.6721
|
| 429 |
+
2429,171200,11.7749
|
| 430 |
+
2435,171600,12.1638
|
| 431 |
+
2439,172000,20.5949
|
| 432 |
+
2443,172400,18.4239
|
| 433 |
+
2447,172800,20.2025
|
| 434 |
+
2453,173200,15.569
|
| 435 |
+
2459,173600,12.0643
|
| 436 |
+
2466,174000,11.6623
|
| 437 |
+
2471,174400,12.4459
|
| 438 |
+
2479,174800,8.9067
|
| 439 |
+
2483,175200,19.4989
|
| 440 |
+
2487,175600,19.945
|
| 441 |
+
2491,176000,18.5005
|
| 442 |
+
2497,176400,15.8793
|
| 443 |
+
2503,176800,11.8832
|
| 444 |
+
2509,177200,11.3501
|
| 445 |
+
2514,177600,11.7629
|
| 446 |
+
2518,178000,19.6937
|
| 447 |
+
2522,178400,19.9908
|
| 448 |
+
2526,178800,20.0871
|
| 449 |
+
2534,179200,9.8611
|
| 450 |
+
2539,179600,16.1641
|
| 451 |
+
2544,180000,16.6044
|
| 452 |
+
2551,180400,11.9178
|
| 453 |
+
2556,180800,15.7903
|
| 454 |
+
2565,181200,8.1818
|
| 455 |
+
2573,181600,6.7254
|
| 456 |
+
2582,182000,6.4099
|
| 457 |
+
2593,182400,6.8256
|
| 458 |
+
2608,182800,4.9001
|
| 459 |
+
2620,183200,5.3354
|
| 460 |
+
2625,183600,13.1512
|
| 461 |
+
2645,184000,3.785
|
| 462 |
+
2660,184400,3.8907
|
| 463 |
+
2667,184800,8.5334
|
| 464 |
+
2678,185200,6.3167
|
| 465 |
+
2688,185600,6.4812
|
| 466 |
+
2698,186000,5.3833
|
| 467 |
+
2706,186400,9.5081
|
| 468 |
+
2716,186800,6.8215
|
| 469 |
+
2722,187200,10.4982
|
| 470 |
+
2730,187600,9.5086
|
| 471 |
+
2738,188000,9.3461
|
| 472 |
+
2744,188400,14.2225
|
| 473 |
+
2757,188800,5.7347
|
| 474 |
+
2766,189200,7.2451
|
| 475 |
+
2772,189600,13.8376
|
| 476 |
+
2776,190000,19.2079
|
| 477 |
+
2785,190400,8.2667
|
| 478 |
+
2794,190800,7.8716
|
| 479 |
+
2800,191200,10.8995
|
| 480 |
+
2808,191600,11.1967
|
| 481 |
+
2813,192000,13.3437
|
| 482 |
+
2818,192400,15.5833
|
| 483 |
+
2825,192800,12.1659
|
| 484 |
+
2829,193200,18.1718
|
| 485 |
+
2841,193600,6.6037
|
| 486 |
+
2847,194000,13.7069
|
| 487 |
+
2853,194400,12.6538
|
| 488 |
+
2860,194800,12.0022
|
| 489 |
+
2868,195200,8.8985
|
| 490 |
+
2875,195600,12.2925
|
| 491 |
+
2881,196000,11.201
|
| 492 |
+
2889,196400,11.2774
|
| 493 |
+
2897,196800,9.0422
|
| 494 |
+
2903,197200,12.1459
|
| 495 |
+
2908,197600,17.9243
|
| 496 |
+
2914,198000,12.5648
|
| 497 |
+
2923,198400,8.3362
|
| 498 |
+
2931,198800,9.3883
|
| 499 |
+
2935,199200,19.7271
|
| 500 |
+
2941,199600,13.2621
|
| 501 |
+
2945,200000,18.3102
|
code/Lake application/logs/results_1/PDPPO_frozen_lake_log_2.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
13,400,2.4027
|
| 3 |
+
32,800,1.617
|
| 4 |
+
46,1200,2.0829
|
| 5 |
+
55,1600,3.3664
|
| 6 |
+
70,2000,1.9602
|
| 7 |
+
77,2400,4.2734
|
| 8 |
+
87,2800,3.3484
|
| 9 |
+
96,3200,2.8441
|
| 10 |
+
108,3600,2.8373
|
| 11 |
+
113,4000,6.0449
|
| 12 |
+
122,4400,3.834
|
| 13 |
+
128,4800,4.6497
|
| 14 |
+
134,5200,5.7626
|
| 15 |
+
139,5600,5.8112
|
| 16 |
+
148,6000,3.9162
|
| 17 |
+
153,6400,6.4239
|
| 18 |
+
161,6800,3.2877
|
| 19 |
+
167,7200,6.0931
|
| 20 |
+
173,7600,5.6529
|
| 21 |
+
177,8000,8.1896
|
| 22 |
+
183,8400,5.4595
|
| 23 |
+
188,8800,5.5099
|
| 24 |
+
194,9200,5.9555
|
| 25 |
+
200,9600,5.4228
|
| 26 |
+
207,10000,5.1593
|
| 27 |
+
213,10400,5.8843
|
| 28 |
+
221,10800,3.7587
|
| 29 |
+
226,11200,6.9919
|
| 30 |
+
233,11600,5.0004
|
| 31 |
+
239,12000,5.6778
|
| 32 |
+
245,12400,6.7453
|
| 33 |
+
251,12800,5.6725
|
| 34 |
+
257,13200,5.7192
|
| 35 |
+
263,13600,6.5788
|
| 36 |
+
267,14000,7.3344
|
| 37 |
+
272,14400,7.4462
|
| 38 |
+
278,14800,6.0194
|
| 39 |
+
283,15200,7.349
|
| 40 |
+
289,15600,6.475
|
| 41 |
+
294,16000,8.7377
|
| 42 |
+
302,16400,5.5482
|
| 43 |
+
310,16800,4.4978
|
| 44 |
+
314,17200,12.6707
|
| 45 |
+
322,17600,6.9202
|
| 46 |
+
327,18000,8.4112
|
| 47 |
+
332,18400,7.8845
|
| 48 |
+
337,18800,8.8895
|
| 49 |
+
344,19200,7.0861
|
| 50 |
+
350,19600,8.066
|
| 51 |
+
357,20000,6.4104
|
| 52 |
+
364,20400,6.3238
|
| 53 |
+
369,20800,9.4534
|
| 54 |
+
373,21200,10.6902
|
| 55 |
+
378,21600,10.5201
|
| 56 |
+
383,22000,9.18
|
| 57 |
+
387,22400,11.8679
|
| 58 |
+
393,22800,7.6084
|
| 59 |
+
397,23200,11.8894
|
| 60 |
+
402,23600,8.3823
|
| 61 |
+
406,24000,11.8962
|
| 62 |
+
410,24400,11.9462
|
| 63 |
+
414,24800,12.0936
|
| 64 |
+
418,25200,12.2516
|
| 65 |
+
423,25600,11.6307
|
| 66 |
+
427,26000,12.2494
|
| 67 |
+
431,26400,10.3211
|
| 68 |
+
435,26800,12.193
|
| 69 |
+
441,27200,8.2828
|
| 70 |
+
447,27600,8.323
|
| 71 |
+
451,28000,12.0851
|
| 72 |
+
455,28400,11.8691
|
| 73 |
+
460,28800,10.3406
|
| 74 |
+
464,29200,11.2745
|
| 75 |
+
469,29600,9.7434
|
| 76 |
+
475,30000,6.0876
|
| 77 |
+
481,30400,9.8042
|
| 78 |
+
485,30800,9.8166
|
| 79 |
+
491,31200,8.5955
|
| 80 |
+
496,31600,8.8879
|
| 81 |
+
502,32000,9.6651
|
| 82 |
+
506,32400,12.4816
|
| 83 |
+
513,32800,5.3655
|
| 84 |
+
518,33200,10.5211
|
| 85 |
+
523,33600,8.8889
|
| 86 |
+
529,34000,8.49
|
| 87 |
+
533,34400,11.531
|
| 88 |
+
539,34800,8.149
|
| 89 |
+
543,35200,10.8391
|
| 90 |
+
548,35600,10.0555
|
| 91 |
+
552,36000,12.2689
|
| 92 |
+
557,36400,10.1142
|
| 93 |
+
562,36800,11.1685
|
| 94 |
+
568,37200,7.5849
|
| 95 |
+
574,37600,7.9313
|
| 96 |
+
578,38000,12.7468
|
| 97 |
+
582,38400,12.8371
|
| 98 |
+
587,38800,10.0817
|
| 99 |
+
593,39200,7.806
|
| 100 |
+
599,39600,8.1719
|
| 101 |
+
606,40000,7.7715
|
| 102 |
+
610,40400,12.7892
|
| 103 |
+
615,40800,8.8938
|
| 104 |
+
622,41200,6.7361
|
| 105 |
+
630,41600,6.5825
|
| 106 |
+
638,42000,6.4833
|
| 107 |
+
643,42400,8.2059
|
| 108 |
+
649,42800,10.0644
|
| 109 |
+
658,43200,5.4777
|
| 110 |
+
663,43600,8.1624
|
| 111 |
+
668,44000,10.306
|
| 112 |
+
673,44400,13.0668
|
| 113 |
+
681,44800,5.2228
|
| 114 |
+
686,45200,12.7062
|
| 115 |
+
691,45600,8.3513
|
| 116 |
+
698,46000,7.9629
|
| 117 |
+
702,46400,13.0319
|
| 118 |
+
709,46800,6.41
|
| 119 |
+
713,47200,13.8998
|
| 120 |
+
718,47600,10.1406
|
| 121 |
+
723,48000,11.4989
|
| 122 |
+
732,48400,5.7759
|
| 123 |
+
736,48800,13.8137
|
| 124 |
+
745,49200,3.7937
|
| 125 |
+
753,49600,8.4375
|
| 126 |
+
757,50000,13.3919
|
| 127 |
+
764,50400,7.7574
|
| 128 |
+
770,50800,7.3266
|
| 129 |
+
774,51200,14.4457
|
| 130 |
+
782,51600,7.6632
|
| 131 |
+
786,52000,14.5052
|
| 132 |
+
791,52400,12.7646
|
| 133 |
+
796,52800,12.3906
|
| 134 |
+
801,53200,12.5327
|
| 135 |
+
806,53600,9.7097
|
| 136 |
+
812,54000,9.998
|
| 137 |
+
821,54400,6.5391
|
| 138 |
+
826,54800,10.0426
|
| 139 |
+
830,55200,13.2772
|
| 140 |
+
837,55600,9.5483
|
| 141 |
+
841,56000,11.9928
|
| 142 |
+
848,56400,9.3228
|
| 143 |
+
852,56800,13.7227
|
| 144 |
+
858,57200,8.4757
|
| 145 |
+
862,57600,14.4407
|
| 146 |
+
868,58000,9.607
|
| 147 |
+
873,58400,12.6694
|
| 148 |
+
883,58800,4.7655
|
| 149 |
+
888,59200,11.5319
|
| 150 |
+
894,59600,10.6887
|
| 151 |
+
899,60000,11.8105
|
| 152 |
+
904,60400,10.2226
|
| 153 |
+
910,60800,8.0744
|
| 154 |
+
914,61200,14.8116
|
| 155 |
+
919,61600,12.2418
|
| 156 |
+
928,62000,6.0928
|
| 157 |
+
935,62400,8.7412
|
| 158 |
+
942,62800,8.2501
|
| 159 |
+
951,63200,7.3312
|
| 160 |
+
958,63600,6.9865
|
| 161 |
+
963,64000,13.1523
|
| 162 |
+
968,64400,11.9024
|
| 163 |
+
975,64800,10.2803
|
| 164 |
+
979,65200,15.9866
|
| 165 |
+
983,65600,13.8684
|
| 166 |
+
988,66000,13.0459
|
| 167 |
+
993,66400,13.1794
|
| 168 |
+
997,66800,16.858
|
| 169 |
+
1005,67200,9.2472
|
| 170 |
+
1011,67600,10.5944
|
| 171 |
+
1015,68000,18.199
|
| 172 |
+
1019,68400,16.954
|
| 173 |
+
1023,68800,16.9229
|
| 174 |
+
1028,69200,12.5304
|
| 175 |
+
1032,69600,17.6321
|
| 176 |
+
1039,70000,10.8713
|
| 177 |
+
1048,70400,8.1558
|
| 178 |
+
1053,70800,12.2909
|
| 179 |
+
1060,71200,10.5877
|
| 180 |
+
1066,71600,10.6469
|
| 181 |
+
1073,72000,10.9766
|
| 182 |
+
1077,72400,15.5587
|
| 183 |
+
1082,72800,14.5922
|
| 184 |
+
1088,73200,11.1597
|
| 185 |
+
1093,73600,16.6017
|
| 186 |
+
1098,74000,13.7014
|
| 187 |
+
1103,74400,10.6047
|
| 188 |
+
1108,74800,14.9205
|
| 189 |
+
1112,75200,18.965
|
| 190 |
+
1119,75600,12.518
|
| 191 |
+
1126,76000,7.3995
|
| 192 |
+
1131,76400,15.3457
|
| 193 |
+
1136,76800,13.6769
|
| 194 |
+
1140,77200,17.8605
|
| 195 |
+
1144,77600,18.1357
|
| 196 |
+
1151,78000,11.2644
|
| 197 |
+
1156,78400,11.4578
|
| 198 |
+
1161,78800,16.01
|
| 199 |
+
1167,79200,13.5079
|
| 200 |
+
1171,79600,15.4602
|
| 201 |
+
1176,80000,18.2113
|
| 202 |
+
1181,80400,17.2741
|
| 203 |
+
1185,80800,19.4672
|
| 204 |
+
1193,81200,8.1171
|
| 205 |
+
1198,81600,16.5683
|
| 206 |
+
1204,82000,12.9389
|
| 207 |
+
1209,82400,16.3976
|
| 208 |
+
1217,82800,9.9467
|
| 209 |
+
1221,83200,20.1043
|
| 210 |
+
1229,83600,8.9266
|
| 211 |
+
1235,84000,12.0714
|
| 212 |
+
1241,84400,14.5858
|
| 213 |
+
1245,84800,19.3948
|
| 214 |
+
1252,85200,9.0477
|
| 215 |
+
1256,85600,19.9998
|
| 216 |
+
1263,86000,11.7696
|
| 217 |
+
1271,86400,9.3787
|
| 218 |
+
1278,86800,8.5559
|
| 219 |
+
1284,87200,14.7973
|
| 220 |
+
1290,87600,12.839
|
| 221 |
+
1295,88000,14.3447
|
| 222 |
+
1299,88400,20.0166
|
| 223 |
+
1305,88800,13.8517
|
| 224 |
+
1310,89200,15.5374
|
| 225 |
+
1315,89600,11.2921
|
| 226 |
+
1322,90000,12.7502
|
| 227 |
+
1331,90400,8.0843
|
| 228 |
+
1335,90800,15.8949
|
| 229 |
+
1342,91200,11.7088
|
| 230 |
+
1347,91600,17.2511
|
| 231 |
+
1351,92000,18.5015
|
| 232 |
+
1357,92400,15.5406
|
| 233 |
+
1363,92800,11.4947
|
| 234 |
+
1367,93200,18.5403
|
| 235 |
+
1375,93600,10.876
|
| 236 |
+
1380,94000,16.1795
|
| 237 |
+
1388,94400,9.5337
|
| 238 |
+
1392,94800,18.751
|
| 239 |
+
1401,95200,9.2096
|
| 240 |
+
1406,95600,13.6803
|
| 241 |
+
1413,96000,11.6685
|
| 242 |
+
1424,96400,4.7407
|
| 243 |
+
1436,96800,6.0117
|
| 244 |
+
1448,97200,6.2754
|
| 245 |
+
1453,97600,14.8301
|
| 246 |
+
1457,98000,19.4724
|
| 247 |
+
1462,98400,12.7641
|
| 248 |
+
1470,98800,10.515
|
| 249 |
+
1479,99200,7.7847
|
| 250 |
+
1487,99600,8.115
|
| 251 |
+
1495,100000,9.6315
|
| 252 |
+
1503,100400,9.6751
|
| 253 |
+
1508,100800,14.0532
|
| 254 |
+
1514,101200,13.3345
|
| 255 |
+
1520,101600,14.4949
|
| 256 |
+
1528,102000,7.3491
|
| 257 |
+
1533,102400,16.749
|
| 258 |
+
1537,102800,16.6947
|
| 259 |
+
1544,103200,13.2561
|
| 260 |
+
1549,103600,16.2186
|
| 261 |
+
1555,104000,10.76
|
| 262 |
+
1561,104400,13.0782
|
| 263 |
+
1569,104800,9.7765
|
| 264 |
+
1576,105200,9.9622
|
| 265 |
+
1581,105600,16.9109
|
| 266 |
+
1587,106000,12.2365
|
| 267 |
+
1596,106400,8.4596
|
| 268 |
+
1602,106800,13.4264
|
| 269 |
+
1607,107200,13.8518
|
| 270 |
+
1614,107600,11.4566
|
| 271 |
+
1620,108000,14.6299
|
| 272 |
+
1628,108400,9.2927
|
| 273 |
+
1634,108800,12.3812
|
| 274 |
+
1641,109200,9.7538
|
| 275 |
+
1655,109600,4.8454
|
| 276 |
+
1660,110000,14.5315
|
| 277 |
+
1673,110400,3.9869
|
| 278 |
+
1680,110800,8.9558
|
| 279 |
+
1691,111200,5.7395
|
| 280 |
+
1695,111600,19.8293
|
| 281 |
+
1701,112000,13.327
|
| 282 |
+
1706,112400,13.7603
|
| 283 |
+
1711,112800,14.9665
|
| 284 |
+
1719,113200,7.5555
|
| 285 |
+
1726,113600,10.7098
|
| 286 |
+
1731,114000,14.8993
|
| 287 |
+
1736,114400,14.7975
|
| 288 |
+
1743,114800,7.8648
|
| 289 |
+
1751,115200,7.9691
|
| 290 |
+
1760,115600,9.1848
|
| 291 |
+
1767,116000,8.9283
|
| 292 |
+
1774,116400,11.6902
|
| 293 |
+
1778,116800,18.8354
|
| 294 |
+
1785,117200,12.2704
|
| 295 |
+
1789,117600,15.4445
|
| 296 |
+
1794,118000,16.3556
|
| 297 |
+
1806,118400,5.2073
|
| 298 |
+
1814,118800,9.6861
|
| 299 |
+
1818,119200,15.142
|
| 300 |
+
1825,119600,13.3942
|
| 301 |
+
1830,120000,14.1063
|
| 302 |
+
1836,120400,12.9716
|
| 303 |
+
1844,120800,8.7663
|
| 304 |
+
1853,121200,7.4106
|
| 305 |
+
1857,121600,20.3496
|
| 306 |
+
1863,122000,11.6921
|
| 307 |
+
1868,122400,14.598
|
| 308 |
+
1873,122800,16.1342
|
| 309 |
+
1878,123200,17.5161
|
| 310 |
+
1886,123600,8.8583
|
| 311 |
+
1894,124000,7.4233
|
| 312 |
+
1903,124400,9.8375
|
| 313 |
+
1908,124800,12.7669
|
| 314 |
+
1912,125200,18.0466
|
| 315 |
+
1919,125600,12.422
|
| 316 |
+
1924,126000,13.4632
|
| 317 |
+
1930,126400,14.4262
|
| 318 |
+
1935,126800,16.3081
|
| 319 |
+
1940,127200,17.1442
|
| 320 |
+
1945,127600,13.8432
|
| 321 |
+
1950,128000,17.8724
|
| 322 |
+
1954,128400,15.4276
|
| 323 |
+
1961,128800,11.0026
|
| 324 |
+
1967,129200,13.7937
|
| 325 |
+
1972,129600,13.3407
|
| 326 |
+
1980,130000,11.0252
|
| 327 |
+
1985,130400,13.7636
|
| 328 |
+
1989,130800,20.3436
|
| 329 |
+
1996,131200,9.9471
|
| 330 |
+
2001,131600,14.7954
|
| 331 |
+
2005,132000,19.5569
|
| 332 |
+
2010,132400,15.0541
|
| 333 |
+
2015,132800,16.94
|
| 334 |
+
2020,133200,16.584
|
| 335 |
+
2026,133600,12.1681
|
| 336 |
+
2030,134000,16.941
|
| 337 |
+
2035,134400,18.5808
|
| 338 |
+
2040,134800,16.5108
|
| 339 |
+
2048,135200,9.7247
|
| 340 |
+
2054,135600,13.6772
|
| 341 |
+
2060,136000,12.384
|
| 342 |
+
2066,136400,14.664
|
| 343 |
+
2071,136800,16.1609
|
| 344 |
+
2077,137200,11.9693
|
| 345 |
+
2085,137600,12.2337
|
| 346 |
+
2092,138000,8.7871
|
| 347 |
+
2096,138400,20.9815
|
| 348 |
+
2105,138800,9.5597
|
| 349 |
+
2111,139200,12.4106
|
| 350 |
+
2123,139600,5.885
|
| 351 |
+
2129,140000,12.46
|
| 352 |
+
2137,140400,8.6104
|
| 353 |
+
2141,140800,19.3967
|
| 354 |
+
2146,141200,18.9291
|
| 355 |
+
2153,141600,11.1995
|
| 356 |
+
2157,142000,21.0472
|
| 357 |
+
2166,142400,9.0968
|
| 358 |
+
2173,142800,11.4345
|
| 359 |
+
2177,143200,17.4054
|
| 360 |
+
2182,143600,16.7436
|
| 361 |
+
2187,144000,15.979
|
| 362 |
+
2191,144400,20.6625
|
| 363 |
+
2196,144800,17.967
|
| 364 |
+
2201,145200,14.4191
|
| 365 |
+
2206,145600,17.7912
|
| 366 |
+
2212,146000,12.9955
|
| 367 |
+
2216,146400,20.7862
|
| 368 |
+
2220,146800,18.8328
|
| 369 |
+
2225,147200,17.0398
|
| 370 |
+
2233,147600,9.1588
|
| 371 |
+
2238,148000,17.0543
|
| 372 |
+
2245,148400,11.4016
|
| 373 |
+
2250,148800,13.5677
|
| 374 |
+
2259,149200,9.3432
|
| 375 |
+
2264,149600,16.0615
|
| 376 |
+
2272,150000,7.0293
|
| 377 |
+
2278,150400,15.0398
|
| 378 |
+
2284,150800,13.835
|
| 379 |
+
2288,151200,15.2233
|
| 380 |
+
2294,151600,13.696
|
| 381 |
+
2302,152000,9.5545
|
| 382 |
+
2307,152400,15.6187
|
| 383 |
+
2313,152800,10.671
|
| 384 |
+
2319,153200,13.9607
|
| 385 |
+
2325,153600,10.3803
|
| 386 |
+
2332,154000,11.999
|
| 387 |
+
2339,154400,9.1082
|
| 388 |
+
2347,154800,9.2813
|
| 389 |
+
2355,155200,8.6008
|
| 390 |
+
2359,155600,19.8647
|
| 391 |
+
2369,156000,6.1099
|
| 392 |
+
2376,156400,13.2466
|
| 393 |
+
2381,156800,13.0148
|
| 394 |
+
2390,157200,7.6919
|
| 395 |
+
2395,157600,14.2158
|
| 396 |
+
2400,158000,17.9611
|
| 397 |
+
2408,158400,8.9897
|
| 398 |
+
2415,158800,10.4802
|
| 399 |
+
2420,159200,13.5744
|
| 400 |
+
2425,159600,16.1444
|
| 401 |
+
2430,160000,13.9878
|
| 402 |
+
2437,160400,12.0302
|
| 403 |
+
2445,160800,10.5921
|
| 404 |
+
2449,161200,15.9317
|
| 405 |
+
2453,161600,19.0136
|
| 406 |
+
2462,162000,9.6758
|
| 407 |
+
2467,162400,15.8064
|
| 408 |
+
2474,162800,9.9397
|
| 409 |
+
2478,163200,20.8661
|
| 410 |
+
2488,163600,6.9612
|
| 411 |
+
2496,164000,9.1259
|
| 412 |
+
2502,164400,12.425
|
| 413 |
+
2512,164800,5.2967
|
| 414 |
+
2521,165200,9.5709
|
| 415 |
+
2528,165600,8.9144
|
| 416 |
+
2537,166000,7.9899
|
| 417 |
+
2549,166400,4.6902
|
| 418 |
+
2558,166800,8.4622
|
| 419 |
+
2572,167200,4.6769
|
| 420 |
+
2579,167600,10.6164
|
| 421 |
+
2589,168000,5.4096
|
| 422 |
+
2597,168400,7.1444
|
| 423 |
+
2605,168800,11.1901
|
| 424 |
+
2610,169200,13.3372
|
| 425 |
+
2619,169600,8.1873
|
| 426 |
+
2626,170000,9.9578
|
| 427 |
+
2634,170400,8.5779
|
| 428 |
+
2639,170800,16.6372
|
| 429 |
+
2644,171200,17.5453
|
| 430 |
+
2649,171600,14.0101
|
| 431 |
+
2656,172000,11.4441
|
| 432 |
+
2661,172400,12.8794
|
| 433 |
+
2668,172800,11.5128
|
| 434 |
+
2674,173200,12.0959
|
| 435 |
+
2683,173600,5.4352
|
| 436 |
+
2687,174000,18.7393
|
| 437 |
+
2694,174400,11.3662
|
| 438 |
+
2700,174800,12.8577
|
| 439 |
+
2704,175200,16.8251
|
| 440 |
+
2708,175600,21.1851
|
| 441 |
+
2712,176000,19.6086
|
| 442 |
+
2717,176400,18.0934
|
| 443 |
+
2722,176800,15.998
|
| 444 |
+
2726,177200,20.8385
|
| 445 |
+
2731,177600,13.3021
|
| 446 |
+
2737,178000,15.9943
|
| 447 |
+
2743,178400,11.5759
|
| 448 |
+
2749,178800,13.4046
|
| 449 |
+
2756,179200,9.6437
|
| 450 |
+
2761,179600,16.2845
|
| 451 |
+
2766,180000,15.4722
|
| 452 |
+
2770,180400,16.5676
|
| 453 |
+
2777,180800,10.0904
|
| 454 |
+
2783,181200,13.448
|
| 455 |
+
2788,181600,14.414
|
| 456 |
+
2792,182000,20.08
|
| 457 |
+
2797,182400,16.0141
|
| 458 |
+
2801,182800,20.2584
|
| 459 |
+
2807,183200,14.7895
|
| 460 |
+
2812,183600,14.5707
|
| 461 |
+
2818,184000,13.1659
|
| 462 |
+
2825,184400,11.4905
|
| 463 |
+
2830,184800,15.3281
|
| 464 |
+
2835,185200,15.8941
|
| 465 |
+
2841,185600,11.6085
|
| 466 |
+
2846,186000,12.8389
|
| 467 |
+
2853,186400,12.1964
|
| 468 |
+
2858,186800,15.4969
|
| 469 |
+
2863,187200,16.2343
|
| 470 |
+
2868,187600,13.6736
|
| 471 |
+
2874,188000,13.7629
|
| 472 |
+
2879,188400,18.2208
|
| 473 |
+
2884,188800,14.7769
|
| 474 |
+
2888,189200,20.1099
|
| 475 |
+
2892,189600,20.665
|
| 476 |
+
2897,190000,14.9009
|
| 477 |
+
2902,190400,15.9363
|
| 478 |
+
2908,190800,13.7088
|
| 479 |
+
2914,191200,12.0374
|
| 480 |
+
2919,191600,11.6974
|
| 481 |
+
2923,192000,19.4542
|
| 482 |
+
2929,192400,12.6135
|
| 483 |
+
2935,192800,13.6599
|
| 484 |
+
2941,193200,14.4815
|
| 485 |
+
2945,193600,18.2334
|
| 486 |
+
2950,194000,13.9492
|
| 487 |
+
2956,194400,12.6527
|
| 488 |
+
2960,194800,19.9709
|
| 489 |
+
2964,195200,20.1638
|
| 490 |
+
2970,195600,12.4923
|
| 491 |
+
2976,196000,13.7684
|
| 492 |
+
2983,196400,11.1614
|
| 493 |
+
2988,196800,12.4877
|
| 494 |
+
2996,197200,6.8995
|
| 495 |
+
3005,197600,7.2801
|
| 496 |
+
3013,198000,10.8011
|
| 497 |
+
3018,198400,13.5368
|
| 498 |
+
3023,198800,15.2503
|
| 499 |
+
3032,199200,8.255
|
| 500 |
+
3037,199600,15.5807
|
| 501 |
+
3044,200000,9.8756
|
code/Lake application/logs/results_1/PDPPO_frozen_lake_log_3.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
24,400,1.1789
|
| 3 |
+
42,800,2.1233
|
| 4 |
+
61,1200,1.5503
|
| 5 |
+
75,1600,2.1531
|
| 6 |
+
94,2000,1.8369
|
| 7 |
+
103,2400,3.5188
|
| 8 |
+
113,2800,3.0467
|
| 9 |
+
126,3200,2.5075
|
| 10 |
+
137,3600,2.784
|
| 11 |
+
142,4000,5.3221
|
| 12 |
+
149,4400,4.356
|
| 13 |
+
158,4800,3.5811
|
| 14 |
+
166,5200,4.0608
|
| 15 |
+
174,5600,3.8915
|
| 16 |
+
178,6000,7.7872
|
| 17 |
+
184,6400,5.5597
|
| 18 |
+
189,6800,6.3654
|
| 19 |
+
193,7200,6.812
|
| 20 |
+
198,7600,6.1092
|
| 21 |
+
202,8000,7.5749
|
| 22 |
+
207,8400,6.2928
|
| 23 |
+
213,8800,5.3826
|
| 24 |
+
217,9200,6.452
|
| 25 |
+
221,9600,7.7105
|
| 26 |
+
225,10000,7.7963
|
| 27 |
+
231,10400,5.4617
|
| 28 |
+
235,10800,7.5387
|
| 29 |
+
239,11200,7.7388
|
| 30 |
+
243,11600,7.2572
|
| 31 |
+
247,12000,7.8198
|
| 32 |
+
251,12400,7.7371
|
| 33 |
+
255,12800,7.8481
|
| 34 |
+
260,13200,7.7476
|
| 35 |
+
264,13600,7.8618
|
| 36 |
+
268,14000,6.7995
|
| 37 |
+
272,14400,7.8333
|
| 38 |
+
277,14800,7.2725
|
| 39 |
+
282,15200,5.7045
|
| 40 |
+
286,15600,7.8592
|
| 41 |
+
290,16000,7.9573
|
| 42 |
+
294,16400,7.996
|
| 43 |
+
298,16800,8.0243
|
| 44 |
+
302,17200,7.1519
|
| 45 |
+
307,17600,7.6693
|
| 46 |
+
312,18000,6.182
|
| 47 |
+
317,18400,6.3662
|
| 48 |
+
321,18800,6.6984
|
| 49 |
+
326,19200,7.1547
|
| 50 |
+
332,19600,5.4313
|
| 51 |
+
337,20000,6.58
|
| 52 |
+
341,20400,6.4143
|
| 53 |
+
348,20800,4.6998
|
| 54 |
+
354,21200,6.0579
|
| 55 |
+
359,21600,6.3869
|
| 56 |
+
363,22000,8.4537
|
| 57 |
+
367,22400,7.6145
|
| 58 |
+
371,22800,8.7217
|
| 59 |
+
375,23200,8.0302
|
| 60 |
+
379,23600,7.855
|
| 61 |
+
383,24000,7.9192
|
| 62 |
+
388,24400,7.1253
|
| 63 |
+
392,24800,7.2569
|
| 64 |
+
396,25200,8.0458
|
| 65 |
+
400,25600,8.0878
|
| 66 |
+
404,26000,7.6454
|
| 67 |
+
411,26400,5.4292
|
| 68 |
+
417,26800,6.2442
|
| 69 |
+
421,27200,7.1395
|
| 70 |
+
425,27600,8.138
|
| 71 |
+
429,28000,7.4518
|
| 72 |
+
433,28400,8.2299
|
| 73 |
+
437,28800,8.0705
|
| 74 |
+
443,29200,6.4598
|
| 75 |
+
448,29600,6.5685
|
| 76 |
+
453,30000,7.0016
|
| 77 |
+
457,30400,8.3606
|
| 78 |
+
461,30800,8.0059
|
| 79 |
+
466,31200,6.8728
|
| 80 |
+
470,31600,8.0419
|
| 81 |
+
475,32000,6.5841
|
| 82 |
+
479,32400,8.576
|
| 83 |
+
484,32800,7.5318
|
| 84 |
+
489,33200,7.0436
|
| 85 |
+
493,33600,8.4315
|
| 86 |
+
500,34000,5.9633
|
| 87 |
+
506,34400,6.3842
|
| 88 |
+
511,34800,8.348
|
| 89 |
+
515,35200,11.5177
|
| 90 |
+
520,35600,8.375
|
| 91 |
+
525,36000,8.8358
|
| 92 |
+
530,36400,8.3674
|
| 93 |
+
536,36800,8.5203
|
| 94 |
+
541,37200,8.0434
|
| 95 |
+
548,37600,5.7622
|
| 96 |
+
552,38000,12.1796
|
| 97 |
+
556,38400,11.9279
|
| 98 |
+
562,38800,9.0523
|
| 99 |
+
566,39200,10.6577
|
| 100 |
+
571,39600,9.9867
|
| 101 |
+
575,40000,12.9051
|
| 102 |
+
580,40400,9.6013
|
| 103 |
+
584,40800,9.0039
|
| 104 |
+
588,41200,11.6103
|
| 105 |
+
593,41600,11.3596
|
| 106 |
+
599,42000,8.0795
|
| 107 |
+
604,42400,9.4068
|
| 108 |
+
608,42800,10.1782
|
| 109 |
+
613,43200,10.2511
|
| 110 |
+
618,43600,8.7243
|
| 111 |
+
627,44000,6.9675
|
| 112 |
+
633,44400,6.1848
|
| 113 |
+
638,44800,12.508
|
| 114 |
+
643,45200,9.435
|
| 115 |
+
649,45600,10.0267
|
| 116 |
+
654,46000,12.8219
|
| 117 |
+
658,46400,14.4656
|
| 118 |
+
663,46800,12.4416
|
| 119 |
+
672,47200,7.6444
|
| 120 |
+
679,47600,6.7948
|
| 121 |
+
686,48000,10.5707
|
| 122 |
+
692,48400,11.3461
|
| 123 |
+
696,48800,15.5451
|
| 124 |
+
703,49200,7.6138
|
| 125 |
+
709,49600,14.2849
|
| 126 |
+
713,50000,17.6588
|
| 127 |
+
717,50400,16.8401
|
| 128 |
+
724,50800,9.6536
|
| 129 |
+
732,51200,7.0814
|
| 130 |
+
737,51600,15.1825
|
| 131 |
+
745,52000,9.6236
|
| 132 |
+
749,52400,16.5895
|
| 133 |
+
757,52800,8.9825
|
| 134 |
+
763,53200,13.4571
|
| 135 |
+
769,53600,12.3041
|
| 136 |
+
774,54000,13.3599
|
| 137 |
+
779,54400,16.2315
|
| 138 |
+
783,54800,18.7432
|
| 139 |
+
788,55200,14.2708
|
| 140 |
+
792,55600,19.0263
|
| 141 |
+
796,56000,19.6853
|
| 142 |
+
804,56400,8.6649
|
| 143 |
+
810,56800,13.5299
|
| 144 |
+
815,57200,15.4356
|
| 145 |
+
822,57600,8.4714
|
| 146 |
+
827,58000,14.8482
|
| 147 |
+
832,58400,13.1313
|
| 148 |
+
836,58800,17.7723
|
| 149 |
+
841,59200,15.1604
|
| 150 |
+
846,59600,16.5815
|
| 151 |
+
850,60000,18.4684
|
| 152 |
+
856,60400,11.723
|
| 153 |
+
860,60800,15.37
|
| 154 |
+
866,61200,14.931
|
| 155 |
+
871,61600,11.3223
|
| 156 |
+
876,62000,11.9169
|
| 157 |
+
883,62400,12.8174
|
| 158 |
+
888,62800,13.5601
|
| 159 |
+
893,63200,15.0672
|
| 160 |
+
898,63600,11.3614
|
| 161 |
+
903,64000,15.3905
|
| 162 |
+
908,64400,13.6269
|
| 163 |
+
912,64800,17.5681
|
| 164 |
+
917,65200,13.3706
|
| 165 |
+
922,65600,13.8827
|
| 166 |
+
927,66000,13.7525
|
| 167 |
+
932,66400,15.7249
|
| 168 |
+
937,66800,12.4379
|
| 169 |
+
943,67200,12.0458
|
| 170 |
+
947,67600,18.2474
|
| 171 |
+
953,68000,11.2208
|
| 172 |
+
961,68400,5.6857
|
| 173 |
+
966,68800,11.2325
|
| 174 |
+
977,69200,6.065
|
| 175 |
+
981,69600,12.7678
|
| 176 |
+
986,70000,15.0279
|
| 177 |
+
993,70400,11.0013
|
| 178 |
+
1001,70800,6.6314
|
| 179 |
+
1008,71200,8.9485
|
| 180 |
+
1015,71600,11.0937
|
| 181 |
+
1020,72000,13.6421
|
| 182 |
+
1027,72400,9.3296
|
| 183 |
+
1033,72800,12.498
|
| 184 |
+
1037,73200,13.5609
|
| 185 |
+
1042,73600,15.2572
|
| 186 |
+
1049,74000,9.7781
|
| 187 |
+
1059,74400,6.6146
|
| 188 |
+
1067,74800,8.4938
|
| 189 |
+
1078,75200,5.6932
|
| 190 |
+
1085,75600,9.7875
|
| 191 |
+
1089,76000,14.85
|
| 192 |
+
1095,76400,11.959
|
| 193 |
+
1099,76800,18.4955
|
| 194 |
+
1106,77200,11.1234
|
| 195 |
+
1110,77600,16.6448
|
| 196 |
+
1116,78000,14.7683
|
| 197 |
+
1121,78400,15.4418
|
| 198 |
+
1126,78800,15.6231
|
| 199 |
+
1133,79200,8.6739
|
| 200 |
+
1137,79600,19.0853
|
| 201 |
+
1141,80000,19.3869
|
| 202 |
+
1146,80400,15.0416
|
| 203 |
+
1151,80800,16.4105
|
| 204 |
+
1155,81200,18.5403
|
| 205 |
+
1162,81600,10.4387
|
| 206 |
+
1168,82000,12.8525
|
| 207 |
+
1173,82400,15.0793
|
| 208 |
+
1179,82800,11.1054
|
| 209 |
+
1184,83200,15.8964
|
| 210 |
+
1189,83600,15.9822
|
| 211 |
+
1193,84000,20.0662
|
| 212 |
+
1197,84400,19.8951
|
| 213 |
+
1203,84800,12.0668
|
| 214 |
+
1207,85200,19.6185
|
| 215 |
+
1216,85600,9.3892
|
| 216 |
+
1221,86000,13.0625
|
| 217 |
+
1228,86400,10.6035
|
| 218 |
+
1236,86800,8.5601
|
| 219 |
+
1243,87200,11.3572
|
| 220 |
+
1248,87600,12.3989
|
| 221 |
+
1255,88000,10.0261
|
| 222 |
+
1261,88400,12.0088
|
| 223 |
+
1265,88800,18.8742
|
| 224 |
+
1272,89200,13.0981
|
| 225 |
+
1281,89600,7.8559
|
| 226 |
+
1288,90000,11.3191
|
| 227 |
+
1293,90400,16.281
|
| 228 |
+
1299,90800,12.2251
|
| 229 |
+
1306,91200,10.0396
|
| 230 |
+
1318,91600,7.6743
|
| 231 |
+
1322,92000,17.0425
|
| 232 |
+
1328,92400,13.7841
|
| 233 |
+
1335,92800,11.843
|
| 234 |
+
1339,93200,18.7906
|
| 235 |
+
1348,93600,7.6509
|
| 236 |
+
1355,94000,11.5199
|
| 237 |
+
1360,94400,15.8789
|
| 238 |
+
1365,94800,16.2332
|
| 239 |
+
1372,95200,8.0954
|
| 240 |
+
1378,95600,12.8351
|
| 241 |
+
1386,96000,10.1123
|
| 242 |
+
1391,96400,13.7096
|
| 243 |
+
1398,96800,9.9747
|
| 244 |
+
1405,97200,9.7309
|
| 245 |
+
1413,97600,10.6216
|
| 246 |
+
1417,98000,19.8107
|
| 247 |
+
1425,98400,8.7529
|
| 248 |
+
1430,98800,12.5949
|
| 249 |
+
1437,99200,10.2796
|
| 250 |
+
1442,99600,14.1966
|
| 251 |
+
1448,100000,12.1246
|
| 252 |
+
1453,100400,14.1483
|
| 253 |
+
1459,100800,14.6867
|
| 254 |
+
1466,101200,11.5653
|
| 255 |
+
1470,101600,19.7992
|
| 256 |
+
1475,102000,16.2707
|
| 257 |
+
1480,102400,12.707
|
| 258 |
+
1490,102800,7.9692
|
| 259 |
+
1496,103200,14.6578
|
| 260 |
+
1504,103600,8.9977
|
| 261 |
+
1512,104000,8.5685
|
| 262 |
+
1516,104400,18.8262
|
| 263 |
+
1525,104800,8.1862
|
| 264 |
+
1532,105200,11.2229
|
| 265 |
+
1539,105600,10.9358
|
| 266 |
+
1543,106000,15.9632
|
| 267 |
+
1551,106400,11.0493
|
| 268 |
+
1555,106800,15.4412
|
| 269 |
+
1561,107200,12.9739
|
| 270 |
+
1568,107600,11.9314
|
| 271 |
+
1573,108000,15.9377
|
| 272 |
+
1580,108400,12.2091
|
| 273 |
+
1586,108800,13.3599
|
| 274 |
+
1592,109200,10.2974
|
| 275 |
+
1598,109600,12.8394
|
| 276 |
+
1605,110000,11.8069
|
| 277 |
+
1611,110400,15.0917
|
| 278 |
+
1619,110800,8.6425
|
| 279 |
+
1626,111200,11.7447
|
| 280 |
+
1632,111600,11.9684
|
| 281 |
+
1637,112000,17.0765
|
| 282 |
+
1643,112400,10.8535
|
| 283 |
+
1647,112800,20.3134
|
| 284 |
+
1654,113200,14.2449
|
| 285 |
+
1658,113600,15.6927
|
| 286 |
+
1663,114000,16.4525
|
| 287 |
+
1667,114400,20.7007
|
| 288 |
+
1675,114800,10.9193
|
| 289 |
+
1681,115200,13.8018
|
| 290 |
+
1686,115600,12.168
|
| 291 |
+
1691,116000,16.1065
|
| 292 |
+
1696,116400,16.9738
|
| 293 |
+
1703,116800,12.1336
|
| 294 |
+
1707,117200,17.4688
|
| 295 |
+
1711,117600,20.7223
|
| 296 |
+
1717,118000,12.8775
|
| 297 |
+
1724,118400,10.7785
|
| 298 |
+
1729,118800,16.0037
|
| 299 |
+
1737,119200,9.3322
|
| 300 |
+
1741,119600,15.5575
|
| 301 |
+
1746,120000,16.7479
|
| 302 |
+
1759,120400,4.1965
|
| 303 |
+
1774,120800,5.0799
|
| 304 |
+
1784,121200,7.0285
|
| 305 |
+
1797,121600,6.1574
|
| 306 |
+
1813,122000,4.1487
|
| 307 |
+
1824,122400,6.7635
|
| 308 |
+
1832,122800,10.5592
|
| 309 |
+
1838,123200,11.4983
|
| 310 |
+
1850,123600,6.0096
|
| 311 |
+
1857,124000,13.2751
|
| 312 |
+
1865,124400,10.6374
|
| 313 |
+
1872,124800,10.4461
|
| 314 |
+
1879,125200,10.8001
|
| 315 |
+
1887,125600,10.5679
|
| 316 |
+
1893,126000,13.6904
|
| 317 |
+
1899,126400,13.3654
|
| 318 |
+
1906,126800,11.1289
|
| 319 |
+
1911,127200,14.4808
|
| 320 |
+
1917,127600,14.7797
|
| 321 |
+
1922,128000,16.3632
|
| 322 |
+
1927,128400,14.8871
|
| 323 |
+
1931,128800,18.4437
|
| 324 |
+
1936,129200,13.8601
|
| 325 |
+
1941,129600,19.323
|
| 326 |
+
1948,130000,11.3219
|
| 327 |
+
1957,130400,6.0778
|
| 328 |
+
1963,130800,13.8097
|
| 329 |
+
1968,131200,17.4356
|
| 330 |
+
1972,131600,15.6235
|
| 331 |
+
1978,132000,13.9724
|
| 332 |
+
1983,132400,16.7818
|
| 333 |
+
1990,132800,11.7503
|
| 334 |
+
1995,133200,16.7384
|
| 335 |
+
2000,133600,11.0031
|
| 336 |
+
2007,134000,13.208
|
| 337 |
+
2013,134400,12.1374
|
| 338 |
+
2018,134800,10.6665
|
| 339 |
+
2022,135200,20.4056
|
| 340 |
+
2027,135600,13.8923
|
| 341 |
+
2033,136000,14.9732
|
| 342 |
+
2039,136400,11.7272
|
| 343 |
+
2049,136800,5.9728
|
| 344 |
+
2055,137200,13.4777
|
| 345 |
+
2061,137600,12.7418
|
| 346 |
+
2070,138000,7.9501
|
| 347 |
+
2076,138400,9.8375
|
| 348 |
+
2085,138800,8.8678
|
| 349 |
+
2096,139200,6.0538
|
| 350 |
+
2104,139600,7.5676
|
| 351 |
+
2112,140000,10.4544
|
| 352 |
+
2126,140400,3.3254
|
| 353 |
+
2135,140800,8.5089
|
| 354 |
+
2147,141200,5.0491
|
| 355 |
+
2155,141600,8.8535
|
| 356 |
+
2165,142000,4.7412
|
| 357 |
+
2174,142400,6.3371
|
| 358 |
+
2180,142800,13.6134
|
| 359 |
+
2189,143200,7.7075
|
| 360 |
+
2202,143600,4.1581
|
| 361 |
+
2206,144000,15.2914
|
| 362 |
+
2217,144400,7.21
|
| 363 |
+
2222,144800,14.1197
|
| 364 |
+
2229,145200,12.3017
|
| 365 |
+
2238,145600,7.5941
|
| 366 |
+
2245,146000,8.9586
|
| 367 |
+
2252,146400,11.0808
|
| 368 |
+
2257,146800,14.2912
|
| 369 |
+
2263,147200,13.1251
|
| 370 |
+
2269,147600,12.861
|
| 371 |
+
2274,148000,12.1163
|
| 372 |
+
2279,148400,16.2446
|
| 373 |
+
2284,148800,15.9297
|
| 374 |
+
2288,149200,14.7428
|
| 375 |
+
2295,149600,10.4804
|
| 376 |
+
2302,150000,12.5935
|
| 377 |
+
2308,150400,10.2608
|
| 378 |
+
2315,150800,11.9565
|
| 379 |
+
2321,151200,10.1254
|
| 380 |
+
2325,151600,17.2792
|
| 381 |
+
2332,152000,10.5685
|
| 382 |
+
2343,152400,7.1165
|
| 383 |
+
2355,152800,5.5324
|
| 384 |
+
2362,153200,10.2419
|
| 385 |
+
2367,153600,17.1911
|
| 386 |
+
2372,154000,16.5306
|
| 387 |
+
2377,154400,13.0943
|
| 388 |
+
2383,154800,17.1094
|
| 389 |
+
2390,155200,11.8845
|
| 390 |
+
2394,155600,21.3396
|
| 391 |
+
2400,156000,12.3499
|
| 392 |
+
2409,156400,7.6367
|
| 393 |
+
2414,156800,19.1307
|
| 394 |
+
2418,157200,21.5015
|
| 395 |
+
2424,157600,14.4416
|
| 396 |
+
2431,158000,11.1522
|
| 397 |
+
2438,158400,11.1949
|
| 398 |
+
2450,158800,7.2224
|
| 399 |
+
2457,159200,11.6111
|
| 400 |
+
2468,159600,5.0266
|
| 401 |
+
2476,160000,10.68
|
| 402 |
+
2487,160400,5.6073
|
| 403 |
+
2493,160800,13.4424
|
| 404 |
+
2503,161200,6.8739
|
| 405 |
+
2512,161600,7.4788
|
| 406 |
+
2520,162000,10.2704
|
| 407 |
+
2531,162400,6.3896
|
| 408 |
+
2537,162800,10.6474
|
| 409 |
+
2542,163200,14.1442
|
| 410 |
+
2555,163600,5.9226
|
| 411 |
+
2566,164000,6.0377
|
| 412 |
+
2575,164400,7.4593
|
| 413 |
+
2588,164800,4.9726
|
| 414 |
+
2594,165200,10.6298
|
| 415 |
+
2599,165600,12.0844
|
| 416 |
+
2616,166000,3.8811
|
| 417 |
+
2626,166400,4.2739
|
| 418 |
+
2634,166800,9.1184
|
| 419 |
+
2641,167200,8.623
|
| 420 |
+
2647,167600,12.8333
|
| 421 |
+
2657,168000,5.8736
|
| 422 |
+
2663,168400,7.3345
|
| 423 |
+
2671,168800,6.602
|
| 424 |
+
2680,169200,7.7097
|
| 425 |
+
2692,169600,3.3408
|
| 426 |
+
2698,170000,12.0658
|
| 427 |
+
2710,170400,4.496
|
| 428 |
+
2719,170800,7.4669
|
| 429 |
+
2729,171200,6.8099
|
| 430 |
+
2738,171600,7.104
|
| 431 |
+
2744,172000,11.9163
|
| 432 |
+
2751,172400,11.3711
|
| 433 |
+
2757,172800,10.847
|
| 434 |
+
2765,173200,7.2486
|
| 435 |
+
2774,173600,6.5974
|
| 436 |
+
2783,174000,9.2593
|
| 437 |
+
2790,174400,10.1808
|
| 438 |
+
2797,174800,7.8738
|
| 439 |
+
2805,175200,8.1315
|
| 440 |
+
2811,175600,12.4611
|
| 441 |
+
2817,176000,11.2274
|
| 442 |
+
2824,176400,9.6997
|
| 443 |
+
2829,176800,16.383
|
| 444 |
+
2835,177200,9.7906
|
| 445 |
+
2841,177600,9.8226
|
| 446 |
+
2850,178000,8.9189
|
| 447 |
+
2856,178400,8.719
|
| 448 |
+
2862,178800,14.3174
|
| 449 |
+
2868,179200,9.976
|
| 450 |
+
2873,179600,16.595
|
| 451 |
+
2879,180000,11.7942
|
| 452 |
+
2886,180400,8.4306
|
| 453 |
+
2890,180800,19.6647
|
| 454 |
+
2895,181200,16.7307
|
| 455 |
+
2899,181600,16.9061
|
| 456 |
+
2907,182000,8.0865
|
| 457 |
+
2912,182400,16.0249
|
| 458 |
+
2917,182800,12.6253
|
| 459 |
+
2924,183200,10.9561
|
| 460 |
+
2930,183600,9.5365
|
| 461 |
+
2935,184000,11.5344
|
| 462 |
+
2943,184400,7.9225
|
| 463 |
+
2949,184800,11.1286
|
| 464 |
+
2959,185200,7.7913
|
| 465 |
+
2964,185600,14.7933
|
| 466 |
+
2972,186000,7.3943
|
| 467 |
+
2979,186400,10.7152
|
| 468 |
+
2986,186800,8.1646
|
| 469 |
+
2992,187200,11.7603
|
| 470 |
+
2997,187600,13.334
|
| 471 |
+
3003,188000,12.5786
|
| 472 |
+
3009,188400,12.2962
|
| 473 |
+
3015,188800,14.3177
|
| 474 |
+
3019,189200,15.0194
|
| 475 |
+
3024,189600,19.2136
|
| 476 |
+
3029,190000,15.3857
|
| 477 |
+
3035,190400,9.9101
|
| 478 |
+
3043,190800,10.0892
|
| 479 |
+
3047,191200,17.6767
|
| 480 |
+
3051,191600,19.3897
|
| 481 |
+
3056,192000,15.6982
|
| 482 |
+
3060,192400,19.0069
|
| 483 |
+
3065,192800,17.3691
|
| 484 |
+
3070,193200,13.5637
|
| 485 |
+
3075,193600,14.7953
|
| 486 |
+
3080,194000,16.2313
|
| 487 |
+
3086,194400,13.6929
|
| 488 |
+
3091,194800,17.0558
|
| 489 |
+
3095,195200,17.121
|
| 490 |
+
3100,195600,14.9038
|
| 491 |
+
3108,196000,9.5739
|
| 492 |
+
3112,196400,16.1362
|
| 493 |
+
3118,196800,9.8704
|
| 494 |
+
3127,197200,8.0936
|
| 495 |
+
3132,197600,17.4588
|
| 496 |
+
3137,198000,12.1721
|
| 497 |
+
3143,198400,9.6148
|
| 498 |
+
3147,198800,18.6538
|
| 499 |
+
3151,199200,19.5497
|
| 500 |
+
3156,199600,15.3876
|
| 501 |
+
3163,200000,10.7067
|
code/Lake application/logs/results_1/PDPPO_frozen_lake_log_4.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
20,400,1.5088
|
| 3 |
+
42,800,1.3079
|
| 4 |
+
65,1200,1.3319
|
| 5 |
+
88,1600,1.3245
|
| 6 |
+
101,2000,2.2609
|
| 7 |
+
116,2400,2.0004
|
| 8 |
+
122,2800,4.3646
|
| 9 |
+
130,3200,4.0763
|
| 10 |
+
138,3600,3.5934
|
| 11 |
+
147,4000,3.15
|
| 12 |
+
158,4400,3.0842
|
| 13 |
+
164,4800,4.0592
|
| 14 |
+
171,5200,4.5173
|
| 15 |
+
177,5600,4.8523
|
| 16 |
+
183,6000,4.215
|
| 17 |
+
190,6400,5.0604
|
| 18 |
+
197,6800,4.5393
|
| 19 |
+
201,7200,7.2405
|
| 20 |
+
207,7600,4.3459
|
| 21 |
+
213,8000,4.605
|
| 22 |
+
220,8400,3.9493
|
| 23 |
+
227,8800,4.0154
|
| 24 |
+
233,9200,5.1817
|
| 25 |
+
237,9600,7.2706
|
| 26 |
+
241,10000,6.1275
|
| 27 |
+
246,10400,6.517
|
| 28 |
+
251,10800,6.4718
|
| 29 |
+
256,11200,5.3332
|
| 30 |
+
263,11600,4.8526
|
| 31 |
+
267,12000,7.5372
|
| 32 |
+
273,12400,4.4563
|
| 33 |
+
278,12800,6.2474
|
| 34 |
+
283,13200,5.1259
|
| 35 |
+
288,13600,6.6405
|
| 36 |
+
293,14000,5.4922
|
| 37 |
+
297,14400,7.3697
|
| 38 |
+
301,14800,7.46
|
| 39 |
+
306,15200,6.8119
|
| 40 |
+
310,15600,7.5148
|
| 41 |
+
314,16000,7.566
|
| 42 |
+
318,16400,5.6766
|
| 43 |
+
324,16800,5.7109
|
| 44 |
+
328,17200,6.9979
|
| 45 |
+
333,17600,6.6446
|
| 46 |
+
337,18000,7.1488
|
| 47 |
+
342,18400,6.612
|
| 48 |
+
348,18800,5.6986
|
| 49 |
+
352,19200,7.4129
|
| 50 |
+
357,19600,6.3231
|
| 51 |
+
364,20000,4.8872
|
| 52 |
+
369,20400,6.2171
|
| 53 |
+
373,20800,7.6549
|
| 54 |
+
379,21200,5.0764
|
| 55 |
+
386,21600,3.8937
|
| 56 |
+
392,22000,4.8971
|
| 57 |
+
396,22400,7.7155
|
| 58 |
+
401,22800,6.522
|
| 59 |
+
405,23200,6.6372
|
| 60 |
+
410,23600,7.358
|
| 61 |
+
417,24000,4.7262
|
| 62 |
+
423,24400,4.3083
|
| 63 |
+
428,24800,6.6989
|
| 64 |
+
432,25200,7.4748
|
| 65 |
+
437,25600,6.8524
|
| 66 |
+
441,26000,7.4783
|
| 67 |
+
447,26400,5.58
|
| 68 |
+
451,26800,7.5209
|
| 69 |
+
457,27200,5.4526
|
| 70 |
+
461,27600,7.2869
|
| 71 |
+
466,28000,6.9935
|
| 72 |
+
470,28400,8.5876
|
| 73 |
+
475,28800,6.8761
|
| 74 |
+
480,29200,5.9352
|
| 75 |
+
486,29600,6.2676
|
| 76 |
+
490,30000,7.6652
|
| 77 |
+
494,30400,8.3381
|
| 78 |
+
499,30800,9.2616
|
| 79 |
+
504,31200,6.5226
|
| 80 |
+
509,31600,6.8202
|
| 81 |
+
513,32000,8.7099
|
| 82 |
+
519,32400,7.2073
|
| 83 |
+
523,32800,9.4631
|
| 84 |
+
530,33200,5.9642
|
| 85 |
+
534,33600,9.2221
|
| 86 |
+
539,34000,7.8802
|
| 87 |
+
545,34400,7.4494
|
| 88 |
+
549,34800,10.2656
|
| 89 |
+
554,35200,8.5635
|
| 90 |
+
559,35600,8.5277
|
| 91 |
+
564,36000,9.2301
|
| 92 |
+
570,36400,7.3293
|
| 93 |
+
577,36800,5.9939
|
| 94 |
+
583,37200,8.0029
|
| 95 |
+
590,37600,6.5089
|
| 96 |
+
595,38000,9.9265
|
| 97 |
+
600,38400,7.7368
|
| 98 |
+
605,38800,10.0703
|
| 99 |
+
610,39200,9.405
|
| 100 |
+
615,39600,9.2747
|
| 101 |
+
620,40000,10.6746
|
| 102 |
+
625,40400,10.8504
|
| 103 |
+
629,40800,12.3268
|
| 104 |
+
634,41200,7.9002
|
| 105 |
+
638,41600,12.2213
|
| 106 |
+
643,42000,10.5923
|
| 107 |
+
649,42400,8.3824
|
| 108 |
+
653,42800,12.3659
|
| 109 |
+
659,43200,7.8569
|
| 110 |
+
664,43600,7.7947
|
| 111 |
+
668,44000,11.9043
|
| 112 |
+
673,44400,11.3176
|
| 113 |
+
677,44800,9.5839
|
| 114 |
+
683,45200,8.7337
|
| 115 |
+
687,45600,11.8954
|
| 116 |
+
691,46000,10.9464
|
| 117 |
+
697,46400,9.0987
|
| 118 |
+
701,46800,11.5694
|
| 119 |
+
706,47200,10.0594
|
| 120 |
+
711,47600,8.6043
|
| 121 |
+
715,48000,11.3733
|
| 122 |
+
721,48400,7.4597
|
| 123 |
+
727,48800,8.6735
|
| 124 |
+
733,49200,9.2683
|
| 125 |
+
737,49600,12.2719
|
| 126 |
+
743,50000,8.5308
|
| 127 |
+
748,50400,7.8866
|
| 128 |
+
754,50800,9.0977
|
| 129 |
+
758,51200,10.5799
|
| 130 |
+
763,51600,10.2186
|
| 131 |
+
769,52000,9.4147
|
| 132 |
+
774,52400,9.8676
|
| 133 |
+
779,52800,9.8646
|
| 134 |
+
783,53200,11.9396
|
| 135 |
+
788,53600,8.3464
|
| 136 |
+
792,54000,10.8994
|
| 137 |
+
800,54400,7.1043
|
| 138 |
+
804,54800,11.0494
|
| 139 |
+
810,55200,7.2492
|
| 140 |
+
816,55600,9.008
|
| 141 |
+
820,56000,11.969
|
| 142 |
+
824,56400,12.3475
|
| 143 |
+
830,56800,8.1093
|
| 144 |
+
836,57200,8.6649
|
| 145 |
+
841,57600,10.3044
|
| 146 |
+
848,58000,6.3358
|
| 147 |
+
853,58400,8.6059
|
| 148 |
+
858,58800,10.5647
|
| 149 |
+
862,59200,11.937
|
| 150 |
+
867,59600,10.6032
|
| 151 |
+
871,60000,12.1926
|
| 152 |
+
879,60400,6.4127
|
| 153 |
+
887,60800,6.1397
|
| 154 |
+
892,61200,9.5413
|
| 155 |
+
897,61600,9.9852
|
| 156 |
+
905,62000,6.0649
|
| 157 |
+
909,62400,12.114
|
| 158 |
+
913,62800,10.7127
|
| 159 |
+
919,63200,7.5118
|
| 160 |
+
928,63600,5.1292
|
| 161 |
+
933,64000,9.7503
|
| 162 |
+
938,64400,11.1308
|
| 163 |
+
943,64800,9.8459
|
| 164 |
+
949,65200,9.6871
|
| 165 |
+
955,65600,9.5914
|
| 166 |
+
962,66000,6.487
|
| 167 |
+
970,66400,5.1927
|
| 168 |
+
980,66800,5.9668
|
| 169 |
+
984,67200,13.1207
|
| 170 |
+
988,67600,13.5101
|
| 171 |
+
995,68000,7.9993
|
| 172 |
+
1001,68400,7.3224
|
| 173 |
+
1006,68800,11.6941
|
| 174 |
+
1011,69200,8.4031
|
| 175 |
+
1015,69600,14.3545
|
| 176 |
+
1024,70000,6.3113
|
| 177 |
+
1029,70400,12.9317
|
| 178 |
+
1034,70800,12.4254
|
| 179 |
+
1040,71200,8.7256
|
| 180 |
+
1045,71600,12.1083
|
| 181 |
+
1049,72000,14.6145
|
| 182 |
+
1056,72400,8.6096
|
| 183 |
+
1062,72800,10.0664
|
| 184 |
+
1066,73200,14.5826
|
| 185 |
+
1071,73600,12.4316
|
| 186 |
+
1076,74000,11.2348
|
| 187 |
+
1081,74400,11.6494
|
| 188 |
+
1086,74800,8.9371
|
| 189 |
+
1092,75200,11.5723
|
| 190 |
+
1096,75600,14.9071
|
| 191 |
+
1100,76000,13.8022
|
| 192 |
+
1104,76400,12.762
|
| 193 |
+
1109,76800,12.5704
|
| 194 |
+
1114,77200,13.2544
|
| 195 |
+
1121,77600,7.8204
|
| 196 |
+
1127,78000,11.0577
|
| 197 |
+
1132,78400,10.2288
|
| 198 |
+
1137,78800,12.0947
|
| 199 |
+
1143,79200,10.4587
|
| 200 |
+
1150,79600,8.3463
|
| 201 |
+
1154,80000,15.744
|
| 202 |
+
1159,80400,13.1713
|
| 203 |
+
1166,80800,7.4983
|
| 204 |
+
1173,81200,9.3079
|
| 205 |
+
1177,81600,14.3302
|
| 206 |
+
1183,82000,10.1948
|
| 207 |
+
1187,82400,15.5844
|
| 208 |
+
1193,82800,10.8247
|
| 209 |
+
1198,83200,9.861
|
| 210 |
+
1203,83600,13.0967
|
| 211 |
+
1208,84000,14.2775
|
| 212 |
+
1212,84400,15.9856
|
| 213 |
+
1220,84800,7.0939
|
| 214 |
+
1224,85200,13.172
|
| 215 |
+
1229,85600,14.9623
|
| 216 |
+
1234,86000,10.7564
|
| 217 |
+
1240,86400,12.0071
|
| 218 |
+
1245,86800,12.1013
|
| 219 |
+
1250,87200,11.8996
|
| 220 |
+
1254,87600,14.3552
|
| 221 |
+
1258,88000,15.6836
|
| 222 |
+
1263,88400,14.0425
|
| 223 |
+
1267,88800,16.2951
|
| 224 |
+
1274,89200,8.0649
|
| 225 |
+
1279,89600,12.0771
|
| 226 |
+
1287,90000,8.9504
|
| 227 |
+
1293,90400,9.2019
|
| 228 |
+
1298,90800,13.5403
|
| 229 |
+
1302,91200,16.2009
|
| 230 |
+
1306,91600,15.8977
|
| 231 |
+
1311,92000,12.6036
|
| 232 |
+
1318,92400,9.4293
|
| 233 |
+
1324,92800,11.8471
|
| 234 |
+
1329,93200,9.6477
|
| 235 |
+
1334,93600,13.0578
|
| 236 |
+
1339,94000,12.0729
|
| 237 |
+
1346,94400,9.9596
|
| 238 |
+
1350,94800,16.223
|
| 239 |
+
1355,95200,13.1369
|
| 240 |
+
1364,95600,7.2675
|
| 241 |
+
1369,96000,11.9901
|
| 242 |
+
1377,96400,8.69
|
| 243 |
+
1382,96800,12.3589
|
| 244 |
+
1389,97200,8.9372
|
| 245 |
+
1400,97600,5.9625
|
| 246 |
+
1405,98000,9.6908
|
| 247 |
+
1412,98400,8.4351
|
| 248 |
+
1418,98800,12.2907
|
| 249 |
+
1426,99200,7.9709
|
| 250 |
+
1432,99600,8.709
|
| 251 |
+
1437,100000,12.9995
|
| 252 |
+
1442,100400,12.5061
|
| 253 |
+
1448,100800,11.813
|
| 254 |
+
1453,101200,10.3663
|
| 255 |
+
1459,101600,11.3195
|
| 256 |
+
1471,102000,4.8996
|
| 257 |
+
1482,102400,5.2443
|
| 258 |
+
1492,102800,5.7095
|
| 259 |
+
1498,103200,10.861
|
| 260 |
+
1505,103600,8.0669
|
| 261 |
+
1511,104000,11.5714
|
| 262 |
+
1516,104400,11.8947
|
| 263 |
+
1522,104800,13.1965
|
| 264 |
+
1527,105200,11.7688
|
| 265 |
+
1532,105600,14.6079
|
| 266 |
+
1537,106000,12.5179
|
| 267 |
+
1545,106400,6.0034
|
| 268 |
+
1550,106800,13.8795
|
| 269 |
+
1555,107200,12.5438
|
| 270 |
+
1560,107600,12.3526
|
| 271 |
+
1565,108000,14.0173
|
| 272 |
+
1572,108400,6.5151
|
| 273 |
+
1578,108800,11.821
|
| 274 |
+
1584,109200,10.7337
|
| 275 |
+
1589,109600,12.987
|
| 276 |
+
1593,110000,15.0823
|
| 277 |
+
1598,110400,13.6488
|
| 278 |
+
1603,110800,15.9419
|
| 279 |
+
1607,111200,17.6469
|
| 280 |
+
1613,111600,11.8659
|
| 281 |
+
1620,112000,8.6666
|
| 282 |
+
1626,112400,8.0076
|
| 283 |
+
1634,112800,8.2332
|
| 284 |
+
1639,113200,11.4569
|
| 285 |
+
1645,113600,7.8428
|
| 286 |
+
1651,114000,9.6165
|
| 287 |
+
1659,114400,7.2423
|
| 288 |
+
1665,114800,10.0212
|
| 289 |
+
1670,115200,15.7796
|
| 290 |
+
1675,115600,13.3568
|
| 291 |
+
1681,116000,11.9107
|
| 292 |
+
1686,116400,16.4987
|
| 293 |
+
1690,116800,17.6908
|
| 294 |
+
1695,117200,13.7199
|
| 295 |
+
1700,117600,13.2496
|
| 296 |
+
1704,118000,18.4471
|
| 297 |
+
1709,118400,12.1934
|
| 298 |
+
1714,118800,16.5036
|
| 299 |
+
1720,119200,13.0416
|
| 300 |
+
1726,119600,9.5647
|
| 301 |
+
1734,120000,9.7985
|
| 302 |
+
1742,120400,7.6858
|
| 303 |
+
1747,120800,11.444
|
| 304 |
+
1751,121200,17.1469
|
| 305 |
+
1758,121600,11.2027
|
| 306 |
+
1764,122000,11.57
|
| 307 |
+
1771,122400,11.0437
|
| 308 |
+
1776,122800,12.1007
|
| 309 |
+
1788,123200,7.5098
|
| 310 |
+
1793,123600,15.3339
|
| 311 |
+
1798,124000,11.8126
|
| 312 |
+
1805,124400,11.7518
|
| 313 |
+
1810,124800,15.431
|
| 314 |
+
1817,125200,9.2215
|
| 315 |
+
1822,125600,16.9219
|
| 316 |
+
1827,126000,13.1771
|
| 317 |
+
1832,126400,19.0296
|
| 318 |
+
1838,126800,12.1479
|
| 319 |
+
1842,127200,20.0777
|
| 320 |
+
1847,127600,12.5869
|
| 321 |
+
1857,128000,7.5145
|
| 322 |
+
1862,128400,17.6462
|
| 323 |
+
1871,128800,5.5968
|
| 324 |
+
1880,129200,7.7816
|
| 325 |
+
1890,129600,7.5515
|
| 326 |
+
1897,130000,9.2213
|
| 327 |
+
1904,130400,12.0633
|
| 328 |
+
1913,130800,9.4187
|
| 329 |
+
1923,131200,6.6914
|
| 330 |
+
1933,131600,6.9577
|
| 331 |
+
1938,132000,15.6559
|
| 332 |
+
1943,132400,15.711
|
| 333 |
+
1948,132800,15.8596
|
| 334 |
+
1955,133200,10.39
|
| 335 |
+
1961,133600,13.9018
|
| 336 |
+
1968,134000,10.9876
|
| 337 |
+
1975,134400,8.5861
|
| 338 |
+
1983,134800,9.3426
|
| 339 |
+
1988,135200,16.0291
|
| 340 |
+
1996,135600,10.6449
|
| 341 |
+
2000,136000,15.9081
|
| 342 |
+
2008,136400,9.7453
|
| 343 |
+
2014,136800,14.5408
|
| 344 |
+
2019,137200,13.6499
|
| 345 |
+
2023,137600,17.35
|
| 346 |
+
2030,138000,11.9688
|
| 347 |
+
2038,138400,9.7757
|
| 348 |
+
2043,138800,15.0422
|
| 349 |
+
2049,139200,14.1278
|
| 350 |
+
2054,139600,11.3104
|
| 351 |
+
2060,140000,10.5947
|
| 352 |
+
2065,140400,14.9903
|
| 353 |
+
2071,140800,12.8763
|
| 354 |
+
2076,141200,12.2992
|
| 355 |
+
2085,141600,8.4617
|
| 356 |
+
2091,142000,12.2624
|
| 357 |
+
2099,142400,10.5275
|
| 358 |
+
2114,142800,3.2717
|
| 359 |
+
2125,143200,4.7307
|
| 360 |
+
2136,143600,8.0355
|
| 361 |
+
2147,144000,6.7636
|
| 362 |
+
2154,144400,8.4627
|
| 363 |
+
2164,144800,8.6816
|
| 364 |
+
2173,145200,7.4853
|
| 365 |
+
2179,145600,9.5195
|
| 366 |
+
2186,146000,10.8791
|
| 367 |
+
2193,146400,12.8798
|
| 368 |
+
2198,146800,14.4826
|
| 369 |
+
2204,147200,13.2357
|
| 370 |
+
2210,147600,14.1333
|
| 371 |
+
2216,148000,12.9477
|
| 372 |
+
2221,148400,14.1298
|
| 373 |
+
2229,148800,10.4178
|
| 374 |
+
2234,149200,15.747
|
| 375 |
+
2239,149600,13.7127
|
| 376 |
+
2245,150000,13.5562
|
| 377 |
+
2252,150400,10.7229
|
| 378 |
+
2258,150800,13.8351
|
| 379 |
+
2263,151200,16.2056
|
| 380 |
+
2268,151600,15.9527
|
| 381 |
+
2275,152000,9.0862
|
| 382 |
+
2279,152400,19.7776
|
| 383 |
+
2285,152800,14.6447
|
| 384 |
+
2293,153200,10.0589
|
| 385 |
+
2298,153600,15.9818
|
| 386 |
+
2308,154000,7.2806
|
| 387 |
+
2315,154400,11.1871
|
| 388 |
+
2320,154800,16.5332
|
| 389 |
+
2326,155200,11.4502
|
| 390 |
+
2332,155600,11.2749
|
| 391 |
+
2337,156000,18.7918
|
| 392 |
+
2345,156400,9.0297
|
| 393 |
+
2351,156800,13.5346
|
| 394 |
+
2355,157200,20.3685
|
| 395 |
+
2359,157600,17.5759
|
| 396 |
+
2363,158000,20.9134
|
| 397 |
+
2372,158400,8.7563
|
| 398 |
+
2377,158800,16.3223
|
| 399 |
+
2383,159200,12.3571
|
| 400 |
+
2388,159600,14.8996
|
| 401 |
+
2393,160000,16.1612
|
| 402 |
+
2400,160400,10.695
|
| 403 |
+
2404,160800,15.8143
|
| 404 |
+
2409,161200,14.1756
|
| 405 |
+
2417,161600,10.3059
|
| 406 |
+
2421,162000,15.7669
|
| 407 |
+
2426,162400,16.7237
|
| 408 |
+
2432,162800,13.186
|
| 409 |
+
2436,163200,15.9777
|
| 410 |
+
2442,163600,15.1247
|
| 411 |
+
2451,164000,6.9288
|
| 412 |
+
2455,164400,19.8019
|
| 413 |
+
2459,164800,16.5844
|
| 414 |
+
2463,165200,19.4641
|
| 415 |
+
2469,165600,12.5474
|
| 416 |
+
2475,166000,12.4718
|
| 417 |
+
2482,166400,9.2596
|
| 418 |
+
2492,166800,7.4804
|
| 419 |
+
2499,167200,9.2925
|
| 420 |
+
2506,167600,9.6121
|
| 421 |
+
2512,168000,11.8263
|
| 422 |
+
2519,168400,11.1036
|
| 423 |
+
2524,168800,11.8451
|
| 424 |
+
2530,169200,12.3773
|
| 425 |
+
2535,169600,16.2471
|
| 426 |
+
2541,170000,11.3649
|
| 427 |
+
2545,170400,15.6326
|
| 428 |
+
2550,170800,16.8937
|
| 429 |
+
2554,171200,19.5632
|
| 430 |
+
2558,171600,17.4009
|
| 431 |
+
2564,172000,15.0096
|
| 432 |
+
2569,172400,12.1716
|
| 433 |
+
2574,172800,16.2656
|
| 434 |
+
2579,173200,12.1121
|
| 435 |
+
2584,173600,15.5555
|
| 436 |
+
2590,174000,14.2854
|
| 437 |
+
2597,174400,11.301
|
| 438 |
+
2601,174800,17.2093
|
| 439 |
+
2606,175200,15.6553
|
| 440 |
+
2611,175600,13.2231
|
| 441 |
+
2618,176000,12.4886
|
| 442 |
+
2623,176400,11.6754
|
| 443 |
+
2627,176800,19.8814
|
| 444 |
+
2633,177200,15.667
|
| 445 |
+
2637,177600,17.0859
|
| 446 |
+
2642,178000,14.0034
|
| 447 |
+
2650,178400,9.8961
|
| 448 |
+
2654,178800,19.2615
|
| 449 |
+
2663,179200,8.5405
|
| 450 |
+
2670,179600,9.0917
|
| 451 |
+
2684,180000,3.9164
|
| 452 |
+
2690,180400,12.0612
|
| 453 |
+
2697,180800,10.1366
|
| 454 |
+
2707,181200,7.161
|
| 455 |
+
2713,181600,10.5804
|
| 456 |
+
2719,182000,11.7124
|
| 457 |
+
2725,182400,12.7422
|
| 458 |
+
2735,182800,5.839
|
| 459 |
+
2743,183200,9.3809
|
| 460 |
+
2748,183600,14.0109
|
| 461 |
+
2754,184000,12.5838
|
| 462 |
+
2772,184400,2.7956
|
| 463 |
+
2782,184800,5.9967
|
| 464 |
+
2790,185200,6.7712
|
| 465 |
+
2799,185600,8.4177
|
| 466 |
+
2805,186000,11.2619
|
| 467 |
+
2818,186400,4.5222
|
| 468 |
+
2827,186800,7.9052
|
| 469 |
+
2834,187200,8.1854
|
| 470 |
+
2845,187600,5.2657
|
| 471 |
+
2852,188000,9.6327
|
| 472 |
+
2862,188400,5.4335
|
| 473 |
+
2870,188800,9.4833
|
| 474 |
+
2876,189200,10.3303
|
| 475 |
+
2881,189600,14.843
|
| 476 |
+
2891,190000,6.7218
|
| 477 |
+
2896,190400,13.4077
|
| 478 |
+
2902,190800,12.8647
|
| 479 |
+
2908,191200,9.9087
|
| 480 |
+
2913,191600,16.5216
|
| 481 |
+
2920,192000,9.6137
|
| 482 |
+
2924,192400,15.5697
|
| 483 |
+
2932,192800,8.3219
|
| 484 |
+
2942,193200,4.7098
|
| 485 |
+
2949,193600,11.8933
|
| 486 |
+
2954,194000,14.2076
|
| 487 |
+
2961,194400,10.3591
|
| 488 |
+
2966,194800,12.7551
|
| 489 |
+
2972,195200,12.0142
|
| 490 |
+
2978,195600,11.9555
|
| 491 |
+
2982,196000,17.2151
|
| 492 |
+
2988,196400,11.6288
|
| 493 |
+
2994,196800,11.5739
|
| 494 |
+
2998,197200,15.9745
|
| 495 |
+
3005,197600,10.5003
|
| 496 |
+
3010,198000,12.1237
|
| 497 |
+
3017,198400,11.9958
|
| 498 |
+
3022,198800,13.6748
|
| 499 |
+
3031,199200,8.644
|
| 500 |
+
3036,199600,15.9198
|
| 501 |
+
3043,200000,11.7405
|
code/Lake application/logs/results_1/PDPPO_frozen_lake_log_5.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
30,400,1.0363
|
| 3 |
+
55,800,1.1656
|
| 4 |
+
74,1200,1.2812
|
| 5 |
+
87,1600,2.8849
|
| 6 |
+
101,2000,2.0512
|
| 7 |
+
109,2400,4.4421
|
| 8 |
+
120,2800,2.7307
|
| 9 |
+
130,3200,2.9113
|
| 10 |
+
136,3600,4.8513
|
| 11 |
+
143,4000,4.6759
|
| 12 |
+
147,4400,6.8206
|
| 13 |
+
156,4800,3.5122
|
| 14 |
+
160,5200,7.4682
|
| 15 |
+
167,5600,4.2985
|
| 16 |
+
173,6000,4.6158
|
| 17 |
+
178,6400,6.3235
|
| 18 |
+
182,6800,5.9121
|
| 19 |
+
187,7200,6.0524
|
| 20 |
+
192,7600,6.2336
|
| 21 |
+
196,8000,6.7958
|
| 22 |
+
201,8400,5.8009
|
| 23 |
+
205,8800,7.3525
|
| 24 |
+
209,9200,7.4518
|
| 25 |
+
214,9600,6.8613
|
| 26 |
+
220,10000,4.8876
|
| 27 |
+
224,10400,7.4277
|
| 28 |
+
228,10800,6.8252
|
| 29 |
+
232,11200,7.0368
|
| 30 |
+
236,11600,7.4591
|
| 31 |
+
241,12000,6.5145
|
| 32 |
+
245,12400,7.3888
|
| 33 |
+
250,12800,5.1931
|
| 34 |
+
255,13200,6.1031
|
| 35 |
+
259,13600,7.5133
|
| 36 |
+
264,14000,6.6363
|
| 37 |
+
268,14400,7.6229
|
| 38 |
+
273,14800,6.0136
|
| 39 |
+
277,15200,8.2029
|
| 40 |
+
282,15600,6.2847
|
| 41 |
+
287,16000,6.6472
|
| 42 |
+
291,16400,7.3617
|
| 43 |
+
295,16800,8.1746
|
| 44 |
+
299,17200,6.8719
|
| 45 |
+
304,17600,6.1261
|
| 46 |
+
309,18000,7.0801
|
| 47 |
+
313,18400,7.6909
|
| 48 |
+
317,18800,7.4924
|
| 49 |
+
321,19200,7.4629
|
| 50 |
+
327,19600,5.5724
|
| 51 |
+
333,20000,4.8738
|
| 52 |
+
337,20400,7.1132
|
| 53 |
+
342,20800,6.5451
|
| 54 |
+
347,21200,5.6767
|
| 55 |
+
351,21600,7.6065
|
| 56 |
+
355,22000,7.5147
|
| 57 |
+
360,22400,7.0767
|
| 58 |
+
364,22800,7.5414
|
| 59 |
+
368,23200,7.7461
|
| 60 |
+
373,23600,5.5045
|
| 61 |
+
377,24000,6.8692
|
| 62 |
+
382,24400,6.7735
|
| 63 |
+
386,24800,7.1926
|
| 64 |
+
391,25200,6.3948
|
| 65 |
+
395,25600,6.506
|
| 66 |
+
400,26000,6.8513
|
| 67 |
+
404,26400,6.7321
|
| 68 |
+
410,26800,5.3182
|
| 69 |
+
415,27200,6.3436
|
| 70 |
+
424,27600,3.9625
|
| 71 |
+
433,28000,4.0086
|
| 72 |
+
438,28400,7.455
|
| 73 |
+
448,28800,4.1049
|
| 74 |
+
455,29200,4.8118
|
| 75 |
+
460,29600,8.8025
|
| 76 |
+
468,30000,5.2449
|
| 77 |
+
474,30400,6.9961
|
| 78 |
+
479,30800,9.5919
|
| 79 |
+
486,31200,4.9522
|
| 80 |
+
494,31600,6.2399
|
| 81 |
+
501,32000,6.2058
|
| 82 |
+
507,32400,7.817
|
| 83 |
+
512,32800,10.4552
|
| 84 |
+
519,33200,6.8586
|
| 85 |
+
528,33600,5.2227
|
| 86 |
+
532,34000,11.0335
|
| 87 |
+
537,34400,9.8253
|
| 88 |
+
548,34800,4.943
|
| 89 |
+
552,35200,12.3006
|
| 90 |
+
557,35600,8.6328
|
| 91 |
+
565,36000,6.8881
|
| 92 |
+
573,36400,6.9154
|
| 93 |
+
578,36800,9.7087
|
| 94 |
+
583,37200,11.4344
|
| 95 |
+
588,37600,8.4428
|
| 96 |
+
594,38000,9.4412
|
| 97 |
+
598,38400,13.0931
|
| 98 |
+
602,38800,13.7591
|
| 99 |
+
608,39200,9.2528
|
| 100 |
+
616,39600,5.39
|
| 101 |
+
622,40000,11.192
|
| 102 |
+
628,40400,9.3021
|
| 103 |
+
635,40800,6.3386
|
| 104 |
+
640,41200,13.3335
|
| 105 |
+
645,41600,10.7252
|
| 106 |
+
650,42000,9.8878
|
| 107 |
+
657,42400,7.6309
|
| 108 |
+
662,42800,13.1219
|
| 109 |
+
667,43200,11.3305
|
| 110 |
+
674,43600,8.0326
|
| 111 |
+
682,44000,7.1185
|
| 112 |
+
686,44400,11.0479
|
| 113 |
+
691,44800,12.8963
|
| 114 |
+
696,45200,12.0313
|
| 115 |
+
700,45600,11.8897
|
| 116 |
+
709,46000,7.0573
|
| 117 |
+
714,46400,10.6651
|
| 118 |
+
720,46800,10.3476
|
| 119 |
+
727,47200,10.3755
|
| 120 |
+
731,47600,13.4045
|
| 121 |
+
738,48000,9.1209
|
| 122 |
+
742,48400,13.8504
|
| 123 |
+
749,48800,9.6356
|
| 124 |
+
755,49200,9.325
|
| 125 |
+
761,49600,10.3474
|
| 126 |
+
765,50000,14.3104
|
| 127 |
+
770,50400,12.0887
|
| 128 |
+
774,50800,15.2546
|
| 129 |
+
779,51200,12.14
|
| 130 |
+
784,51600,12.2329
|
| 131 |
+
788,52000,15.0146
|
| 132 |
+
793,52400,10.5181
|
| 133 |
+
799,52800,10.1783
|
| 134 |
+
805,53200,8.4057
|
| 135 |
+
812,53600,8.7315
|
| 136 |
+
818,54000,11.7951
|
| 137 |
+
822,54400,15.2027
|
| 138 |
+
827,54800,11.7442
|
| 139 |
+
831,55200,13.5781
|
| 140 |
+
836,55600,12.2977
|
| 141 |
+
842,56000,10.4294
|
| 142 |
+
847,56400,10.9495
|
| 143 |
+
853,56800,10.9578
|
| 144 |
+
859,57200,11.0167
|
| 145 |
+
865,57600,9.7521
|
| 146 |
+
869,58000,15.5817
|
| 147 |
+
875,58400,9.9763
|
| 148 |
+
880,58800,13.0464
|
| 149 |
+
884,59200,13.8061
|
| 150 |
+
889,59600,12.9356
|
| 151 |
+
896,60000,10.3463
|
| 152 |
+
901,60400,10.6919
|
| 153 |
+
908,60800,8.6734
|
| 154 |
+
914,61200,12.4219
|
| 155 |
+
919,61600,10.0202
|
| 156 |
+
924,62000,15.9255
|
| 157 |
+
929,62400,11.2685
|
| 158 |
+
935,62800,10.8452
|
| 159 |
+
940,63200,11.7721
|
| 160 |
+
946,63600,11.407
|
| 161 |
+
952,64000,12.8059
|
| 162 |
+
958,64400,8.4392
|
| 163 |
+
964,64800,11.9307
|
| 164 |
+
969,65200,15.1005
|
| 165 |
+
974,65600,13.4469
|
| 166 |
+
984,66000,7.6713
|
| 167 |
+
988,66400,14.245
|
| 168 |
+
995,66800,9.1666
|
| 169 |
+
1000,67200,15.6424
|
| 170 |
+
1005,67600,16.3647
|
| 171 |
+
1011,68000,15.1433
|
| 172 |
+
1017,68400,12.4149
|
| 173 |
+
1022,68800,13.4503
|
| 174 |
+
1030,69200,10.6481
|
| 175 |
+
1039,69600,8.0946
|
| 176 |
+
1046,70000,7.7302
|
| 177 |
+
1051,70400,16.2667
|
| 178 |
+
1056,70800,17.1896
|
| 179 |
+
1062,71200,12.1892
|
| 180 |
+
1067,71600,15.6327
|
| 181 |
+
1074,72000,10.8082
|
| 182 |
+
1078,72400,20.0766
|
| 183 |
+
1086,72800,9.649
|
| 184 |
+
1092,73200,11.0206
|
| 185 |
+
1098,73600,11.8751
|
| 186 |
+
1107,74000,8.5445
|
| 187 |
+
1111,74400,16.1052
|
| 188 |
+
1118,74800,11.2793
|
| 189 |
+
1122,75200,17.9738
|
| 190 |
+
1129,75600,11.9216
|
| 191 |
+
1135,76000,15.1108
|
| 192 |
+
1141,76400,10.5198
|
| 193 |
+
1147,76800,16.3762
|
| 194 |
+
1151,77200,18.503
|
| 195 |
+
1160,77600,8.741
|
| 196 |
+
1167,78000,9.7747
|
| 197 |
+
1173,78400,13.8605
|
| 198 |
+
1180,78800,9.6619
|
| 199 |
+
1185,79200,15.3712
|
| 200 |
+
1190,79600,16.7442
|
| 201 |
+
1196,80000,11.6345
|
| 202 |
+
1202,80400,14.0943
|
| 203 |
+
1207,80800,14.2296
|
| 204 |
+
1212,81200,16.3014
|
| 205 |
+
1220,81600,9.8765
|
| 206 |
+
1228,82000,8.7464
|
| 207 |
+
1235,82400,10.1393
|
| 208 |
+
1240,82800,16.1224
|
| 209 |
+
1246,83200,15.2376
|
| 210 |
+
1252,83600,10.7445
|
| 211 |
+
1259,84000,10.0961
|
| 212 |
+
1264,84400,15.7153
|
| 213 |
+
1271,84800,13.0846
|
| 214 |
+
1280,85200,7.0547
|
| 215 |
+
1285,85600,15.492
|
| 216 |
+
1292,86000,9.3012
|
| 217 |
+
1297,86400,12.5845
|
| 218 |
+
1303,86800,12.9457
|
| 219 |
+
1308,87200,15.8783
|
| 220 |
+
1314,87600,12.6196
|
| 221 |
+
1319,88000,13.5926
|
| 222 |
+
1325,88400,9.8846
|
| 223 |
+
1329,88800,19.8844
|
| 224 |
+
1336,89200,11.9627
|
| 225 |
+
1340,89600,19.6276
|
| 226 |
+
1345,90000,11.4204
|
| 227 |
+
1355,90400,9.3768
|
| 228 |
+
1360,90800,13.957
|
| 229 |
+
1365,91200,15.7235
|
| 230 |
+
1372,91600,10.9691
|
| 231 |
+
1376,92000,17.8047
|
| 232 |
+
1380,92400,19.6958
|
| 233 |
+
1385,92800,19.2327
|
| 234 |
+
1391,93200,9.4353
|
| 235 |
+
1400,93600,8.9945
|
| 236 |
+
1407,94000,9.5239
|
| 237 |
+
1416,94400,9.7381
|
| 238 |
+
1422,94800,12.1216
|
| 239 |
+
1429,95200,11.5348
|
| 240 |
+
1437,95600,6.6356
|
| 241 |
+
1447,96000,8.2667
|
| 242 |
+
1456,96400,5.5346
|
| 243 |
+
1463,96800,13.0559
|
| 244 |
+
1471,97200,8.4067
|
| 245 |
+
1483,97600,5.107
|
| 246 |
+
1494,98000,6.8124
|
| 247 |
+
1501,98400,8.9277
|
| 248 |
+
1515,98800,5.11
|
| 249 |
+
1524,99200,8.4644
|
| 250 |
+
1538,99600,4.2993
|
| 251 |
+
1547,100000,6.5226
|
| 252 |
+
1552,100400,16.8617
|
| 253 |
+
1559,100800,10.7992
|
| 254 |
+
1569,101200,7.4671
|
| 255 |
+
1577,101600,8.0795
|
| 256 |
+
1586,102000,8.0661
|
| 257 |
+
1595,102400,7.4557
|
| 258 |
+
1602,102800,9.2675
|
| 259 |
+
1612,103200,7.525
|
| 260 |
+
1621,103600,8.0409
|
| 261 |
+
1628,104000,10.8619
|
| 262 |
+
1638,104400,7.0936
|
| 263 |
+
1648,104800,6.5034
|
| 264 |
+
1656,105200,7.5528
|
| 265 |
+
1666,105600,7.0664
|
| 266 |
+
1674,106000,9.2449
|
| 267 |
+
1684,106400,6.3384
|
| 268 |
+
1695,106800,4.2875
|
| 269 |
+
1701,107200,15.9626
|
| 270 |
+
1710,107600,8.014
|
| 271 |
+
1720,108000,7.4489
|
| 272 |
+
1729,108400,8.3794
|
| 273 |
+
1737,108800,8.7056
|
| 274 |
+
1745,109200,7.9263
|
| 275 |
+
1751,109600,11.078
|
| 276 |
+
1758,110000,12.3928
|
| 277 |
+
1763,110400,12.4602
|
| 278 |
+
1769,110800,16.5946
|
| 279 |
+
1775,111200,12.9342
|
| 280 |
+
1780,111600,15.1111
|
| 281 |
+
1785,112000,13.7242
|
| 282 |
+
1791,112400,14.9992
|
| 283 |
+
1797,112800,11.5528
|
| 284 |
+
1802,113200,16.7522
|
| 285 |
+
1806,113600,14.0396
|
| 286 |
+
1813,114000,12.6857
|
| 287 |
+
1817,114400,16.7741
|
| 288 |
+
1822,114800,12.9523
|
| 289 |
+
1827,115200,16.5158
|
| 290 |
+
1832,115600,17.1408
|
| 291 |
+
1839,116000,9.8454
|
| 292 |
+
1843,116400,17.8182
|
| 293 |
+
1849,116800,11.2671
|
| 294 |
+
1855,117200,14.7182
|
| 295 |
+
1860,117600,14.9587
|
| 296 |
+
1867,118000,8.7827
|
| 297 |
+
1874,118400,11.6531
|
| 298 |
+
1879,118800,12.538
|
| 299 |
+
1887,119200,9.8758
|
| 300 |
+
1892,119600,14.5453
|
| 301 |
+
1897,120000,14.467
|
| 302 |
+
1904,120400,12.7871
|
| 303 |
+
1909,120800,14.9706
|
| 304 |
+
1914,121200,13.51
|
| 305 |
+
1921,121600,12.2767
|
| 306 |
+
1926,122000,16.5016
|
| 307 |
+
1930,122400,20.9919
|
| 308 |
+
1934,122800,18.5356
|
| 309 |
+
1939,123200,11.7085
|
| 310 |
+
1945,123600,14.2539
|
| 311 |
+
1955,124000,7.2888
|
| 312 |
+
1965,124400,6.2359
|
| 313 |
+
1974,124800,5.9543
|
| 314 |
+
1989,125200,3.2663
|
| 315 |
+
2001,125600,4.2557
|
| 316 |
+
2015,126000,5.6426
|
| 317 |
+
2032,126400,2.605
|
| 318 |
+
2046,126800,4.8766
|
| 319 |
+
2059,127200,4.8677
|
| 320 |
+
2070,127600,5.7594
|
| 321 |
+
2081,128000,5.4336
|
| 322 |
+
2092,128400,5.9113
|
| 323 |
+
2103,128800,4.5887
|
| 324 |
+
2114,129200,4.1911
|
| 325 |
+
2122,129600,5.8893
|
| 326 |
+
2134,130000,3.7628
|
| 327 |
+
2144,130400,4.9745
|
| 328 |
+
2152,130800,7.0797
|
| 329 |
+
2158,131200,9.2503
|
| 330 |
+
2168,131600,6.9398
|
| 331 |
+
2176,132000,7.7078
|
| 332 |
+
2185,132400,6.6676
|
| 333 |
+
2192,132800,7.4282
|
| 334 |
+
2201,133200,7.9222
|
| 335 |
+
2208,133600,8.5393
|
| 336 |
+
2214,134000,11.7517
|
| 337 |
+
2219,134400,13.6645
|
| 338 |
+
2225,134800,11.5794
|
| 339 |
+
2232,135200,9.2883
|
| 340 |
+
2237,135600,15.8834
|
| 341 |
+
2242,136000,13.0821
|
| 342 |
+
2249,136400,11.0747
|
| 343 |
+
2255,136800,12.4104
|
| 344 |
+
2263,137200,6.2022
|
| 345 |
+
2268,137600,15.7658
|
| 346 |
+
2272,138000,18.5306
|
| 347 |
+
2283,138400,6.9062
|
| 348 |
+
2289,138800,10.1887
|
| 349 |
+
2296,139200,12.4334
|
| 350 |
+
2300,139600,13.9879
|
| 351 |
+
2304,140000,19.1339
|
| 352 |
+
2312,140400,10.1769
|
| 353 |
+
2317,140800,14.9144
|
| 354 |
+
2322,141200,15.6578
|
| 355 |
+
2327,141600,12.3308
|
| 356 |
+
2333,142000,13.2801
|
| 357 |
+
2340,142400,8.8488
|
| 358 |
+
2345,142800,15.9321
|
| 359 |
+
2351,143200,14.0744
|
| 360 |
+
2357,143600,13.7016
|
| 361 |
+
2361,144000,17.1141
|
| 362 |
+
2366,144400,16.093
|
| 363 |
+
2371,144800,15.3966
|
| 364 |
+
2377,145200,11.9135
|
| 365 |
+
2383,145600,12.9568
|
| 366 |
+
2388,146000,16.5526
|
| 367 |
+
2394,146400,13.9278
|
| 368 |
+
2400,146800,13.1973
|
| 369 |
+
2407,147200,10.658
|
| 370 |
+
2411,147600,17.6656
|
| 371 |
+
2418,148000,9.9268
|
| 372 |
+
2424,148400,12.6209
|
| 373 |
+
2429,148800,15.6302
|
| 374 |
+
2434,149200,15.4722
|
| 375 |
+
2438,149600,15.8488
|
| 376 |
+
2443,150000,15.5069
|
| 377 |
+
2452,150400,8.9388
|
| 378 |
+
2458,150800,11.1183
|
| 379 |
+
2465,151200,12.9985
|
| 380 |
+
2472,151600,9.298
|
| 381 |
+
2482,152000,6.9406
|
| 382 |
+
2488,152400,10.9889
|
| 383 |
+
2494,152800,13.8809
|
| 384 |
+
2500,153200,9.4922
|
| 385 |
+
2508,153600,9.8473
|
| 386 |
+
2517,154000,5.4676
|
| 387 |
+
2521,154400,17.306
|
| 388 |
+
2529,154800,10.5859
|
| 389 |
+
2535,155200,8.9943
|
| 390 |
+
2539,155600,19.018
|
| 391 |
+
2545,156000,10.541
|
| 392 |
+
2553,156400,7.199
|
| 393 |
+
2559,156800,13.1617
|
| 394 |
+
2564,157200,13.507
|
| 395 |
+
2568,157600,19.7962
|
| 396 |
+
2574,158000,15.0976
|
| 397 |
+
2581,158400,9.5981
|
| 398 |
+
2587,158800,11.6193
|
| 399 |
+
2593,159200,11.8875
|
| 400 |
+
2599,159600,12.6915
|
| 401 |
+
2608,160000,7.097
|
| 402 |
+
2616,160400,10.2592
|
| 403 |
+
2622,160800,11.4696
|
| 404 |
+
2627,161200,14.5994
|
| 405 |
+
2632,161600,14.607
|
| 406 |
+
2637,162000,17.4054
|
| 407 |
+
2643,162400,13.1822
|
| 408 |
+
2649,162800,13.4214
|
| 409 |
+
2654,163200,15.3761
|
| 410 |
+
2658,163600,20.4652
|
| 411 |
+
2664,164000,11.7819
|
| 412 |
+
2668,164400,21.1348
|
| 413 |
+
2672,164800,19.7497
|
| 414 |
+
2676,165200,19.9861
|
| 415 |
+
2680,165600,20.6264
|
| 416 |
+
2686,166000,15.0114
|
| 417 |
+
2690,166400,16.8975
|
| 418 |
+
2696,166800,16.6914
|
| 419 |
+
2703,167200,9.2727
|
| 420 |
+
2711,167600,10.1419
|
| 421 |
+
2717,168000,8.6377
|
| 422 |
+
2722,168400,15.6912
|
| 423 |
+
2729,168800,11.2312
|
| 424 |
+
2737,169200,6.9751
|
| 425 |
+
2743,169600,14.1282
|
| 426 |
+
2748,170000,14.9263
|
| 427 |
+
2753,170400,16.8727
|
| 428 |
+
2757,170800,20.6722
|
| 429 |
+
2761,171200,21.0708
|
| 430 |
+
2767,171600,10.5146
|
| 431 |
+
2772,172000,18.3501
|
| 432 |
+
2777,172400,16.4924
|
| 433 |
+
2782,172800,15.8319
|
| 434 |
+
2786,173200,18.9688
|
| 435 |
+
2792,173600,14.616
|
| 436 |
+
2796,174000,18.8745
|
| 437 |
+
2802,174400,13.8155
|
| 438 |
+
2806,174800,20.1499
|
| 439 |
+
2810,175200,16.8318
|
| 440 |
+
2817,175600,10.7324
|
| 441 |
+
2824,176000,11.1007
|
| 442 |
+
2830,176400,12.4775
|
| 443 |
+
2841,176800,5.9412
|
| 444 |
+
2847,177200,13.0488
|
| 445 |
+
2855,177600,9.6263
|
| 446 |
+
2861,178000,9.2555
|
| 447 |
+
2868,178400,13.6159
|
| 448 |
+
2874,178800,10.7991
|
| 449 |
+
2880,179200,13.2894
|
| 450 |
+
2885,179600,12.8225
|
| 451 |
+
2894,180000,10.0183
|
| 452 |
+
2898,180400,18.6829
|
| 453 |
+
2904,180800,11.2808
|
| 454 |
+
2909,181200,18.8581
|
| 455 |
+
2914,181600,15.8593
|
| 456 |
+
2919,182000,15.171
|
| 457 |
+
2923,182400,15.3617
|
| 458 |
+
2929,182800,12.993
|
| 459 |
+
2934,183200,19.4859
|
| 460 |
+
2938,183600,16.5612
|
| 461 |
+
2943,184000,18.0036
|
| 462 |
+
2948,184400,14.3423
|
| 463 |
+
2954,184800,14.2031
|
| 464 |
+
2958,185200,17.8896
|
| 465 |
+
2964,185600,14.1217
|
| 466 |
+
2969,186000,15.7024
|
| 467 |
+
2976,186400,10.0496
|
| 468 |
+
2982,186800,15.1974
|
| 469 |
+
2990,187200,8.1909
|
| 470 |
+
2996,187600,11.4299
|
| 471 |
+
3002,188000,9.0973
|
| 472 |
+
3007,188400,18.8057
|
| 473 |
+
3019,188800,5.6121
|
| 474 |
+
3029,189200,6.4934
|
| 475 |
+
3035,189600,10.5633
|
| 476 |
+
3040,190000,14.1835
|
| 477 |
+
3049,190400,6.6539
|
| 478 |
+
3055,190800,11.4925
|
| 479 |
+
3061,191200,12.6726
|
| 480 |
+
3066,191600,13.2938
|
| 481 |
+
3072,192000,12.4183
|
| 482 |
+
3079,192400,9.0459
|
| 483 |
+
3084,192800,15.8784
|
| 484 |
+
3089,193200,14.8581
|
| 485 |
+
3095,193600,11.5812
|
| 486 |
+
3099,194000,19.8301
|
| 487 |
+
3107,194400,4.9365
|
| 488 |
+
3115,194800,8.6926
|
| 489 |
+
3122,195200,10.0389
|
| 490 |
+
3131,195600,8.3146
|
| 491 |
+
3136,196000,12.6695
|
| 492 |
+
3141,196400,16.2173
|
| 493 |
+
3146,196800,15.5667
|
| 494 |
+
3152,197200,13.6761
|
| 495 |
+
3158,197600,9.9176
|
| 496 |
+
3164,198000,12.4659
|
| 497 |
+
3170,198400,10.7513
|
| 498 |
+
3175,198800,10.0257
|
| 499 |
+
3179,199200,17.8125
|
| 500 |
+
3184,199600,15.2902
|
| 501 |
+
3188,200000,13.6833
|
code/Lake application/logs/results_1/PPO_frozen_lake_log_1.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
27,400,1.0685
|
| 3 |
+
52,800,1.1781
|
| 4 |
+
66,1200,2.0866
|
| 5 |
+
74,1600,3.6635
|
| 6 |
+
83,2000,3.21
|
| 7 |
+
89,2400,4.7845
|
| 8 |
+
96,2800,4.0053
|
| 9 |
+
104,3200,3.8626
|
| 10 |
+
110,3600,4.2643
|
| 11 |
+
117,4000,4.8568
|
| 12 |
+
122,4400,5.5491
|
| 13 |
+
126,4800,7.4627
|
| 14 |
+
131,5200,6.4184
|
| 15 |
+
136,5600,4.5238
|
| 16 |
+
141,6000,6.3553
|
| 17 |
+
145,6400,7.4742
|
| 18 |
+
150,6800,6.7629
|
| 19 |
+
154,7200,6.507
|
| 20 |
+
159,7600,6.5995
|
| 21 |
+
164,8000,5.5815
|
| 22 |
+
168,8400,7.363
|
| 23 |
+
172,8800,7.377
|
| 24 |
+
176,9200,7.3108
|
| 25 |
+
180,9600,7.5117
|
| 26 |
+
184,10000,7.5528
|
| 27 |
+
189,10400,6.4767
|
| 28 |
+
194,10800,6.4779
|
| 29 |
+
199,11200,6.0249
|
| 30 |
+
204,11600,5.7716
|
| 31 |
+
209,12000,6.2201
|
| 32 |
+
213,12400,6.6677
|
| 33 |
+
217,12800,7.5942
|
| 34 |
+
222,13200,6.4747
|
| 35 |
+
228,13600,5.2064
|
| 36 |
+
234,14000,4.7457
|
| 37 |
+
240,14400,4.9933
|
| 38 |
+
245,14800,7.1465
|
| 39 |
+
250,15200,5.3496
|
| 40 |
+
254,15600,6.5622
|
| 41 |
+
259,16000,6.4103
|
| 42 |
+
263,16400,7.161
|
| 43 |
+
268,16800,6.8279
|
| 44 |
+
273,17200,6.076
|
| 45 |
+
278,17600,6.2577
|
| 46 |
+
282,18000,6.3496
|
| 47 |
+
287,18400,6.1414
|
| 48 |
+
292,18800,6.9705
|
| 49 |
+
298,19200,4.5771
|
| 50 |
+
302,19600,7.4473
|
| 51 |
+
306,20000,7.2296
|
| 52 |
+
310,20400,7.5263
|
| 53 |
+
315,20800,5.9007
|
| 54 |
+
319,21200,7.457
|
| 55 |
+
323,21600,7.0129
|
| 56 |
+
327,22000,7.5711
|
| 57 |
+
331,22400,7.4269
|
| 58 |
+
335,22800,7.6694
|
| 59 |
+
339,23200,7.4846
|
| 60 |
+
346,23600,4.3524
|
| 61 |
+
350,24000,7.8691
|
| 62 |
+
354,24400,7.6455
|
| 63 |
+
358,24800,7.412
|
| 64 |
+
363,25200,6.1338
|
| 65 |
+
367,25600,7.6492
|
| 66 |
+
372,26000,7.0091
|
| 67 |
+
376,26400,7.6802
|
| 68 |
+
380,26800,7.2539
|
| 69 |
+
384,27200,7.7116
|
| 70 |
+
388,27600,7.6681
|
| 71 |
+
394,28000,4.7247
|
| 72 |
+
400,28400,4.8051
|
| 73 |
+
404,28800,7.619
|
| 74 |
+
410,29200,6.0282
|
| 75 |
+
414,29600,7.0344
|
| 76 |
+
419,30000,6.6914
|
| 77 |
+
424,30400,5.9087
|
| 78 |
+
428,30800,6.2718
|
| 79 |
+
434,31200,6.0214
|
| 80 |
+
438,31600,7.7125
|
| 81 |
+
444,32000,5.1105
|
| 82 |
+
448,32400,6.5991
|
| 83 |
+
452,32800,7.6193
|
| 84 |
+
458,33200,5.5863
|
| 85 |
+
464,33600,4.8851
|
| 86 |
+
468,34000,6.7645
|
| 87 |
+
474,34400,5.4311
|
| 88 |
+
479,34800,6.3864
|
| 89 |
+
487,35200,3.7461
|
| 90 |
+
494,35600,4.4228
|
| 91 |
+
500,36000,4.2904
|
| 92 |
+
506,36400,5.8171
|
| 93 |
+
510,36800,7.8094
|
| 94 |
+
514,37200,7.1195
|
| 95 |
+
519,37600,6.8759
|
| 96 |
+
524,38000,5.3536
|
| 97 |
+
535,38400,2.7984
|
| 98 |
+
542,38800,4.5735
|
| 99 |
+
549,39200,4.2022
|
| 100 |
+
557,39600,3.886
|
| 101 |
+
562,40000,5.98
|
| 102 |
+
567,40400,5.694
|
| 103 |
+
572,40800,6.5385
|
| 104 |
+
577,41200,5.555
|
| 105 |
+
581,41600,7.3993
|
| 106 |
+
586,42000,5.9369
|
| 107 |
+
591,42400,6.7453
|
| 108 |
+
595,42800,7.2735
|
| 109 |
+
599,43200,7.2897
|
| 110 |
+
604,43600,5.4537
|
| 111 |
+
609,44000,5.0761
|
| 112 |
+
615,44400,5.6804
|
| 113 |
+
620,44800,5.6548
|
| 114 |
+
624,45200,7.1059
|
| 115 |
+
628,45600,7.38
|
| 116 |
+
632,46000,6.6452
|
| 117 |
+
637,46400,6.9389
|
| 118 |
+
641,46800,7.5441
|
| 119 |
+
645,47200,7.4802
|
| 120 |
+
653,47600,3.7831
|
| 121 |
+
659,48000,4.9193
|
| 122 |
+
665,48400,4.5841
|
| 123 |
+
670,48800,5.2625
|
| 124 |
+
676,49200,5.5027
|
| 125 |
+
681,49600,6.0169
|
| 126 |
+
689,50000,3.5328
|
| 127 |
+
694,50400,6.1266
|
| 128 |
+
700,50800,4.9486
|
| 129 |
+
707,51200,4.3697
|
| 130 |
+
713,51600,5.4997
|
| 131 |
+
719,52000,4.4305
|
| 132 |
+
724,52400,5.7043
|
| 133 |
+
731,52800,5.1429
|
| 134 |
+
739,53200,3.6578
|
| 135 |
+
745,53600,5.5292
|
| 136 |
+
751,54000,4.4774
|
| 137 |
+
757,54400,5.5445
|
| 138 |
+
761,54800,7.1383
|
| 139 |
+
766,55200,6.4968
|
| 140 |
+
770,55600,7.5131
|
| 141 |
+
774,56000,7.5454
|
| 142 |
+
779,56400,5.1354
|
| 143 |
+
783,56800,6.7832
|
| 144 |
+
789,57200,5.6604
|
| 145 |
+
793,57600,7.4366
|
| 146 |
+
797,58000,7.4868
|
| 147 |
+
801,58400,7.4437
|
| 148 |
+
805,58800,7.4123
|
| 149 |
+
812,59200,4.6388
|
| 150 |
+
816,59600,7.4665
|
| 151 |
+
820,60000,7.4706
|
| 152 |
+
824,60400,6.3475
|
| 153 |
+
828,60800,7.5096
|
| 154 |
+
832,61200,7.1812
|
| 155 |
+
837,61600,6.3239
|
| 156 |
+
841,62000,6.8265
|
| 157 |
+
847,62400,5.2963
|
| 158 |
+
853,62800,4.758
|
| 159 |
+
857,63200,7.345
|
| 160 |
+
862,63600,6.2916
|
| 161 |
+
866,64000,7.0709
|
| 162 |
+
874,64400,3.7731
|
| 163 |
+
880,64800,4.6813
|
| 164 |
+
888,65200,4.1506
|
| 165 |
+
893,65600,5.6099
|
| 166 |
+
897,66000,6.8958
|
| 167 |
+
903,66400,5.7361
|
| 168 |
+
910,66800,4.2763
|
| 169 |
+
915,67200,5.9698
|
| 170 |
+
922,67600,3.6519
|
| 171 |
+
926,68000,7.6562
|
| 172 |
+
932,68400,6.2379
|
| 173 |
+
936,68800,7.0506
|
| 174 |
+
942,69200,5.6851
|
| 175 |
+
946,69600,7.5529
|
| 176 |
+
950,70000,7.6569
|
| 177 |
+
954,70400,7.489
|
| 178 |
+
958,70800,6.3997
|
| 179 |
+
962,71200,7.6004
|
| 180 |
+
967,71600,6.2412
|
| 181 |
+
971,72000,7.6256
|
| 182 |
+
977,72400,5.6385
|
| 183 |
+
981,72800,7.6622
|
| 184 |
+
985,73200,6.9613
|
| 185 |
+
990,73600,6.4998
|
| 186 |
+
994,74000,7.4835
|
| 187 |
+
998,74400,6.362
|
| 188 |
+
1003,74800,6.4975
|
| 189 |
+
1009,75200,6.0932
|
| 190 |
+
1013,75600,7.3275
|
| 191 |
+
1018,76000,6.3136
|
| 192 |
+
1022,76400,7.918
|
| 193 |
+
1026,76800,7.8966
|
| 194 |
+
1030,77200,8.1455
|
| 195 |
+
1035,77600,6.7381
|
| 196 |
+
1040,78000,6.5592
|
| 197 |
+
1045,78400,6.0788
|
| 198 |
+
1049,78800,7.1046
|
| 199 |
+
1054,79200,7.1041
|
| 200 |
+
1058,79600,7.6982
|
| 201 |
+
1062,80000,8.7302
|
| 202 |
+
1066,80400,6.6165
|
| 203 |
+
1071,80800,6.9873
|
| 204 |
+
1077,81200,6.9054
|
| 205 |
+
1082,81600,7.456
|
| 206 |
+
1087,82000,8.4276
|
| 207 |
+
1093,82400,6.369
|
| 208 |
+
1097,82800,8.6679
|
| 209 |
+
1101,83200,6.9598
|
| 210 |
+
1107,83600,5.4595
|
| 211 |
+
1112,84000,7.8471
|
| 212 |
+
1116,84400,6.8466
|
| 213 |
+
1121,84800,6.1909
|
| 214 |
+
1127,85200,6.153
|
| 215 |
+
1131,85600,7.6274
|
| 216 |
+
1136,86000,6.3568
|
| 217 |
+
1140,86400,7.0727
|
| 218 |
+
1144,86800,8.7111
|
| 219 |
+
1148,87200,7.0302
|
| 220 |
+
1152,87600,7.515
|
| 221 |
+
1157,88000,6.5863
|
| 222 |
+
1162,88400,6.3346
|
| 223 |
+
1166,88800,6.8116
|
| 224 |
+
1171,89200,6.1335
|
| 225 |
+
1175,89600,7.3719
|
| 226 |
+
1179,90000,6.3049
|
| 227 |
+
1183,90400,7.4515
|
| 228 |
+
1188,90800,6.5131
|
| 229 |
+
1193,91200,7.0855
|
| 230 |
+
1198,91600,5.4287
|
| 231 |
+
1203,92000,6.8094
|
| 232 |
+
1209,92400,5.0978
|
| 233 |
+
1214,92800,5.9272
|
| 234 |
+
1220,93200,4.52
|
| 235 |
+
1225,93600,6.2555
|
| 236 |
+
1231,94000,5.4839
|
| 237 |
+
1236,94400,6.4755
|
| 238 |
+
1245,94800,3.9834
|
| 239 |
+
1251,95200,4.4569
|
| 240 |
+
1257,95600,5.508
|
| 241 |
+
1261,96000,7.6455
|
| 242 |
+
1265,96400,7.3739
|
| 243 |
+
1269,96800,7.8249
|
| 244 |
+
1275,97200,5.2307
|
| 245 |
+
1279,97600,7.6121
|
| 246 |
+
1284,98000,6.1267
|
| 247 |
+
1288,98400,7.3416
|
| 248 |
+
1292,98800,7.5182
|
| 249 |
+
1296,99200,7.3272
|
| 250 |
+
1301,99600,6.8465
|
| 251 |
+
1306,100000,6.4113
|
| 252 |
+
1311,100400,5.7506
|
| 253 |
+
1316,100800,7.3543
|
| 254 |
+
1320,101200,7.6077
|
| 255 |
+
1326,101600,5.5944
|
| 256 |
+
1331,102000,7.3631
|
| 257 |
+
1337,102400,5.2
|
| 258 |
+
1343,102800,4.8723
|
| 259 |
+
1348,103200,6.1586
|
| 260 |
+
1355,103600,4.7696
|
| 261 |
+
1360,104000,6.1973
|
| 262 |
+
1364,104400,6.7792
|
| 263 |
+
1368,104800,7.2198
|
| 264 |
+
1373,105200,7.1729
|
| 265 |
+
1379,105600,5.4069
|
| 266 |
+
1385,106000,4.6109
|
| 267 |
+
1390,106400,7.1239
|
| 268 |
+
1395,106800,5.8867
|
| 269 |
+
1399,107200,7.495
|
| 270 |
+
1403,107600,6.6621
|
| 271 |
+
1407,108000,7.4027
|
| 272 |
+
1412,108400,6.3171
|
| 273 |
+
1416,108800,5.6887
|
| 274 |
+
1421,109200,6.6554
|
| 275 |
+
1425,109600,6.75
|
| 276 |
+
1431,110000,5.4458
|
| 277 |
+
1435,110400,7.2516
|
| 278 |
+
1439,110800,6.3838
|
| 279 |
+
1443,111200,6.9752
|
| 280 |
+
1447,111600,7.2899
|
| 281 |
+
1451,112000,7.285
|
| 282 |
+
1455,112400,6.905
|
| 283 |
+
1460,112800,6.7635
|
| 284 |
+
1464,113200,7.2151
|
| 285 |
+
1468,113600,7.3228
|
| 286 |
+
1472,114000,7.2727
|
| 287 |
+
1477,114400,5.1432
|
| 288 |
+
1484,114800,4.5119
|
| 289 |
+
1488,115200,7.5247
|
| 290 |
+
1493,115600,6.6993
|
| 291 |
+
1497,116000,6.583
|
| 292 |
+
1502,116400,5.6599
|
| 293 |
+
1506,116800,7.3572
|
| 294 |
+
1512,117200,4.6733
|
| 295 |
+
1517,117600,6.9534
|
| 296 |
+
1522,118000,5.8504
|
| 297 |
+
1527,118400,5.7037
|
| 298 |
+
1532,118800,5.3677
|
| 299 |
+
1537,119200,6.6015
|
| 300 |
+
1545,119600,4.1934
|
| 301 |
+
1550,120000,6.0054
|
| 302 |
+
1555,120400,6.3411
|
| 303 |
+
1559,120800,7.6877
|
| 304 |
+
1563,121200,7.6184
|
| 305 |
+
1569,121600,4.5052
|
| 306 |
+
1573,122000,7.209
|
| 307 |
+
1579,122400,6.2706
|
| 308 |
+
1584,122800,6.129
|
| 309 |
+
1588,123200,9.4211
|
| 310 |
+
1595,123600,5.3557
|
| 311 |
+
1601,124000,5.9072
|
| 312 |
+
1608,124400,4.905
|
| 313 |
+
1613,124800,8.0799
|
| 314 |
+
1619,125200,5.1058
|
| 315 |
+
1625,125600,5.3964
|
| 316 |
+
1630,126000,6.4494
|
| 317 |
+
1634,126400,6.4167
|
| 318 |
+
1640,126800,5.6178
|
| 319 |
+
1644,127200,6.6824
|
| 320 |
+
1650,127600,5.9365
|
| 321 |
+
1656,128000,4.972
|
| 322 |
+
1663,128400,4.6724
|
| 323 |
+
1669,128800,4.5071
|
| 324 |
+
1676,129200,4.7821
|
| 325 |
+
1683,129600,4.2253
|
| 326 |
+
1689,130000,5.028
|
| 327 |
+
1693,130400,6.5605
|
| 328 |
+
1699,130800,6.1124
|
| 329 |
+
1705,131200,5.982
|
| 330 |
+
1711,131600,5.4071
|
| 331 |
+
1716,132000,6.5883
|
| 332 |
+
1720,132400,8.6701
|
| 333 |
+
1725,132800,5.8316
|
| 334 |
+
1730,133200,6.5105
|
| 335 |
+
1734,133600,8.651
|
| 336 |
+
1739,134000,7.6199
|
| 337 |
+
1743,134400,7.9109
|
| 338 |
+
1749,134800,7.469
|
| 339 |
+
1753,135200,7.2578
|
| 340 |
+
1760,135600,7.0038
|
| 341 |
+
1765,136000,8.4482
|
| 342 |
+
1770,136400,10.9777
|
| 343 |
+
1776,136800,7.8633
|
| 344 |
+
1781,137200,10.7422
|
| 345 |
+
1786,137600,7.7002
|
| 346 |
+
1791,138000,9.8498
|
| 347 |
+
1796,138400,8.3565
|
| 348 |
+
1802,138800,7.2121
|
| 349 |
+
1808,139200,7.437
|
| 350 |
+
1815,139600,5.7484
|
| 351 |
+
1822,140000,7.3232
|
| 352 |
+
1828,140400,8.0316
|
| 353 |
+
1836,140800,5.3387
|
| 354 |
+
1841,141200,7.3611
|
| 355 |
+
1849,141600,6.0461
|
| 356 |
+
1857,142000,5.8818
|
| 357 |
+
1861,142400,8.7706
|
| 358 |
+
1867,142800,12.6086
|
| 359 |
+
1875,143200,4.9306
|
| 360 |
+
1881,143600,8.4787
|
| 361 |
+
1889,144000,6.6935
|
| 362 |
+
1894,144400,9.2201
|
| 363 |
+
1899,144800,9.3988
|
| 364 |
+
1904,145200,11.3811
|
| 365 |
+
1909,145600,8.435
|
| 366 |
+
1916,146000,7.3488
|
| 367 |
+
1922,146400,10.672
|
| 368 |
+
1927,146800,11.5718
|
| 369 |
+
1934,147200,7.5663
|
| 370 |
+
1940,147600,8.4638
|
| 371 |
+
1944,148000,10.8035
|
| 372 |
+
1954,148400,5.3482
|
| 373 |
+
1961,148800,7.711
|
| 374 |
+
1967,149200,11.0491
|
| 375 |
+
1975,149600,6.2135
|
| 376 |
+
1981,150000,10.6626
|
| 377 |
+
1991,150400,5.506
|
| 378 |
+
1996,150800,9.1663
|
| 379 |
+
2004,151200,5.6931
|
| 380 |
+
2009,151600,11.9436
|
| 381 |
+
2018,152000,8.0473
|
| 382 |
+
2025,152400,7.8392
|
| 383 |
+
2032,152800,8.6977
|
| 384 |
+
2041,153200,5.9514
|
| 385 |
+
2045,153600,15.2402
|
| 386 |
+
2052,154000,9.4472
|
| 387 |
+
2059,154400,5.8932
|
| 388 |
+
2065,154800,9.2129
|
| 389 |
+
2074,155200,6.222
|
| 390 |
+
2078,155600,15.2026
|
| 391 |
+
2090,156000,6.9467
|
| 392 |
+
2099,156400,6.1649
|
| 393 |
+
2108,156800,7.4966
|
| 394 |
+
2117,157200,6.7418
|
| 395 |
+
2126,157600,8.2676
|
| 396 |
+
2133,158000,9.3655
|
| 397 |
+
2141,158400,6.6173
|
| 398 |
+
2145,158800,17.2168
|
| 399 |
+
2154,159200,8.7089
|
| 400 |
+
2160,159600,11.2436
|
| 401 |
+
2165,160000,12.7216
|
| 402 |
+
2169,160400,21.6313
|
| 403 |
+
2174,160800,23.2383
|
| 404 |
+
2179,161200,17.6359
|
| 405 |
+
2185,161600,16.5861
|
| 406 |
+
2191,162000,16.226
|
| 407 |
+
2197,162400,17.9568
|
| 408 |
+
2204,162800,12.871
|
| 409 |
+
2209,163200,13.3918
|
| 410 |
+
2216,163600,15.4123
|
| 411 |
+
2223,164000,11.4062
|
| 412 |
+
2228,164400,14.1367
|
| 413 |
+
2237,164800,8.0556
|
| 414 |
+
2244,165200,8.6179
|
| 415 |
+
2256,165600,6.4841
|
| 416 |
+
2266,166000,9.5171
|
| 417 |
+
2274,166400,11.5372
|
| 418 |
+
2283,166800,9.7408
|
| 419 |
+
2287,167200,21.5043
|
| 420 |
+
2291,167600,23.8779
|
| 421 |
+
2296,168000,20.5695
|
| 422 |
+
2301,168400,23.0533
|
| 423 |
+
2309,168800,11.9759
|
| 424 |
+
2314,169200,23.7854
|
| 425 |
+
2320,169600,17.3165
|
| 426 |
+
2327,170000,15.7337
|
| 427 |
+
2332,170400,21.5258
|
| 428 |
+
2337,170800,22.1934
|
| 429 |
+
2344,171200,18.7651
|
| 430 |
+
2351,171600,10.7436
|
| 431 |
+
2360,172000,10.7671
|
| 432 |
+
2369,172400,10.4788
|
| 433 |
+
2382,172800,7.1276
|
| 434 |
+
2386,173200,21.2732
|
| 435 |
+
2392,173600,18.6327
|
| 436 |
+
2397,174000,19.4716
|
| 437 |
+
2404,174400,12.1719
|
| 438 |
+
2409,174800,14.6277
|
| 439 |
+
2416,175200,14.5393
|
| 440 |
+
2422,175600,15.4089
|
| 441 |
+
2429,176000,14.0683
|
| 442 |
+
2435,176400,15.4539
|
| 443 |
+
2441,176800,13.9568
|
| 444 |
+
2446,177200,19.1191
|
| 445 |
+
2456,177600,9.5304
|
| 446 |
+
2466,178000,7.2711
|
| 447 |
+
2472,178400,10.6102
|
| 448 |
+
2478,178800,11.5563
|
| 449 |
+
2482,179200,19.9556
|
| 450 |
+
2490,179600,9.0751
|
| 451 |
+
2497,180000,6.2264
|
| 452 |
+
2502,180400,18.2541
|
| 453 |
+
2507,180800,9.1061
|
| 454 |
+
2513,181200,15.3433
|
| 455 |
+
2517,181600,19.7729
|
| 456 |
+
2522,182000,13.1969
|
| 457 |
+
2527,182400,18.7246
|
| 458 |
+
2531,182800,14.651
|
| 459 |
+
2537,183200,13.6844
|
| 460 |
+
2543,183600,13.292
|
| 461 |
+
2550,184000,16.4504
|
| 462 |
+
2559,184400,8.7936
|
| 463 |
+
2563,184800,22.5295
|
| 464 |
+
2569,185200,15.0557
|
| 465 |
+
2577,185600,10.3489
|
| 466 |
+
2585,186000,11.0047
|
| 467 |
+
2595,186400,9.2111
|
| 468 |
+
2604,186800,11.11
|
| 469 |
+
2611,187200,10.4671
|
| 470 |
+
2617,187600,12.7915
|
| 471 |
+
2624,188000,13.962
|
| 472 |
+
2633,188400,10.0636
|
| 473 |
+
2637,188800,18.8204
|
| 474 |
+
2642,189200,19.8767
|
| 475 |
+
2647,189600,17.3735
|
| 476 |
+
2654,190000,12.2333
|
| 477 |
+
2664,190400,5.6146
|
| 478 |
+
2670,190800,14.4501
|
| 479 |
+
2675,191200,15.3957
|
| 480 |
+
2685,191600,6.7485
|
| 481 |
+
2689,192000,20.351
|
| 482 |
+
2702,192400,4.2184
|
| 483 |
+
2707,192800,16.1191
|
| 484 |
+
2714,193200,11.0071
|
| 485 |
+
2722,193600,6.7454
|
| 486 |
+
2730,194000,4.1497
|
| 487 |
+
2735,194400,6.5892
|
| 488 |
+
2743,194800,10.7816
|
| 489 |
+
2754,195200,3.9205
|
| 490 |
+
2761,195600,10.0928
|
| 491 |
+
2767,196000,14.8486
|
| 492 |
+
2772,196400,17.4269
|
| 493 |
+
2779,196800,10.2417
|
| 494 |
+
2783,197200,13.4426
|
| 495 |
+
2790,197600,10.6094
|
| 496 |
+
2794,198000,17.6529
|
| 497 |
+
2801,198400,7.4825
|
| 498 |
+
2807,198800,9.2602
|
| 499 |
+
2816,199200,4.0249
|
| 500 |
+
2824,199600,6.6758
|
| 501 |
+
2829,200000,16.2271
|
code/Lake application/logs/results_1/PPO_frozen_lake_log_2.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
28,400,1.0712
|
| 3 |
+
50,800,1.338
|
| 4 |
+
64,1200,2.2055
|
| 5 |
+
75,1600,2.7995
|
| 6 |
+
81,2000,5.4941
|
| 7 |
+
92,2400,3.3249
|
| 8 |
+
96,2800,6.413
|
| 9 |
+
101,3200,6.3269
|
| 10 |
+
106,3600,5.3282
|
| 11 |
+
112,4000,5.6833
|
| 12 |
+
117,4400,5.6105
|
| 13 |
+
123,4800,5.1394
|
| 14 |
+
127,5200,7.3238
|
| 15 |
+
131,5600,7.129
|
| 16 |
+
137,6000,7.7241
|
| 17 |
+
142,6400,4.6711
|
| 18 |
+
148,6800,7.9164
|
| 19 |
+
152,7200,6.7923
|
| 20 |
+
157,7600,7.6097
|
| 21 |
+
162,8000,5.8306
|
| 22 |
+
169,8400,5.6442
|
| 23 |
+
173,8800,8.2231
|
| 24 |
+
179,9200,8.5511
|
| 25 |
+
185,9600,6.1404
|
| 26 |
+
189,10000,10.3032
|
| 27 |
+
194,10400,16.185
|
| 28 |
+
198,10800,20.0498
|
| 29 |
+
202,11200,16.4263
|
| 30 |
+
212,11600,6.5354
|
| 31 |
+
217,12000,13.0853
|
| 32 |
+
222,12400,19.0939
|
| 33 |
+
232,12800,7.205
|
| 34 |
+
241,13200,8.4478
|
| 35 |
+
251,13600,7.8135
|
| 36 |
+
261,14000,6.7748
|
| 37 |
+
271,14400,8.0458
|
| 38 |
+
277,14800,15.136
|
| 39 |
+
287,15200,8.4088
|
| 40 |
+
296,15600,9.5744
|
| 41 |
+
301,16000,17.8938
|
| 42 |
+
307,16400,14.878
|
| 43 |
+
317,16800,9.3055
|
| 44 |
+
323,17200,13.5246
|
| 45 |
+
328,17600,18.3417
|
| 46 |
+
334,18000,12.9367
|
| 47 |
+
341,18400,14.7582
|
| 48 |
+
345,18800,19.8885
|
| 49 |
+
352,19200,11.2014
|
| 50 |
+
359,19600,12.73
|
| 51 |
+
365,20000,11.939
|
| 52 |
+
373,20400,10.1943
|
| 53 |
+
380,20800,12.1353
|
| 54 |
+
394,21200,5.5529
|
| 55 |
+
402,21600,8.7544
|
| 56 |
+
411,22000,7.7306
|
| 57 |
+
422,22400,6.3769
|
| 58 |
+
431,22800,8.1486
|
| 59 |
+
441,23200,6.2044
|
| 60 |
+
446,23600,13.7331
|
| 61 |
+
454,24000,10.2596
|
| 62 |
+
460,24400,10.6293
|
| 63 |
+
469,24800,8.1278
|
| 64 |
+
473,25200,20.7824
|
| 65 |
+
480,25600,14.1365
|
| 66 |
+
484,26000,18.8189
|
| 67 |
+
489,26400,15.9557
|
| 68 |
+
499,26800,6.2855
|
| 69 |
+
504,27200,16.2915
|
| 70 |
+
511,27600,7.7517
|
| 71 |
+
518,28000,11.3698
|
| 72 |
+
522,28400,20.7648
|
| 73 |
+
527,28800,19.0116
|
| 74 |
+
533,29200,16.3582
|
| 75 |
+
539,29600,16.8797
|
| 76 |
+
545,30000,12.4403
|
| 77 |
+
552,30400,16.3851
|
| 78 |
+
557,30800,21.8059
|
| 79 |
+
561,31200,23.8707
|
| 80 |
+
569,31600,9.6084
|
| 81 |
+
577,32000,13.8038
|
| 82 |
+
589,32400,5.1129
|
| 83 |
+
597,32800,13.5648
|
| 84 |
+
605,33200,12.1758
|
| 85 |
+
612,33600,11.9826
|
| 86 |
+
620,34000,8.8992
|
| 87 |
+
628,34400,10.1155
|
| 88 |
+
636,34800,7.6714
|
| 89 |
+
643,35200,9.6841
|
| 90 |
+
648,35600,18.1897
|
| 91 |
+
654,36000,10.757
|
| 92 |
+
664,36400,8.2339
|
| 93 |
+
670,36800,14.7556
|
| 94 |
+
677,37200,12.949
|
| 95 |
+
683,37600,12.4321
|
| 96 |
+
694,38000,7.0777
|
| 97 |
+
699,38400,16.2456
|
| 98 |
+
712,38800,7.5439
|
| 99 |
+
720,39200,13.3273
|
| 100 |
+
725,39600,20.2522
|
| 101 |
+
731,40000,20.2798
|
| 102 |
+
737,40400,19.7969
|
| 103 |
+
742,40800,23.1972
|
| 104 |
+
747,41200,21.0181
|
| 105 |
+
754,41600,15.7453
|
| 106 |
+
764,42000,10.3466
|
| 107 |
+
775,42400,9.1961
|
| 108 |
+
782,42800,13.7833
|
| 109 |
+
787,43200,22.5446
|
| 110 |
+
793,43600,20.7285
|
| 111 |
+
797,44000,20.3011
|
| 112 |
+
803,44400,18.0368
|
| 113 |
+
810,44800,17.1513
|
| 114 |
+
816,45200,16.1653
|
| 115 |
+
822,45600,20.1987
|
| 116 |
+
827,46000,17.5718
|
| 117 |
+
834,46400,15.5697
|
| 118 |
+
844,46800,10.5578
|
| 119 |
+
849,47200,22.3577
|
| 120 |
+
854,47600,22.7982
|
| 121 |
+
862,48000,11.8983
|
| 122 |
+
873,48400,8.1632
|
| 123 |
+
880,48800,13.9313
|
| 124 |
+
888,49200,11.4741
|
| 125 |
+
892,49600,24.8084
|
| 126 |
+
899,50000,17.164
|
| 127 |
+
904,50400,19.6258
|
| 128 |
+
911,50800,15.8299
|
| 129 |
+
918,51200,9.6712
|
| 130 |
+
926,51600,10.8926
|
| 131 |
+
933,52000,12.6279
|
| 132 |
+
941,52400,10.5669
|
| 133 |
+
946,52800,19.1119
|
| 134 |
+
955,53200,6.5965
|
| 135 |
+
962,53600,14.0385
|
| 136 |
+
972,54000,5.9704
|
| 137 |
+
977,54400,18.2561
|
| 138 |
+
984,54800,11.3252
|
| 139 |
+
994,55200,7.534
|
| 140 |
+
999,55600,13.1451
|
| 141 |
+
1005,56000,17.4585
|
| 142 |
+
1011,56400,10.8942
|
| 143 |
+
1017,56800,16.6066
|
| 144 |
+
1023,57200,16.5959
|
| 145 |
+
1027,57600,24.5556
|
| 146 |
+
1037,58000,8.4738
|
| 147 |
+
1042,58400,17.3071
|
| 148 |
+
1049,58800,9.78
|
| 149 |
+
1055,59200,15.1944
|
| 150 |
+
1067,59600,5.3205
|
| 151 |
+
1075,60000,11.3803
|
| 152 |
+
1092,60400,3.6245
|
| 153 |
+
1102,60800,5.8269
|
| 154 |
+
1111,61200,9.5032
|
| 155 |
+
1121,61600,5.4864
|
| 156 |
+
1128,62000,6.7626
|
| 157 |
+
1137,62400,9.5821
|
| 158 |
+
1142,62800,16.4103
|
| 159 |
+
1148,63200,11.2262
|
| 160 |
+
1156,63600,8.8738
|
| 161 |
+
1164,64000,9.1704
|
| 162 |
+
1169,64400,10.2279
|
| 163 |
+
1176,64800,11.3308
|
| 164 |
+
1183,65200,10.4271
|
| 165 |
+
1191,65600,7.8356
|
| 166 |
+
1196,66000,13.8851
|
| 167 |
+
1202,66400,18.9843
|
| 168 |
+
1206,66800,24.7673
|
| 169 |
+
1211,67200,18.6684
|
| 170 |
+
1218,67600,16.0951
|
| 171 |
+
1222,68000,22.9115
|
| 172 |
+
1226,68400,23.6065
|
| 173 |
+
1233,68800,11.1388
|
| 174 |
+
1238,69200,14.0507
|
| 175 |
+
1244,69600,15.8132
|
| 176 |
+
1249,70000,13.9042
|
| 177 |
+
1253,70400,24.436
|
| 178 |
+
1263,70800,9.9095
|
| 179 |
+
1268,71200,15.1151
|
| 180 |
+
1273,71600,18.5476
|
| 181 |
+
1278,72000,18.5341
|
| 182 |
+
1283,72400,16.7705
|
| 183 |
+
1290,72800,12.7882
|
| 184 |
+
1297,73200,14.3823
|
| 185 |
+
1303,73600,11.6998
|
| 186 |
+
1309,74000,17.2901
|
| 187 |
+
1315,74400,15.6958
|
| 188 |
+
1323,74800,10.383
|
| 189 |
+
1329,75200,11.5377
|
| 190 |
+
1334,75600,18.225
|
| 191 |
+
1340,76000,16.318
|
| 192 |
+
1347,76400,11.497
|
| 193 |
+
1353,76800,13.7727
|
| 194 |
+
1360,77200,9.5276
|
| 195 |
+
1367,77600,12.1409
|
| 196 |
+
1373,78000,7.7239
|
| 197 |
+
1383,78400,9.336
|
| 198 |
+
1388,78800,14.569
|
| 199 |
+
1392,79200,17.7591
|
| 200 |
+
1396,79600,21.1039
|
| 201 |
+
1401,80000,15.0516
|
| 202 |
+
1407,80400,14.6828
|
| 203 |
+
1412,80800,15.2347
|
| 204 |
+
1422,81200,8.8604
|
| 205 |
+
1429,81600,10.5288
|
| 206 |
+
1436,82000,14.4741
|
| 207 |
+
1443,82400,9.9426
|
| 208 |
+
1450,82800,13.5522
|
| 209 |
+
1457,83200,12.1773
|
| 210 |
+
1466,83600,12.6106
|
| 211 |
+
1473,84000,15.5286
|
| 212 |
+
1483,84400,9.095
|
| 213 |
+
1489,84800,16.7606
|
| 214 |
+
1493,85200,16.0469
|
| 215 |
+
1499,85600,11.1895
|
| 216 |
+
1504,86000,16.8995
|
| 217 |
+
1509,86400,14.7574
|
| 218 |
+
1514,86800,17.2417
|
| 219 |
+
1521,87200,8.4962
|
| 220 |
+
1533,87600,5.2503
|
| 221 |
+
1539,88000,13.6948
|
| 222 |
+
1546,88400,10.7947
|
| 223 |
+
1553,88800,11.7415
|
| 224 |
+
1563,89200,5.8864
|
| 225 |
+
1572,89600,7.1169
|
| 226 |
+
1577,90000,11.0446
|
| 227 |
+
1585,90400,7.4355
|
| 228 |
+
1591,90800,7.9803
|
| 229 |
+
1598,91200,8.522
|
| 230 |
+
1603,91600,10.4511
|
| 231 |
+
1608,92000,12.7399
|
| 232 |
+
1614,92400,8.2374
|
| 233 |
+
1619,92800,12.8639
|
| 234 |
+
1623,93200,11.7356
|
| 235 |
+
1629,93600,12.5085
|
| 236 |
+
1635,94000,8.7022
|
| 237 |
+
1640,94400,13.0884
|
| 238 |
+
1644,94800,18.9683
|
| 239 |
+
1648,95200,17.3632
|
| 240 |
+
1654,95600,13.1206
|
| 241 |
+
1659,96000,16.7717
|
| 242 |
+
1667,96400,7.295
|
| 243 |
+
1672,96800,14.4554
|
| 244 |
+
1679,97200,10.9414
|
| 245 |
+
1686,97600,11.4577
|
| 246 |
+
1694,98000,10.4921
|
| 247 |
+
1699,98400,11.4891
|
| 248 |
+
1706,98800,12.5354
|
| 249 |
+
1715,99200,9.2944
|
| 250 |
+
1721,99600,15.9709
|
| 251 |
+
1725,100000,17.2995
|
| 252 |
+
1734,100400,6.9714
|
| 253 |
+
1738,100800,15.647
|
| 254 |
+
1746,101200,8.3676
|
| 255 |
+
1754,101600,9.6176
|
| 256 |
+
1761,102000,7.512
|
| 257 |
+
1768,102400,10.3183
|
| 258 |
+
1774,102800,13.3747
|
| 259 |
+
1780,103200,11.1231
|
| 260 |
+
1785,103600,13.8813
|
| 261 |
+
1791,104000,17.1172
|
| 262 |
+
1796,104400,12.4113
|
| 263 |
+
1801,104800,16.1103
|
| 264 |
+
1809,105200,6.39
|
| 265 |
+
1818,105600,8.2949
|
| 266 |
+
1825,106000,11.5045
|
| 267 |
+
1831,106400,10.9331
|
| 268 |
+
1837,106800,11.3621
|
| 269 |
+
1842,107200,18.0518
|
| 270 |
+
1848,107600,12.5318
|
| 271 |
+
1855,108000,13.3056
|
| 272 |
+
1861,108400,12.4526
|
| 273 |
+
1867,108800,12.8945
|
| 274 |
+
1876,109200,8.2563
|
| 275 |
+
1882,109600,11.2807
|
| 276 |
+
1888,110000,12.192
|
| 277 |
+
1896,110400,7.9317
|
| 278 |
+
1903,110800,11.2608
|
| 279 |
+
1908,111200,18.0233
|
| 280 |
+
1912,111600,17.8104
|
| 281 |
+
1918,112000,13.7657
|
| 282 |
+
1925,112400,7.795
|
| 283 |
+
1931,112800,13.0999
|
| 284 |
+
1936,113200,12.6435
|
| 285 |
+
1942,113600,9.7847
|
| 286 |
+
1947,114000,15.6282
|
| 287 |
+
1953,114400,13.0466
|
| 288 |
+
1959,114800,13.1941
|
| 289 |
+
1963,115200,17.225
|
| 290 |
+
1970,115600,10.4272
|
| 291 |
+
1976,116000,10.2674
|
| 292 |
+
1984,116400,7.0663
|
| 293 |
+
1991,116800,7.4921
|
| 294 |
+
1996,117200,12.674
|
| 295 |
+
2005,117600,9.7954
|
| 296 |
+
2010,118000,15.9189
|
| 297 |
+
2016,118400,13.78
|
| 298 |
+
2023,118800,10.5507
|
| 299 |
+
2030,119200,7.0213
|
| 300 |
+
2036,119600,16.1446
|
| 301 |
+
2040,120000,20.1122
|
| 302 |
+
2044,120400,17.4097
|
| 303 |
+
2049,120800,13.002
|
| 304 |
+
2054,121200,15.4103
|
| 305 |
+
2058,121600,13.7301
|
| 306 |
+
2062,122000,17.3055
|
| 307 |
+
2069,122400,12.1489
|
| 308 |
+
2075,122800,10.2724
|
| 309 |
+
2084,123200,8.3912
|
| 310 |
+
2088,123600,20.9833
|
| 311 |
+
2094,124000,9.3535
|
| 312 |
+
2102,124400,12.5569
|
| 313 |
+
2106,124800,22.0025
|
| 314 |
+
2111,125200,15.3937
|
| 315 |
+
2117,125600,13.1022
|
| 316 |
+
2127,126000,7.0628
|
| 317 |
+
2132,126400,19.1772
|
| 318 |
+
2138,126800,12.9516
|
| 319 |
+
2146,127200,9.1282
|
| 320 |
+
2154,127600,8.3276
|
| 321 |
+
2160,128000,9.1102
|
| 322 |
+
2167,128400,8.3557
|
| 323 |
+
2175,128800,9.8998
|
| 324 |
+
2181,129200,8.9511
|
| 325 |
+
2190,129600,9.2503
|
| 326 |
+
2197,130000,8.1281
|
| 327 |
+
2203,130400,15.1956
|
| 328 |
+
2207,130800,16.8667
|
| 329 |
+
2221,131200,4.3684
|
| 330 |
+
2232,131600,6.8054
|
| 331 |
+
2237,132000,14.4013
|
| 332 |
+
2244,132400,16.4632
|
| 333 |
+
2254,132800,7.111
|
| 334 |
+
2261,133200,12.7119
|
| 335 |
+
2268,133600,11.1943
|
| 336 |
+
2277,134000,10.9562
|
| 337 |
+
2284,134400,10.6057
|
| 338 |
+
2291,134800,16.0464
|
| 339 |
+
2298,135200,12.4611
|
| 340 |
+
2302,135600,22.5867
|
| 341 |
+
2308,136000,14.8831
|
| 342 |
+
2315,136400,10.9013
|
| 343 |
+
2319,136800,17.5025
|
| 344 |
+
2327,137200,13.1141
|
| 345 |
+
2333,137600,16.9285
|
| 346 |
+
2343,138000,12.4663
|
| 347 |
+
2350,138400,13.3258
|
| 348 |
+
2357,138800,16.7013
|
| 349 |
+
2361,139200,22.2587
|
| 350 |
+
2366,139600,25.103
|
| 351 |
+
2371,140000,25.5381
|
| 352 |
+
2376,140400,16.0396
|
| 353 |
+
2381,140800,12.2065
|
| 354 |
+
2387,141200,22.5733
|
| 355 |
+
2392,141600,25.1197
|
| 356 |
+
2399,142000,13.928
|
| 357 |
+
2403,142400,22.8321
|
| 358 |
+
2412,142800,11.8089
|
| 359 |
+
2419,143200,12.0245
|
| 360 |
+
2426,143600,11.2781
|
| 361 |
+
2433,144000,9.814
|
| 362 |
+
2440,144400,11.4397
|
| 363 |
+
2445,144800,15.5671
|
| 364 |
+
2451,145200,11.2167
|
| 365 |
+
2457,145600,11.7414
|
| 366 |
+
2465,146000,11.176
|
| 367 |
+
2470,146400,17.8477
|
| 368 |
+
2474,146800,23.4131
|
| 369 |
+
2479,147200,17.3667
|
| 370 |
+
2484,147600,15.4173
|
| 371 |
+
2490,148000,15.5236
|
| 372 |
+
2496,148400,11.9975
|
| 373 |
+
2500,148800,15.5211
|
| 374 |
+
2506,149200,9.2949
|
| 375 |
+
2512,149600,13.2005
|
| 376 |
+
2522,150000,6.7119
|
| 377 |
+
2526,150400,13.047
|
| 378 |
+
2532,150800,10.975
|
| 379 |
+
2537,151200,12.4601
|
| 380 |
+
2545,151600,10.3919
|
| 381 |
+
2549,152000,17.1736
|
| 382 |
+
2555,152400,10.9481
|
| 383 |
+
2561,152800,6.5293
|
| 384 |
+
2565,153200,16.5226
|
| 385 |
+
2574,153600,6.976
|
| 386 |
+
2580,154000,9.178
|
| 387 |
+
2586,154400,9.0912
|
| 388 |
+
2593,154800,8.2036
|
| 389 |
+
2599,155200,11.2736
|
| 390 |
+
2604,155600,11.8792
|
| 391 |
+
2612,156000,8.9914
|
| 392 |
+
2619,156400,7.5987
|
| 393 |
+
2624,156800,16.6534
|
| 394 |
+
2631,157200,9.9542
|
| 395 |
+
2637,157600,11.2406
|
| 396 |
+
2646,158000,5.5051
|
| 397 |
+
2651,158400,13.1209
|
| 398 |
+
2659,158800,11.2929
|
| 399 |
+
2666,159200,9.2889
|
| 400 |
+
2673,159600,6.4488
|
| 401 |
+
2679,160000,8.9596
|
| 402 |
+
2685,160400,12.8978
|
| 403 |
+
2691,160800,12.8375
|
| 404 |
+
2696,161200,16.7479
|
| 405 |
+
2701,161600,10.707
|
| 406 |
+
2709,162000,7.5865
|
| 407 |
+
2720,162400,5.9772
|
| 408 |
+
2727,162800,14.3091
|
| 409 |
+
2735,163200,10.0529
|
| 410 |
+
2741,163600,16.2379
|
| 411 |
+
2750,164000,9.3317
|
| 412 |
+
2761,164400,9.3682
|
| 413 |
+
2772,164800,7.5992
|
| 414 |
+
2777,165200,13.7422
|
| 415 |
+
2785,165600,11.4302
|
| 416 |
+
2796,166000,5.5863
|
| 417 |
+
2800,166400,20.5644
|
| 418 |
+
2809,166800,9.1758
|
| 419 |
+
2819,167200,10.6074
|
| 420 |
+
2824,167600,19.2911
|
| 421 |
+
2829,168000,14.7837
|
| 422 |
+
2834,168400,20.5648
|
| 423 |
+
2839,168800,21.9035
|
| 424 |
+
2846,169200,12.8552
|
| 425 |
+
2852,169600,14.4815
|
| 426 |
+
2860,170000,10.327
|
| 427 |
+
2867,170400,16.0522
|
| 428 |
+
2877,170800,7.9339
|
| 429 |
+
2885,171200,9.3754
|
| 430 |
+
2895,171600,8.846
|
| 431 |
+
2901,172000,18.2031
|
| 432 |
+
2911,172400,9.8781
|
| 433 |
+
2920,172800,8.4863
|
| 434 |
+
2925,173200,14.941
|
| 435 |
+
2933,173600,12.9186
|
| 436 |
+
2941,174000,10.819
|
| 437 |
+
2946,174400,17.9883
|
| 438 |
+
2951,174800,16.5405
|
| 439 |
+
2956,175200,18.8158
|
| 440 |
+
2960,175600,23.2525
|
| 441 |
+
2967,176000,13.0999
|
| 442 |
+
2973,176400,13.952
|
| 443 |
+
2977,176800,21.9684
|
| 444 |
+
2985,177200,10.9067
|
| 445 |
+
2991,177600,11.2303
|
| 446 |
+
2997,178000,13.2209
|
| 447 |
+
3005,178400,11.2834
|
| 448 |
+
3009,178800,16.5292
|
| 449 |
+
3013,179200,21.2372
|
| 450 |
+
3022,179600,11.0619
|
| 451 |
+
3031,180000,7.3537
|
| 452 |
+
3036,180400,14.5772
|
| 453 |
+
3045,180800,9.2121
|
| 454 |
+
3057,181200,5.2037
|
| 455 |
+
3067,181600,6.669
|
| 456 |
+
3075,182000,8.7553
|
| 457 |
+
3081,182400,10.2706
|
| 458 |
+
3087,182800,11.9766
|
| 459 |
+
3098,183200,7.7188
|
| 460 |
+
3104,183600,10.9377
|
| 461 |
+
3112,184000,8.698
|
| 462 |
+
3118,184400,8.7785
|
| 463 |
+
3126,184800,7.6664
|
| 464 |
+
3131,185200,13.2695
|
| 465 |
+
3138,185600,8.0803
|
| 466 |
+
3145,186000,12.0663
|
| 467 |
+
3152,186400,9.1547
|
| 468 |
+
3158,186800,10.6509
|
| 469 |
+
3165,187200,11.0623
|
| 470 |
+
3169,187600,19.4497
|
| 471 |
+
3173,188000,18.2049
|
| 472 |
+
3178,188400,14.0501
|
| 473 |
+
3185,188800,11.2683
|
| 474 |
+
3194,189200,6.596
|
| 475 |
+
3202,189600,11.3825
|
| 476 |
+
3210,190000,7.7631
|
| 477 |
+
3214,190400,12.7471
|
| 478 |
+
3222,190800,12.4214
|
| 479 |
+
3228,191200,11.1834
|
| 480 |
+
3234,191600,12.8345
|
| 481 |
+
3242,192000,10.284
|
| 482 |
+
3248,192400,7.1057
|
| 483 |
+
3253,192800,11.0905
|
| 484 |
+
3258,193200,18.9309
|
| 485 |
+
3263,193600,18.4142
|
| 486 |
+
3269,194000,14.4793
|
| 487 |
+
3273,194400,21.8498
|
| 488 |
+
3277,194800,19.3599
|
| 489 |
+
3283,195200,19.47
|
| 490 |
+
3287,195600,26.6692
|
| 491 |
+
3291,196000,21.4347
|
| 492 |
+
3296,196400,20.3488
|
| 493 |
+
3301,196800,18.7587
|
| 494 |
+
3309,197200,8.8581
|
| 495 |
+
3314,197600,8.1858
|
| 496 |
+
3321,198000,11.0028
|
| 497 |
+
3325,198400,17.812
|
| 498 |
+
3329,198800,21.4082
|
| 499 |
+
3333,199200,20.2986
|
| 500 |
+
3337,199600,25.3884
|
| 501 |
+
3342,200000,18.7209
|
code/Lake application/logs/results_1/PPO_frozen_lake_log_3.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
28,400,0.9697
|
| 3 |
+
49,800,1.4299
|
| 4 |
+
59,1200,3.1168
|
| 5 |
+
67,1600,3.4994
|
| 6 |
+
74,2000,4.3512
|
| 7 |
+
78,2400,6.7968
|
| 8 |
+
84,2800,4.9979
|
| 9 |
+
90,3200,4.8983
|
| 10 |
+
97,3600,5.1283
|
| 11 |
+
104,4000,4.4929
|
| 12 |
+
113,4400,3.0465
|
| 13 |
+
121,4800,3.6254
|
| 14 |
+
127,5200,5.7239
|
| 15 |
+
131,5600,6.4944
|
| 16 |
+
138,6000,4.5873
|
| 17 |
+
143,6400,5.5102
|
| 18 |
+
148,6800,6.1279
|
| 19 |
+
155,7200,4.6213
|
| 20 |
+
160,7600,5.1025
|
| 21 |
+
164,8000,7.2864
|
| 22 |
+
168,8400,7.325
|
| 23 |
+
174,8800,5.1145
|
| 24 |
+
179,9200,5.9985
|
| 25 |
+
183,9600,7.3481
|
| 26 |
+
189,10000,5.0562
|
| 27 |
+
194,10400,5.74
|
| 28 |
+
198,10800,6.7543
|
| 29 |
+
205,11200,4.0238
|
| 30 |
+
210,11600,7.0676
|
| 31 |
+
215,12000,4.578
|
| 32 |
+
220,12400,6.2174
|
| 33 |
+
225,12800,6.4531
|
| 34 |
+
231,13200,4.6471
|
| 35 |
+
237,13600,5.7269
|
| 36 |
+
241,14000,7.5008
|
| 37 |
+
247,14400,5.3946
|
| 38 |
+
251,14800,6.9517
|
| 39 |
+
255,15200,7.4972
|
| 40 |
+
261,15600,5.5219
|
| 41 |
+
265,16000,6.6453
|
| 42 |
+
271,16400,5.998
|
| 43 |
+
279,16800,3.6128
|
| 44 |
+
285,17200,4.8651
|
| 45 |
+
290,17600,7.1438
|
| 46 |
+
295,18000,6.1627
|
| 47 |
+
300,18400,6.7364
|
| 48 |
+
305,18800,5.0593
|
| 49 |
+
311,19200,5.698
|
| 50 |
+
317,19600,5.0444
|
| 51 |
+
322,20000,6.426
|
| 52 |
+
329,20400,5.4239
|
| 53 |
+
334,20800,5.0805
|
| 54 |
+
339,21200,7.5365
|
| 55 |
+
343,21600,6.5818
|
| 56 |
+
348,22000,6.7635
|
| 57 |
+
353,22400,6.5158
|
| 58 |
+
360,22800,4.8907
|
| 59 |
+
365,23200,6.5301
|
| 60 |
+
369,23600,7.0247
|
| 61 |
+
376,24000,4.7928
|
| 62 |
+
381,24400,6.8979
|
| 63 |
+
390,24800,3.7186
|
| 64 |
+
397,25200,4.4294
|
| 65 |
+
403,25600,5.0304
|
| 66 |
+
408,26000,6.3346
|
| 67 |
+
412,26400,8.2039
|
| 68 |
+
417,26800,6.1453
|
| 69 |
+
422,27200,7.8806
|
| 70 |
+
429,27600,4.8061
|
| 71 |
+
434,28000,6.9867
|
| 72 |
+
441,28400,4.2048
|
| 73 |
+
447,28800,6.2038
|
| 74 |
+
454,29200,5.0084
|
| 75 |
+
462,29600,4.0663
|
| 76 |
+
466,30000,6.9928
|
| 77 |
+
471,30400,6.8849
|
| 78 |
+
478,30800,4.8687
|
| 79 |
+
482,31200,7.0303
|
| 80 |
+
487,31600,6.6575
|
| 81 |
+
492,32000,7.9083
|
| 82 |
+
496,32400,7.7999
|
| 83 |
+
502,32800,7.0862
|
| 84 |
+
506,33200,7.493
|
| 85 |
+
511,33600,7.9607
|
| 86 |
+
515,34000,10.1515
|
| 87 |
+
519,34400,10.5739
|
| 88 |
+
523,34800,7.9336
|
| 89 |
+
529,35200,8.113
|
| 90 |
+
535,35600,7.7863
|
| 91 |
+
541,36000,7.2677
|
| 92 |
+
546,36400,7.9735
|
| 93 |
+
552,36800,7.6501
|
| 94 |
+
557,37200,7.2945
|
| 95 |
+
565,37600,4.6367
|
| 96 |
+
569,38000,8.4382
|
| 97 |
+
578,38400,4.0414
|
| 98 |
+
583,38800,7.4847
|
| 99 |
+
588,39200,5.5446
|
| 100 |
+
594,39600,6.5673
|
| 101 |
+
599,40000,6.8408
|
| 102 |
+
604,40400,6.5415
|
| 103 |
+
609,40800,6.8515
|
| 104 |
+
613,41200,7.0606
|
| 105 |
+
617,41600,8.0479
|
| 106 |
+
623,42000,5.6694
|
| 107 |
+
627,42400,7.9936
|
| 108 |
+
632,42800,6.7543
|
| 109 |
+
637,43200,5.8305
|
| 110 |
+
643,43600,6.281
|
| 111 |
+
650,44000,3.5108
|
| 112 |
+
656,44400,5.8348
|
| 113 |
+
660,44800,7.7714
|
| 114 |
+
664,45200,7.4986
|
| 115 |
+
671,45600,4.8578
|
| 116 |
+
679,46000,3.9887
|
| 117 |
+
684,46400,6.534
|
| 118 |
+
689,46800,6.7332
|
| 119 |
+
693,47200,7.6821
|
| 120 |
+
698,47600,6.0771
|
| 121 |
+
704,48000,5.4061
|
| 122 |
+
712,48400,3.1593
|
| 123 |
+
717,48800,7.2291
|
| 124 |
+
723,49200,5.0261
|
| 125 |
+
728,49600,8.1633
|
| 126 |
+
733,50000,6.6328
|
| 127 |
+
738,50400,7.6968
|
| 128 |
+
742,50800,9.8616
|
| 129 |
+
747,51200,8.1682
|
| 130 |
+
753,51600,6.0632
|
| 131 |
+
757,52000,6.9269
|
| 132 |
+
761,52400,8.9436
|
| 133 |
+
768,52800,5.1674
|
| 134 |
+
775,53200,5.4459
|
| 135 |
+
779,53600,8.1192
|
| 136 |
+
785,54000,6.4501
|
| 137 |
+
791,54400,5.4877
|
| 138 |
+
795,54800,9.5873
|
| 139 |
+
800,55200,8.2846
|
| 140 |
+
806,55600,6.569
|
| 141 |
+
812,56000,8.7981
|
| 142 |
+
816,56400,9.4428
|
| 143 |
+
823,56800,5.6861
|
| 144 |
+
829,57200,6.7182
|
| 145 |
+
835,57600,6.808
|
| 146 |
+
840,58000,9.3055
|
| 147 |
+
845,58400,7.6702
|
| 148 |
+
851,58800,6.8338
|
| 149 |
+
855,59200,10.6899
|
| 150 |
+
859,59600,9.518
|
| 151 |
+
864,60000,8.8663
|
| 152 |
+
869,60400,7.8428
|
| 153 |
+
873,60800,8.1849
|
| 154 |
+
879,61200,5.1644
|
| 155 |
+
886,61600,6.1759
|
| 156 |
+
893,62000,4.8597
|
| 157 |
+
901,62400,4.9238
|
| 158 |
+
908,62800,4.5957
|
| 159 |
+
913,63200,5.2139
|
| 160 |
+
920,63600,4.4207
|
| 161 |
+
929,64000,3.8686
|
| 162 |
+
933,64400,7.7389
|
| 163 |
+
937,64800,7.038
|
| 164 |
+
942,65200,6.4901
|
| 165 |
+
946,65600,8.3237
|
| 166 |
+
951,66000,6.4676
|
| 167 |
+
956,66400,7.9483
|
| 168 |
+
960,66800,7.4556
|
| 169 |
+
966,67200,5.8683
|
| 170 |
+
972,67600,4.9513
|
| 171 |
+
977,68000,6.3514
|
| 172 |
+
982,68400,7.6564
|
| 173 |
+
989,68800,3.9463
|
| 174 |
+
996,69200,4.8304
|
| 175 |
+
1000,69600,7.4777
|
| 176 |
+
1007,70000,5.3967
|
| 177 |
+
1012,70400,5.9162
|
| 178 |
+
1016,70800,9.1032
|
| 179 |
+
1022,71200,5.6404
|
| 180 |
+
1027,71600,7.1579
|
| 181 |
+
1033,72000,5.5076
|
| 182 |
+
1039,72400,5.1194
|
| 183 |
+
1044,72800,5.7785
|
| 184 |
+
1049,73200,7.2613
|
| 185 |
+
1054,73600,6.0494
|
| 186 |
+
1059,74000,7.0271
|
| 187 |
+
1063,74400,7.5499
|
| 188 |
+
1067,74800,7.0446
|
| 189 |
+
1072,75200,6.0907
|
| 190 |
+
1076,75600,6.7148
|
| 191 |
+
1081,76000,7.3562
|
| 192 |
+
1086,76400,5.7122
|
| 193 |
+
1092,76800,4.6709
|
| 194 |
+
1099,77200,4.9494
|
| 195 |
+
1104,77600,5.152
|
| 196 |
+
1109,78000,7.1534
|
| 197 |
+
1115,78400,5.9638
|
| 198 |
+
1119,78800,7.1621
|
| 199 |
+
1123,79200,8.3579
|
| 200 |
+
1128,79600,8.5829
|
| 201 |
+
1137,80000,4.075
|
| 202 |
+
1144,80400,3.9743
|
| 203 |
+
1149,80800,7.1503
|
| 204 |
+
1155,81200,5.8118
|
| 205 |
+
1161,81600,5.5331
|
| 206 |
+
1167,82000,5.4701
|
| 207 |
+
1176,82400,4.1435
|
| 208 |
+
1180,82800,6.5754
|
| 209 |
+
1186,83200,5.7
|
| 210 |
+
1192,83600,6.4401
|
| 211 |
+
1198,84000,5.9802
|
| 212 |
+
1202,84400,7.2848
|
| 213 |
+
1207,84800,6.2365
|
| 214 |
+
1213,85200,6.6385
|
| 215 |
+
1218,85600,7.7489
|
| 216 |
+
1224,86000,5.8286
|
| 217 |
+
1229,86400,6.656
|
| 218 |
+
1234,86800,7.9111
|
| 219 |
+
1240,87200,5.2668
|
| 220 |
+
1244,87600,8.2528
|
| 221 |
+
1250,88000,5.2783
|
| 222 |
+
1254,88400,7.6867
|
| 223 |
+
1258,88800,7.9634
|
| 224 |
+
1263,89200,7.2483
|
| 225 |
+
1270,89600,6.3149
|
| 226 |
+
1274,90000,7.6798
|
| 227 |
+
1281,90400,6.6529
|
| 228 |
+
1286,90800,6.4735
|
| 229 |
+
1296,91200,3.4323
|
| 230 |
+
1301,91600,7.3159
|
| 231 |
+
1306,92000,8.4848
|
| 232 |
+
1311,92400,6.9908
|
| 233 |
+
1316,92800,8.4663
|
| 234 |
+
1326,93200,3.5854
|
| 235 |
+
1331,93600,8.0148
|
| 236 |
+
1336,94000,7.4098
|
| 237 |
+
1341,94400,6.6913
|
| 238 |
+
1348,94800,4.7574
|
| 239 |
+
1355,95200,5.4236
|
| 240 |
+
1361,95600,6.3331
|
| 241 |
+
1369,96000,4.811
|
| 242 |
+
1373,96400,8.9366
|
| 243 |
+
1377,96800,8.122
|
| 244 |
+
1383,97200,6.3531
|
| 245 |
+
1388,97600,5.7501
|
| 246 |
+
1395,98000,5.7337
|
| 247 |
+
1400,98400,6.524
|
| 248 |
+
1405,98800,5.8413
|
| 249 |
+
1414,99200,3.6238
|
| 250 |
+
1419,99600,6.8038
|
| 251 |
+
1427,100000,4.1713
|
| 252 |
+
1431,100400,6.4416
|
| 253 |
+
1435,100800,7.8524
|
| 254 |
+
1440,101200,7.2999
|
| 255 |
+
1444,101600,7.8906
|
| 256 |
+
1448,102000,7.2445
|
| 257 |
+
1452,102400,7.8529
|
| 258 |
+
1457,102800,7.3003
|
| 259 |
+
1463,103200,5.6059
|
| 260 |
+
1467,103600,7.8356
|
| 261 |
+
1472,104000,6.6397
|
| 262 |
+
1477,104400,6.4739
|
| 263 |
+
1481,104800,6.9889
|
| 264 |
+
1486,105200,6.2231
|
| 265 |
+
1490,105600,7.6594
|
| 266 |
+
1495,106000,6.1959
|
| 267 |
+
1501,106400,5.118
|
| 268 |
+
1507,106800,4.5929
|
| 269 |
+
1513,107200,5.9776
|
| 270 |
+
1518,107600,6.0486
|
| 271 |
+
1522,108000,7.2906
|
| 272 |
+
1526,108400,6.2824
|
| 273 |
+
1531,108800,5.9725
|
| 274 |
+
1537,109200,5.3613
|
| 275 |
+
1541,109600,6.6071
|
| 276 |
+
1546,110000,6.5305
|
| 277 |
+
1551,110400,5.922
|
| 278 |
+
1555,110800,6.7349
|
| 279 |
+
1561,111200,5.5868
|
| 280 |
+
1565,111600,6.026
|
| 281 |
+
1570,112000,6.0956
|
| 282 |
+
1575,112400,6.1964
|
| 283 |
+
1580,112800,6.6359
|
| 284 |
+
1584,113200,6.554
|
| 285 |
+
1590,113600,6.2213
|
| 286 |
+
1598,114000,4.2887
|
| 287 |
+
1604,114400,4.9337
|
| 288 |
+
1610,114800,5.8288
|
| 289 |
+
1614,115200,8.2018
|
| 290 |
+
1618,115600,7.1357
|
| 291 |
+
1623,116000,7.2247
|
| 292 |
+
1629,116400,5.1857
|
| 293 |
+
1633,116800,6.6046
|
| 294 |
+
1638,117200,5.8635
|
| 295 |
+
1642,117600,7.5453
|
| 296 |
+
1648,118000,6.806
|
| 297 |
+
1652,118400,7.8409
|
| 298 |
+
1656,118800,8.5085
|
| 299 |
+
1661,119200,6.7829
|
| 300 |
+
1666,119600,6.8868
|
| 301 |
+
1675,120000,4.2945
|
| 302 |
+
1682,120400,4.4064
|
| 303 |
+
1689,120800,5.0664
|
| 304 |
+
1694,121200,6.5454
|
| 305 |
+
1701,121600,5.6222
|
| 306 |
+
1708,122000,5.4888
|
| 307 |
+
1712,122400,11.0368
|
| 308 |
+
1717,122800,7.0496
|
| 309 |
+
1722,123200,8.6749
|
| 310 |
+
1729,123600,6.9978
|
| 311 |
+
1737,124000,5.4495
|
| 312 |
+
1747,124400,4.0671
|
| 313 |
+
1753,124800,7.6818
|
| 314 |
+
1759,125200,6.4432
|
| 315 |
+
1763,125600,10.7613
|
| 316 |
+
1770,126000,6.4108
|
| 317 |
+
1778,126400,4.3716
|
| 318 |
+
1784,126800,6.7246
|
| 319 |
+
1788,127200,10.4382
|
| 320 |
+
1794,127600,7.1896
|
| 321 |
+
1800,128000,7.588
|
| 322 |
+
1805,128400,9.4403
|
| 323 |
+
1809,128800,11.0176
|
| 324 |
+
1813,129200,11.7979
|
| 325 |
+
1818,129600,8.75
|
| 326 |
+
1827,130000,4.6859
|
| 327 |
+
1834,130400,6.7395
|
| 328 |
+
1839,130800,8.993
|
| 329 |
+
1843,131200,11.3917
|
| 330 |
+
1849,131600,7.4701
|
| 331 |
+
1855,132000,5.7999
|
| 332 |
+
1862,132400,8.5014
|
| 333 |
+
1866,132800,11.0123
|
| 334 |
+
1879,133200,3.7213
|
| 335 |
+
1884,133600,10.6388
|
| 336 |
+
1892,134000,5.8487
|
| 337 |
+
1898,134400,7.1305
|
| 338 |
+
1904,134800,8.8896
|
| 339 |
+
1910,135200,6.4877
|
| 340 |
+
1918,135600,6.8896
|
| 341 |
+
1924,136000,6.4605
|
| 342 |
+
1930,136400,9.0044
|
| 343 |
+
1934,136800,10.5351
|
| 344 |
+
1940,137200,9.2294
|
| 345 |
+
1945,137600,10.9699
|
| 346 |
+
1949,138000,10.0906
|
| 347 |
+
1954,138400,10.2141
|
| 348 |
+
1959,138800,11.3224
|
| 349 |
+
1964,139200,9.5563
|
| 350 |
+
1969,139600,6.9219
|
| 351 |
+
1974,140000,11.1704
|
| 352 |
+
1978,140400,11.1142
|
| 353 |
+
1982,140800,11.9859
|
| 354 |
+
1986,141200,11.8602
|
| 355 |
+
1990,141600,11.6316
|
| 356 |
+
1994,142000,11.6433
|
| 357 |
+
2000,142400,9.2738
|
| 358 |
+
2006,142800,7.5194
|
| 359 |
+
2013,143200,6.4846
|
| 360 |
+
2019,143600,7.8079
|
| 361 |
+
2023,144000,9.8408
|
| 362 |
+
2031,144400,6.9516
|
| 363 |
+
2035,144800,9.8839
|
| 364 |
+
2041,145200,7.7938
|
| 365 |
+
2049,145600,7.6839
|
| 366 |
+
2054,146000,7.4943
|
| 367 |
+
2061,146400,8.0578
|
| 368 |
+
2070,146800,4.8268
|
| 369 |
+
2075,147200,10.4681
|
| 370 |
+
2081,147600,7.9008
|
| 371 |
+
2086,148000,11.0989
|
| 372 |
+
2091,148400,11.994
|
| 373 |
+
2095,148800,9.9046
|
| 374 |
+
2105,149200,4.7372
|
| 375 |
+
2111,149600,9.8328
|
| 376 |
+
2116,150000,11.0763
|
| 377 |
+
2122,150400,10.9598
|
| 378 |
+
2129,150800,7.7438
|
| 379 |
+
2135,151200,8.6623
|
| 380 |
+
2140,151600,12.6654
|
| 381 |
+
2146,152000,8.1182
|
| 382 |
+
2151,152400,9.0062
|
| 383 |
+
2157,152800,8.5988
|
| 384 |
+
2163,153200,8.9284
|
| 385 |
+
2167,153600,14.2702
|
| 386 |
+
2173,154000,10.585
|
| 387 |
+
2178,154400,11.7006
|
| 388 |
+
2186,154800,6.7155
|
| 389 |
+
2192,155200,7.7146
|
| 390 |
+
2198,155600,8.8118
|
| 391 |
+
2203,156000,11.7081
|
| 392 |
+
2212,156400,4.9161
|
| 393 |
+
2217,156800,11.632
|
| 394 |
+
2222,157200,12.2152
|
| 395 |
+
2227,157600,12.1379
|
| 396 |
+
2236,158000,4.0621
|
| 397 |
+
2244,158400,7.7841
|
| 398 |
+
2250,158800,8.4301
|
| 399 |
+
2260,159200,5.0246
|
| 400 |
+
2266,159600,7.0603
|
| 401 |
+
2274,160000,6.5755
|
| 402 |
+
2282,160400,7.0485
|
| 403 |
+
2287,160800,9.5418
|
| 404 |
+
2297,161200,5.8611
|
| 405 |
+
2305,161600,7.2947
|
| 406 |
+
2312,162000,7.2398
|
| 407 |
+
2316,162400,13.7584
|
| 408 |
+
2322,162800,11.7808
|
| 409 |
+
2329,163200,11.604
|
| 410 |
+
2334,163600,13.0979
|
| 411 |
+
2342,164000,8.5548
|
| 412 |
+
2347,164400,14.6783
|
| 413 |
+
2353,164800,12.7043
|
| 414 |
+
2358,165200,13.3911
|
| 415 |
+
2363,165600,11.383
|
| 416 |
+
2371,166000,8.1752
|
| 417 |
+
2376,166400,11.7496
|
| 418 |
+
2382,166800,12.7269
|
| 419 |
+
2388,167200,8.3856
|
| 420 |
+
2397,167600,7.333
|
| 421 |
+
2406,168000,6.2778
|
| 422 |
+
2411,168400,11.0526
|
| 423 |
+
2418,168800,9.0626
|
| 424 |
+
2424,169200,7.5325
|
| 425 |
+
2436,169600,4.0862
|
| 426 |
+
2441,170000,10.966
|
| 427 |
+
2446,170400,9.7353
|
| 428 |
+
2452,170800,8.3233
|
| 429 |
+
2459,171200,5.748
|
| 430 |
+
2465,171600,7.1726
|
| 431 |
+
2472,172000,7.998
|
| 432 |
+
2481,172400,6.561
|
| 433 |
+
2488,172800,6.2343
|
| 434 |
+
2495,173200,7.5135
|
| 435 |
+
2504,173600,4.6772
|
| 436 |
+
2509,174000,9.5455
|
| 437 |
+
2516,174400,7.1717
|
| 438 |
+
2521,174800,9.9983
|
| 439 |
+
2526,175200,8.8316
|
| 440 |
+
2535,175600,4.6613
|
| 441 |
+
2543,176000,4.4261
|
| 442 |
+
2551,176400,7.2283
|
| 443 |
+
2556,176800,14.7301
|
| 444 |
+
2562,177200,11.5262
|
| 445 |
+
2569,177600,9.4323
|
| 446 |
+
2574,178000,14.3105
|
| 447 |
+
2580,178400,11.4739
|
| 448 |
+
2584,178800,14.4407
|
| 449 |
+
2590,179200,13.8673
|
| 450 |
+
2596,179600,13.3536
|
| 451 |
+
2600,180000,12.2952
|
| 452 |
+
2607,180400,11.8378
|
| 453 |
+
2611,180800,18.9288
|
| 454 |
+
2615,181200,13.6898
|
| 455 |
+
2622,181600,10.7245
|
| 456 |
+
2626,182000,17.0192
|
| 457 |
+
2631,182400,17.5475
|
| 458 |
+
2636,182800,14.2294
|
| 459 |
+
2640,183200,18.8072
|
| 460 |
+
2646,183600,10.0944
|
| 461 |
+
2650,184000,14.8466
|
| 462 |
+
2655,184400,12.8789
|
| 463 |
+
2663,184800,7.384
|
| 464 |
+
2668,185200,12.7785
|
| 465 |
+
2680,185600,5.2209
|
| 466 |
+
2687,186000,8.8431
|
| 467 |
+
2695,186400,8.6145
|
| 468 |
+
2703,186800,6.0656
|
| 469 |
+
2708,187200,11.2342
|
| 470 |
+
2714,187600,9.9512
|
| 471 |
+
2723,188000,6.9898
|
| 472 |
+
2729,188400,10.1041
|
| 473 |
+
2739,188800,6.2203
|
| 474 |
+
2744,189200,12.3465
|
| 475 |
+
2754,189600,5.1606
|
| 476 |
+
2761,190000,10.6421
|
| 477 |
+
2767,190400,10.2982
|
| 478 |
+
2777,190800,5.2088
|
| 479 |
+
2782,191200,10.9174
|
| 480 |
+
2786,191600,13.2552
|
| 481 |
+
2791,192000,12.1285
|
| 482 |
+
2798,192400,8.8285
|
| 483 |
+
2805,192800,7.9108
|
| 484 |
+
2814,193200,7.3404
|
| 485 |
+
2819,193600,12.2333
|
| 486 |
+
2825,194000,9.6662
|
| 487 |
+
2832,194400,8.0742
|
| 488 |
+
2837,194800,12.2136
|
| 489 |
+
2843,195200,12.3488
|
| 490 |
+
2850,195600,10.1218
|
| 491 |
+
2856,196000,12.0153
|
| 492 |
+
2862,196400,11.8017
|
| 493 |
+
2869,196800,10.381
|
| 494 |
+
2874,197200,14.0652
|
| 495 |
+
2879,197600,14.9152
|
| 496 |
+
2887,198000,8.0053
|
| 497 |
+
2893,198400,12.0596
|
| 498 |
+
2897,198800,15.0738
|
| 499 |
+
2902,199200,16.9263
|
| 500 |
+
2907,199600,12.2609
|
| 501 |
+
2912,200000,15.858
|
code/Lake application/logs/results_1/PPO_frozen_lake_log_4.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
22,400,1.3382
|
| 3 |
+
40,800,1.8287
|
| 4 |
+
52,1200,2.5231
|
| 5 |
+
66,1600,2.103
|
| 6 |
+
77,2000,3.0155
|
| 7 |
+
85,2400,3.7714
|
| 8 |
+
90,2800,5.8035
|
| 9 |
+
96,3200,5.2274
|
| 10 |
+
100,3600,6.3303
|
| 11 |
+
107,4000,4.8019
|
| 12 |
+
111,4400,6.4724
|
| 13 |
+
117,4800,5.5563
|
| 14 |
+
122,5200,6.2998
|
| 15 |
+
128,5600,5.1646
|
| 16 |
+
132,6000,7.1287
|
| 17 |
+
138,6400,5.8314
|
| 18 |
+
142,6800,7.4052
|
| 19 |
+
147,7200,6.2875
|
| 20 |
+
152,7600,5.6015
|
| 21 |
+
157,8000,6.1754
|
| 22 |
+
162,8400,7.082
|
| 23 |
+
167,8800,6.4028
|
| 24 |
+
171,9200,7.2559
|
| 25 |
+
176,9600,6.9444
|
| 26 |
+
181,10000,6.2933
|
| 27 |
+
186,10400,5.2366
|
| 28 |
+
195,10800,3.9987
|
| 29 |
+
201,11200,5.395
|
| 30 |
+
207,11600,4.9618
|
| 31 |
+
213,12000,5.1983
|
| 32 |
+
219,12400,4.4254
|
| 33 |
+
225,12800,5.7096
|
| 34 |
+
229,13200,7.1898
|
| 35 |
+
235,13600,5.3824
|
| 36 |
+
242,14000,4.5048
|
| 37 |
+
250,14400,3.7881
|
| 38 |
+
255,14800,5.2267
|
| 39 |
+
260,15200,7.031
|
| 40 |
+
264,15600,6.9285
|
| 41 |
+
268,16000,7.5476
|
| 42 |
+
273,16400,5.6767
|
| 43 |
+
278,16800,6.5262
|
| 44 |
+
283,17200,6.1492
|
| 45 |
+
288,17600,5.6468
|
| 46 |
+
292,18000,7.6928
|
| 47 |
+
297,18400,6.6286
|
| 48 |
+
301,18800,7.7454
|
| 49 |
+
305,19200,6.5254
|
| 50 |
+
312,19600,4.8168
|
| 51 |
+
317,20000,6.5406
|
| 52 |
+
322,20400,6.5504
|
| 53 |
+
326,20800,6.7464
|
| 54 |
+
331,21200,6.2836
|
| 55 |
+
336,21600,6.7817
|
| 56 |
+
340,22000,6.6796
|
| 57 |
+
344,22400,7.2005
|
| 58 |
+
351,22800,4.3842
|
| 59 |
+
355,23200,7.6366
|
| 60 |
+
359,23600,7.4904
|
| 61 |
+
365,24000,6.1521
|
| 62 |
+
369,24400,7.6973
|
| 63 |
+
373,24800,7.0403
|
| 64 |
+
377,25200,6.9523
|
| 65 |
+
382,25600,7.1983
|
| 66 |
+
387,26000,6.4586
|
| 67 |
+
391,26400,6.4252
|
| 68 |
+
397,26800,5.499
|
| 69 |
+
402,27200,5.0302
|
| 70 |
+
407,27600,6.8429
|
| 71 |
+
413,28000,4.7347
|
| 72 |
+
418,28400,6.183
|
| 73 |
+
423,28800,6.8443
|
| 74 |
+
427,29200,7.5077
|
| 75 |
+
431,29600,6.0028
|
| 76 |
+
437,30000,6.0114
|
| 77 |
+
441,30400,6.4562
|
| 78 |
+
446,30800,6.7718
|
| 79 |
+
450,31200,6.276
|
| 80 |
+
454,31600,7.5279
|
| 81 |
+
458,32000,7.9159
|
| 82 |
+
462,32400,8.0556
|
| 83 |
+
468,32800,5.5321
|
| 84 |
+
473,33200,5.7863
|
| 85 |
+
479,33600,5.8919
|
| 86 |
+
484,34000,5.3791
|
| 87 |
+
488,34400,7.7837
|
| 88 |
+
494,34800,5.618
|
| 89 |
+
498,35200,7.0261
|
| 90 |
+
502,35600,7.828
|
| 91 |
+
507,36000,6.4049
|
| 92 |
+
512,36400,6.4361
|
| 93 |
+
517,36800,5.7041
|
| 94 |
+
522,37200,7.132
|
| 95 |
+
526,37600,7.4088
|
| 96 |
+
530,38000,7.8924
|
| 97 |
+
535,38400,5.7086
|
| 98 |
+
540,38800,6.5723
|
| 99 |
+
544,39200,7.588
|
| 100 |
+
548,39600,6.1673
|
| 101 |
+
552,40000,7.8064
|
| 102 |
+
557,40400,6.5526
|
| 103 |
+
562,40800,7.5729
|
| 104 |
+
566,41200,6.3143
|
| 105 |
+
570,41600,8.4047
|
| 106 |
+
575,42000,7.2824
|
| 107 |
+
579,42400,7.7924
|
| 108 |
+
583,42800,7.4971
|
| 109 |
+
587,43200,6.6631
|
| 110 |
+
592,43600,7.1095
|
| 111 |
+
596,44000,6.4556
|
| 112 |
+
600,44400,7.6746
|
| 113 |
+
604,44800,7.6558
|
| 114 |
+
609,45200,6.194
|
| 115 |
+
614,45600,7.1185
|
| 116 |
+
619,46000,5.7002
|
| 117 |
+
624,46400,6.5519
|
| 118 |
+
629,46800,6.8268
|
| 119 |
+
633,47200,6.9014
|
| 120 |
+
638,47600,6.301
|
| 121 |
+
644,48000,6.1982
|
| 122 |
+
651,48400,4.9436
|
| 123 |
+
655,48800,6.7595
|
| 124 |
+
661,49200,6.5292
|
| 125 |
+
666,49600,5.158
|
| 126 |
+
671,50000,6.6904
|
| 127 |
+
675,50400,8.0493
|
| 128 |
+
679,50800,7.9991
|
| 129 |
+
687,51200,4.3244
|
| 130 |
+
692,51600,7.5411
|
| 131 |
+
696,52000,9.858
|
| 132 |
+
700,52400,8.5624
|
| 133 |
+
704,52800,7.9155
|
| 134 |
+
709,53200,8.9057
|
| 135 |
+
713,53600,9.5471
|
| 136 |
+
718,54000,7.1981
|
| 137 |
+
722,54400,8.6158
|
| 138 |
+
726,54800,10.1275
|
| 139 |
+
736,55200,4.2344
|
| 140 |
+
742,55600,7.8258
|
| 141 |
+
747,56000,7.7521
|
| 142 |
+
753,56400,9.0208
|
| 143 |
+
759,56800,7.7654
|
| 144 |
+
765,57200,8.6142
|
| 145 |
+
771,57600,6.8121
|
| 146 |
+
776,58000,10.9618
|
| 147 |
+
781,58400,10.3827
|
| 148 |
+
785,58800,10.0289
|
| 149 |
+
791,59200,9.8831
|
| 150 |
+
800,59600,6.9521
|
| 151 |
+
806,60000,7.7517
|
| 152 |
+
815,60400,8.8211
|
| 153 |
+
821,60800,8.9909
|
| 154 |
+
827,61200,8.7221
|
| 155 |
+
836,61600,7.7684
|
| 156 |
+
841,62000,9.6272
|
| 157 |
+
845,62400,14.9976
|
| 158 |
+
850,62800,11.6372
|
| 159 |
+
857,63200,8.8488
|
| 160 |
+
865,63600,7.6496
|
| 161 |
+
871,64000,10.0977
|
| 162 |
+
876,64400,10.3548
|
| 163 |
+
883,64800,10.7205
|
| 164 |
+
889,65200,12.5606
|
| 165 |
+
896,65600,9.843
|
| 166 |
+
902,66000,11.6978
|
| 167 |
+
909,66400,10.94
|
| 168 |
+
915,66800,11.5443
|
| 169 |
+
920,67200,14.4685
|
| 170 |
+
927,67600,7.6312
|
| 171 |
+
934,68000,9.3931
|
| 172 |
+
941,68400,7.5232
|
| 173 |
+
946,68800,13.3119
|
| 174 |
+
952,69200,15.4347
|
| 175 |
+
956,69600,14.1605
|
| 176 |
+
963,70000,12.82
|
| 177 |
+
968,70400,14.5149
|
| 178 |
+
973,70800,14.5627
|
| 179 |
+
979,71200,12.8107
|
| 180 |
+
984,71600,16.1239
|
| 181 |
+
992,72000,8.8389
|
| 182 |
+
997,72400,15.4108
|
| 183 |
+
1008,72800,3.5878
|
| 184 |
+
1015,73200,11.6583
|
| 185 |
+
1023,73600,4.6755
|
| 186 |
+
1029,74000,7.929
|
| 187 |
+
1034,74400,8.3184
|
| 188 |
+
1043,74800,4.4589
|
| 189 |
+
1049,75200,5.8413
|
| 190 |
+
1055,75600,5.8525
|
| 191 |
+
1061,76000,5.7395
|
| 192 |
+
1066,76400,7.915
|
| 193 |
+
1072,76800,6.9911
|
| 194 |
+
1077,77200,9.5236
|
| 195 |
+
1085,77600,6.0534
|
| 196 |
+
1091,78000,6.3831
|
| 197 |
+
1098,78400,5.0208
|
| 198 |
+
1104,78800,8.7623
|
| 199 |
+
1109,79200,6.9585
|
| 200 |
+
1117,79600,6.3551
|
| 201 |
+
1121,80000,14.1976
|
| 202 |
+
1127,80400,9.871
|
| 203 |
+
1131,80800,13.4573
|
| 204 |
+
1138,81200,7.47
|
| 205 |
+
1143,81600,7.7708
|
| 206 |
+
1149,82000,10.2425
|
| 207 |
+
1154,82400,13.5466
|
| 208 |
+
1160,82800,8.8403
|
| 209 |
+
1166,83200,9.1941
|
| 210 |
+
1171,83600,12.7902
|
| 211 |
+
1179,84000,6.3707
|
| 212 |
+
1186,84400,8.839
|
| 213 |
+
1195,84800,6.0659
|
| 214 |
+
1202,85200,9.7067
|
| 215 |
+
1210,85600,5.3875
|
| 216 |
+
1219,86000,7.3249
|
| 217 |
+
1228,86400,8.1348
|
| 218 |
+
1240,86800,4.2997
|
| 219 |
+
1246,87200,7.7715
|
| 220 |
+
1253,87600,9.9431
|
| 221 |
+
1261,88000,5.8097
|
| 222 |
+
1268,88400,9.7243
|
| 223 |
+
1275,88800,9.9173
|
| 224 |
+
1281,89200,8.526
|
| 225 |
+
1291,89600,6.5095
|
| 226 |
+
1296,90000,10.6242
|
| 227 |
+
1305,90400,6.8888
|
| 228 |
+
1313,90800,6.5251
|
| 229 |
+
1320,91200,7.6252
|
| 230 |
+
1330,91600,6.4161
|
| 231 |
+
1339,92000,6.192
|
| 232 |
+
1345,92400,10.4853
|
| 233 |
+
1352,92800,6.5388
|
| 234 |
+
1358,93200,12.3501
|
| 235 |
+
1365,93600,7.43
|
| 236 |
+
1373,94000,7.5638
|
| 237 |
+
1378,94400,15.0633
|
| 238 |
+
1384,94800,14.7513
|
| 239 |
+
1396,95200,6.5226
|
| 240 |
+
1403,95600,8.8394
|
| 241 |
+
1410,96000,7.9987
|
| 242 |
+
1420,96400,7.5662
|
| 243 |
+
1427,96800,8.6258
|
| 244 |
+
1435,97200,9.7086
|
| 245 |
+
1444,97600,8.0059
|
| 246 |
+
1451,98000,10.653
|
| 247 |
+
1459,98400,7.4015
|
| 248 |
+
1471,98800,4.6864
|
| 249 |
+
1479,99200,7.8408
|
| 250 |
+
1485,99600,10.5011
|
| 251 |
+
1491,100000,14.4978
|
| 252 |
+
1498,100400,13.2709
|
| 253 |
+
1504,100800,11.8503
|
| 254 |
+
1511,101200,10.111
|
| 255 |
+
1518,101600,7.1249
|
| 256 |
+
1524,102000,11.0255
|
| 257 |
+
1533,102400,9.3004
|
| 258 |
+
1545,102800,4.3136
|
| 259 |
+
1549,103200,20.1579
|
| 260 |
+
1562,103600,5.6588
|
| 261 |
+
1567,104000,11.4403
|
| 262 |
+
1574,104400,7.7448
|
| 263 |
+
1582,104800,8.4585
|
| 264 |
+
1591,105200,7.9123
|
| 265 |
+
1596,105600,12.7572
|
| 266 |
+
1602,106000,9.7458
|
| 267 |
+
1613,106400,6.0184
|
| 268 |
+
1621,106800,7.3456
|
| 269 |
+
1627,107200,10.2534
|
| 270 |
+
1632,107600,14.4112
|
| 271 |
+
1637,108000,10.8765
|
| 272 |
+
1647,108400,8.215
|
| 273 |
+
1652,108800,14.8476
|
| 274 |
+
1658,109200,13.0967
|
| 275 |
+
1666,109600,9.5115
|
| 276 |
+
1672,110000,12.5662
|
| 277 |
+
1680,110400,9.9636
|
| 278 |
+
1685,110800,17.9725
|
| 279 |
+
1690,111200,21.6135
|
| 280 |
+
1695,111600,16.7148
|
| 281 |
+
1702,112000,11.3757
|
| 282 |
+
1709,112400,11.6943
|
| 283 |
+
1714,112800,20.2134
|
| 284 |
+
1718,113200,15.6719
|
| 285 |
+
1724,113600,17.1174
|
| 286 |
+
1728,114000,21.9856
|
| 287 |
+
1733,114400,14.545
|
| 288 |
+
1741,114800,9.2946
|
| 289 |
+
1745,115200,13.9682
|
| 290 |
+
1751,115600,11.2764
|
| 291 |
+
1758,116000,10.046
|
| 292 |
+
1765,116400,6.6395
|
| 293 |
+
1770,116800,12.6278
|
| 294 |
+
1778,117200,7.5994
|
| 295 |
+
1786,117600,5.1644
|
| 296 |
+
1794,118000,6.6251
|
| 297 |
+
1801,118400,5.2048
|
| 298 |
+
1806,118800,8.7313
|
| 299 |
+
1813,119200,6.7291
|
| 300 |
+
1821,119600,5.8351
|
| 301 |
+
1829,120000,8.9222
|
| 302 |
+
1835,120400,8.7741
|
| 303 |
+
1842,120800,7.2133
|
| 304 |
+
1849,121200,8.8234
|
| 305 |
+
1855,121600,9.6317
|
| 306 |
+
1862,122000,8.2559
|
| 307 |
+
1867,122400,11.0191
|
| 308 |
+
1872,122800,13.0496
|
| 309 |
+
1878,123200,12.1102
|
| 310 |
+
1884,123600,15.2622
|
| 311 |
+
1890,124000,14.949
|
| 312 |
+
1897,124400,14.1959
|
| 313 |
+
1905,124800,10.2858
|
| 314 |
+
1912,125200,10.4039
|
| 315 |
+
1920,125600,9.2182
|
| 316 |
+
1925,126000,18.0285
|
| 317 |
+
1931,126400,14.5642
|
| 318 |
+
1936,126800,15.7313
|
| 319 |
+
1942,127200,16.071
|
| 320 |
+
1949,127600,12.4836
|
| 321 |
+
1955,128000,14.5803
|
| 322 |
+
1964,128400,6.5171
|
| 323 |
+
1973,128800,10.3231
|
| 324 |
+
1979,129200,11.6592
|
| 325 |
+
1985,129600,13.9441
|
| 326 |
+
1991,130000,11.8742
|
| 327 |
+
1997,130400,14.2094
|
| 328 |
+
2002,130800,16.5023
|
| 329 |
+
2008,131200,7.9427
|
| 330 |
+
2013,131600,13.3196
|
| 331 |
+
2017,132000,22.0345
|
| 332 |
+
2023,132400,9.6949
|
| 333 |
+
2029,132800,8.3864
|
| 334 |
+
2033,133200,15.5384
|
| 335 |
+
2038,133600,14.2444
|
| 336 |
+
2043,134000,16.1699
|
| 337 |
+
2047,134400,14.6686
|
| 338 |
+
2052,134800,11.1139
|
| 339 |
+
2057,135200,15.3049
|
| 340 |
+
2061,135600,18.3799
|
| 341 |
+
2066,136000,10.9603
|
| 342 |
+
2070,136400,12.5289
|
| 343 |
+
2076,136800,12.3745
|
| 344 |
+
2080,137200,11.4958
|
| 345 |
+
2086,137600,11.5674
|
| 346 |
+
2094,138000,8.5145
|
| 347 |
+
2099,138400,15.1147
|
| 348 |
+
2104,138800,12.0235
|
| 349 |
+
2110,139200,10.5698
|
| 350 |
+
2116,139600,11.9205
|
| 351 |
+
2123,140000,6.9545
|
| 352 |
+
2130,140400,10.1053
|
| 353 |
+
2136,140800,11.9632
|
| 354 |
+
2143,141200,10.4299
|
| 355 |
+
2151,141600,8.3429
|
| 356 |
+
2165,142000,4.3628
|
| 357 |
+
2172,142400,6.8153
|
| 358 |
+
2179,142800,12.8716
|
| 359 |
+
2187,143200,5.8168
|
| 360 |
+
2196,143600,8.4523
|
| 361 |
+
2202,144000,11.4057
|
| 362 |
+
2208,144400,10.0338
|
| 363 |
+
2219,144800,6.4849
|
| 364 |
+
2224,145200,12.969
|
| 365 |
+
2228,145600,18.6417
|
| 366 |
+
2233,146000,12.536
|
| 367 |
+
2238,146400,15.4672
|
| 368 |
+
2243,146800,13.8039
|
| 369 |
+
2248,147200,14.223
|
| 370 |
+
2254,147600,12.1347
|
| 371 |
+
2261,148000,8.5235
|
| 372 |
+
2267,148400,15.2563
|
| 373 |
+
2272,148800,11.314
|
| 374 |
+
2278,149200,12.9462
|
| 375 |
+
2285,149600,9.7554
|
| 376 |
+
2291,150000,14.0195
|
| 377 |
+
2296,150400,15.3385
|
| 378 |
+
2303,150800,9.973
|
| 379 |
+
2308,151200,18.1009
|
| 380 |
+
2312,151600,20.9784
|
| 381 |
+
2321,152000,10.5002
|
| 382 |
+
2335,152400,4.4978
|
| 383 |
+
2342,152800,10.5598
|
| 384 |
+
2350,153200,9.1714
|
| 385 |
+
2357,153600,12.176
|
| 386 |
+
2364,154000,11.5123
|
| 387 |
+
2368,154400,20.0547
|
| 388 |
+
2373,154800,17.2117
|
| 389 |
+
2379,155200,15.459
|
| 390 |
+
2387,155600,11.3856
|
| 391 |
+
2393,156000,10.4216
|
| 392 |
+
2398,156400,18.6778
|
| 393 |
+
2406,156800,9.8121
|
| 394 |
+
2413,157200,15.4442
|
| 395 |
+
2420,157600,8.2398
|
| 396 |
+
2429,158000,7.9804
|
| 397 |
+
2436,158400,10.0033
|
| 398 |
+
2444,158800,11.684
|
| 399 |
+
2451,159200,11.7212
|
| 400 |
+
2458,159600,11.1657
|
| 401 |
+
2463,160000,12.9049
|
| 402 |
+
2474,160400,7.2923
|
| 403 |
+
2480,160800,11.1158
|
| 404 |
+
2488,161200,7.0536
|
| 405 |
+
2496,161600,12.9054
|
| 406 |
+
2503,162000,10.8589
|
| 407 |
+
2510,162400,7.6154
|
| 408 |
+
2516,162800,15.1175
|
| 409 |
+
2526,163200,6.0549
|
| 410 |
+
2535,163600,6.8174
|
| 411 |
+
2543,164000,11.3806
|
| 412 |
+
2548,164400,14.9169
|
| 413 |
+
2555,164800,8.7519
|
| 414 |
+
2561,165200,12.8943
|
| 415 |
+
2568,165600,7.4888
|
| 416 |
+
2577,166000,12.9756
|
| 417 |
+
2586,166400,9.7037
|
| 418 |
+
2595,166800,7.6698
|
| 419 |
+
2604,167200,8.0272
|
| 420 |
+
2609,167600,17.4422
|
| 421 |
+
2614,168000,21.842
|
| 422 |
+
2619,168400,24.0025
|
| 423 |
+
2624,168800,21.9548
|
| 424 |
+
2629,169200,16.2903
|
| 425 |
+
2636,169600,15.3797
|
| 426 |
+
2644,170000,12.3509
|
| 427 |
+
2651,170400,17.7225
|
| 428 |
+
2657,170800,17.4392
|
| 429 |
+
2667,171200,7.6192
|
| 430 |
+
2672,171600,19.9947
|
| 431 |
+
2677,172000,21.4869
|
| 432 |
+
2682,172400,16.9837
|
| 433 |
+
2689,172800,15.026
|
| 434 |
+
2698,173200,8.2427
|
| 435 |
+
2704,173600,9.9491
|
| 436 |
+
2712,174000,12.5259
|
| 437 |
+
2717,174400,17.4708
|
| 438 |
+
2723,174800,12.1165
|
| 439 |
+
2729,175200,15.3909
|
| 440 |
+
2736,175600,12.777
|
| 441 |
+
2741,176000,21.2902
|
| 442 |
+
2745,176400,26.1009
|
| 443 |
+
2751,176800,17.7154
|
| 444 |
+
2757,177200,14.3114
|
| 445 |
+
2762,177600,21.2451
|
| 446 |
+
2766,178000,24.6657
|
| 447 |
+
2773,178400,11.323
|
| 448 |
+
2778,178800,18.9868
|
| 449 |
+
2789,179200,10.2768
|
| 450 |
+
2795,179600,16.099
|
| 451 |
+
2802,180000,9.7036
|
| 452 |
+
2811,180400,10.1027
|
| 453 |
+
2817,180800,14.6335
|
| 454 |
+
2821,181200,20.4911
|
| 455 |
+
2825,181600,23.7041
|
| 456 |
+
2830,182000,19.1257
|
| 457 |
+
2834,182400,21.7177
|
| 458 |
+
2838,182800,22.6593
|
| 459 |
+
2843,183200,21.3371
|
| 460 |
+
2849,183600,12.1633
|
| 461 |
+
2855,184000,18.8629
|
| 462 |
+
2859,184400,16.2873
|
| 463 |
+
2866,184800,16.9789
|
| 464 |
+
2871,185200,13.7611
|
| 465 |
+
2878,185600,12.7885
|
| 466 |
+
2883,186000,18.4539
|
| 467 |
+
2888,186400,19.4485
|
| 468 |
+
2894,186800,17.3323
|
| 469 |
+
2899,187200,16.1448
|
| 470 |
+
2909,187600,9.09
|
| 471 |
+
2921,188000,5.7227
|
| 472 |
+
2930,188400,9.5406
|
| 473 |
+
2939,188800,8.9963
|
| 474 |
+
2948,189200,9.0039
|
| 475 |
+
2959,189600,6.1213
|
| 476 |
+
2968,190000,10.7098
|
| 477 |
+
2973,190400,27.5424
|
| 478 |
+
2981,190800,11.4104
|
| 479 |
+
2988,191200,14.7688
|
| 480 |
+
2996,191600,8.1336
|
| 481 |
+
3006,192000,9.3062
|
| 482 |
+
3013,192400,9.5427
|
| 483 |
+
3020,192800,12.1444
|
| 484 |
+
3027,193200,13.9121
|
| 485 |
+
3033,193600,11.6091
|
| 486 |
+
3043,194000,9.565
|
| 487 |
+
3050,194400,12.3977
|
| 488 |
+
3058,194800,13.0409
|
| 489 |
+
3064,195200,19.9469
|
| 490 |
+
3071,195600,12.6553
|
| 491 |
+
3078,196000,11.394
|
| 492 |
+
3082,196400,23.2945
|
| 493 |
+
3087,196800,25.3409
|
| 494 |
+
3093,197200,20.015
|
| 495 |
+
3099,197600,16.1347
|
| 496 |
+
3103,198000,15.8059
|
| 497 |
+
3108,198400,16.2553
|
| 498 |
+
3116,198800,12.0655
|
| 499 |
+
3123,199200,7.5759
|
| 500 |
+
3133,199600,8.3166
|
| 501 |
+
3146,200000,5.2906
|
code/Lake application/logs/results_1/PPO_frozen_lake_log_5.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
24,400,1.1896
|
| 3 |
+
45,800,1.4148
|
| 4 |
+
59,1200,2.3332
|
| 5 |
+
66,1600,5.1201
|
| 6 |
+
70,2000,6.5847
|
| 7 |
+
78,2400,5.1845
|
| 8 |
+
84,2800,5.4028
|
| 9 |
+
91,3200,4.5706
|
| 10 |
+
98,3600,4.8473
|
| 11 |
+
103,4000,6.01
|
| 12 |
+
108,4400,5.9648
|
| 13 |
+
114,4800,5.9022
|
| 14 |
+
118,5200,7.4097
|
| 15 |
+
123,5600,6.5452
|
| 16 |
+
128,6000,6.3157
|
| 17 |
+
133,6400,7.2965
|
| 18 |
+
143,6800,3.5149
|
| 19 |
+
147,7200,7.2122
|
| 20 |
+
151,7600,7.6719
|
| 21 |
+
156,8000,7.8449
|
| 22 |
+
160,8400,6.3771
|
| 23 |
+
168,8800,4.1202
|
| 24 |
+
172,9200,7.7297
|
| 25 |
+
179,9600,5.2521
|
| 26 |
+
184,10000,5.0265
|
| 27 |
+
190,10400,6.301
|
| 28 |
+
195,10800,6.4812
|
| 29 |
+
200,11200,5.9884
|
| 30 |
+
204,11600,8.2287
|
| 31 |
+
208,12000,6.8769
|
| 32 |
+
213,12400,8.2376
|
| 33 |
+
217,12800,6.7639
|
| 34 |
+
222,13200,7.491
|
| 35 |
+
226,13600,7.14
|
| 36 |
+
234,14000,4.8056
|
| 37 |
+
238,14400,8.2315
|
| 38 |
+
244,14800,5.6201
|
| 39 |
+
250,15200,5.6978
|
| 40 |
+
254,15600,8.6572
|
| 41 |
+
259,16000,7.1277
|
| 42 |
+
264,16400,6.3588
|
| 43 |
+
276,16800,3.2533
|
| 44 |
+
285,17200,4.354
|
| 45 |
+
295,17600,3.6256
|
| 46 |
+
302,18000,5.482
|
| 47 |
+
312,18400,4.9095
|
| 48 |
+
324,18800,3.2072
|
| 49 |
+
332,19200,4.2376
|
| 50 |
+
337,19600,7.9829
|
| 51 |
+
343,20000,7.3154
|
| 52 |
+
352,20400,5.8945
|
| 53 |
+
359,20800,5.4014
|
| 54 |
+
367,21200,6.2058
|
| 55 |
+
372,21600,8.3138
|
| 56 |
+
377,22000,7.7193
|
| 57 |
+
385,22400,5.7761
|
| 58 |
+
392,22800,6.301
|
| 59 |
+
397,23200,7.7678
|
| 60 |
+
402,23600,7.7383
|
| 61 |
+
408,24000,8.0968
|
| 62 |
+
414,24400,7.2615
|
| 63 |
+
420,24800,8.6811
|
| 64 |
+
428,25200,6.1812
|
| 65 |
+
434,25600,5.421
|
| 66 |
+
439,26000,10.3048
|
| 67 |
+
444,26400,8.6396
|
| 68 |
+
450,26800,5.8203
|
| 69 |
+
455,27200,8.955
|
| 70 |
+
460,27600,10.1203
|
| 71 |
+
464,28000,10.9113
|
| 72 |
+
469,28400,10.8469
|
| 73 |
+
475,28800,8.0835
|
| 74 |
+
481,29200,8.4523
|
| 75 |
+
486,29600,9.6786
|
| 76 |
+
494,30000,5.8029
|
| 77 |
+
499,30400,9.7283
|
| 78 |
+
503,30800,10.3865
|
| 79 |
+
509,31200,7.4219
|
| 80 |
+
516,31600,6.7019
|
| 81 |
+
521,32000,8.9637
|
| 82 |
+
526,32400,8.0468
|
| 83 |
+
531,32800,9.7588
|
| 84 |
+
538,33200,7.8747
|
| 85 |
+
542,33600,8.1689
|
| 86 |
+
546,34000,11.1529
|
| 87 |
+
550,34400,11.3951
|
| 88 |
+
555,34800,9.1521
|
| 89 |
+
559,35200,10.907
|
| 90 |
+
563,35600,11.4909
|
| 91 |
+
569,36000,8.8749
|
| 92 |
+
573,36400,11.4543
|
| 93 |
+
577,36800,10.4455
|
| 94 |
+
583,37200,6.9897
|
| 95 |
+
587,37600,9.4309
|
| 96 |
+
592,38000,10.4979
|
| 97 |
+
596,38400,10.9061
|
| 98 |
+
600,38800,11.3928
|
| 99 |
+
606,39200,9.6524
|
| 100 |
+
610,39600,11.4807
|
| 101 |
+
616,40000,7.0779
|
| 102 |
+
620,40400,7.8871
|
| 103 |
+
630,40800,5.189
|
| 104 |
+
637,41200,4.9294
|
| 105 |
+
645,41600,5.8549
|
| 106 |
+
649,42000,9.1055
|
| 107 |
+
655,42400,7.5844
|
| 108 |
+
660,42800,7.9374
|
| 109 |
+
665,43200,8.81
|
| 110 |
+
669,43600,11.0942
|
| 111 |
+
675,44000,8.1994
|
| 112 |
+
681,44400,5.7097
|
| 113 |
+
686,44800,5.6185
|
| 114 |
+
693,45200,6.8876
|
| 115 |
+
697,45600,9.3624
|
| 116 |
+
702,46000,10.4714
|
| 117 |
+
707,46400,8.134
|
| 118 |
+
711,46800,8.3317
|
| 119 |
+
717,47200,8.2672
|
| 120 |
+
722,47600,8.0033
|
| 121 |
+
729,48000,5.1115
|
| 122 |
+
737,48400,6.7697
|
| 123 |
+
743,48800,6.1466
|
| 124 |
+
750,49200,6.2071
|
| 125 |
+
756,49600,8.233
|
| 126 |
+
761,50000,6.4818
|
| 127 |
+
766,50400,7.5639
|
| 128 |
+
771,50800,8.9587
|
| 129 |
+
778,51200,7.244
|
| 130 |
+
783,51600,8.6157
|
| 131 |
+
790,52000,6.8854
|
| 132 |
+
796,52400,6.3743
|
| 133 |
+
800,52800,7.0537
|
| 134 |
+
806,53200,5.9831
|
| 135 |
+
812,53600,7.1028
|
| 136 |
+
817,54000,7.4769
|
| 137 |
+
822,54400,6.6734
|
| 138 |
+
828,54800,7.8816
|
| 139 |
+
833,55200,7.9481
|
| 140 |
+
837,55600,8.0056
|
| 141 |
+
842,56000,8.3952
|
| 142 |
+
847,56400,7.193
|
| 143 |
+
852,56800,6.8126
|
| 144 |
+
857,57200,6.8457
|
| 145 |
+
861,57600,8.5708
|
| 146 |
+
867,58000,7.7666
|
| 147 |
+
872,58400,8.4442
|
| 148 |
+
877,58800,8.728
|
| 149 |
+
882,59200,6.7609
|
| 150 |
+
887,59600,6.3465
|
| 151 |
+
892,60000,9.3179
|
| 152 |
+
896,60400,10.0812
|
| 153 |
+
901,60800,7.6496
|
| 154 |
+
906,61200,8.6439
|
| 155 |
+
914,61600,5.8768
|
| 156 |
+
921,62000,5.4985
|
| 157 |
+
928,62400,6.8348
|
| 158 |
+
932,62800,11.426
|
| 159 |
+
938,63200,7.9831
|
| 160 |
+
942,63600,11.7064
|
| 161 |
+
947,64000,8.5804
|
| 162 |
+
956,64400,4.5109
|
| 163 |
+
963,64800,6.216
|
| 164 |
+
969,65200,6.0191
|
| 165 |
+
976,65600,5.3416
|
| 166 |
+
983,66000,5.4151
|
| 167 |
+
989,66400,6.187
|
| 168 |
+
993,66800,8.3532
|
| 169 |
+
998,67200,5.5044
|
| 170 |
+
1003,67600,7.4823
|
| 171 |
+
1009,68000,6.0798
|
| 172 |
+
1015,68400,5.3716
|
| 173 |
+
1021,68800,6.6076
|
| 174 |
+
1029,69200,4.1927
|
| 175 |
+
1035,69600,5.568
|
| 176 |
+
1040,70000,8.2748
|
| 177 |
+
1044,70400,8.0315
|
| 178 |
+
1049,70800,7.3339
|
| 179 |
+
1054,71200,8.792
|
| 180 |
+
1059,71600,5.9685
|
| 181 |
+
1064,72000,9.0921
|
| 182 |
+
1070,72400,6.5296
|
| 183 |
+
1075,72800,6.9532
|
| 184 |
+
1080,73200,7.8572
|
| 185 |
+
1087,73600,6.9338
|
| 186 |
+
1092,74000,7.5691
|
| 187 |
+
1096,74400,7.7686
|
| 188 |
+
1105,74800,5.2799
|
| 189 |
+
1111,75200,6.5266
|
| 190 |
+
1121,75600,4.676
|
| 191 |
+
1131,76000,4.0666
|
| 192 |
+
1136,76400,7.5432
|
| 193 |
+
1142,76800,9.6043
|
| 194 |
+
1147,77200,10.452
|
| 195 |
+
1153,77600,9.5079
|
| 196 |
+
1161,78000,5.3496
|
| 197 |
+
1168,78400,7.87
|
| 198 |
+
1172,78800,12.6175
|
| 199 |
+
1177,79200,11.3827
|
| 200 |
+
1184,79600,9.1898
|
| 201 |
+
1189,80000,9.0837
|
| 202 |
+
1194,80400,10.6039
|
| 203 |
+
1204,80800,5.8527
|
| 204 |
+
1213,81200,4.7043
|
| 205 |
+
1221,81600,5.8605
|
| 206 |
+
1228,82000,6.6981
|
| 207 |
+
1233,82400,9.605
|
| 208 |
+
1241,82800,5.4104
|
| 209 |
+
1246,83200,8.432
|
| 210 |
+
1251,83600,7.9885
|
| 211 |
+
1255,84000,10.6539
|
| 212 |
+
1261,84400,7.0366
|
| 213 |
+
1268,84800,5.8069
|
| 214 |
+
1273,85200,6.9464
|
| 215 |
+
1280,85600,5.2223
|
| 216 |
+
1286,86000,5.3272
|
| 217 |
+
1292,86400,6.4975
|
| 218 |
+
1301,86800,3.7054
|
| 219 |
+
1307,87200,5.0905
|
| 220 |
+
1314,87600,5.9251
|
| 221 |
+
1320,88000,5.9757
|
| 222 |
+
1325,88400,7.8605
|
| 223 |
+
1330,88800,6.1258
|
| 224 |
+
1335,89200,6.4772
|
| 225 |
+
1342,89600,4.6821
|
| 226 |
+
1348,90000,6.1312
|
| 227 |
+
1356,90400,4.4604
|
| 228 |
+
1360,90800,8.8793
|
| 229 |
+
1366,91200,5.4831
|
| 230 |
+
1372,91600,6.6693
|
| 231 |
+
1377,92000,6.0235
|
| 232 |
+
1382,92400,7.023
|
| 233 |
+
1386,92800,7.991
|
| 234 |
+
1391,93200,9.647
|
| 235 |
+
1399,93600,4.4356
|
| 236 |
+
1404,94000,7.8905
|
| 237 |
+
1413,94400,4.2128
|
| 238 |
+
1418,94800,7.0636
|
| 239 |
+
1423,95200,8.5693
|
| 240 |
+
1430,95600,6.583
|
| 241 |
+
1436,96000,7.1519
|
| 242 |
+
1444,96400,4.0687
|
| 243 |
+
1454,96800,3.9372
|
| 244 |
+
1460,97200,6.4719
|
| 245 |
+
1467,97600,7.3715
|
| 246 |
+
1477,98000,4.4424
|
| 247 |
+
1486,98400,5.2201
|
| 248 |
+
1495,98800,5.0595
|
| 249 |
+
1500,99200,8.9895
|
| 250 |
+
1507,99600,5.1904
|
| 251 |
+
1512,100000,7.0454
|
| 252 |
+
1520,100400,5.0524
|
| 253 |
+
1526,100800,6.979
|
| 254 |
+
1530,101200,8.5726
|
| 255 |
+
1538,101600,5.8817
|
| 256 |
+
1545,102000,4.4648
|
| 257 |
+
1552,102400,6.082
|
| 258 |
+
1560,102800,5.4946
|
| 259 |
+
1567,103200,5.3969
|
| 260 |
+
1574,103600,5.1586
|
| 261 |
+
1580,104000,5.9475
|
| 262 |
+
1588,104400,5.6989
|
| 263 |
+
1595,104800,7.2335
|
| 264 |
+
1602,105200,5.8916
|
| 265 |
+
1606,105600,9.3439
|
| 266 |
+
1613,106000,9.341
|
| 267 |
+
1622,106400,6.1297
|
| 268 |
+
1626,106800,12.5539
|
| 269 |
+
1631,107200,8.9207
|
| 270 |
+
1637,107600,9.1629
|
| 271 |
+
1643,108000,8.1442
|
| 272 |
+
1650,108400,6.787
|
| 273 |
+
1656,108800,8.6608
|
| 274 |
+
1664,109200,5.6852
|
| 275 |
+
1673,109600,5.7596
|
| 276 |
+
1678,110000,11.7301
|
| 277 |
+
1684,110400,6.4299
|
| 278 |
+
1691,110800,8.9632
|
| 279 |
+
1700,111200,5.1691
|
| 280 |
+
1707,111600,6.5305
|
| 281 |
+
1717,112000,4.326
|
| 282 |
+
1723,112400,8.8668
|
| 283 |
+
1731,112800,6.4173
|
| 284 |
+
1737,113200,8.5648
|
| 285 |
+
1741,113600,11.226
|
| 286 |
+
1750,114000,5.9103
|
| 287 |
+
1757,114400,8.2391
|
| 288 |
+
1761,114800,11.72
|
| 289 |
+
1765,115200,12.409
|
| 290 |
+
1770,115600,9.8668
|
| 291 |
+
1775,116000,9.7725
|
| 292 |
+
1783,116400,5.4277
|
| 293 |
+
1787,116800,11.838
|
| 294 |
+
1793,117200,8.9856
|
| 295 |
+
1802,117600,4.9619
|
| 296 |
+
1809,118000,7.6212
|
| 297 |
+
1814,118400,9.6064
|
| 298 |
+
1820,118800,9.063
|
| 299 |
+
1826,119200,8.8093
|
| 300 |
+
1832,119600,8.2819
|
| 301 |
+
1839,120000,6.0866
|
| 302 |
+
1844,120400,9.4807
|
| 303 |
+
1849,120800,11.4309
|
| 304 |
+
1854,121200,8.7056
|
| 305 |
+
1859,121600,9.2045
|
| 306 |
+
1866,122000,7.9125
|
| 307 |
+
1873,122400,7.3911
|
| 308 |
+
1883,122800,3.7769
|
| 309 |
+
1889,123200,7.0604
|
| 310 |
+
1896,123600,5.8807
|
| 311 |
+
1902,124000,8.3782
|
| 312 |
+
1907,124400,9.9949
|
| 313 |
+
1911,124800,12.1608
|
| 314 |
+
1916,125200,10.2111
|
| 315 |
+
1923,125600,6.3728
|
| 316 |
+
1927,126000,11.6701
|
| 317 |
+
1932,126400,10.0707
|
| 318 |
+
1937,126800,9.8646
|
| 319 |
+
1943,127200,8.6133
|
| 320 |
+
1948,127600,6.8825
|
| 321 |
+
1952,128000,11.6098
|
| 322 |
+
1957,128400,10.7833
|
| 323 |
+
1967,128800,4.9028
|
| 324 |
+
1974,129200,4.8043
|
| 325 |
+
1981,129600,7.1724
|
| 326 |
+
1988,130000,6.6265
|
| 327 |
+
1992,130400,12.4505
|
| 328 |
+
1997,130800,10.4089
|
| 329 |
+
2002,131200,9.3429
|
| 330 |
+
2008,131600,8.49
|
| 331 |
+
2016,132000,5.6689
|
| 332 |
+
2022,132400,7.8804
|
| 333 |
+
2027,132800,10.9685
|
| 334 |
+
2033,133200,7.3669
|
| 335 |
+
2040,133600,7.722
|
| 336 |
+
2046,134000,7.4917
|
| 337 |
+
2050,134400,12.4121
|
| 338 |
+
2055,134800,8.4809
|
| 339 |
+
2060,135200,8.2016
|
| 340 |
+
2065,135600,9.6964
|
| 341 |
+
2069,136000,11.8806
|
| 342 |
+
2074,136400,10.897
|
| 343 |
+
2081,136800,6.2054
|
| 344 |
+
2088,137200,6.4216
|
| 345 |
+
2097,137600,5.6088
|
| 346 |
+
2105,138000,6.3134
|
| 347 |
+
2111,138400,7.3101
|
| 348 |
+
2118,138800,7.7567
|
| 349 |
+
2122,139200,11.3647
|
| 350 |
+
2127,139600,7.5965
|
| 351 |
+
2133,140000,8.5471
|
| 352 |
+
2140,140400,7.6505
|
| 353 |
+
2148,140800,5.5044
|
| 354 |
+
2153,141200,10.6672
|
| 355 |
+
2158,141600,9.3921
|
| 356 |
+
2164,142000,9.4556
|
| 357 |
+
2169,142400,8.4184
|
| 358 |
+
2174,142800,12.2423
|
| 359 |
+
2180,143200,10.4702
|
| 360 |
+
2192,143600,3.7695
|
| 361 |
+
2199,144000,7.5407
|
| 362 |
+
2209,144400,5.2107
|
| 363 |
+
2218,144800,5.2279
|
| 364 |
+
2223,145200,10.1944
|
| 365 |
+
2232,145600,6.5609
|
| 366 |
+
2239,146000,7.3471
|
| 367 |
+
2246,146400,9.7593
|
| 368 |
+
2256,146800,6.1584
|
| 369 |
+
2265,147200,6.7017
|
| 370 |
+
2272,147600,8.3735
|
| 371 |
+
2278,148000,12.8356
|
| 372 |
+
2284,148400,10.6073
|
| 373 |
+
2290,148800,11.7075
|
| 374 |
+
2294,149200,15.3994
|
| 375 |
+
2298,149600,14.141
|
| 376 |
+
2304,150000,12.4809
|
| 377 |
+
2310,150400,9.9578
|
| 378 |
+
2319,150800,5.6223
|
| 379 |
+
2327,151200,9.5834
|
| 380 |
+
2333,151600,10.7389
|
| 381 |
+
2341,152000,7.72
|
| 382 |
+
2346,152400,7.7508
|
| 383 |
+
2352,152800,12.4856
|
| 384 |
+
2357,153200,11.9052
|
| 385 |
+
2363,153600,10.7156
|
| 386 |
+
2373,154000,4.0923
|
| 387 |
+
2382,154400,6.618
|
| 388 |
+
2388,154800,7.4782
|
| 389 |
+
2399,155200,5.56
|
| 390 |
+
2409,155600,4.0632
|
| 391 |
+
2418,156000,6.6121
|
| 392 |
+
2425,156400,8.951
|
| 393 |
+
2433,156800,6.1173
|
| 394 |
+
2439,157200,10.4851
|
| 395 |
+
2449,157600,6.8041
|
| 396 |
+
2455,158000,9.7919
|
| 397 |
+
2461,158400,14.9732
|
| 398 |
+
2466,158800,17.2664
|
| 399 |
+
2476,159200,7.0055
|
| 400 |
+
2480,159600,18.4795
|
| 401 |
+
2488,160000,7.4399
|
| 402 |
+
2493,160400,13.4248
|
| 403 |
+
2497,160800,17.9206
|
| 404 |
+
2505,161200,10.3671
|
| 405 |
+
2513,161600,7.8602
|
| 406 |
+
2520,162000,7.5847
|
| 407 |
+
2530,162400,6.8028
|
| 408 |
+
2537,162800,8.7183
|
| 409 |
+
2544,163200,10.3386
|
| 410 |
+
2551,163600,11.3944
|
| 411 |
+
2556,164000,10.885
|
| 412 |
+
2566,164400,7.0053
|
| 413 |
+
2575,164800,7.0606
|
| 414 |
+
2580,165200,14.0458
|
| 415 |
+
2584,165600,16.733
|
| 416 |
+
2595,166000,6.4971
|
| 417 |
+
2606,166400,5.5624
|
| 418 |
+
2612,166800,12.141
|
| 419 |
+
2616,167200,20.5602
|
| 420 |
+
2625,167600,7.7261
|
| 421 |
+
2634,168000,7.5007
|
| 422 |
+
2639,168400,13.3516
|
| 423 |
+
2645,168800,13.6817
|
| 424 |
+
2651,169200,10.8423
|
| 425 |
+
2657,169600,11.7492
|
| 426 |
+
2664,170000,9.1222
|
| 427 |
+
2670,170400,10.5513
|
| 428 |
+
2677,170800,10.756
|
| 429 |
+
2682,171200,14.8444
|
| 430 |
+
2687,171600,13.6451
|
| 431 |
+
2692,172000,11.6598
|
| 432 |
+
2699,172400,10.1388
|
| 433 |
+
2708,172800,8.8513
|
| 434 |
+
2720,173200,4.2224
|
| 435 |
+
2727,173600,8.3196
|
| 436 |
+
2732,174000,11.9491
|
| 437 |
+
2740,174400,6.6871
|
| 438 |
+
2748,174800,7.4226
|
| 439 |
+
2753,175200,10.0185
|
| 440 |
+
2760,175600,10.3176
|
| 441 |
+
2764,176000,11.3026
|
| 442 |
+
2770,176400,9.9799
|
| 443 |
+
2776,176800,9.911
|
| 444 |
+
2782,177200,7.9342
|
| 445 |
+
2789,177600,6.5082
|
| 446 |
+
2796,178000,6.9011
|
| 447 |
+
2801,178400,11.9291
|
| 448 |
+
2806,178800,9.9604
|
| 449 |
+
2814,179200,7.9044
|
| 450 |
+
2820,179600,13.3649
|
| 451 |
+
2826,180000,9.8404
|
| 452 |
+
2832,180400,12.8414
|
| 453 |
+
2836,180800,18.641
|
| 454 |
+
2843,181200,11.7401
|
| 455 |
+
2850,181600,12.4612
|
| 456 |
+
2859,182000,8.9368
|
| 457 |
+
2864,182400,12.2839
|
| 458 |
+
2869,182800,15.863
|
| 459 |
+
2877,183200,8.5494
|
| 460 |
+
2884,183600,11.6935
|
| 461 |
+
2896,184000,6.2298
|
| 462 |
+
2903,184400,10.0828
|
| 463 |
+
2909,184800,11.6092
|
| 464 |
+
2914,185200,11.3742
|
| 465 |
+
2922,185600,10.9442
|
| 466 |
+
2932,186000,5.9435
|
| 467 |
+
2936,186400,15.5259
|
| 468 |
+
2944,186800,8.2002
|
| 469 |
+
2950,187200,12.5212
|
| 470 |
+
2957,187600,9.7587
|
| 471 |
+
2962,188000,12.3502
|
| 472 |
+
2969,188400,10.8997
|
| 473 |
+
2976,188800,8.21
|
| 474 |
+
2984,189200,8.5636
|
| 475 |
+
2990,189600,12.3993
|
| 476 |
+
2996,190000,12.8622
|
| 477 |
+
3000,190400,19.3607
|
| 478 |
+
3006,190800,9.842
|
| 479 |
+
3010,191200,14.5131
|
| 480 |
+
3020,191600,5.0736
|
| 481 |
+
3030,192000,4.7978
|
| 482 |
+
3042,192400,3.6577
|
| 483 |
+
3049,192800,8.4608
|
| 484 |
+
3056,193200,7.3668
|
| 485 |
+
3068,193600,4.3313
|
| 486 |
+
3075,194000,7.6891
|
| 487 |
+
3080,194400,12.0177
|
| 488 |
+
3086,194800,8.6174
|
| 489 |
+
3093,195200,7.7047
|
| 490 |
+
3099,195600,8.0317
|
| 491 |
+
3105,196000,8.0931
|
| 492 |
+
3114,196400,5.4667
|
| 493 |
+
3122,196800,6.6043
|
| 494 |
+
3132,197200,5.0706
|
| 495 |
+
3140,197600,8.1802
|
| 496 |
+
3149,198000,6.0144
|
| 497 |
+
3160,198400,5.1743
|
| 498 |
+
3168,198800,6.8536
|
| 499 |
+
3178,199200,3.8362
|
| 500 |
+
3186,199600,6.7096
|
| 501 |
+
3192,200000,8.9841
|
code/Lake application/logs/results_2/PDPPO_frozen_lake_log_1.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
22,400,1.2526
|
| 3 |
+
48,800,1.1762
|
| 4 |
+
66,1200,1.698
|
| 5 |
+
86,1600,1.5699
|
| 6 |
+
100,2000,2.2196
|
| 7 |
+
114,2400,2.2243
|
| 8 |
+
130,2800,1.9494
|
| 9 |
+
144,3200,2.1269
|
| 10 |
+
161,3600,1.8416
|
| 11 |
+
173,4000,2.4108
|
| 12 |
+
186,4400,2.3831
|
| 13 |
+
195,4800,3.7798
|
| 14 |
+
204,5200,3.3905
|
| 15 |
+
212,5600,3.7893
|
| 16 |
+
221,6000,2.6417
|
| 17 |
+
230,6400,3.9088
|
| 18 |
+
245,6800,2.3282
|
| 19 |
+
256,7200,2.7405
|
| 20 |
+
263,7600,4.6528
|
| 21 |
+
273,8000,3.0146
|
| 22 |
+
281,8400,3.2375
|
| 23 |
+
291,8800,3.553
|
| 24 |
+
297,9200,4.7911
|
| 25 |
+
307,9600,3.4636
|
| 26 |
+
314,10000,4.4748
|
| 27 |
+
320,10400,4.6664
|
| 28 |
+
329,10800,3.4853
|
| 29 |
+
336,11200,4.7085
|
| 30 |
+
341,11600,6.6859
|
| 31 |
+
349,12000,3.6321
|
| 32 |
+
355,12400,5.3428
|
| 33 |
+
362,12800,4.1236
|
| 34 |
+
367,13200,6.1173
|
| 35 |
+
373,13600,5.3324
|
| 36 |
+
377,14000,7.2656
|
| 37 |
+
381,14400,7.7223
|
| 38 |
+
386,14800,5.9847
|
| 39 |
+
390,15200,7.6658
|
| 40 |
+
395,15600,5.8334
|
| 41 |
+
400,16000,7.1939
|
| 42 |
+
404,16400,7.1478
|
| 43 |
+
408,16800,7.5988
|
| 44 |
+
414,17200,6.1732
|
| 45 |
+
419,17600,6.1007
|
| 46 |
+
423,18000,7.2673
|
| 47 |
+
427,18400,7.41
|
| 48 |
+
432,18800,7.0193
|
| 49 |
+
438,19200,5.3807
|
| 50 |
+
443,19600,5.9541
|
| 51 |
+
447,20000,7.6451
|
| 52 |
+
452,20400,7.0583
|
| 53 |
+
457,20800,7.3468
|
| 54 |
+
461,21200,7.6243
|
| 55 |
+
466,21600,6.6661
|
| 56 |
+
473,22000,4.9625
|
| 57 |
+
479,22400,4.5857
|
| 58 |
+
484,22800,6.9212
|
| 59 |
+
488,23200,6.1686
|
| 60 |
+
493,23600,7.897
|
| 61 |
+
497,24000,7.9243
|
| 62 |
+
501,24400,8.608
|
| 63 |
+
506,24800,6.6952
|
| 64 |
+
511,25200,6.2059
|
| 65 |
+
516,25600,7.1384
|
| 66 |
+
522,26000,5.7323
|
| 67 |
+
527,26400,5.9145
|
| 68 |
+
534,26800,5.6198
|
| 69 |
+
538,27200,7.6158
|
| 70 |
+
542,27600,7.7017
|
| 71 |
+
546,28000,7.8191
|
| 72 |
+
551,28400,8.2935
|
| 73 |
+
556,28800,7.3137
|
| 74 |
+
562,29200,5.8557
|
| 75 |
+
570,29600,4.3915
|
| 76 |
+
576,30000,4.9182
|
| 77 |
+
581,30400,6.3954
|
| 78 |
+
585,30800,7.2091
|
| 79 |
+
590,31200,9.0975
|
| 80 |
+
594,31600,5.9126
|
| 81 |
+
600,32000,5.1805
|
| 82 |
+
606,32400,6.9075
|
| 83 |
+
611,32800,6.2059
|
| 84 |
+
616,33200,4.6794
|
| 85 |
+
621,33600,7.0498
|
| 86 |
+
626,34000,7.4649
|
| 87 |
+
631,34400,6.6183
|
| 88 |
+
635,34800,8.8603
|
| 89 |
+
639,35200,6.275
|
| 90 |
+
643,35600,9.1126
|
| 91 |
+
648,36000,6.9308
|
| 92 |
+
653,36400,6.7373
|
| 93 |
+
657,36800,7.6857
|
| 94 |
+
662,37200,7.836
|
| 95 |
+
666,37600,6.3725
|
| 96 |
+
671,38000,6.2922
|
| 97 |
+
676,38400,6.6979
|
| 98 |
+
680,38800,7.6388
|
| 99 |
+
684,39200,7.0079
|
| 100 |
+
688,39600,7.5892
|
| 101 |
+
692,40000,7.7355
|
| 102 |
+
697,40400,6.5238
|
| 103 |
+
702,40800,6.181
|
| 104 |
+
706,41200,6.5753
|
| 105 |
+
711,41600,6.499
|
| 106 |
+
716,42000,6.1642
|
| 107 |
+
721,42400,6.3709
|
| 108 |
+
725,42800,6.5148
|
| 109 |
+
730,43200,6.1071
|
| 110 |
+
734,43600,7.2231
|
| 111 |
+
739,44000,6.9884
|
| 112 |
+
743,44400,7.6752
|
| 113 |
+
747,44800,7.645
|
| 114 |
+
751,45200,7.6253
|
| 115 |
+
756,45600,6.9888
|
| 116 |
+
760,46000,6.551
|
| 117 |
+
764,46400,8.8765
|
| 118 |
+
768,46800,7.644
|
| 119 |
+
772,47200,7.7078
|
| 120 |
+
776,47600,7.7402
|
| 121 |
+
780,48000,7.7096
|
| 122 |
+
786,48400,6.4542
|
| 123 |
+
790,48800,6.8511
|
| 124 |
+
795,49200,6.7727
|
| 125 |
+
799,49600,7.6417
|
| 126 |
+
803,50000,7.6993
|
| 127 |
+
807,50400,7.7043
|
| 128 |
+
812,50800,6.8261
|
| 129 |
+
816,51200,6.92
|
| 130 |
+
820,51600,7.9919
|
| 131 |
+
825,52000,6.4103
|
| 132 |
+
830,52400,5.7618
|
| 133 |
+
834,52800,7.0424
|
| 134 |
+
838,53200,7.6928
|
| 135 |
+
842,53600,7.7354
|
| 136 |
+
847,54000,7.3411
|
| 137 |
+
852,54400,6.3078
|
| 138 |
+
856,54800,7.4612
|
| 139 |
+
860,55200,6.6696
|
| 140 |
+
865,55600,6.7569
|
| 141 |
+
869,56000,6.9279
|
| 142 |
+
874,56400,6.5996
|
| 143 |
+
878,56800,7.7271
|
| 144 |
+
882,57200,7.6932
|
| 145 |
+
886,57600,7.5902
|
| 146 |
+
890,58000,7.1683
|
| 147 |
+
897,58400,4.5812
|
| 148 |
+
903,58800,6.1682
|
| 149 |
+
908,59200,6.1325
|
| 150 |
+
912,59600,7.8386
|
| 151 |
+
916,60000,6.3943
|
| 152 |
+
921,60400,6.8943
|
| 153 |
+
925,60800,7.8821
|
| 154 |
+
932,61200,4.9096
|
| 155 |
+
937,61600,6.3275
|
| 156 |
+
941,62000,8.5893
|
| 157 |
+
945,62400,7.9071
|
| 158 |
+
949,62800,7.8808
|
| 159 |
+
953,63200,7.8324
|
| 160 |
+
957,63600,8.5609
|
| 161 |
+
961,64000,8.4952
|
| 162 |
+
965,64400,7.8793
|
| 163 |
+
971,64800,5.9811
|
| 164 |
+
975,65200,6.3148
|
| 165 |
+
979,65600,8.3047
|
| 166 |
+
985,66000,6.6049
|
| 167 |
+
991,66400,5.5465
|
| 168 |
+
995,66800,6.7472
|
| 169 |
+
1000,67200,6.688
|
| 170 |
+
1004,67600,8.5059
|
| 171 |
+
1008,68000,8.0416
|
| 172 |
+
1012,68400,9.3594
|
| 173 |
+
1017,68800,7.135
|
| 174 |
+
1022,69200,7.7882
|
| 175 |
+
1028,69600,5.1304
|
| 176 |
+
1032,70000,7.9267
|
| 177 |
+
1036,70400,7.9253
|
| 178 |
+
1040,70800,7.522
|
| 179 |
+
1045,71200,6.7979
|
| 180 |
+
1050,71600,7.0769
|
| 181 |
+
1055,72000,7.1814
|
| 182 |
+
1059,72400,7.9156
|
| 183 |
+
1063,72800,6.9876
|
| 184 |
+
1069,73200,5.6939
|
| 185 |
+
1073,73600,7.603
|
| 186 |
+
1078,74000,6.9544
|
| 187 |
+
1085,74400,5.1872
|
| 188 |
+
1089,74800,8.0712
|
| 189 |
+
1094,75200,5.4866
|
| 190 |
+
1099,75600,6.856
|
| 191 |
+
1104,76000,6.6695
|
| 192 |
+
1111,76400,5.2366
|
| 193 |
+
1115,76800,8.231
|
| 194 |
+
1120,77200,6.3017
|
| 195 |
+
1125,77600,6.1984
|
| 196 |
+
1129,78000,8.0981
|
| 197 |
+
1134,78400,5.6874
|
| 198 |
+
1139,78800,6.7436
|
| 199 |
+
1144,79200,6.7781
|
| 200 |
+
1148,79600,8.035
|
| 201 |
+
1153,80000,7.0241
|
| 202 |
+
1157,80400,8.1216
|
| 203 |
+
1161,80800,7.7847
|
| 204 |
+
1165,81200,7.0458
|
| 205 |
+
1172,81600,4.9159
|
| 206 |
+
1176,82000,8.8252
|
| 207 |
+
1180,82400,8.1435
|
| 208 |
+
1184,82800,8.0794
|
| 209 |
+
1188,83200,8.4439
|
| 210 |
+
1194,83600,6.5179
|
| 211 |
+
1198,84000,9.5129
|
| 212 |
+
1202,84400,9.5982
|
| 213 |
+
1206,84800,8.1605
|
| 214 |
+
1213,85200,5.1735
|
| 215 |
+
1218,85600,6.3955
|
| 216 |
+
1222,86000,6.5836
|
| 217 |
+
1227,86400,7.8827
|
| 218 |
+
1231,86800,9.0583
|
| 219 |
+
1236,87200,7.4464
|
| 220 |
+
1242,87600,6.9449
|
| 221 |
+
1247,88000,7.6785
|
| 222 |
+
1251,88400,7.7551
|
| 223 |
+
1255,88800,9.1958
|
| 224 |
+
1260,89200,7.8329
|
| 225 |
+
1265,89600,9.6998
|
| 226 |
+
1269,90000,12.2941
|
| 227 |
+
1274,90400,7.8743
|
| 228 |
+
1281,90800,6.7261
|
| 229 |
+
1285,91200,10.3406
|
| 230 |
+
1289,91600,10.3629
|
| 231 |
+
1294,92000,12.8416
|
| 232 |
+
1298,92400,11.8637
|
| 233 |
+
1302,92800,10.95
|
| 234 |
+
1307,93200,12.6815
|
| 235 |
+
1312,93600,10.2359
|
| 236 |
+
1316,94000,14.5616
|
| 237 |
+
1320,94400,12.2057
|
| 238 |
+
1325,94800,10.3496
|
| 239 |
+
1331,95200,10.0749
|
| 240 |
+
1336,95600,12.5332
|
| 241 |
+
1341,96000,7.9488
|
| 242 |
+
1347,96400,11.6734
|
| 243 |
+
1353,96800,8.6258
|
| 244 |
+
1359,97200,8.6982
|
| 245 |
+
1363,97600,16.8782
|
| 246 |
+
1369,98000,13.076
|
| 247 |
+
1374,98400,12.6862
|
| 248 |
+
1380,98800,13.0069
|
| 249 |
+
1385,99200,13.2064
|
| 250 |
+
1394,99600,7.5732
|
| 251 |
+
1399,100000,13.571
|
| 252 |
+
1406,100400,11.8885
|
| 253 |
+
1411,100800,14.0567
|
| 254 |
+
1416,101200,14.4459
|
| 255 |
+
1422,101600,14.7572
|
| 256 |
+
1430,102000,9.0038
|
| 257 |
+
1436,102400,11.7543
|
| 258 |
+
1443,102800,12.3206
|
| 259 |
+
1448,103200,13.6686
|
| 260 |
+
1454,103600,14.6563
|
| 261 |
+
1461,104000,10.5416
|
| 262 |
+
1467,104400,11.8187
|
| 263 |
+
1473,104800,12.6766
|
| 264 |
+
1478,105200,13.234
|
| 265 |
+
1487,105600,10.4377
|
| 266 |
+
1494,106000,10.4682
|
| 267 |
+
1499,106400,14.2136
|
| 268 |
+
1505,106800,13.3214
|
| 269 |
+
1512,107200,9.9131
|
| 270 |
+
1518,107600,11.3795
|
| 271 |
+
1524,108000,13.6525
|
| 272 |
+
1531,108400,10.5166
|
| 273 |
+
1536,108800,15.6816
|
| 274 |
+
1541,109200,13.5686
|
| 275 |
+
1546,109600,15.8877
|
| 276 |
+
1552,110000,10.798
|
| 277 |
+
1556,110400,15.505
|
| 278 |
+
1564,110800,8.7457
|
| 279 |
+
1571,111200,9.8442
|
| 280 |
+
1576,111600,16.783
|
| 281 |
+
1581,112000,16.6507
|
| 282 |
+
1587,112400,12.5058
|
| 283 |
+
1597,112800,7.0879
|
| 284 |
+
1604,113200,9.4795
|
| 285 |
+
1612,113600,8.8247
|
| 286 |
+
1619,114000,9.8475
|
| 287 |
+
1626,114400,9.1637
|
| 288 |
+
1631,114800,14.0166
|
| 289 |
+
1638,115200,8.0027
|
| 290 |
+
1642,115600,17.5209
|
| 291 |
+
1650,116000,7.7073
|
| 292 |
+
1654,116400,20.1086
|
| 293 |
+
1661,116800,7.9623
|
| 294 |
+
1668,117200,12.2066
|
| 295 |
+
1675,117600,8.7988
|
| 296 |
+
1681,118000,14.56
|
| 297 |
+
1686,118400,16.3382
|
| 298 |
+
1691,118800,12.1992
|
| 299 |
+
1699,119200,10.4266
|
| 300 |
+
1703,119600,19.3562
|
| 301 |
+
1708,120000,11.3536
|
| 302 |
+
1712,120400,20.2356
|
| 303 |
+
1716,120800,19.5323
|
| 304 |
+
1721,121200,17.4551
|
| 305 |
+
1728,121600,10.1801
|
| 306 |
+
1732,122000,16.947
|
| 307 |
+
1739,122400,9.827
|
| 308 |
+
1745,122800,13.3383
|
| 309 |
+
1749,123200,19.4348
|
| 310 |
+
1755,123600,12.4996
|
| 311 |
+
1761,124000,13.8883
|
| 312 |
+
1765,124400,20.16
|
| 313 |
+
1769,124800,19.6822
|
| 314 |
+
1776,125200,10.0647
|
| 315 |
+
1780,125600,18.9827
|
| 316 |
+
1785,126000,10.8816
|
| 317 |
+
1791,126400,12.6382
|
| 318 |
+
1797,126800,11.2938
|
| 319 |
+
1806,127200,8.8997
|
| 320 |
+
1811,127600,15.7514
|
| 321 |
+
1817,128000,9.8611
|
| 322 |
+
1825,128400,10.0803
|
| 323 |
+
1833,128800,9.5121
|
| 324 |
+
1841,129200,7.1428
|
| 325 |
+
1850,129600,8.5931
|
| 326 |
+
1858,130000,9.1178
|
| 327 |
+
1866,130400,10.6061
|
| 328 |
+
1871,130800,15.7645
|
| 329 |
+
1875,131200,19.0111
|
| 330 |
+
1881,131600,10.6172
|
| 331 |
+
1885,132000,20.6998
|
| 332 |
+
1889,132400,20.2484
|
| 333 |
+
1894,132800,16.4684
|
| 334 |
+
1900,133200,13.5329
|
| 335 |
+
1904,133600,20.9186
|
| 336 |
+
1910,134000,15.0977
|
| 337 |
+
1915,134400,14.7358
|
| 338 |
+
1920,134800,15.5285
|
| 339 |
+
1928,135200,8.3647
|
| 340 |
+
1934,135600,11.9479
|
| 341 |
+
1940,136000,14.4666
|
| 342 |
+
1947,136400,9.6302
|
| 343 |
+
1953,136800,13.6634
|
| 344 |
+
1958,137200,14.0396
|
| 345 |
+
1966,137600,9.1948
|
| 346 |
+
1973,138000,9.3788
|
| 347 |
+
1982,138400,9.0964
|
| 348 |
+
1991,138800,7.105
|
| 349 |
+
1998,139200,11.6642
|
| 350 |
+
2005,139600,8.2024
|
| 351 |
+
2014,140000,7.7126
|
| 352 |
+
2021,140400,11.0422
|
| 353 |
+
2027,140800,11.2108
|
| 354 |
+
2032,141200,15.9589
|
| 355 |
+
2039,141600,8.9692
|
| 356 |
+
2046,142000,12.0016
|
| 357 |
+
2052,142400,13.3516
|
| 358 |
+
2056,142800,19.3885
|
| 359 |
+
2061,143200,14.0276
|
| 360 |
+
2066,143600,14.164
|
| 361 |
+
2073,144000,11.3607
|
| 362 |
+
2082,144400,6.5908
|
| 363 |
+
2089,144800,10.36
|
| 364 |
+
2094,145200,14.2824
|
| 365 |
+
2105,145600,4.9849
|
| 366 |
+
2111,146000,11.1523
|
| 367 |
+
2116,146400,12.4602
|
| 368 |
+
2123,146800,10.201
|
| 369 |
+
2129,147200,11.5534
|
| 370 |
+
2138,147600,5.9679
|
| 371 |
+
2142,148000,16.4718
|
| 372 |
+
2147,148400,17.5833
|
| 373 |
+
2152,148800,16.768
|
| 374 |
+
2156,149200,19.1331
|
| 375 |
+
2162,149600,12.3424
|
| 376 |
+
2166,150000,18.0845
|
| 377 |
+
2170,150400,18.9557
|
| 378 |
+
2175,150800,14.0846
|
| 379 |
+
2179,151200,20.1474
|
| 380 |
+
2184,151600,15.3493
|
| 381 |
+
2188,152000,18.1315
|
| 382 |
+
2193,152400,15.8044
|
| 383 |
+
2197,152800,17.7751
|
| 384 |
+
2203,153200,11.6984
|
| 385 |
+
2209,153600,13.0837
|
| 386 |
+
2213,154000,15.855
|
| 387 |
+
2218,154400,16.8814
|
| 388 |
+
2223,154800,16.0311
|
| 389 |
+
2227,155200,15.3192
|
| 390 |
+
2234,155600,11.457
|
| 391 |
+
2239,156000,16.1934
|
| 392 |
+
2246,156400,11.9881
|
| 393 |
+
2252,156800,9.2926
|
| 394 |
+
2257,157200,16.73
|
| 395 |
+
2261,157600,14.6868
|
| 396 |
+
2268,158000,7.9747
|
| 397 |
+
2273,158400,10.7901
|
| 398 |
+
2277,158800,18.3051
|
| 399 |
+
2283,159200,14.0301
|
| 400 |
+
2288,159600,15.5108
|
| 401 |
+
2294,160000,10.0966
|
| 402 |
+
2300,160400,8.0898
|
| 403 |
+
2306,160800,8.0323
|
| 404 |
+
2315,161200,5.836
|
| 405 |
+
2321,161600,10.9128
|
| 406 |
+
2329,162000,5.4973
|
| 407 |
+
2334,162400,11.0091
|
| 408 |
+
2340,162800,8.3576
|
| 409 |
+
2349,163200,6.2928
|
| 410 |
+
2354,163600,13.008
|
| 411 |
+
2358,164000,15.7582
|
| 412 |
+
2364,164400,10.6657
|
| 413 |
+
2372,164800,7.3352
|
| 414 |
+
2378,165200,7.9416
|
| 415 |
+
2385,165600,9.7033
|
| 416 |
+
2390,166000,14.8003
|
| 417 |
+
2395,166400,13.5949
|
| 418 |
+
2399,166800,16.5582
|
| 419 |
+
2405,167200,13.1281
|
| 420 |
+
2410,167600,13.6583
|
| 421 |
+
2415,168000,13.3859
|
| 422 |
+
2422,168400,12.7336
|
| 423 |
+
2427,168800,13.4928
|
| 424 |
+
2432,169200,14.222
|
| 425 |
+
2438,169600,12.2052
|
| 426 |
+
2442,170000,17.538
|
| 427 |
+
2449,170400,13.4975
|
| 428 |
+
2454,170800,13.4316
|
| 429 |
+
2460,171200,14.3904
|
| 430 |
+
2466,171600,14.1914
|
| 431 |
+
2472,172000,10.7212
|
| 432 |
+
2478,172400,13.2043
|
| 433 |
+
2484,172800,12.3505
|
| 434 |
+
2488,173200,18.4707
|
| 435 |
+
2493,173600,13.7594
|
| 436 |
+
2498,174000,15.2434
|
| 437 |
+
2503,174400,17.6234
|
| 438 |
+
2509,174800,11.5482
|
| 439 |
+
2513,175200,18.5919
|
| 440 |
+
2518,175600,18.2289
|
| 441 |
+
2525,176000,10.1992
|
| 442 |
+
2529,176400,19.2187
|
| 443 |
+
2533,176800,20.1763
|
| 444 |
+
2538,177200,15.8706
|
| 445 |
+
2542,177600,20.1437
|
| 446 |
+
2547,178000,14.6606
|
| 447 |
+
2552,178400,16.5006
|
| 448 |
+
2557,178800,17.2947
|
| 449 |
+
2561,179200,20.4127
|
| 450 |
+
2566,179600,16.8026
|
| 451 |
+
2571,180000,16.4537
|
| 452 |
+
2575,180400,19.0096
|
| 453 |
+
2580,180800,12.4961
|
| 454 |
+
2586,181200,13.702
|
| 455 |
+
2590,181600,19.4121
|
| 456 |
+
2595,182000,19.1734
|
| 457 |
+
2599,182400,15.6252
|
| 458 |
+
2604,182800,17.4543
|
| 459 |
+
2608,183200,18.4527
|
| 460 |
+
2613,183600,17.0079
|
| 461 |
+
2617,184000,15.8574
|
| 462 |
+
2623,184400,14.6486
|
| 463 |
+
2627,184800,16.7431
|
| 464 |
+
2632,185200,16.3204
|
| 465 |
+
2636,185600,18.1816
|
| 466 |
+
2640,186000,19.3138
|
| 467 |
+
2646,186400,13.6693
|
| 468 |
+
2652,186800,12.1288
|
| 469 |
+
2656,187200,19.5385
|
| 470 |
+
2662,187600,12.7194
|
| 471 |
+
2666,188000,18.8978
|
| 472 |
+
2671,188400,17.7266
|
| 473 |
+
2675,188800,20.3572
|
| 474 |
+
2680,189200,16.1169
|
| 475 |
+
2685,189600,14.712
|
| 476 |
+
2692,190000,11.1334
|
| 477 |
+
2696,190400,17.4639
|
| 478 |
+
2701,190800,16.8716
|
| 479 |
+
2705,191200,19.8286
|
| 480 |
+
2710,191600,18.7337
|
| 481 |
+
2714,192000,19.8553
|
| 482 |
+
2719,192400,14.2238
|
| 483 |
+
2725,192800,12.8373
|
| 484 |
+
2735,193200,7.0151
|
| 485 |
+
2739,193600,13.3644
|
| 486 |
+
2745,194000,13.3385
|
| 487 |
+
2753,194400,8.739
|
| 488 |
+
2758,194800,13.8917
|
| 489 |
+
2764,195200,10.3893
|
| 490 |
+
2773,195600,4.8033
|
| 491 |
+
2784,196000,5.5534
|
| 492 |
+
2792,196400,7.2659
|
| 493 |
+
2796,196800,13.9818
|
| 494 |
+
2802,197200,13.2211
|
| 495 |
+
2807,197600,16.0152
|
| 496 |
+
2814,198000,9.9562
|
| 497 |
+
2823,198400,6.7525
|
| 498 |
+
2828,198800,14.4238
|
| 499 |
+
2833,199200,10.7252
|
| 500 |
+
2838,199600,13.7153
|
| 501 |
+
2842,200000,19.2825
|
code/Lake application/logs/results_2/PDPPO_frozen_lake_log_2.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
25,400,1.2001
|
| 3 |
+
50,800,1.1974
|
| 4 |
+
67,1200,1.7766
|
| 5 |
+
87,1600,1.4873
|
| 6 |
+
104,2000,1.7077
|
| 7 |
+
114,2400,2.5908
|
| 8 |
+
134,2800,1.8992
|
| 9 |
+
145,3200,2.8642
|
| 10 |
+
154,3600,2.9615
|
| 11 |
+
163,4000,3.7629
|
| 12 |
+
170,4400,4.1044
|
| 13 |
+
180,4800,3.4985
|
| 14 |
+
187,5200,4.3872
|
| 15 |
+
192,5600,5.6852
|
| 16 |
+
198,6000,5.7852
|
| 17 |
+
205,6400,4.5497
|
| 18 |
+
210,6800,5.1203
|
| 19 |
+
215,7200,6.3622
|
| 20 |
+
222,7600,4.9101
|
| 21 |
+
230,8000,3.7216
|
| 22 |
+
237,8400,3.9412
|
| 23 |
+
243,8800,5.1797
|
| 24 |
+
250,9200,4.9764
|
| 25 |
+
255,9600,5.0597
|
| 26 |
+
260,10000,6.6622
|
| 27 |
+
266,10400,5.4034
|
| 28 |
+
274,10800,3.7803
|
| 29 |
+
279,11200,4.8601
|
| 30 |
+
284,11600,6.8704
|
| 31 |
+
289,12000,5.884
|
| 32 |
+
298,12400,3.1085
|
| 33 |
+
306,12800,3.7402
|
| 34 |
+
317,13200,2.4474
|
| 35 |
+
325,13600,4.697
|
| 36 |
+
333,14000,3.7155
|
| 37 |
+
340,14400,4.1133
|
| 38 |
+
345,14800,6.3342
|
| 39 |
+
354,15200,3.4713
|
| 40 |
+
363,15600,2.973
|
| 41 |
+
369,16000,5.3844
|
| 42 |
+
376,16400,3.9942
|
| 43 |
+
384,16800,4.3917
|
| 44 |
+
390,17200,3.9032
|
| 45 |
+
396,17600,5.5522
|
| 46 |
+
402,18000,5.0959
|
| 47 |
+
410,18400,4.0555
|
| 48 |
+
419,18800,3.8592
|
| 49 |
+
424,19200,5.5717
|
| 50 |
+
430,19600,5.4533
|
| 51 |
+
436,20000,4.4112
|
| 52 |
+
442,20400,5.2485
|
| 53 |
+
453,20800,3.6337
|
| 54 |
+
458,21200,5.7439
|
| 55 |
+
464,21600,5.8554
|
| 56 |
+
470,22000,6.1356
|
| 57 |
+
478,22400,4.1016
|
| 58 |
+
483,22800,6.9287
|
| 59 |
+
487,23200,7.9029
|
| 60 |
+
492,23600,7.2121
|
| 61 |
+
496,24000,6.8316
|
| 62 |
+
501,24400,7.3779
|
| 63 |
+
506,24800,7.0025
|
| 64 |
+
511,25200,7.3224
|
| 65 |
+
516,25600,6.0262
|
| 66 |
+
521,26000,7.3493
|
| 67 |
+
525,26400,8.5734
|
| 68 |
+
530,26800,7.505
|
| 69 |
+
536,27200,5.6146
|
| 70 |
+
541,27600,7.32
|
| 71 |
+
547,28000,5.8791
|
| 72 |
+
551,28400,8.3691
|
| 73 |
+
555,28800,8.5242
|
| 74 |
+
560,29200,6.8016
|
| 75 |
+
564,29600,6.4966
|
| 76 |
+
570,30000,6.5884
|
| 77 |
+
574,30400,7.9429
|
| 78 |
+
579,30800,8.2591
|
| 79 |
+
586,31200,5.1781
|
| 80 |
+
590,31600,6.725
|
| 81 |
+
595,32000,7.3339
|
| 82 |
+
602,32400,5.5153
|
| 83 |
+
606,32800,7.294
|
| 84 |
+
611,33200,7.861
|
| 85 |
+
617,33600,5.8614
|
| 86 |
+
622,34000,6.2895
|
| 87 |
+
626,34400,7.641
|
| 88 |
+
631,34800,7.1765
|
| 89 |
+
635,35200,8.3742
|
| 90 |
+
639,35600,8.5644
|
| 91 |
+
644,36000,7.5061
|
| 92 |
+
648,36400,7.137
|
| 93 |
+
654,36800,6.317
|
| 94 |
+
659,37200,7.5171
|
| 95 |
+
663,37600,8.4671
|
| 96 |
+
667,38000,8.8055
|
| 97 |
+
672,38400,7.2286
|
| 98 |
+
677,38800,7.0709
|
| 99 |
+
682,39200,6.0232
|
| 100 |
+
687,39600,7.7556
|
| 101 |
+
691,40000,7.7135
|
| 102 |
+
695,40400,8.3451
|
| 103 |
+
701,40800,6.0775
|
| 104 |
+
705,41200,8.3363
|
| 105 |
+
709,41600,8.6071
|
| 106 |
+
715,42000,5.3694
|
| 107 |
+
720,42400,7.6011
|
| 108 |
+
724,42800,8.7117
|
| 109 |
+
729,43200,7.0684
|
| 110 |
+
735,43600,5.433
|
| 111 |
+
739,44000,8.6411
|
| 112 |
+
743,44400,8.5924
|
| 113 |
+
747,44800,8.9168
|
| 114 |
+
752,45200,7.2905
|
| 115 |
+
756,45600,8.9831
|
| 116 |
+
761,46000,7.4007
|
| 117 |
+
766,46400,6.664
|
| 118 |
+
770,46800,7.3928
|
| 119 |
+
777,47200,5.7024
|
| 120 |
+
783,47600,5.151
|
| 121 |
+
788,48000,7.2108
|
| 122 |
+
793,48400,7.2763
|
| 123 |
+
799,48800,7.0493
|
| 124 |
+
803,49200,8.5485
|
| 125 |
+
808,49600,7.5421
|
| 126 |
+
812,50000,9.5257
|
| 127 |
+
816,50400,9.1144
|
| 128 |
+
822,50800,5.6572
|
| 129 |
+
828,51200,7.8776
|
| 130 |
+
832,51600,10.0686
|
| 131 |
+
838,52000,6.2858
|
| 132 |
+
842,52400,8.7905
|
| 133 |
+
847,52800,6.9064
|
| 134 |
+
852,53200,8.591
|
| 135 |
+
857,53600,7.9326
|
| 136 |
+
861,54000,10.7555
|
| 137 |
+
865,54400,11.0084
|
| 138 |
+
870,54800,8.5343
|
| 139 |
+
874,55200,10.2597
|
| 140 |
+
878,55600,11.0706
|
| 141 |
+
883,56000,9.7567
|
| 142 |
+
887,56400,11.0066
|
| 143 |
+
891,56800,11.0205
|
| 144 |
+
897,57200,8.6028
|
| 145 |
+
902,57600,7.8706
|
| 146 |
+
906,58000,11.6827
|
| 147 |
+
911,58400,11.5678
|
| 148 |
+
916,58800,7.3302
|
| 149 |
+
920,59200,11.8686
|
| 150 |
+
925,59600,9.6659
|
| 151 |
+
929,60000,11.8218
|
| 152 |
+
933,60400,11.753
|
| 153 |
+
938,60800,10.8838
|
| 154 |
+
942,61200,11.9606
|
| 155 |
+
946,61600,11.9891
|
| 156 |
+
950,62000,10.7408
|
| 157 |
+
955,62400,9.9649
|
| 158 |
+
959,62800,10.1192
|
| 159 |
+
963,63200,11.9243
|
| 160 |
+
967,63600,12.0996
|
| 161 |
+
971,64000,12.2641
|
| 162 |
+
975,64400,12.2119
|
| 163 |
+
980,64800,9.6896
|
| 164 |
+
984,65200,11.985
|
| 165 |
+
988,65600,12.0389
|
| 166 |
+
993,66000,9.7261
|
| 167 |
+
998,66400,10.0918
|
| 168 |
+
1002,66800,11.9409
|
| 169 |
+
1007,67200,9.7976
|
| 170 |
+
1011,67600,11.9562
|
| 171 |
+
1015,68000,11.9393
|
| 172 |
+
1019,68400,11.9489
|
| 173 |
+
1023,68800,10.6665
|
| 174 |
+
1029,69200,8.6481
|
| 175 |
+
1035,69600,8.2744
|
| 176 |
+
1040,70000,9.8761
|
| 177 |
+
1044,70400,12.7648
|
| 178 |
+
1048,70800,12.0824
|
| 179 |
+
1053,71200,9.9615
|
| 180 |
+
1059,71600,7.5596
|
| 181 |
+
1063,72000,12.066
|
| 182 |
+
1068,72400,10.3717
|
| 183 |
+
1072,72800,11.9771
|
| 184 |
+
1076,73200,11.9456
|
| 185 |
+
1080,73600,9.1042
|
| 186 |
+
1084,74000,12.076
|
| 187 |
+
1090,74400,9.4681
|
| 188 |
+
1094,74800,11.529
|
| 189 |
+
1098,75200,12.0008
|
| 190 |
+
1103,75600,10.1566
|
| 191 |
+
1108,76000,9.751
|
| 192 |
+
1112,76400,12.3104
|
| 193 |
+
1116,76800,9.2405
|
| 194 |
+
1120,77200,12.0348
|
| 195 |
+
1125,77600,10.0069
|
| 196 |
+
1130,78000,10.0386
|
| 197 |
+
1136,78400,10.4839
|
| 198 |
+
1142,78800,7.6819
|
| 199 |
+
1149,79200,5.9398
|
| 200 |
+
1156,79600,7.5437
|
| 201 |
+
1163,80000,7.4114
|
| 202 |
+
1167,80400,9.315
|
| 203 |
+
1172,80800,11.2425
|
| 204 |
+
1176,81200,10.7758
|
| 205 |
+
1180,81600,12.0153
|
| 206 |
+
1185,82000,10.7721
|
| 207 |
+
1190,82400,9.8986
|
| 208 |
+
1194,82800,11.9053
|
| 209 |
+
1198,83200,11.9558
|
| 210 |
+
1203,83600,9.3269
|
| 211 |
+
1207,84000,12.2099
|
| 212 |
+
1213,84400,8.4628
|
| 213 |
+
1217,84800,10.7038
|
| 214 |
+
1221,85200,12.2837
|
| 215 |
+
1226,85600,9.9915
|
| 216 |
+
1231,86000,9.9481
|
| 217 |
+
1236,86400,10.1545
|
| 218 |
+
1240,86800,12.2327
|
| 219 |
+
1244,87200,12.075
|
| 220 |
+
1248,87600,12.01
|
| 221 |
+
1252,88000,10.5855
|
| 222 |
+
1256,88400,12.134
|
| 223 |
+
1260,88800,12.3757
|
| 224 |
+
1264,89200,11.9803
|
| 225 |
+
1269,89600,9.3804
|
| 226 |
+
1274,90000,9.8415
|
| 227 |
+
1278,90400,11.9713
|
| 228 |
+
1284,90800,8.3152
|
| 229 |
+
1289,91200,8.8216
|
| 230 |
+
1293,91600,12.3776
|
| 231 |
+
1299,92000,8.471
|
| 232 |
+
1303,92400,12.5027
|
| 233 |
+
1307,92800,12.517
|
| 234 |
+
1313,93200,8.4122
|
| 235 |
+
1317,93600,13.1839
|
| 236 |
+
1322,94000,10.4515
|
| 237 |
+
1326,94400,11.1728
|
| 238 |
+
1330,94800,13.0548
|
| 239 |
+
1334,95200,12.0536
|
| 240 |
+
1338,95600,12.4526
|
| 241 |
+
1344,96000,9.0872
|
| 242 |
+
1351,96400,7.9055
|
| 243 |
+
1355,96800,11.2378
|
| 244 |
+
1360,97200,11.1121
|
| 245 |
+
1364,97600,12.7524
|
| 246 |
+
1368,98000,12.6111
|
| 247 |
+
1373,98400,11.2153
|
| 248 |
+
1378,98800,8.2927
|
| 249 |
+
1382,99200,12.5971
|
| 250 |
+
1388,99600,9.538
|
| 251 |
+
1392,100000,12.5941
|
| 252 |
+
1397,100400,10.2156
|
| 253 |
+
1401,100800,12.5003
|
| 254 |
+
1406,101200,9.1204
|
| 255 |
+
1411,101600,11.3426
|
| 256 |
+
1418,102000,6.7031
|
| 257 |
+
1422,102400,13.0065
|
| 258 |
+
1427,102800,10.4644
|
| 259 |
+
1431,103200,12.6849
|
| 260 |
+
1435,103600,12.3948
|
| 261 |
+
1441,104000,9.7853
|
| 262 |
+
1446,104400,9.4532
|
| 263 |
+
1450,104800,10.5596
|
| 264 |
+
1456,105200,8.6362
|
| 265 |
+
1461,105600,11.1625
|
| 266 |
+
1465,106000,9.7383
|
| 267 |
+
1471,106400,9.5542
|
| 268 |
+
1476,106800,10.1016
|
| 269 |
+
1480,107200,11.3806
|
| 270 |
+
1484,107600,12.8012
|
| 271 |
+
1488,108000,12.754
|
| 272 |
+
1493,108400,9.2045
|
| 273 |
+
1497,108800,12.9866
|
| 274 |
+
1504,109200,7.3446
|
| 275 |
+
1509,109600,11.8225
|
| 276 |
+
1514,110000,8.9346
|
| 277 |
+
1518,110400,12.7821
|
| 278 |
+
1523,110800,11.2293
|
| 279 |
+
1527,111200,12.976
|
| 280 |
+
1532,111600,8.6775
|
| 281 |
+
1536,112000,12.7469
|
| 282 |
+
1540,112400,12.7591
|
| 283 |
+
1545,112800,10.3778
|
| 284 |
+
1550,113200,10.3744
|
| 285 |
+
1554,113600,12.7611
|
| 286 |
+
1558,114000,11.8172
|
| 287 |
+
1562,114400,12.8452
|
| 288 |
+
1566,114800,12.6974
|
| 289 |
+
1570,115200,12.7894
|
| 290 |
+
1575,115600,10.9595
|
| 291 |
+
1579,116000,13.0989
|
| 292 |
+
1583,116400,12.9278
|
| 293 |
+
1589,116800,9.0682
|
| 294 |
+
1593,117200,12.9978
|
| 295 |
+
1597,117600,10.7087
|
| 296 |
+
1601,118000,13.5158
|
| 297 |
+
1605,118400,13.435
|
| 298 |
+
1610,118800,11.5973
|
| 299 |
+
1614,119200,13.0533
|
| 300 |
+
1618,119600,13.0017
|
| 301 |
+
1623,120000,10.8166
|
| 302 |
+
1629,120400,9.0293
|
| 303 |
+
1634,120800,9.2539
|
| 304 |
+
1639,121200,10.6134
|
| 305 |
+
1644,121600,11.5216
|
| 306 |
+
1648,122000,13.3206
|
| 307 |
+
1652,122400,12.207
|
| 308 |
+
1657,122800,11.3809
|
| 309 |
+
1661,123200,10.1333
|
| 310 |
+
1665,123600,13.4154
|
| 311 |
+
1669,124000,13.7245
|
| 312 |
+
1673,124400,13.2766
|
| 313 |
+
1677,124800,13.6751
|
| 314 |
+
1681,125200,13.3002
|
| 315 |
+
1686,125600,11.3154
|
| 316 |
+
1690,126000,14.442
|
| 317 |
+
1697,126400,9.1752
|
| 318 |
+
1701,126800,12.6002
|
| 319 |
+
1710,127200,8.7327
|
| 320 |
+
1715,127600,11.4596
|
| 321 |
+
1722,128000,9.0605
|
| 322 |
+
1730,128400,6.6071
|
| 323 |
+
1738,128800,7.8556
|
| 324 |
+
1743,129200,8.6705
|
| 325 |
+
1748,129600,13.2619
|
| 326 |
+
1757,130000,8.0688
|
| 327 |
+
1762,130400,11.255
|
| 328 |
+
1769,130800,10.1434
|
| 329 |
+
1776,131200,7.9637
|
| 330 |
+
1780,131600,16.234
|
| 331 |
+
1784,132000,16.009
|
| 332 |
+
1790,132400,11.5696
|
| 333 |
+
1795,132800,13.03
|
| 334 |
+
1801,133200,11.5952
|
| 335 |
+
1807,133600,10.1351
|
| 336 |
+
1811,134000,14.54
|
| 337 |
+
1823,134400,4.607
|
| 338 |
+
1829,134800,12.6576
|
| 339 |
+
1833,135200,16.8332
|
| 340 |
+
1838,135600,11.8201
|
| 341 |
+
1842,136000,17.1005
|
| 342 |
+
1846,136400,16.8673
|
| 343 |
+
1851,136800,15.3565
|
| 344 |
+
1857,137200,8.8723
|
| 345 |
+
1862,137600,13.1234
|
| 346 |
+
1868,138000,12.9135
|
| 347 |
+
1876,138400,6.6299
|
| 348 |
+
1883,138800,8.6404
|
| 349 |
+
1892,139200,8.0842
|
| 350 |
+
1896,139600,16.6064
|
| 351 |
+
1900,140000,12.9285
|
| 352 |
+
1912,140400,5.249
|
| 353 |
+
1918,140800,9.7863
|
| 354 |
+
1926,141200,7.8766
|
| 355 |
+
1932,141600,7.9322
|
| 356 |
+
1942,142000,5.3181
|
| 357 |
+
1947,142400,12.7024
|
| 358 |
+
1956,142800,8.2081
|
| 359 |
+
1968,143200,2.9574
|
| 360 |
+
1975,143600,6.7944
|
| 361 |
+
1981,144000,11.6649
|
| 362 |
+
1994,144400,5.4107
|
| 363 |
+
2003,144800,6.3419
|
| 364 |
+
2012,145200,6.9728
|
| 365 |
+
2023,145600,5.0183
|
| 366 |
+
2031,146000,7.8319
|
| 367 |
+
2041,146400,4.6116
|
| 368 |
+
2054,146800,4.337
|
| 369 |
+
2063,147200,5.3691
|
| 370 |
+
2069,147600,12.9849
|
| 371 |
+
2078,148000,8.5405
|
| 372 |
+
2091,148400,4.9827
|
| 373 |
+
2108,148800,3.026
|
| 374 |
+
2123,149200,3.3827
|
| 375 |
+
2132,149600,6.8501
|
| 376 |
+
2141,150000,7.5638
|
| 377 |
+
2152,150400,5.2852
|
| 378 |
+
2162,150800,7.5752
|
| 379 |
+
2170,151200,7.9508
|
| 380 |
+
2177,151600,10.2038
|
| 381 |
+
2191,152000,3.0956
|
| 382 |
+
2200,152400,7.7211
|
| 383 |
+
2208,152800,6.9913
|
| 384 |
+
2215,153200,10.9929
|
| 385 |
+
2219,153600,15.1937
|
| 386 |
+
2226,154000,12.1173
|
| 387 |
+
2235,154400,6.3827
|
| 388 |
+
2244,154800,9.904
|
| 389 |
+
2251,155200,10.4535
|
| 390 |
+
2257,155600,11.1928
|
| 391 |
+
2262,156000,11.8628
|
| 392 |
+
2269,156400,11.1129
|
| 393 |
+
2275,156800,10.8419
|
| 394 |
+
2282,157200,10.6974
|
| 395 |
+
2288,157600,12.6491
|
| 396 |
+
2293,158000,17.1227
|
| 397 |
+
2297,158400,15.1925
|
| 398 |
+
2302,158800,15.8225
|
| 399 |
+
2310,159200,10.5351
|
| 400 |
+
2314,159600,20.5124
|
| 401 |
+
2318,160000,20.7472
|
| 402 |
+
2323,160400,16.1619
|
| 403 |
+
2328,160800,17.0157
|
| 404 |
+
2332,161200,19.5865
|
| 405 |
+
2336,161600,20.3359
|
| 406 |
+
2340,162000,15.7826
|
| 407 |
+
2344,162400,20.7786
|
| 408 |
+
2349,162800,19.027
|
| 409 |
+
2353,163200,16.8306
|
| 410 |
+
2358,163600,15.0345
|
| 411 |
+
2364,164000,13.7065
|
| 412 |
+
2370,164400,15.8193
|
| 413 |
+
2375,164800,15.9792
|
| 414 |
+
2379,165200,16.8467
|
| 415 |
+
2384,165600,17.832
|
| 416 |
+
2388,166000,20.4626
|
| 417 |
+
2393,166400,14.8119
|
| 418 |
+
2399,166800,12.4114
|
| 419 |
+
2403,167200,20.9186
|
| 420 |
+
2408,167600,13.3934
|
| 421 |
+
2412,168000,20.5788
|
| 422 |
+
2418,168400,16.2933
|
| 423 |
+
2422,168800,18.8223
|
| 424 |
+
2427,169200,19.3578
|
| 425 |
+
2432,169600,13.3396
|
| 426 |
+
2437,170000,17.3548
|
| 427 |
+
2443,170400,16.2848
|
| 428 |
+
2448,170800,18.1538
|
| 429 |
+
2453,171200,16.7561
|
| 430 |
+
2457,171600,17.7607
|
| 431 |
+
2463,172000,15.1953
|
| 432 |
+
2469,172400,13.134
|
| 433 |
+
2476,172800,12.5457
|
| 434 |
+
2482,173200,14.6165
|
| 435 |
+
2487,173600,18.5189
|
| 436 |
+
2492,174000,19.1314
|
| 437 |
+
2496,174400,17.353
|
| 438 |
+
2500,174800,22.18
|
| 439 |
+
2506,175200,15.5115
|
| 440 |
+
2511,175600,19.8744
|
| 441 |
+
2515,176000,18.1736
|
| 442 |
+
2519,176400,21.4187
|
| 443 |
+
2525,176800,17.5231
|
| 444 |
+
2531,177200,12.1789
|
| 445 |
+
2536,177600,17.784
|
| 446 |
+
2540,178000,20.6193
|
| 447 |
+
2545,178400,17.4618
|
| 448 |
+
2549,178800,21.499
|
| 449 |
+
2553,179200,17.4586
|
| 450 |
+
2557,179600,20.6227
|
| 451 |
+
2561,180000,20.1806
|
| 452 |
+
2567,180400,13.3524
|
| 453 |
+
2573,180800,14.6734
|
| 454 |
+
2577,181200,21.1076
|
| 455 |
+
2582,181600,17.1151
|
| 456 |
+
2587,182000,12.7979
|
| 457 |
+
2592,182400,15.9493
|
| 458 |
+
2599,182800,14.3687
|
| 459 |
+
2604,183200,15.514
|
| 460 |
+
2610,183600,12.1476
|
| 461 |
+
2616,184000,13.7643
|
| 462 |
+
2622,184400,14.6858
|
| 463 |
+
2627,184800,13.8197
|
| 464 |
+
2631,185200,20.2988
|
| 465 |
+
2637,185600,13.8708
|
| 466 |
+
2641,186000,21.6494
|
| 467 |
+
2646,186400,15.6542
|
| 468 |
+
2653,186800,11.9435
|
| 469 |
+
2659,187200,13.8883
|
| 470 |
+
2666,187600,12.5814
|
| 471 |
+
2671,188000,17.3771
|
| 472 |
+
2678,188400,11.9801
|
| 473 |
+
2683,188800,13.1353
|
| 474 |
+
2692,189200,10.337
|
| 475 |
+
2698,189600,11.3343
|
| 476 |
+
2705,190000,12.8742
|
| 477 |
+
2711,190400,14.6771
|
| 478 |
+
2717,190800,11.9189
|
| 479 |
+
2726,191200,8.7059
|
| 480 |
+
2732,191600,13.1508
|
| 481 |
+
2739,192000,9.7473
|
| 482 |
+
2743,192400,19.009
|
| 483 |
+
2748,192800,16.8758
|
| 484 |
+
2754,193200,15.7392
|
| 485 |
+
2758,193600,20.0703
|
| 486 |
+
2764,194000,12.4249
|
| 487 |
+
2768,194400,21.7073
|
| 488 |
+
2773,194800,18.1804
|
| 489 |
+
2779,195200,15.5854
|
| 490 |
+
2785,195600,12.3091
|
| 491 |
+
2790,196000,15.0442
|
| 492 |
+
2794,196400,20.9159
|
| 493 |
+
2802,196800,11.6643
|
| 494 |
+
2811,197200,7.4276
|
| 495 |
+
2819,197600,8.6076
|
| 496 |
+
2826,198000,10.6653
|
| 497 |
+
2831,198400,16.6902
|
| 498 |
+
2837,198800,13.4796
|
| 499 |
+
2843,199200,14.7899
|
| 500 |
+
2848,199600,14.6657
|
| 501 |
+
2855,200000,11.2761
|
code/Lake application/logs/results_2/PDPPO_frozen_lake_log_3.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
20,400,1.4511
|
| 3 |
+
42,800,1.4159
|
| 4 |
+
60,1200,1.7077
|
| 5 |
+
73,1600,2.1135
|
| 6 |
+
84,2000,2.7468
|
| 7 |
+
105,2400,1.7674
|
| 8 |
+
113,2800,3.7814
|
| 9 |
+
122,3200,3.7896
|
| 10 |
+
134,3600,2.7067
|
| 11 |
+
146,4000,2.5524
|
| 12 |
+
152,4400,6.5864
|
| 13 |
+
157,4800,6.5195
|
| 14 |
+
162,5200,6.1349
|
| 15 |
+
168,5600,5.317
|
| 16 |
+
177,6000,3.6724
|
| 17 |
+
183,6400,6.1446
|
| 18 |
+
189,6800,5.5608
|
| 19 |
+
195,7200,6.1726
|
| 20 |
+
199,7600,6.344
|
| 21 |
+
205,8000,5.8716
|
| 22 |
+
213,8400,4.0247
|
| 23 |
+
217,8800,6.9902
|
| 24 |
+
224,9200,6.2118
|
| 25 |
+
229,9600,6.7983
|
| 26 |
+
236,10000,6.4537
|
| 27 |
+
241,10400,9.274
|
| 28 |
+
246,10800,7.4492
|
| 29 |
+
253,11200,6.7536
|
| 30 |
+
257,11600,10.0378
|
| 31 |
+
264,12000,5.3493
|
| 32 |
+
270,12400,6.8639
|
| 33 |
+
274,12800,9.7271
|
| 34 |
+
278,13200,10.6744
|
| 35 |
+
284,13600,7.8632
|
| 36 |
+
290,14000,8.3459
|
| 37 |
+
296,14400,6.5538
|
| 38 |
+
301,14800,9.4489
|
| 39 |
+
307,15200,8.4332
|
| 40 |
+
313,15600,7.6024
|
| 41 |
+
318,16000,9.6646
|
| 42 |
+
323,16400,7.5682
|
| 43 |
+
327,16800,12.2827
|
| 44 |
+
332,17200,11.2367
|
| 45 |
+
336,17600,10.6158
|
| 46 |
+
341,18000,8.7318
|
| 47 |
+
347,18400,8.9077
|
| 48 |
+
352,18800,10.0014
|
| 49 |
+
357,19200,9.9727
|
| 50 |
+
362,19600,8.4138
|
| 51 |
+
368,20000,8.7518
|
| 52 |
+
372,20400,12.3645
|
| 53 |
+
376,20800,10.8224
|
| 54 |
+
381,21200,11.2952
|
| 55 |
+
387,21600,7.5035
|
| 56 |
+
392,22000,10.1362
|
| 57 |
+
398,22400,8.0769
|
| 58 |
+
402,22800,11.4559
|
| 59 |
+
408,23200,10.9315
|
| 60 |
+
412,23600,12.5869
|
| 61 |
+
416,24000,9.7607
|
| 62 |
+
421,24400,10.3972
|
| 63 |
+
427,24800,9.1654
|
| 64 |
+
434,25200,6.5726
|
| 65 |
+
440,25600,7.9518
|
| 66 |
+
445,26000,12.163
|
| 67 |
+
449,26400,12.5389
|
| 68 |
+
453,26800,12.4047
|
| 69 |
+
457,27200,12.8434
|
| 70 |
+
461,27600,12.508
|
| 71 |
+
465,28000,12.7977
|
| 72 |
+
469,28400,12.7722
|
| 73 |
+
475,28800,7.5211
|
| 74 |
+
481,29200,8.8252
|
| 75 |
+
485,29600,11.5555
|
| 76 |
+
489,30000,11.477
|
| 77 |
+
493,30400,12.9982
|
| 78 |
+
497,30800,13.1206
|
| 79 |
+
504,31200,9.1732
|
| 80 |
+
509,31600,9.6877
|
| 81 |
+
515,32000,8.65
|
| 82 |
+
522,32400,6.6807
|
| 83 |
+
529,32800,7.7346
|
| 84 |
+
534,33200,7.8476
|
| 85 |
+
541,33600,7.9549
|
| 86 |
+
548,34000,6.9946
|
| 87 |
+
555,34400,5.7617
|
| 88 |
+
562,34800,7.4759
|
| 89 |
+
567,35200,9.4369
|
| 90 |
+
574,35600,8.9111
|
| 91 |
+
581,36000,6.5248
|
| 92 |
+
586,36400,10.6508
|
| 93 |
+
594,36800,4.8766
|
| 94 |
+
606,37200,5.0255
|
| 95 |
+
611,37600,8.7737
|
| 96 |
+
619,38000,7.297
|
| 97 |
+
625,38400,8.7064
|
| 98 |
+
631,38800,7.6392
|
| 99 |
+
637,39200,8.1329
|
| 100 |
+
643,39600,8.6996
|
| 101 |
+
648,40000,12.1593
|
| 102 |
+
654,40400,8.0097
|
| 103 |
+
658,40800,13.7355
|
| 104 |
+
663,41200,10.8767
|
| 105 |
+
667,41600,13.4623
|
| 106 |
+
671,42000,13.7128
|
| 107 |
+
677,42400,7.9521
|
| 108 |
+
685,42800,7.0389
|
| 109 |
+
693,43200,7.1228
|
| 110 |
+
699,43600,7.1906
|
| 111 |
+
704,44000,10.2499
|
| 112 |
+
711,44400,8.8439
|
| 113 |
+
715,44800,10.723
|
| 114 |
+
722,45200,8.0561
|
| 115 |
+
726,45600,12.8837
|
| 116 |
+
730,46000,11.6005
|
| 117 |
+
735,46400,10.5382
|
| 118 |
+
740,46800,10.9579
|
| 119 |
+
744,47200,12.8439
|
| 120 |
+
749,47600,8.5832
|
| 121 |
+
755,48000,10.2625
|
| 122 |
+
759,48400,11.0394
|
| 123 |
+
764,48800,10.5021
|
| 124 |
+
771,49200,7.5662
|
| 125 |
+
775,49600,11.8596
|
| 126 |
+
779,50000,10.9197
|
| 127 |
+
785,50400,9.6613
|
| 128 |
+
791,50800,7.1842
|
| 129 |
+
797,51200,8.9205
|
| 130 |
+
810,51600,3.7799
|
| 131 |
+
814,52000,11.7011
|
| 132 |
+
818,52400,13.1067
|
| 133 |
+
825,52800,6.8794
|
| 134 |
+
837,53200,4.1435
|
| 135 |
+
844,53600,7.8758
|
| 136 |
+
850,54000,6.8512
|
| 137 |
+
857,54400,7.279
|
| 138 |
+
863,54800,8.8461
|
| 139 |
+
870,55200,5.1664
|
| 140 |
+
876,55600,8.291
|
| 141 |
+
882,56000,7.4186
|
| 142 |
+
886,56400,12.7099
|
| 143 |
+
895,56800,5.8991
|
| 144 |
+
900,57200,10.3267
|
| 145 |
+
905,57600,10.4081
|
| 146 |
+
910,58000,8.2201
|
| 147 |
+
917,58400,7.2555
|
| 148 |
+
923,58800,8.6902
|
| 149 |
+
929,59200,8.4311
|
| 150 |
+
935,59600,9.3899
|
| 151 |
+
940,60000,9.48
|
| 152 |
+
947,60400,8.8701
|
| 153 |
+
952,60800,10.6712
|
| 154 |
+
957,61200,12.1303
|
| 155 |
+
963,61600,10.0041
|
| 156 |
+
967,62000,11.8506
|
| 157 |
+
975,62400,7.9768
|
| 158 |
+
981,62800,10.121
|
| 159 |
+
985,63200,12.7484
|
| 160 |
+
992,63600,8.6506
|
| 161 |
+
996,64000,12.0166
|
| 162 |
+
1003,64400,8.6989
|
| 163 |
+
1007,64800,13.5841
|
| 164 |
+
1012,65200,11.8948
|
| 165 |
+
1020,65600,7.5362
|
| 166 |
+
1024,66000,14.5183
|
| 167 |
+
1031,66400,8.4667
|
| 168 |
+
1039,66800,6.099
|
| 169 |
+
1044,67200,9.9433
|
| 170 |
+
1051,67600,10.2187
|
| 171 |
+
1056,68000,12.4386
|
| 172 |
+
1065,68400,4.846
|
| 173 |
+
1071,68800,12.9973
|
| 174 |
+
1076,69200,11.6068
|
| 175 |
+
1084,69600,9.6249
|
| 176 |
+
1090,70000,8.821
|
| 177 |
+
1098,70400,8.9721
|
| 178 |
+
1104,70800,10.8627
|
| 179 |
+
1110,71200,12.9207
|
| 180 |
+
1117,71600,9.6223
|
| 181 |
+
1124,72000,9.5217
|
| 182 |
+
1129,72400,12.7009
|
| 183 |
+
1137,72800,7.1291
|
| 184 |
+
1142,73200,15.8915
|
| 185 |
+
1147,73600,12.773
|
| 186 |
+
1152,74000,14.0152
|
| 187 |
+
1158,74400,12.8197
|
| 188 |
+
1165,74800,7.6715
|
| 189 |
+
1171,75200,11.6273
|
| 190 |
+
1176,75600,14.7481
|
| 191 |
+
1182,76000,10.9798
|
| 192 |
+
1186,76400,17.6803
|
| 193 |
+
1193,76800,10.2361
|
| 194 |
+
1198,77200,11.135
|
| 195 |
+
1203,77600,13.5392
|
| 196 |
+
1207,78000,16.3684
|
| 197 |
+
1213,78400,12.9063
|
| 198 |
+
1218,78800,11.9918
|
| 199 |
+
1226,79200,8.0133
|
| 200 |
+
1231,79600,13.8717
|
| 201 |
+
1236,80000,13.592
|
| 202 |
+
1240,80400,16.6494
|
| 203 |
+
1247,80800,9.0394
|
| 204 |
+
1253,81200,11.6721
|
| 205 |
+
1257,81600,12.4117
|
| 206 |
+
1263,82000,13.1805
|
| 207 |
+
1268,82400,12.8848
|
| 208 |
+
1273,82800,13.0017
|
| 209 |
+
1279,83200,11.932
|
| 210 |
+
1286,83600,10.3167
|
| 211 |
+
1292,84000,11.0747
|
| 212 |
+
1298,84400,12.7847
|
| 213 |
+
1302,84800,16.4195
|
| 214 |
+
1307,85200,14.5389
|
| 215 |
+
1313,85600,11.3515
|
| 216 |
+
1317,86000,19.0045
|
| 217 |
+
1324,86400,12.204
|
| 218 |
+
1328,86800,18.172
|
| 219 |
+
1334,87200,11.4799
|
| 220 |
+
1339,87600,15.2817
|
| 221 |
+
1346,88000,12.8543
|
| 222 |
+
1351,88400,15.2124
|
| 223 |
+
1355,88800,20.012
|
| 224 |
+
1360,89200,15.6753
|
| 225 |
+
1364,89600,18.9953
|
| 226 |
+
1369,90000,14.7316
|
| 227 |
+
1373,90400,18.3781
|
| 228 |
+
1379,90800,12.0495
|
| 229 |
+
1383,91200,19.1038
|
| 230 |
+
1388,91600,15.5228
|
| 231 |
+
1394,92000,13.1508
|
| 232 |
+
1399,92400,13.739
|
| 233 |
+
1404,92800,15.4669
|
| 234 |
+
1409,93200,12.2052
|
| 235 |
+
1413,93600,18.4534
|
| 236 |
+
1417,94000,18.4736
|
| 237 |
+
1421,94400,19.451
|
| 238 |
+
1426,94800,17.0253
|
| 239 |
+
1431,95200,13.6257
|
| 240 |
+
1436,95600,15.849
|
| 241 |
+
1447,96000,6.1302
|
| 242 |
+
1452,96400,13.5995
|
| 243 |
+
1460,96800,10.4263
|
| 244 |
+
1466,97200,12.8175
|
| 245 |
+
1470,97600,19.8985
|
| 246 |
+
1474,98000,19.4499
|
| 247 |
+
1480,98400,12.2035
|
| 248 |
+
1485,98800,15.5921
|
| 249 |
+
1493,99200,9.945
|
| 250 |
+
1500,99600,8.3822
|
| 251 |
+
1504,100000,20.1108
|
| 252 |
+
1511,100400,13.2678
|
| 253 |
+
1517,100800,13.3653
|
| 254 |
+
1521,101200,20.5694
|
| 255 |
+
1526,101600,12.9576
|
| 256 |
+
1531,102000,18.6283
|
| 257 |
+
1537,102400,13.6185
|
| 258 |
+
1542,102800,13.5109
|
| 259 |
+
1547,103200,14.7459
|
| 260 |
+
1555,103600,11.7803
|
| 261 |
+
1562,104000,8.6873
|
| 262 |
+
1572,104400,8.425
|
| 263 |
+
1577,104800,12.4127
|
| 264 |
+
1583,105200,11.9189
|
| 265 |
+
1590,105600,12.8694
|
| 266 |
+
1597,106000,10.6233
|
| 267 |
+
1604,106400,8.3289
|
| 268 |
+
1613,106800,9.3008
|
| 269 |
+
1622,107200,7.9757
|
| 270 |
+
1627,107600,15.1024
|
| 271 |
+
1633,108000,12.5267
|
| 272 |
+
1641,108400,10.7986
|
| 273 |
+
1646,108800,10.9844
|
| 274 |
+
1650,109200,20.3921
|
| 275 |
+
1657,109600,12.9882
|
| 276 |
+
1664,110000,8.8107
|
| 277 |
+
1671,110400,13.2145
|
| 278 |
+
1677,110800,13.7578
|
| 279 |
+
1683,111200,11.6441
|
| 280 |
+
1691,111600,10.6352
|
| 281 |
+
1698,112000,9.8465
|
| 282 |
+
1704,112400,13.4316
|
| 283 |
+
1708,112800,19.3443
|
| 284 |
+
1715,113200,11.1754
|
| 285 |
+
1722,113600,10.6371
|
| 286 |
+
1728,114000,9.1292
|
| 287 |
+
1734,114400,14.4382
|
| 288 |
+
1741,114800,13.7379
|
| 289 |
+
1745,115200,20.8923
|
| 290 |
+
1749,115600,16.3165
|
| 291 |
+
1755,116000,15.3684
|
| 292 |
+
1762,116400,9.6267
|
| 293 |
+
1766,116800,17.2582
|
| 294 |
+
1772,117200,14.4907
|
| 295 |
+
1777,117600,16.4743
|
| 296 |
+
1783,118000,13.0177
|
| 297 |
+
1792,118400,7.393
|
| 298 |
+
1799,118800,10.4686
|
| 299 |
+
1804,119200,17.1223
|
| 300 |
+
1814,119600,7.6813
|
| 301 |
+
1820,120000,10.9129
|
| 302 |
+
1826,120400,15.8259
|
| 303 |
+
1830,120800,20.2207
|
| 304 |
+
1837,121200,9.1836
|
| 305 |
+
1843,121600,14.5043
|
| 306 |
+
1848,122000,12.8295
|
| 307 |
+
1853,122400,18.3265
|
| 308 |
+
1858,122800,16.1205
|
| 309 |
+
1863,123200,19.6436
|
| 310 |
+
1869,123600,11.276
|
| 311 |
+
1873,124000,20.749
|
| 312 |
+
1882,124400,9.1372
|
| 313 |
+
1888,124800,13.6346
|
| 314 |
+
1893,125200,17.135
|
| 315 |
+
1899,125600,10.655
|
| 316 |
+
1904,126000,20.1275
|
| 317 |
+
1908,126400,19.847
|
| 318 |
+
1913,126800,15.7564
|
| 319 |
+
1918,127200,14.3012
|
| 320 |
+
1923,127600,15.9916
|
| 321 |
+
1928,128000,15.8541
|
| 322 |
+
1932,128400,20.358
|
| 323 |
+
1937,128800,16.4342
|
| 324 |
+
1941,129200,21.0778
|
| 325 |
+
1946,129600,15.8157
|
| 326 |
+
1952,130000,14.1684
|
| 327 |
+
1956,130400,17.5727
|
| 328 |
+
1962,130800,13.636
|
| 329 |
+
1967,131200,16.7871
|
| 330 |
+
1971,131600,19.9122
|
| 331 |
+
1976,132000,17.2468
|
| 332 |
+
1984,132400,10.4792
|
| 333 |
+
1989,132800,15.3806
|
| 334 |
+
1994,133200,15.9361
|
| 335 |
+
1998,133600,20.595
|
| 336 |
+
2003,134000,14.5683
|
| 337 |
+
2010,134400,12.9443
|
| 338 |
+
2017,134800,11.1499
|
| 339 |
+
2021,135200,16.4114
|
| 340 |
+
2028,135600,11.15
|
| 341 |
+
2033,136000,16.1641
|
| 342 |
+
2039,136400,11.8905
|
| 343 |
+
2045,136800,12.094
|
| 344 |
+
2051,137200,13.1055
|
| 345 |
+
2057,137600,12.3794
|
| 346 |
+
2062,138000,16.7795
|
| 347 |
+
2066,138400,15.2186
|
| 348 |
+
2071,138800,17.5386
|
| 349 |
+
2075,139200,19.7387
|
| 350 |
+
2080,139600,17.7345
|
| 351 |
+
2087,140000,9.286
|
| 352 |
+
2091,140400,20.1843
|
| 353 |
+
2095,140800,20.6579
|
| 354 |
+
2099,141200,20.4451
|
| 355 |
+
2105,141600,12.451
|
| 356 |
+
2111,142000,13.8034
|
| 357 |
+
2116,142400,16.789
|
| 358 |
+
2121,142800,17.3337
|
| 359 |
+
2127,143200,12.512
|
| 360 |
+
2132,143600,18.0873
|
| 361 |
+
2137,144000,16.5148
|
| 362 |
+
2143,144400,14.2245
|
| 363 |
+
2150,144800,11.4969
|
| 364 |
+
2158,145200,6.4741
|
| 365 |
+
2167,145600,10.4108
|
| 366 |
+
2173,146000,9.8208
|
| 367 |
+
2180,146400,14.0351
|
| 368 |
+
2186,146800,11.3705
|
| 369 |
+
2191,147200,17.1942
|
| 370 |
+
2196,147600,16.5483
|
| 371 |
+
2201,148000,17.2101
|
| 372 |
+
2206,148400,15.8254
|
| 373 |
+
2211,148800,16.944
|
| 374 |
+
2216,149200,13.2711
|
| 375 |
+
2222,149600,16.0584
|
| 376 |
+
2227,150000,15.4412
|
| 377 |
+
2231,150400,20.0904
|
| 378 |
+
2235,150800,18.9388
|
| 379 |
+
2241,151200,10.2277
|
| 380 |
+
2248,151600,9.7938
|
| 381 |
+
2257,152000,6.8651
|
| 382 |
+
2265,152400,10.0151
|
| 383 |
+
2269,152800,16.5873
|
| 384 |
+
2275,153200,13.0192
|
| 385 |
+
2280,153600,16.4787
|
| 386 |
+
2284,154000,19.9945
|
| 387 |
+
2288,154400,21.1409
|
| 388 |
+
2293,154800,17.4874
|
| 389 |
+
2298,155200,16.9422
|
| 390 |
+
2302,155600,16.5356
|
| 391 |
+
2307,156000,17.7096
|
| 392 |
+
2314,156400,11.4627
|
| 393 |
+
2318,156800,20.033
|
| 394 |
+
2323,157200,14.9603
|
| 395 |
+
2327,157600,19.5649
|
| 396 |
+
2334,158000,10.3648
|
| 397 |
+
2339,158400,17.1039
|
| 398 |
+
2343,158800,21.143
|
| 399 |
+
2348,159200,17.2194
|
| 400 |
+
2352,159600,21.0071
|
| 401 |
+
2356,160000,21.8045
|
| 402 |
+
2361,160400,17.6473
|
| 403 |
+
2367,160800,15.5177
|
| 404 |
+
2371,161200,20.4818
|
| 405 |
+
2376,161600,17.1564
|
| 406 |
+
2381,162000,14.4102
|
| 407 |
+
2386,162400,18.4027
|
| 408 |
+
2390,162800,22.3808
|
| 409 |
+
2395,163200,14.4186
|
| 410 |
+
2400,163600,13.4343
|
| 411 |
+
2404,164000,19.605
|
| 412 |
+
2409,164400,14.7853
|
| 413 |
+
2415,164800,11.0992
|
| 414 |
+
2421,165200,12.6908
|
| 415 |
+
2426,165600,17.289
|
| 416 |
+
2431,166000,17.1976
|
| 417 |
+
2435,166400,21.6014
|
| 418 |
+
2441,166800,12.793
|
| 419 |
+
2445,167200,14.232
|
| 420 |
+
2450,167600,15.6217
|
| 421 |
+
2457,168000,14.2949
|
| 422 |
+
2461,168400,16.1677
|
| 423 |
+
2465,168800,21.0584
|
| 424 |
+
2470,169200,19.4555
|
| 425 |
+
2474,169600,16.2403
|
| 426 |
+
2481,170000,10.8878
|
| 427 |
+
2491,170400,4.966
|
| 428 |
+
2497,170800,11.4854
|
| 429 |
+
2509,171200,4.8386
|
| 430 |
+
2517,171600,9.0965
|
| 431 |
+
2532,172000,4.4732
|
| 432 |
+
2539,172400,10.3322
|
| 433 |
+
2548,172800,6.617
|
| 434 |
+
2559,173200,6.6052
|
| 435 |
+
2566,173600,8.1639
|
| 436 |
+
2571,174000,14.7295
|
| 437 |
+
2575,174400,19.1853
|
| 438 |
+
2581,174800,11.4953
|
| 439 |
+
2586,175200,15.2959
|
| 440 |
+
2591,175600,17.3706
|
| 441 |
+
2597,176000,16.6239
|
| 442 |
+
2602,176400,16.1638
|
| 443 |
+
2608,176800,13.7811
|
| 444 |
+
2614,177200,11.7461
|
| 445 |
+
2624,177600,8.2969
|
| 446 |
+
2629,178000,16.7226
|
| 447 |
+
2636,178400,10.9358
|
| 448 |
+
2642,178800,16.5572
|
| 449 |
+
2649,179200,10.4538
|
| 450 |
+
2655,179600,12.1145
|
| 451 |
+
2660,180000,17.3922
|
| 452 |
+
2669,180400,10.0921
|
| 453 |
+
2675,180800,14.2412
|
| 454 |
+
2680,181200,16.8869
|
| 455 |
+
2684,181600,17.4712
|
| 456 |
+
2689,182000,19.3122
|
| 457 |
+
2693,182400,22.4468
|
| 458 |
+
2699,182800,15.3699
|
| 459 |
+
2704,183200,18.0721
|
| 460 |
+
2710,183600,13.5026
|
| 461 |
+
2717,184000,12.2414
|
| 462 |
+
2721,184400,17.901
|
| 463 |
+
2726,184800,16.2374
|
| 464 |
+
2731,185200,17.4196
|
| 465 |
+
2736,185600,18.1275
|
| 466 |
+
2740,186000,20.9471
|
| 467 |
+
2745,186400,20.6423
|
| 468 |
+
2750,186800,18.2358
|
| 469 |
+
2755,187200,17.1191
|
| 470 |
+
2760,187600,17.9405
|
| 471 |
+
2766,188000,12.8474
|
| 472 |
+
2770,188400,19.6427
|
| 473 |
+
2776,188800,15.7317
|
| 474 |
+
2780,189200,19.7856
|
| 475 |
+
2787,189600,10.9979
|
| 476 |
+
2792,190000,18.7782
|
| 477 |
+
2798,190400,13.124
|
| 478 |
+
2803,190800,15.1497
|
| 479 |
+
2808,191200,15.8551
|
| 480 |
+
2818,191600,6.6157
|
| 481 |
+
2824,192000,15.3035
|
| 482 |
+
2831,192400,11.4718
|
| 483 |
+
2835,192800,22.8164
|
| 484 |
+
2841,193200,16.0213
|
| 485 |
+
2846,193600,17.6302
|
| 486 |
+
2851,194000,14.8716
|
| 487 |
+
2855,194400,20.6271
|
| 488 |
+
2862,194800,14.8134
|
| 489 |
+
2868,195200,10.8108
|
| 490 |
+
2872,195600,22.52
|
| 491 |
+
2879,196000,13.9515
|
| 492 |
+
2885,196400,12.6253
|
| 493 |
+
2893,196800,8.5209
|
| 494 |
+
2900,197200,11.9898
|
| 495 |
+
2906,197600,13.4315
|
| 496 |
+
2911,198000,18.833
|
| 497 |
+
2918,198400,11.5714
|
| 498 |
+
2923,198800,15.1536
|
| 499 |
+
2931,199200,9.8851
|
| 500 |
+
2937,199600,12.3961
|
| 501 |
+
2943,200000,10.9256
|
code/Lake application/logs/results_2/PDPPO_frozen_lake_log_4.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
27,400,1.0785
|
| 3 |
+
45,800,1.6504
|
| 4 |
+
65,1200,1.4004
|
| 5 |
+
75,1600,2.9704
|
| 6 |
+
92,2000,1.7748
|
| 7 |
+
101,2400,3.44
|
| 8 |
+
112,2800,2.7486
|
| 9 |
+
118,3200,4.5942
|
| 10 |
+
124,3600,5.5212
|
| 11 |
+
132,4000,3.7269
|
| 12 |
+
139,4400,4.3077
|
| 13 |
+
146,4800,4.8111
|
| 14 |
+
151,5200,5.5228
|
| 15 |
+
157,5600,6.1189
|
| 16 |
+
163,6000,5.2543
|
| 17 |
+
168,6400,5.2306
|
| 18 |
+
175,6800,5.3845
|
| 19 |
+
182,7200,4.3935
|
| 20 |
+
187,7600,5.7774
|
| 21 |
+
192,8000,6.1537
|
| 22 |
+
197,8400,5.7906
|
| 23 |
+
202,8800,7.0905
|
| 24 |
+
209,9200,4.5122
|
| 25 |
+
213,9600,6.5891
|
| 26 |
+
218,10000,7.7331
|
| 27 |
+
223,10400,6.6175
|
| 28 |
+
227,10800,10.1737
|
| 29 |
+
231,11200,8.0792
|
| 30 |
+
238,11600,5.0695
|
| 31 |
+
243,12000,6.8807
|
| 32 |
+
247,12400,6.8492
|
| 33 |
+
252,12800,7.4324
|
| 34 |
+
256,13200,7.9897
|
| 35 |
+
263,13600,4.5812
|
| 36 |
+
270,14000,4.8277
|
| 37 |
+
275,14400,6.4227
|
| 38 |
+
281,14800,7.0743
|
| 39 |
+
287,15200,5.8331
|
| 40 |
+
292,15600,5.6509
|
| 41 |
+
297,16000,7.8764
|
| 42 |
+
303,16400,4.8805
|
| 43 |
+
308,16800,6.1394
|
| 44 |
+
312,17200,7.3848
|
| 45 |
+
319,17600,5.4064
|
| 46 |
+
326,18000,4.2903
|
| 47 |
+
333,18400,4.4865
|
| 48 |
+
338,18800,6.5078
|
| 49 |
+
344,19200,5.3164
|
| 50 |
+
348,19600,6.5957
|
| 51 |
+
352,20000,8.4949
|
| 52 |
+
357,20400,8.0414
|
| 53 |
+
361,20800,8.259
|
| 54 |
+
366,21200,8.7431
|
| 55 |
+
371,21600,8.1425
|
| 56 |
+
375,22000,7.6031
|
| 57 |
+
379,22400,10.6469
|
| 58 |
+
384,22800,7.4268
|
| 59 |
+
389,23200,7.8948
|
| 60 |
+
393,23600,9.7095
|
| 61 |
+
399,24000,7.9959
|
| 62 |
+
403,24400,12.6126
|
| 63 |
+
407,24800,10.4783
|
| 64 |
+
414,25200,8.8362
|
| 65 |
+
418,25600,12.1951
|
| 66 |
+
422,26000,12.6242
|
| 67 |
+
426,26400,13.1493
|
| 68 |
+
430,26800,11.1338
|
| 69 |
+
434,27200,11.6175
|
| 70 |
+
440,27600,7.7618
|
| 71 |
+
445,28000,10.8874
|
| 72 |
+
451,28400,9.7606
|
| 73 |
+
456,28800,11.5253
|
| 74 |
+
462,29200,6.7856
|
| 75 |
+
467,29600,11.7964
|
| 76 |
+
472,30000,8.8168
|
| 77 |
+
476,30400,13.8556
|
| 78 |
+
481,30800,11.2407
|
| 79 |
+
485,31200,13.1211
|
| 80 |
+
491,31600,10.3695
|
| 81 |
+
496,32000,8.6598
|
| 82 |
+
500,32400,13.3756
|
| 83 |
+
506,32800,9.0459
|
| 84 |
+
511,33200,10.4668
|
| 85 |
+
516,33600,9.0482
|
| 86 |
+
522,34000,9.8797
|
| 87 |
+
526,34400,13.0444
|
| 88 |
+
532,34800,7.3031
|
| 89 |
+
536,35200,12.3843
|
| 90 |
+
541,35600,11.6548
|
| 91 |
+
545,36000,12.1808
|
| 92 |
+
550,36400,8.937
|
| 93 |
+
554,36800,13.32
|
| 94 |
+
559,37200,10.2187
|
| 95 |
+
563,37600,11.1425
|
| 96 |
+
569,38000,10.4837
|
| 97 |
+
573,38400,9.4084
|
| 98 |
+
579,38800,9.5927
|
| 99 |
+
585,39200,8.1512
|
| 100 |
+
591,39600,7.4306
|
| 101 |
+
598,40000,7.4929
|
| 102 |
+
603,40400,12.0014
|
| 103 |
+
609,40800,6.8877
|
| 104 |
+
615,41200,8.7736
|
| 105 |
+
622,41600,6.575
|
| 106 |
+
632,42000,3.9331
|
| 107 |
+
638,42400,9.7754
|
| 108 |
+
644,42800,8.4288
|
| 109 |
+
649,43200,9.7733
|
| 110 |
+
656,43600,6.2225
|
| 111 |
+
666,44000,4.1808
|
| 112 |
+
672,44400,6.0964
|
| 113 |
+
685,44800,3.3983
|
| 114 |
+
694,45200,5.6596
|
| 115 |
+
702,45600,6.6803
|
| 116 |
+
708,46000,7.2517
|
| 117 |
+
716,46400,5.9717
|
| 118 |
+
729,46800,3.6724
|
| 119 |
+
740,47200,4.6841
|
| 120 |
+
744,47600,12.0424
|
| 121 |
+
751,48000,8.4528
|
| 122 |
+
757,48400,7.1028
|
| 123 |
+
761,48800,12.6503
|
| 124 |
+
770,49200,4.9012
|
| 125 |
+
775,49600,9.6465
|
| 126 |
+
782,50000,7.875
|
| 127 |
+
787,50400,12.0186
|
| 128 |
+
793,50800,9.2714
|
| 129 |
+
798,51200,9.0638
|
| 130 |
+
804,51600,9.1859
|
| 131 |
+
808,52000,13.3962
|
| 132 |
+
814,52400,9.1845
|
| 133 |
+
819,52800,8.6704
|
| 134 |
+
823,53200,12.922
|
| 135 |
+
827,53600,13.1999
|
| 136 |
+
832,54000,10.8399
|
| 137 |
+
837,54400,12.6242
|
| 138 |
+
842,54800,11.2661
|
| 139 |
+
847,55200,10.5994
|
| 140 |
+
853,55600,8.1275
|
| 141 |
+
858,56000,12.1206
|
| 142 |
+
866,56400,6.3821
|
| 143 |
+
871,56800,8.825
|
| 144 |
+
875,57200,13.9679
|
| 145 |
+
880,57600,13.0994
|
| 146 |
+
884,58000,14.0452
|
| 147 |
+
888,58400,10.9111
|
| 148 |
+
892,58800,13.7891
|
| 149 |
+
897,59200,12.1729
|
| 150 |
+
902,59600,12.7809
|
| 151 |
+
907,60000,10.9993
|
| 152 |
+
914,60400,6.609
|
| 153 |
+
922,60800,6.1909
|
| 154 |
+
927,61200,10.7844
|
| 155 |
+
934,61600,7.0903
|
| 156 |
+
945,62000,4.2601
|
| 157 |
+
953,62400,5.6534
|
| 158 |
+
958,62800,10.075
|
| 159 |
+
963,63200,10.4977
|
| 160 |
+
967,63600,11.9044
|
| 161 |
+
971,64000,13.6212
|
| 162 |
+
975,64400,13.086
|
| 163 |
+
980,64800,12.0809
|
| 164 |
+
987,65200,8.3579
|
| 165 |
+
993,65600,8.2868
|
| 166 |
+
997,66000,14.3792
|
| 167 |
+
1001,66400,11.228
|
| 168 |
+
1008,66800,8.2525
|
| 169 |
+
1017,67200,6.1501
|
| 170 |
+
1022,67600,9.1965
|
| 171 |
+
1027,68000,11.0591
|
| 172 |
+
1032,68400,11.1621
|
| 173 |
+
1038,68800,10.1783
|
| 174 |
+
1042,69200,10.0313
|
| 175 |
+
1052,69600,5.234
|
| 176 |
+
1060,70000,7.2449
|
| 177 |
+
1064,70400,11.1294
|
| 178 |
+
1070,70800,7.2192
|
| 179 |
+
1075,71200,10.5247
|
| 180 |
+
1082,71600,8.6408
|
| 181 |
+
1087,72000,12.3906
|
| 182 |
+
1091,72400,10.4689
|
| 183 |
+
1095,72800,14.3503
|
| 184 |
+
1100,73200,13.3414
|
| 185 |
+
1104,73600,14.6343
|
| 186 |
+
1108,74000,12.0234
|
| 187 |
+
1113,74400,10.4043
|
| 188 |
+
1118,74800,13.1958
|
| 189 |
+
1122,75200,14.1113
|
| 190 |
+
1128,75600,8.9121
|
| 191 |
+
1133,76000,9.4241
|
| 192 |
+
1138,76400,12.1937
|
| 193 |
+
1142,76800,10.8917
|
| 194 |
+
1148,77200,9.6075
|
| 195 |
+
1152,77600,12.5465
|
| 196 |
+
1157,78000,12.2911
|
| 197 |
+
1161,78400,14.3991
|
| 198 |
+
1166,78800,11.7178
|
| 199 |
+
1170,79200,14.2131
|
| 200 |
+
1176,79600,9.4352
|
| 201 |
+
1182,80000,9.8783
|
| 202 |
+
1187,80400,8.9671
|
| 203 |
+
1192,80800,11.4625
|
| 204 |
+
1197,81200,13.3021
|
| 205 |
+
1201,81600,14.9308
|
| 206 |
+
1205,82000,11.3369
|
| 207 |
+
1212,82400,9.6094
|
| 208 |
+
1217,82800,11.86
|
| 209 |
+
1221,83200,12.7279
|
| 210 |
+
1227,83600,10.7913
|
| 211 |
+
1232,84000,11.0473
|
| 212 |
+
1236,84400,14.2463
|
| 213 |
+
1242,84800,9.8461
|
| 214 |
+
1246,85200,14.7624
|
| 215 |
+
1252,85600,10.4951
|
| 216 |
+
1259,86000,8.9123
|
| 217 |
+
1265,86400,10.1258
|
| 218 |
+
1271,86800,9.0527
|
| 219 |
+
1276,87200,12.5009
|
| 220 |
+
1280,87600,14.9112
|
| 221 |
+
1286,88000,9.7683
|
| 222 |
+
1291,88400,12.2766
|
| 223 |
+
1295,88800,14.6523
|
| 224 |
+
1303,89200,8.0044
|
| 225 |
+
1307,89600,14.4251
|
| 226 |
+
1311,90000,14.7435
|
| 227 |
+
1316,90400,13.5846
|
| 228 |
+
1322,90800,11.1363
|
| 229 |
+
1326,91200,13.1161
|
| 230 |
+
1330,91600,14.8782
|
| 231 |
+
1335,92000,12.9898
|
| 232 |
+
1341,92400,10.5745
|
| 233 |
+
1349,92800,7.7201
|
| 234 |
+
1355,93200,9.7362
|
| 235 |
+
1362,93600,9.0283
|
| 236 |
+
1371,94000,5.595
|
| 237 |
+
1376,94400,9.584
|
| 238 |
+
1384,94800,7.7162
|
| 239 |
+
1389,95200,9.5471
|
| 240 |
+
1395,95600,11.8725
|
| 241 |
+
1400,96000,11.4859
|
| 242 |
+
1406,96400,10.0133
|
| 243 |
+
1411,96800,10.0898
|
| 244 |
+
1419,97200,7.1929
|
| 245 |
+
1425,97600,10.2308
|
| 246 |
+
1431,98000,10.8156
|
| 247 |
+
1436,98400,11.1904
|
| 248 |
+
1441,98800,13.0126
|
| 249 |
+
1448,99200,9.5278
|
| 250 |
+
1456,99600,7.7219
|
| 251 |
+
1460,100000,11.838
|
| 252 |
+
1467,100400,10.3963
|
| 253 |
+
1471,100800,15.1954
|
| 254 |
+
1477,101200,9.7102
|
| 255 |
+
1483,101600,10.8302
|
| 256 |
+
1487,102000,13.6986
|
| 257 |
+
1491,102400,17.6513
|
| 258 |
+
1497,102800,11.7696
|
| 259 |
+
1502,103200,14.6363
|
| 260 |
+
1506,103600,12.3268
|
| 261 |
+
1510,104000,15.2753
|
| 262 |
+
1515,104400,13.8737
|
| 263 |
+
1520,104800,9.0522
|
| 264 |
+
1524,105200,15.7886
|
| 265 |
+
1529,105600,14.429
|
| 266 |
+
1534,106000,12.0745
|
| 267 |
+
1539,106400,13.0085
|
| 268 |
+
1543,106800,16.5044
|
| 269 |
+
1547,107200,12.8213
|
| 270 |
+
1552,107600,10.883
|
| 271 |
+
1557,108000,12.4299
|
| 272 |
+
1561,108400,12.7274
|
| 273 |
+
1565,108800,15.701
|
| 274 |
+
1571,109200,11.4077
|
| 275 |
+
1576,109600,13.4852
|
| 276 |
+
1580,110000,15.1326
|
| 277 |
+
1585,110400,12.6496
|
| 278 |
+
1592,110800,11.1331
|
| 279 |
+
1598,111200,10.001
|
| 280 |
+
1604,111600,9.8463
|
| 281 |
+
1610,112000,9.5269
|
| 282 |
+
1615,112400,12.4523
|
| 283 |
+
1623,112800,8.4184
|
| 284 |
+
1627,113200,14.1469
|
| 285 |
+
1634,113600,7.9029
|
| 286 |
+
1640,114000,11.989
|
| 287 |
+
1646,114400,10.0607
|
| 288 |
+
1654,114800,7.935
|
| 289 |
+
1658,115200,15.8146
|
| 290 |
+
1662,115600,16.4529
|
| 291 |
+
1666,116000,16.031
|
| 292 |
+
1670,116400,16.961
|
| 293 |
+
1674,116800,16.9752
|
| 294 |
+
1680,117200,11.4042
|
| 295 |
+
1687,117600,10.9484
|
| 296 |
+
1693,118000,11.3393
|
| 297 |
+
1697,118400,19.1626
|
| 298 |
+
1702,118800,16.0651
|
| 299 |
+
1707,119200,12.5378
|
| 300 |
+
1713,119600,14.3033
|
| 301 |
+
1720,120000,11.5588
|
| 302 |
+
1724,120400,15.8731
|
| 303 |
+
1729,120800,16.0878
|
| 304 |
+
1734,121200,16.3894
|
| 305 |
+
1739,121600,14.4359
|
| 306 |
+
1745,122000,14.7653
|
| 307 |
+
1752,122400,10.6896
|
| 308 |
+
1756,122800,17.8908
|
| 309 |
+
1762,123200,14.3452
|
| 310 |
+
1768,123600,13.2548
|
| 311 |
+
1772,124000,19.6647
|
| 312 |
+
1776,124400,19.1847
|
| 313 |
+
1780,124800,16.8379
|
| 314 |
+
1786,125200,14.3642
|
| 315 |
+
1790,125600,16.4554
|
| 316 |
+
1794,126000,19.7687
|
| 317 |
+
1801,126400,10.1183
|
| 318 |
+
1806,126800,18.1265
|
| 319 |
+
1812,127200,11.8105
|
| 320 |
+
1818,127600,11.0402
|
| 321 |
+
1823,128000,16.0082
|
| 322 |
+
1830,128400,9.7205
|
| 323 |
+
1836,128800,10.7887
|
| 324 |
+
1841,129200,12.7927
|
| 325 |
+
1845,129600,17.2088
|
| 326 |
+
1852,130000,11.4728
|
| 327 |
+
1857,130400,13.019
|
| 328 |
+
1861,130800,18.7369
|
| 329 |
+
1868,131200,11.284
|
| 330 |
+
1873,131600,13.2389
|
| 331 |
+
1878,132000,15.4627
|
| 332 |
+
1882,132400,19.8252
|
| 333 |
+
1889,132800,10.8438
|
| 334 |
+
1894,133200,12.0874
|
| 335 |
+
1901,133600,11.009
|
| 336 |
+
1906,134000,13.7517
|
| 337 |
+
1915,134400,8.6863
|
| 338 |
+
1921,134800,10.9831
|
| 339 |
+
1928,135200,11.1524
|
| 340 |
+
1934,135600,11.4495
|
| 341 |
+
1941,136000,9.4189
|
| 342 |
+
1945,136400,18.9608
|
| 343 |
+
1952,136800,9.5868
|
| 344 |
+
1958,137200,10.5218
|
| 345 |
+
1964,137600,13.0402
|
| 346 |
+
1972,138000,7.7796
|
| 347 |
+
1980,138400,7.3364
|
| 348 |
+
1986,138800,13.7623
|
| 349 |
+
1991,139200,16.2419
|
| 350 |
+
1996,139600,11.4699
|
| 351 |
+
2003,140000,10.983
|
| 352 |
+
2012,140400,7.5045
|
| 353 |
+
2016,140800,20.0741
|
| 354 |
+
2022,141200,12.6223
|
| 355 |
+
2027,141600,15.3017
|
| 356 |
+
2033,142000,12.7772
|
| 357 |
+
2037,142400,20.7311
|
| 358 |
+
2041,142800,18.8427
|
| 359 |
+
2046,143200,19.43
|
| 360 |
+
2051,143600,15.0906
|
| 361 |
+
2056,144000,13.0377
|
| 362 |
+
2065,144400,7.2051
|
| 363 |
+
2071,144800,13.2424
|
| 364 |
+
2078,145200,11.0955
|
| 365 |
+
2088,145600,5.3895
|
| 366 |
+
2093,146000,12.318
|
| 367 |
+
2100,146400,12.8436
|
| 368 |
+
2105,146800,14.4873
|
| 369 |
+
2109,147200,19.8853
|
| 370 |
+
2117,147600,10.7438
|
| 371 |
+
2123,148000,13.8923
|
| 372 |
+
2128,148400,15.6226
|
| 373 |
+
2132,148800,18.0464
|
| 374 |
+
2137,149200,16.3704
|
| 375 |
+
2144,149600,12.576
|
| 376 |
+
2148,150000,20.3183
|
| 377 |
+
2156,150400,9.807
|
| 378 |
+
2161,150800,12.9068
|
| 379 |
+
2165,151200,21.0426
|
| 380 |
+
2171,151600,15.371
|
| 381 |
+
2175,152000,17.7154
|
| 382 |
+
2180,152400,17.2565
|
| 383 |
+
2185,152800,18.8765
|
| 384 |
+
2190,153200,17.2862
|
| 385 |
+
2194,153600,19.4167
|
| 386 |
+
2200,154000,13.2546
|
| 387 |
+
2205,154400,15.271
|
| 388 |
+
2211,154800,14.3329
|
| 389 |
+
2215,155200,21.1205
|
| 390 |
+
2221,155600,12.1206
|
| 391 |
+
2227,156000,14.4072
|
| 392 |
+
2232,156400,16.5247
|
| 393 |
+
2236,156800,21.231
|
| 394 |
+
2240,157200,20.8302
|
| 395 |
+
2244,157600,19.4892
|
| 396 |
+
2248,158000,20.936
|
| 397 |
+
2252,158400,21.2178
|
| 398 |
+
2256,158800,21.1056
|
| 399 |
+
2261,159200,16.8762
|
| 400 |
+
2267,159600,14.0483
|
| 401 |
+
2272,160000,13.3591
|
| 402 |
+
2279,160400,12.6142
|
| 403 |
+
2285,160800,9.0069
|
| 404 |
+
2291,161200,13.7843
|
| 405 |
+
2300,161600,7.8473
|
| 406 |
+
2307,162000,9.9078
|
| 407 |
+
2312,162400,15.3595
|
| 408 |
+
2317,162800,14.6767
|
| 409 |
+
2324,163200,10.7115
|
| 410 |
+
2331,163600,9.728
|
| 411 |
+
2336,164000,14.8431
|
| 412 |
+
2341,164400,15.2026
|
| 413 |
+
2346,164800,12.322
|
| 414 |
+
2351,165200,11.1342
|
| 415 |
+
2356,165600,14.9371
|
| 416 |
+
2361,166000,13.2367
|
| 417 |
+
2365,166400,20.3463
|
| 418 |
+
2371,166800,10.1723
|
| 419 |
+
2377,167200,10.4504
|
| 420 |
+
2381,167600,18.0134
|
| 421 |
+
2388,168000,10.336
|
| 422 |
+
2395,168400,11.1223
|
| 423 |
+
2403,168800,6.6491
|
| 424 |
+
2410,169200,9.6499
|
| 425 |
+
2418,169600,7.0802
|
| 426 |
+
2423,170000,10.1417
|
| 427 |
+
2431,170400,6.2054
|
| 428 |
+
2440,170800,7.8601
|
| 429 |
+
2449,171200,7.7198
|
| 430 |
+
2454,171600,14.5012
|
| 431 |
+
2459,172000,15.7605
|
| 432 |
+
2467,172400,7.7054
|
| 433 |
+
2473,172800,12.5347
|
| 434 |
+
2478,173200,15.5908
|
| 435 |
+
2486,173600,5.7416
|
| 436 |
+
2494,174000,9.3572
|
| 437 |
+
2501,174400,9.0669
|
| 438 |
+
2505,174800,11.2773
|
| 439 |
+
2512,175200,6.6277
|
| 440 |
+
2518,175600,4.9397
|
| 441 |
+
2524,176000,6.4387
|
| 442 |
+
2529,176400,9.8694
|
| 443 |
+
2534,176800,8.5331
|
| 444 |
+
2538,177200,8.5411
|
| 445 |
+
2545,177600,6.2453
|
| 446 |
+
2549,178000,11.7878
|
| 447 |
+
2554,178400,10.175
|
| 448 |
+
2558,178800,12.4336
|
| 449 |
+
2565,179200,6.5908
|
| 450 |
+
2570,179600,6.219
|
| 451 |
+
2577,180000,6.0009
|
| 452 |
+
2582,180400,5.8638
|
| 453 |
+
2588,180800,6.9441
|
| 454 |
+
2592,181200,6.8218
|
| 455 |
+
2598,181600,9.0203
|
| 456 |
+
2604,182000,7.8072
|
| 457 |
+
2609,182400,6.4259
|
| 458 |
+
2616,182800,6.3331
|
| 459 |
+
2622,183200,9.3925
|
| 460 |
+
2631,183600,6.0891
|
| 461 |
+
2639,184000,5.3467
|
| 462 |
+
2645,184400,5.1621
|
| 463 |
+
2652,184800,7.0998
|
| 464 |
+
2656,185200,8.041
|
| 465 |
+
2663,185600,4.7481
|
| 466 |
+
2670,186000,5.0002
|
| 467 |
+
2677,186400,4.4118
|
| 468 |
+
2682,186800,5.4232
|
| 469 |
+
2689,187200,7.2963
|
| 470 |
+
2693,187600,6.6073
|
| 471 |
+
2699,188000,8.326
|
| 472 |
+
2707,188400,5.5025
|
| 473 |
+
2713,188800,5.5795
|
| 474 |
+
2721,189200,3.796
|
| 475 |
+
2728,189600,5.6303
|
| 476 |
+
2734,190000,6.2013
|
| 477 |
+
2740,190400,6.0202
|
| 478 |
+
2746,190800,4.2373
|
| 479 |
+
2751,191200,8.5135
|
| 480 |
+
2758,191600,3.9532
|
| 481 |
+
2763,192000,7.6742
|
| 482 |
+
2770,192400,4.637
|
| 483 |
+
2776,192800,4.4964
|
| 484 |
+
2781,193200,8.2152
|
| 485 |
+
2789,193600,4.0498
|
| 486 |
+
2793,194000,7.944
|
| 487 |
+
2798,194400,6.7629
|
| 488 |
+
2802,194800,6.5532
|
| 489 |
+
2807,195200,7.4252
|
| 490 |
+
2811,195600,8.0144
|
| 491 |
+
2815,196000,7.9792
|
| 492 |
+
2819,196400,7.9971
|
| 493 |
+
2824,196800,5.5573
|
| 494 |
+
2828,197200,7.3163
|
| 495 |
+
2833,197600,8.0791
|
| 496 |
+
2838,198000,5.7746
|
| 497 |
+
2843,198400,7.1306
|
| 498 |
+
2848,198800,7.198
|
| 499 |
+
2852,199200,8.5707
|
| 500 |
+
2858,199600,5.5788
|
| 501 |
+
2862,200000,8.7377
|
code/Lake application/logs/results_2/PDPPO_frozen_lake_log_5.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
22,400,1.2943
|
| 3 |
+
41,800,1.6015
|
| 4 |
+
55,1200,1.9737
|
| 5 |
+
64,1600,3.5487
|
| 6 |
+
73,2000,3.146
|
| 7 |
+
82,2400,3.2991
|
| 8 |
+
92,2800,2.7673
|
| 9 |
+
100,3200,3.8523
|
| 10 |
+
107,3600,3.7979
|
| 11 |
+
115,4000,4.256
|
| 12 |
+
119,4400,6.4793
|
| 13 |
+
124,4800,5.5414
|
| 14 |
+
128,5200,7.3476
|
| 15 |
+
133,5600,5.8006
|
| 16 |
+
138,6000,6.1344
|
| 17 |
+
144,6400,5.8326
|
| 18 |
+
150,6800,4.8366
|
| 19 |
+
154,7200,6.7648
|
| 20 |
+
159,7600,6.5947
|
| 21 |
+
163,8000,7.3957
|
| 22 |
+
167,8400,7.3241
|
| 23 |
+
172,8800,5.9961
|
| 24 |
+
177,9200,6.0296
|
| 25 |
+
182,9600,7.2685
|
| 26 |
+
187,10000,6.637
|
| 27 |
+
192,10400,5.999
|
| 28 |
+
196,10800,6.4386
|
| 29 |
+
201,11200,6.6646
|
| 30 |
+
207,11600,5.4069
|
| 31 |
+
212,12000,6.6252
|
| 32 |
+
216,12400,6.4882
|
| 33 |
+
223,12800,5.5298
|
| 34 |
+
228,13200,6.2035
|
| 35 |
+
233,13600,5.9536
|
| 36 |
+
237,14000,6.9952
|
| 37 |
+
244,14400,4.9771
|
| 38 |
+
249,14800,6.1157
|
| 39 |
+
253,15200,7.8858
|
| 40 |
+
257,15600,7.8068
|
| 41 |
+
261,16000,7.9837
|
| 42 |
+
266,16400,6.6484
|
| 43 |
+
270,16800,8.0041
|
| 44 |
+
274,17200,6.6656
|
| 45 |
+
278,17600,8.0284
|
| 46 |
+
283,18000,7.3533
|
| 47 |
+
290,18400,4.5723
|
| 48 |
+
295,18800,6.8589
|
| 49 |
+
299,19200,8.1765
|
| 50 |
+
303,19600,8.1
|
| 51 |
+
307,20000,8.1549
|
| 52 |
+
312,20400,5.818
|
| 53 |
+
318,20800,6.0381
|
| 54 |
+
323,21200,5.8448
|
| 55 |
+
327,21600,8.4165
|
| 56 |
+
332,22000,6.3011
|
| 57 |
+
336,22400,8.2937
|
| 58 |
+
343,22800,5.503
|
| 59 |
+
347,23200,8.5968
|
| 60 |
+
353,23600,5.1292
|
| 61 |
+
358,24000,7.4338
|
| 62 |
+
363,24400,6.6991
|
| 63 |
+
368,24800,6.5642
|
| 64 |
+
373,25200,6.8225
|
| 65 |
+
377,25600,8.3731
|
| 66 |
+
382,26000,7.4055
|
| 67 |
+
387,26400,7.1132
|
| 68 |
+
393,26800,5.6948
|
| 69 |
+
399,27200,5.2845
|
| 70 |
+
404,27600,5.9669
|
| 71 |
+
408,28000,8.3363
|
| 72 |
+
415,28400,5.6672
|
| 73 |
+
420,28800,5.4611
|
| 74 |
+
425,29200,7.714
|
| 75 |
+
429,29600,7.5512
|
| 76 |
+
434,30000,7.2347
|
| 77 |
+
439,30400,6.5507
|
| 78 |
+
443,30800,8.4242
|
| 79 |
+
449,31200,6.0604
|
| 80 |
+
453,31600,8.2476
|
| 81 |
+
458,32000,8.0065
|
| 82 |
+
462,32400,8.2966
|
| 83 |
+
467,32800,6.7805
|
| 84 |
+
471,33200,9.484
|
| 85 |
+
475,33600,8.7649
|
| 86 |
+
481,34000,8.2474
|
| 87 |
+
485,34400,9.5795
|
| 88 |
+
490,34800,8.6045
|
| 89 |
+
496,35200,7.4124
|
| 90 |
+
501,35600,7.6157
|
| 91 |
+
505,36000,10.9111
|
| 92 |
+
510,36400,8.8556
|
| 93 |
+
514,36800,9.9951
|
| 94 |
+
519,37200,11.3025
|
| 95 |
+
523,37600,11.5043
|
| 96 |
+
527,38000,11.7302
|
| 97 |
+
532,38400,10.3442
|
| 98 |
+
536,38800,12.0948
|
| 99 |
+
541,39200,10.2846
|
| 100 |
+
545,39600,12.2089
|
| 101 |
+
549,40000,11.8314
|
| 102 |
+
554,40400,9.8942
|
| 103 |
+
559,40800,10.0485
|
| 104 |
+
563,41200,12.4629
|
| 105 |
+
567,41600,12.2332
|
| 106 |
+
571,42000,11.1847
|
| 107 |
+
575,42400,12.4524
|
| 108 |
+
580,42800,10.2353
|
| 109 |
+
587,43200,6.303
|
| 110 |
+
591,43600,12.3469
|
| 111 |
+
596,44000,11.1024
|
| 112 |
+
600,44400,12.4106
|
| 113 |
+
604,44800,11.5728
|
| 114 |
+
608,45200,10.9579
|
| 115 |
+
614,45600,9.6399
|
| 116 |
+
618,46000,11.3217
|
| 117 |
+
622,46400,12.529
|
| 118 |
+
628,46800,9.0714
|
| 119 |
+
633,47200,9.9013
|
| 120 |
+
638,47600,10.0751
|
| 121 |
+
643,48000,10.2499
|
| 122 |
+
647,48400,12.5708
|
| 123 |
+
652,48800,8.7569
|
| 124 |
+
656,49200,12.3604
|
| 125 |
+
660,49600,12.5165
|
| 126 |
+
664,50000,12.3671
|
| 127 |
+
668,50400,12.618
|
| 128 |
+
672,50800,12.6543
|
| 129 |
+
677,51200,10.6911
|
| 130 |
+
681,51600,11.8317
|
| 131 |
+
685,52000,12.6955
|
| 132 |
+
690,52400,10.1157
|
| 133 |
+
695,52800,10.0116
|
| 134 |
+
700,53200,10.2901
|
| 135 |
+
705,53600,8.1824
|
| 136 |
+
710,54000,9.8285
|
| 137 |
+
716,54400,8.8717
|
| 138 |
+
721,54800,9.9854
|
| 139 |
+
725,55200,12.9736
|
| 140 |
+
731,55600,8.6973
|
| 141 |
+
737,56000,8.4719
|
| 142 |
+
742,56400,10.3744
|
| 143 |
+
747,56800,8.9466
|
| 144 |
+
752,57200,10.6086
|
| 145 |
+
758,57600,10.0045
|
| 146 |
+
763,58000,9.8052
|
| 147 |
+
768,58400,11.2697
|
| 148 |
+
774,58800,7.2144
|
| 149 |
+
780,59200,9.6594
|
| 150 |
+
784,59600,12.3888
|
| 151 |
+
789,60000,10.4215
|
| 152 |
+
795,60400,8.8573
|
| 153 |
+
800,60800,10.8871
|
| 154 |
+
807,61200,5.8992
|
| 155 |
+
812,61600,10.12
|
| 156 |
+
817,62000,10.8824
|
| 157 |
+
821,62400,13.1221
|
| 158 |
+
826,62800,10.7535
|
| 159 |
+
830,63200,13.1014
|
| 160 |
+
835,63600,10.7345
|
| 161 |
+
839,64000,13.082
|
| 162 |
+
843,64400,13.2581
|
| 163 |
+
847,64800,13.4413
|
| 164 |
+
851,65200,13.5097
|
| 165 |
+
855,65600,13.3641
|
| 166 |
+
859,66000,13.3583
|
| 167 |
+
863,66400,13.1701
|
| 168 |
+
868,66800,11.1134
|
| 169 |
+
872,67200,13.6245
|
| 170 |
+
880,67600,5.9886
|
| 171 |
+
886,68000,10.2475
|
| 172 |
+
890,68400,14.6202
|
| 173 |
+
894,68800,10.0685
|
| 174 |
+
900,69200,11.0053
|
| 175 |
+
904,69600,13.6849
|
| 176 |
+
909,70000,9.6346
|
| 177 |
+
913,70400,13.5021
|
| 178 |
+
917,70800,13.1414
|
| 179 |
+
922,71200,10.8646
|
| 180 |
+
926,71600,13.4908
|
| 181 |
+
932,72000,9.1396
|
| 182 |
+
936,72400,11.6489
|
| 183 |
+
940,72800,13.2407
|
| 184 |
+
944,73200,13.0342
|
| 185 |
+
948,73600,13.5329
|
| 186 |
+
952,74000,13.4403
|
| 187 |
+
956,74400,13.7446
|
| 188 |
+
960,74800,13.1605
|
| 189 |
+
964,75200,13.3433
|
| 190 |
+
969,75600,10.9952
|
| 191 |
+
974,76000,11.5741
|
| 192 |
+
980,76400,9.3918
|
| 193 |
+
985,76800,11.7496
|
| 194 |
+
992,77200,8.3268
|
| 195 |
+
997,77600,10.0094
|
| 196 |
+
1002,78000,10.6563
|
| 197 |
+
1007,78400,11.5348
|
| 198 |
+
1011,78800,15.119
|
| 199 |
+
1017,79200,12.0912
|
| 200 |
+
1023,79600,8.9712
|
| 201 |
+
1027,80000,11.6409
|
| 202 |
+
1031,80400,15.629
|
| 203 |
+
1036,80800,13.3766
|
| 204 |
+
1040,81200,15.2536
|
| 205 |
+
1045,81600,12.2289
|
| 206 |
+
1049,82000,15.4839
|
| 207 |
+
1055,82400,9.1355
|
| 208 |
+
1060,82800,12.202
|
| 209 |
+
1064,83200,14.748
|
| 210 |
+
1068,83600,14.5001
|
| 211 |
+
1072,84000,15.0432
|
| 212 |
+
1080,84400,7.0194
|
| 213 |
+
1085,84800,10.1806
|
| 214 |
+
1090,85200,11.795
|
| 215 |
+
1095,85600,12.7322
|
| 216 |
+
1100,86000,10.2449
|
| 217 |
+
1109,86400,6.4535
|
| 218 |
+
1117,86800,6.893
|
| 219 |
+
1123,87200,8.8297
|
| 220 |
+
1129,87600,9.9537
|
| 221 |
+
1134,88000,9.4762
|
| 222 |
+
1138,88400,15.7652
|
| 223 |
+
1144,88800,12.4334
|
| 224 |
+
1153,89200,6.025
|
| 225 |
+
1157,89600,14.0408
|
| 226 |
+
1162,90000,12.8277
|
| 227 |
+
1167,90400,12.4492
|
| 228 |
+
1173,90800,8.5448
|
| 229 |
+
1180,91200,9.9864
|
| 230 |
+
1184,91600,15.1804
|
| 231 |
+
1190,92000,8.7061
|
| 232 |
+
1196,92400,11.9629
|
| 233 |
+
1202,92800,9.8801
|
| 234 |
+
1207,93200,10.6953
|
| 235 |
+
1211,93600,16.5854
|
| 236 |
+
1221,94000,6.0867
|
| 237 |
+
1229,94400,9.2746
|
| 238 |
+
1234,94800,12.5134
|
| 239 |
+
1240,95200,8.38
|
| 240 |
+
1245,95600,14.4995
|
| 241 |
+
1249,96000,16.0927
|
| 242 |
+
1255,96400,8.763
|
| 243 |
+
1261,96800,11.1095
|
| 244 |
+
1265,97200,15.5608
|
| 245 |
+
1271,97600,12.1043
|
| 246 |
+
1278,98000,9.2772
|
| 247 |
+
1283,98400,10.4808
|
| 248 |
+
1291,98800,9.4265
|
| 249 |
+
1296,99200,13.4881
|
| 250 |
+
1303,99600,8.7073
|
| 251 |
+
1311,100000,7.7693
|
| 252 |
+
1323,100400,5.2588
|
| 253 |
+
1328,100800,12.0091
|
| 254 |
+
1338,101200,6.3879
|
| 255 |
+
1346,101600,9.2148
|
| 256 |
+
1354,102000,6.5712
|
| 257 |
+
1359,102400,15.4022
|
| 258 |
+
1363,102800,17.8751
|
| 259 |
+
1372,103200,5.6517
|
| 260 |
+
1379,103600,11.7892
|
| 261 |
+
1383,104000,13.7202
|
| 262 |
+
1390,104400,12.0935
|
| 263 |
+
1398,104800,8.3031
|
| 264 |
+
1403,105200,11.4273
|
| 265 |
+
1409,105600,13.6581
|
| 266 |
+
1414,106000,12.8389
|
| 267 |
+
1421,106400,10.5737
|
| 268 |
+
1426,106800,14.0672
|
| 269 |
+
1431,107200,15.1905
|
| 270 |
+
1435,107600,16.499
|
| 271 |
+
1441,108000,14.9027
|
| 272 |
+
1447,108400,12.2743
|
| 273 |
+
1453,108800,12.5351
|
| 274 |
+
1458,109200,13.3324
|
| 275 |
+
1464,109600,10.1987
|
| 276 |
+
1470,110000,14.6681
|
| 277 |
+
1480,110400,6.3981
|
| 278 |
+
1484,110800,17.443
|
| 279 |
+
1489,111200,11.7544
|
| 280 |
+
1494,111600,12.3627
|
| 281 |
+
1500,112000,14.2433
|
| 282 |
+
1507,112400,9.9183
|
| 283 |
+
1514,112800,10.1194
|
| 284 |
+
1520,113200,11.6173
|
| 285 |
+
1526,113600,12.6407
|
| 286 |
+
1530,114000,14.9395
|
| 287 |
+
1537,114400,10.6247
|
| 288 |
+
1546,114800,8.2285
|
| 289 |
+
1555,115200,7.4419
|
| 290 |
+
1565,115600,6.9423
|
| 291 |
+
1571,116000,11.8016
|
| 292 |
+
1576,116400,13.4006
|
| 293 |
+
1582,116800,12.1714
|
| 294 |
+
1589,117200,9.5481
|
| 295 |
+
1594,117600,17.1002
|
| 296 |
+
1599,118000,13.3729
|
| 297 |
+
1605,118400,14.4748
|
| 298 |
+
1610,118800,15.9086
|
| 299 |
+
1616,119200,13.515
|
| 300 |
+
1621,119600,13.6216
|
| 301 |
+
1627,120000,13.3638
|
| 302 |
+
1631,120400,15.8724
|
| 303 |
+
1637,120800,13.1813
|
| 304 |
+
1642,121200,16.7492
|
| 305 |
+
1647,121600,16.2201
|
| 306 |
+
1652,122000,14.2471
|
| 307 |
+
1658,122400,11.9562
|
| 308 |
+
1663,122800,14.3429
|
| 309 |
+
1670,123200,8.2829
|
| 310 |
+
1675,123600,12.7689
|
| 311 |
+
1679,124000,17.9555
|
| 312 |
+
1686,124400,10.3992
|
| 313 |
+
1693,124800,9.3774
|
| 314 |
+
1700,125200,10.5229
|
| 315 |
+
1705,125600,13.095
|
| 316 |
+
1709,126000,16.827
|
| 317 |
+
1716,126400,11.5151
|
| 318 |
+
1720,126800,15.0954
|
| 319 |
+
1725,127200,17.1223
|
| 320 |
+
1729,127600,19.2025
|
| 321 |
+
1734,128000,16.3688
|
| 322 |
+
1740,128400,13.0476
|
| 323 |
+
1744,128800,17.5761
|
| 324 |
+
1751,129200,9.9195
|
| 325 |
+
1758,129600,12.3492
|
| 326 |
+
1763,130000,12.9946
|
| 327 |
+
1768,130400,15.0853
|
| 328 |
+
1774,130800,12.7493
|
| 329 |
+
1780,131200,13.7049
|
| 330 |
+
1784,131600,16.4027
|
| 331 |
+
1791,132000,10.02
|
| 332 |
+
1796,132400,13.2953
|
| 333 |
+
1802,132800,13.2571
|
| 334 |
+
1807,133200,16.9227
|
| 335 |
+
1816,133600,7.5362
|
| 336 |
+
1823,134000,9.0337
|
| 337 |
+
1831,134400,8.2112
|
| 338 |
+
1839,134800,9.5222
|
| 339 |
+
1845,135200,10.5068
|
| 340 |
+
1851,135600,15.2168
|
| 341 |
+
1860,136000,7.9646
|
| 342 |
+
1866,136400,9.9186
|
| 343 |
+
1872,136800,11.7983
|
| 344 |
+
1878,137200,15.5265
|
| 345 |
+
1884,137600,11.4403
|
| 346 |
+
1889,138000,14.1125
|
| 347 |
+
1893,138400,18.7814
|
| 348 |
+
1898,138800,15.9716
|
| 349 |
+
1907,139200,6.6898
|
| 350 |
+
1916,139600,8.6765
|
| 351 |
+
1926,140000,6.0023
|
| 352 |
+
1930,140400,18.1008
|
| 353 |
+
1938,140800,8.4123
|
| 354 |
+
1946,141200,8.4666
|
| 355 |
+
1953,141600,9.8084
|
| 356 |
+
1958,142000,12.6225
|
| 357 |
+
1962,142400,16.6253
|
| 358 |
+
1969,142800,11.879
|
| 359 |
+
1979,143200,5.891
|
| 360 |
+
1983,143600,14.6341
|
| 361 |
+
1991,144000,10.6154
|
| 362 |
+
1995,144400,19.3371
|
| 363 |
+
2000,144800,14.6999
|
| 364 |
+
2005,145200,17.3212
|
| 365 |
+
2010,145600,13.511
|
| 366 |
+
2019,146000,8.9176
|
| 367 |
+
2025,146400,11.9234
|
| 368 |
+
2029,146800,18.7508
|
| 369 |
+
2035,147200,11.3652
|
| 370 |
+
2042,147600,10.7875
|
| 371 |
+
2047,148000,17.2345
|
| 372 |
+
2052,148400,13.469
|
| 373 |
+
2057,148800,15.0187
|
| 374 |
+
2062,149200,18.2483
|
| 375 |
+
2067,149600,15.1589
|
| 376 |
+
2072,150000,13.8506
|
| 377 |
+
2078,150400,13.5154
|
| 378 |
+
2083,150800,17.4931
|
| 379 |
+
2088,151200,13.0434
|
| 380 |
+
2094,151600,12.4448
|
| 381 |
+
2098,152000,16.2641
|
| 382 |
+
2103,152400,18.5631
|
| 383 |
+
2109,152800,13.4721
|
| 384 |
+
2114,153200,17.5024
|
| 385 |
+
2118,153600,17.2629
|
| 386 |
+
2123,154000,16.0443
|
| 387 |
+
2130,154400,12.4068
|
| 388 |
+
2134,154800,19.7433
|
| 389 |
+
2141,155200,12.1725
|
| 390 |
+
2145,155600,17.4482
|
| 391 |
+
2151,156000,14.9725
|
| 392 |
+
2158,156400,8.3286
|
| 393 |
+
2163,156800,15.3569
|
| 394 |
+
2168,157200,16.8249
|
| 395 |
+
2176,157600,10.7159
|
| 396 |
+
2181,158000,13.5756
|
| 397 |
+
2186,158400,16.8722
|
| 398 |
+
2191,158800,16.1566
|
| 399 |
+
2196,159200,16.4402
|
| 400 |
+
2200,159600,19.2901
|
| 401 |
+
2206,160000,13.408
|
| 402 |
+
2216,160400,6.5277
|
| 403 |
+
2221,160800,16.5398
|
| 404 |
+
2225,161200,17.9313
|
| 405 |
+
2229,161600,16.0893
|
| 406 |
+
2234,162000,13.5297
|
| 407 |
+
2239,162400,18.5255
|
| 408 |
+
2244,162800,15.412
|
| 409 |
+
2249,163200,16.7656
|
| 410 |
+
2254,163600,12.2883
|
| 411 |
+
2260,164000,12.8931
|
| 412 |
+
2268,164400,9.994
|
| 413 |
+
2273,164800,14.903
|
| 414 |
+
2278,165200,14.9247
|
| 415 |
+
2284,165600,13.8873
|
| 416 |
+
2288,166000,15.893
|
| 417 |
+
2294,166400,16.0733
|
| 418 |
+
2298,166800,20.3023
|
| 419 |
+
2306,167200,8.823
|
| 420 |
+
2311,167600,14.7959
|
| 421 |
+
2316,168000,15.5506
|
| 422 |
+
2320,168400,18.9412
|
| 423 |
+
2328,168800,9.9269
|
| 424 |
+
2337,169200,7.8662
|
| 425 |
+
2342,169600,13.8098
|
| 426 |
+
2347,170000,16.0699
|
| 427 |
+
2352,170400,14.2326
|
| 428 |
+
2357,170800,14.3457
|
| 429 |
+
2365,171200,7.755
|
| 430 |
+
2372,171600,11.1969
|
| 431 |
+
2377,172000,17.7461
|
| 432 |
+
2382,172400,14.5072
|
| 433 |
+
2387,172800,15.6355
|
| 434 |
+
2392,173200,16.9889
|
| 435 |
+
2397,173600,16.7953
|
| 436 |
+
2402,174000,14.2381
|
| 437 |
+
2407,174400,15.2855
|
| 438 |
+
2414,174800,9.9614
|
| 439 |
+
2422,175200,6.716
|
| 440 |
+
2429,175600,9.1914
|
| 441 |
+
2433,176000,15.9262
|
| 442 |
+
2439,176400,13.2029
|
| 443 |
+
2444,176800,12.0649
|
| 444 |
+
2448,177200,11.2086
|
| 445 |
+
2453,177600,17.596
|
| 446 |
+
2458,178000,10.4265
|
| 447 |
+
2464,178400,12.7507
|
| 448 |
+
2470,178800,10.5018
|
| 449 |
+
2477,179200,9.6174
|
| 450 |
+
2484,179600,12.3463
|
| 451 |
+
2493,180000,7.463
|
| 452 |
+
2502,180400,7.0977
|
| 453 |
+
2511,180800,6.566
|
| 454 |
+
2516,181200,12.4002
|
| 455 |
+
2522,181600,12.8125
|
| 456 |
+
2526,182000,16.8705
|
| 457 |
+
2532,182400,15.6621
|
| 458 |
+
2537,182800,15.5005
|
| 459 |
+
2545,183200,7.1256
|
| 460 |
+
2549,183600,19.091
|
| 461 |
+
2553,184000,20.7269
|
| 462 |
+
2558,184400,17.8736
|
| 463 |
+
2562,184800,16.5145
|
| 464 |
+
2568,185200,12.29
|
| 465 |
+
2574,185600,11.165
|
| 466 |
+
2580,186000,14.862
|
| 467 |
+
2585,186400,12.9498
|
| 468 |
+
2590,186800,16.6379
|
| 469 |
+
2597,187200,12.0999
|
| 470 |
+
2601,187600,19.986
|
| 471 |
+
2605,188000,21.4165
|
| 472 |
+
2610,188400,12.3688
|
| 473 |
+
2614,188800,17.9821
|
| 474 |
+
2618,189200,20.5196
|
| 475 |
+
2623,189600,19.5269
|
| 476 |
+
2629,190000,11.2376
|
| 477 |
+
2633,190400,20.1738
|
| 478 |
+
2637,190800,20.5599
|
| 479 |
+
2642,191200,15.8952
|
| 480 |
+
2649,191600,12.2397
|
| 481 |
+
2654,192000,16.6287
|
| 482 |
+
2659,192400,14.966
|
| 483 |
+
2665,192800,13.1073
|
| 484 |
+
2671,193200,11.1274
|
| 485 |
+
2676,193600,15.6753
|
| 486 |
+
2681,194000,15.8626
|
| 487 |
+
2688,194400,9.1965
|
| 488 |
+
2693,194800,15.0967
|
| 489 |
+
2697,195200,19.8348
|
| 490 |
+
2701,195600,14.855
|
| 491 |
+
2706,196000,13.0618
|
| 492 |
+
2715,196400,9.1275
|
| 493 |
+
2721,196800,7.1574
|
| 494 |
+
2726,197200,16.5246
|
| 495 |
+
2732,197600,10.026
|
| 496 |
+
2739,198000,11.8092
|
| 497 |
+
2745,198400,10.2368
|
| 498 |
+
2751,198800,11.7544
|
| 499 |
+
2757,199200,11.6034
|
| 500 |
+
2764,199600,9.3743
|
| 501 |
+
2769,200000,15.3278
|
code/Lake application/logs/results_2/PPO_frozen_lake_log_1.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
15,400,1.9345
|
| 3 |
+
32,800,1.8784
|
| 4 |
+
42,1200,3.0732
|
| 5 |
+
52,1600,2.9835
|
| 6 |
+
65,2000,2.3838
|
| 7 |
+
69,2400,7.2159
|
| 8 |
+
75,2800,4.758
|
| 9 |
+
80,3200,5.6732
|
| 10 |
+
85,3600,7.076
|
| 11 |
+
89,4000,5.77
|
| 12 |
+
95,4400,5.4889
|
| 13 |
+
101,4800,4.8132
|
| 14 |
+
106,5200,7.4812
|
| 15 |
+
115,5600,2.816
|
| 16 |
+
123,6000,4.0168
|
| 17 |
+
129,6400,5.4723
|
| 18 |
+
137,6800,3.675
|
| 19 |
+
142,7200,5.8091
|
| 20 |
+
147,7600,5.6523
|
| 21 |
+
151,8000,7.3976
|
| 22 |
+
158,8400,4.279
|
| 23 |
+
164,8800,4.7596
|
| 24 |
+
168,9200,7.4893
|
| 25 |
+
173,9600,6.204
|
| 26 |
+
177,10000,7.3342
|
| 27 |
+
183,10400,4.9965
|
| 28 |
+
188,10800,6.2773
|
| 29 |
+
193,11200,6.1197
|
| 30 |
+
197,11600,7.2967
|
| 31 |
+
201,12000,7.4756
|
| 32 |
+
205,12400,7.1791
|
| 33 |
+
209,12800,7.3157
|
| 34 |
+
213,13200,7.0592
|
| 35 |
+
218,13600,5.8954
|
| 36 |
+
222,14000,7.1992
|
| 37 |
+
227,14400,6.5493
|
| 38 |
+
232,14800,5.9315
|
| 39 |
+
237,15200,7.0655
|
| 40 |
+
243,15600,5.2935
|
| 41 |
+
251,16000,3.6833
|
| 42 |
+
258,16400,4.511
|
| 43 |
+
267,16800,4.1031
|
| 44 |
+
271,17200,7.4993
|
| 45 |
+
275,17600,9.0943
|
| 46 |
+
279,18000,7.6642
|
| 47 |
+
285,18400,6.6167
|
| 48 |
+
291,18800,5.4259
|
| 49 |
+
295,19200,7.9226
|
| 50 |
+
299,19600,8.2753
|
| 51 |
+
304,20000,6.8015
|
| 52 |
+
308,20400,7.8775
|
| 53 |
+
315,20800,4.5316
|
| 54 |
+
321,21200,5.4028
|
| 55 |
+
326,21600,6.3314
|
| 56 |
+
330,22000,8.1528
|
| 57 |
+
334,22400,7.4973
|
| 58 |
+
339,22800,6.6922
|
| 59 |
+
343,23200,7.7072
|
| 60 |
+
347,23600,7.2187
|
| 61 |
+
351,24000,7.7859
|
| 62 |
+
356,24400,7.0575
|
| 63 |
+
360,24800,7.2281
|
| 64 |
+
366,25200,5.8797
|
| 65 |
+
372,25600,4.8101
|
| 66 |
+
378,26000,5.0378
|
| 67 |
+
384,26400,5.6277
|
| 68 |
+
389,26800,5.2954
|
| 69 |
+
400,27200,2.9769
|
| 70 |
+
406,27600,4.6418
|
| 71 |
+
416,28000,3.4303
|
| 72 |
+
423,28400,4.0932
|
| 73 |
+
430,28800,4.505
|
| 74 |
+
435,29200,5.5546
|
| 75 |
+
439,29600,10.5906
|
| 76 |
+
445,30000,6.1133
|
| 77 |
+
451,30400,5.9823
|
| 78 |
+
455,30800,8.4017
|
| 79 |
+
462,31200,5.0732
|
| 80 |
+
468,31600,5.7932
|
| 81 |
+
474,32000,6.708
|
| 82 |
+
479,32400,6.8853
|
| 83 |
+
485,32800,9.0958
|
| 84 |
+
490,33200,6.4707
|
| 85 |
+
495,33600,7.4192
|
| 86 |
+
501,34000,6.0651
|
| 87 |
+
506,34400,6.7035
|
| 88 |
+
511,34800,6.8316
|
| 89 |
+
518,35200,5.9218
|
| 90 |
+
524,35600,7.2932
|
| 91 |
+
528,36000,8.8676
|
| 92 |
+
534,36400,8.8016
|
| 93 |
+
538,36800,8.7465
|
| 94 |
+
544,37200,8.41
|
| 95 |
+
549,37600,8.3048
|
| 96 |
+
555,38000,8.0676
|
| 97 |
+
561,38400,6.0523
|
| 98 |
+
566,38800,8.8973
|
| 99 |
+
571,39200,8.7871
|
| 100 |
+
577,39600,6.6108
|
| 101 |
+
582,40000,10.0771
|
| 102 |
+
587,40400,7.6992
|
| 103 |
+
594,40800,5.8859
|
| 104 |
+
602,41200,5.2757
|
| 105 |
+
607,41600,7.4604
|
| 106 |
+
614,42000,6.3297
|
| 107 |
+
624,42400,4.3
|
| 108 |
+
630,42800,7.3462
|
| 109 |
+
636,43200,7.7832
|
| 110 |
+
642,43600,7.2278
|
| 111 |
+
649,44000,7.0903
|
| 112 |
+
656,44400,7.2864
|
| 113 |
+
660,44800,11.4337
|
| 114 |
+
664,45200,9.4219
|
| 115 |
+
669,45600,10.1337
|
| 116 |
+
674,46000,10.8808
|
| 117 |
+
679,46400,9.5832
|
| 118 |
+
686,46800,6.8856
|
| 119 |
+
692,47200,7.9853
|
| 120 |
+
699,47600,5.4185
|
| 121 |
+
705,48000,8.51
|
| 122 |
+
711,48400,8.7795
|
| 123 |
+
717,48800,8.1386
|
| 124 |
+
721,49200,12.1496
|
| 125 |
+
727,49600,9.671
|
| 126 |
+
734,50000,4.7004
|
| 127 |
+
742,50400,5.4595
|
| 128 |
+
751,50800,7.1566
|
| 129 |
+
756,51200,11.0405
|
| 130 |
+
763,51600,7.8217
|
| 131 |
+
769,52000,6.9623
|
| 132 |
+
774,52400,9.7564
|
| 133 |
+
780,52800,8.2737
|
| 134 |
+
786,53200,7.7905
|
| 135 |
+
792,53600,8.2016
|
| 136 |
+
798,54000,11.1113
|
| 137 |
+
803,54400,9.71
|
| 138 |
+
811,54800,6.3473
|
| 139 |
+
816,55200,10.3967
|
| 140 |
+
822,55600,8.5624
|
| 141 |
+
829,56000,7.9157
|
| 142 |
+
836,56400,7.3681
|
| 143 |
+
843,56800,7.9999
|
| 144 |
+
848,57200,12.6498
|
| 145 |
+
855,57600,6.7851
|
| 146 |
+
864,58000,5.926
|
| 147 |
+
871,58400,8.3211
|
| 148 |
+
877,58800,8.4122
|
| 149 |
+
885,59200,5.5055
|
| 150 |
+
892,59600,4.8152
|
| 151 |
+
896,60000,10.2298
|
| 152 |
+
903,60400,8.8526
|
| 153 |
+
908,60800,10.4385
|
| 154 |
+
914,61200,6.8265
|
| 155 |
+
920,61600,7.4231
|
| 156 |
+
926,62000,7.7046
|
| 157 |
+
932,62400,8.744
|
| 158 |
+
939,62800,6.815
|
| 159 |
+
944,63200,8.9794
|
| 160 |
+
950,63600,9.0955
|
| 161 |
+
960,64000,3.9011
|
| 162 |
+
966,64400,9.1169
|
| 163 |
+
970,64800,9.8831
|
| 164 |
+
975,65200,10.7317
|
| 165 |
+
983,65600,5.231
|
| 166 |
+
988,66000,9.8312
|
| 167 |
+
993,66400,8.1545
|
| 168 |
+
999,66800,8.3283
|
| 169 |
+
1008,67200,3.3866
|
| 170 |
+
1014,67600,6.2135
|
| 171 |
+
1018,68000,8.4834
|
| 172 |
+
1025,68400,4.2607
|
| 173 |
+
1031,68800,7.755
|
| 174 |
+
1035,69200,6.7764
|
| 175 |
+
1043,69600,4.8484
|
| 176 |
+
1049,70000,6.8767
|
| 177 |
+
1055,70400,7.2876
|
| 178 |
+
1061,70800,6.5337
|
| 179 |
+
1066,71200,8.7095
|
| 180 |
+
1071,71600,8.5315
|
| 181 |
+
1076,72000,8.1106
|
| 182 |
+
1082,72400,6.5303
|
| 183 |
+
1088,72800,5.1858
|
| 184 |
+
1092,73200,8.2166
|
| 185 |
+
1097,73600,7.7642
|
| 186 |
+
1103,74000,5.7689
|
| 187 |
+
1109,74400,6.5443
|
| 188 |
+
1113,74800,6.0629
|
| 189 |
+
1119,75200,6.9554
|
| 190 |
+
1124,75600,6.5107
|
| 191 |
+
1131,76000,5.8779
|
| 192 |
+
1136,76400,7.0196
|
| 193 |
+
1141,76800,8.221
|
| 194 |
+
1146,77200,7.0804
|
| 195 |
+
1150,77600,7.0824
|
| 196 |
+
1158,78000,4.9257
|
| 197 |
+
1165,78400,4.3861
|
| 198 |
+
1171,78800,5.412
|
| 199 |
+
1178,79200,5.8524
|
| 200 |
+
1186,79600,4.6331
|
| 201 |
+
1193,80000,5.5782
|
| 202 |
+
1199,80400,7.2403
|
| 203 |
+
1204,80800,8.5925
|
| 204 |
+
1208,81200,9.0404
|
| 205 |
+
1212,81600,10.5095
|
| 206 |
+
1220,82000,7.0118
|
| 207 |
+
1225,82400,8.1882
|
| 208 |
+
1229,82800,10.863
|
| 209 |
+
1233,83200,10.7497
|
| 210 |
+
1239,83600,6.4536
|
| 211 |
+
1243,84000,11.2907
|
| 212 |
+
1247,84400,11.783
|
| 213 |
+
1252,84800,11.0893
|
| 214 |
+
1257,85200,9.8707
|
| 215 |
+
1261,85600,13.0076
|
| 216 |
+
1265,86000,12.8858
|
| 217 |
+
1269,86400,13.1408
|
| 218 |
+
1273,86800,11.8421
|
| 219 |
+
1277,87200,9.9549
|
| 220 |
+
1284,87600,6.7063
|
| 221 |
+
1288,88000,12.4549
|
| 222 |
+
1292,88400,13.2602
|
| 223 |
+
1299,88800,7.2956
|
| 224 |
+
1303,89200,15.6022
|
| 225 |
+
1308,89600,12.0928
|
| 226 |
+
1317,90000,3.9969
|
| 227 |
+
1323,90400,6.6326
|
| 228 |
+
1331,90800,6.7157
|
| 229 |
+
1337,91200,6.7272
|
| 230 |
+
1346,91600,4.2204
|
| 231 |
+
1351,92000,10.5966
|
| 232 |
+
1357,92400,6.7511
|
| 233 |
+
1364,92800,6.8195
|
| 234 |
+
1371,93200,6.92
|
| 235 |
+
1381,93600,4.6135
|
| 236 |
+
1387,94000,9.0048
|
| 237 |
+
1395,94400,4.8047
|
| 238 |
+
1401,94800,9.5174
|
| 239 |
+
1407,95200,11.6457
|
| 240 |
+
1414,95600,6.9109
|
| 241 |
+
1421,96000,5.7336
|
| 242 |
+
1430,96400,5.3074
|
| 243 |
+
1434,96800,9.2165
|
| 244 |
+
1439,97200,9.5083
|
| 245 |
+
1447,97600,5.2046
|
| 246 |
+
1456,98000,4.3764
|
| 247 |
+
1462,98400,5.923
|
| 248 |
+
1467,98800,8.1644
|
| 249 |
+
1473,99200,6.7967
|
| 250 |
+
1480,99600,7.7647
|
| 251 |
+
1484,100000,12.2281
|
| 252 |
+
1488,100400,9.1569
|
| 253 |
+
1494,100800,8.54
|
| 254 |
+
1499,101200,9.4563
|
| 255 |
+
1503,101600,9.5965
|
| 256 |
+
1509,102000,8.6554
|
| 257 |
+
1514,102400,11.0492
|
| 258 |
+
1518,102800,10.4916
|
| 259 |
+
1525,103200,5.7317
|
| 260 |
+
1534,103600,5.4161
|
| 261 |
+
1539,104000,9.7729
|
| 262 |
+
1546,104400,6.9356
|
| 263 |
+
1552,104800,7.0423
|
| 264 |
+
1556,105200,12.5092
|
| 265 |
+
1563,105600,8.7948
|
| 266 |
+
1567,106000,12.3567
|
| 267 |
+
1573,106400,7.8177
|
| 268 |
+
1577,106800,12.8039
|
| 269 |
+
1584,107200,6.8714
|
| 270 |
+
1589,107600,8.3869
|
| 271 |
+
1593,108000,12.6683
|
| 272 |
+
1601,108400,6.5902
|
| 273 |
+
1607,108800,8.628
|
| 274 |
+
1615,109200,5.4762
|
| 275 |
+
1621,109600,9.3018
|
| 276 |
+
1627,110000,7.4751
|
| 277 |
+
1640,110400,4.139
|
| 278 |
+
1647,110800,7.7327
|
| 279 |
+
1652,111200,10.3567
|
| 280 |
+
1662,111600,4.9881
|
| 281 |
+
1668,112000,8.8983
|
| 282 |
+
1673,112400,9.1326
|
| 283 |
+
1683,112800,3.9693
|
| 284 |
+
1693,113200,4.1639
|
| 285 |
+
1699,113600,8.137
|
| 286 |
+
1705,114000,8.6208
|
| 287 |
+
1710,114400,9.1387
|
| 288 |
+
1717,114800,6.4969
|
| 289 |
+
1724,115200,5.8328
|
| 290 |
+
1729,115600,9.6428
|
| 291 |
+
1736,116000,7.7885
|
| 292 |
+
1742,116400,6.1858
|
| 293 |
+
1751,116800,5.3759
|
| 294 |
+
1760,117200,4.9088
|
| 295 |
+
1764,117600,12.8121
|
| 296 |
+
1773,118000,5.528
|
| 297 |
+
1781,118400,4.8728
|
| 298 |
+
1786,118800,9.4107
|
| 299 |
+
1794,119200,5.6412
|
| 300 |
+
1800,119600,8.3779
|
| 301 |
+
1804,120000,14.4553
|
| 302 |
+
1814,120400,4.32
|
| 303 |
+
1819,120800,12.7509
|
| 304 |
+
1828,121200,6.4023
|
| 305 |
+
1833,121600,15.743
|
| 306 |
+
1841,122000,6.9661
|
| 307 |
+
1847,122400,11.3894
|
| 308 |
+
1852,122800,12.2086
|
| 309 |
+
1857,123200,16.0304
|
| 310 |
+
1862,123600,16.1933
|
| 311 |
+
1870,124000,8.6231
|
| 312 |
+
1875,124400,13.6078
|
| 313 |
+
1880,124800,14.3245
|
| 314 |
+
1889,125200,5.8726
|
| 315 |
+
1896,125600,9.655
|
| 316 |
+
1904,126000,6.397
|
| 317 |
+
1912,126400,5.4703
|
| 318 |
+
1917,126800,13.7367
|
| 319 |
+
1923,127200,10.1322
|
| 320 |
+
1928,127600,12.9031
|
| 321 |
+
1932,128000,11.6047
|
| 322 |
+
1941,128400,8.1528
|
| 323 |
+
1948,128800,10.0696
|
| 324 |
+
1953,129200,12.8557
|
| 325 |
+
1964,129600,6.1629
|
| 326 |
+
1974,130000,4.7264
|
| 327 |
+
1983,130400,5.0263
|
| 328 |
+
1989,130800,13.0645
|
| 329 |
+
2000,131200,4.9296
|
| 330 |
+
2006,131600,9.139
|
| 331 |
+
2016,132000,6.4189
|
| 332 |
+
2025,132400,4.758
|
| 333 |
+
2033,132800,7.2135
|
| 334 |
+
2038,133200,8.544
|
| 335 |
+
2045,133600,9.7969
|
| 336 |
+
2051,134000,7.3074
|
| 337 |
+
2063,134400,4.5675
|
| 338 |
+
2072,134800,4.937
|
| 339 |
+
2080,135200,6.392
|
| 340 |
+
2088,135600,4.6451
|
| 341 |
+
2093,136000,9.6252
|
| 342 |
+
2100,136400,5.984
|
| 343 |
+
2106,136800,7.3098
|
| 344 |
+
2114,137200,6.8205
|
| 345 |
+
2122,137600,5.1513
|
| 346 |
+
2130,138000,6.8115
|
| 347 |
+
2140,138400,4.8287
|
| 348 |
+
2149,138800,3.6011
|
| 349 |
+
2157,139200,5.5075
|
| 350 |
+
2163,139600,9.1481
|
| 351 |
+
2171,140000,6.788
|
| 352 |
+
2178,140400,7.9736
|
| 353 |
+
2186,140800,8.5882
|
| 354 |
+
2191,141200,9.2546
|
| 355 |
+
2196,141600,15.4158
|
| 356 |
+
2206,142000,7.1935
|
| 357 |
+
2211,142400,13.2408
|
| 358 |
+
2223,142800,4.9066
|
| 359 |
+
2235,143200,4.5763
|
| 360 |
+
2241,143600,10.3808
|
| 361 |
+
2246,144000,10.8564
|
| 362 |
+
2254,144400,7.7899
|
| 363 |
+
2261,144800,8.8404
|
| 364 |
+
2268,145200,8.8255
|
| 365 |
+
2276,145600,7.4552
|
| 366 |
+
2282,146000,11.216
|
| 367 |
+
2289,146400,10.299
|
| 368 |
+
2295,146800,9.2089
|
| 369 |
+
2302,147200,8.5813
|
| 370 |
+
2311,147600,5.5125
|
| 371 |
+
2318,148000,7.7001
|
| 372 |
+
2323,148400,11.1793
|
| 373 |
+
2333,148800,3.7442
|
| 374 |
+
2340,149200,10.413
|
| 375 |
+
2345,149600,11.7132
|
| 376 |
+
2350,150000,9.5357
|
| 377 |
+
2357,150400,8.7914
|
| 378 |
+
2363,150800,8.3319
|
| 379 |
+
2370,151200,7.1484
|
| 380 |
+
2376,151600,5.6768
|
| 381 |
+
2385,152000,4.1424
|
| 382 |
+
2390,152400,6.528
|
| 383 |
+
2394,152800,7.7268
|
| 384 |
+
2399,153200,8.4871
|
| 385 |
+
2406,153600,6.8809
|
| 386 |
+
2411,154000,8.3506
|
| 387 |
+
2415,154400,11.0307
|
| 388 |
+
2419,154800,10.397
|
| 389 |
+
2427,155200,5.1522
|
| 390 |
+
2434,155600,5.8036
|
| 391 |
+
2439,156000,10.0986
|
| 392 |
+
2445,156400,7.7494
|
| 393 |
+
2452,156800,8.7844
|
| 394 |
+
2458,157200,9.1953
|
| 395 |
+
2464,157600,9.7125
|
| 396 |
+
2469,158000,14.3923
|
| 397 |
+
2475,158400,10.5696
|
| 398 |
+
2479,158800,19.5742
|
| 399 |
+
2488,159200,6.9904
|
| 400 |
+
2494,159600,12.7756
|
| 401 |
+
2499,160000,12.9402
|
| 402 |
+
2506,160400,11.8509
|
| 403 |
+
2510,160800,14.1441
|
| 404 |
+
2522,161200,5.8916
|
| 405 |
+
2530,161600,8.0032
|
| 406 |
+
2536,162000,11.8227
|
| 407 |
+
2540,162400,14.6518
|
| 408 |
+
2548,162800,9.0002
|
| 409 |
+
2555,163200,5.4754
|
| 410 |
+
2560,163600,15.7747
|
| 411 |
+
2568,164000,9.1886
|
| 412 |
+
2572,164400,18.4005
|
| 413 |
+
2580,164800,6.8589
|
| 414 |
+
2586,165200,13.1998
|
| 415 |
+
2591,165600,12.0732
|
| 416 |
+
2599,166000,7.623
|
| 417 |
+
2608,166400,6.2975
|
| 418 |
+
2614,166800,10.325
|
| 419 |
+
2622,167200,7.3189
|
| 420 |
+
2628,167600,11.0537
|
| 421 |
+
2636,168000,8.6657
|
| 422 |
+
2642,168400,13.2533
|
| 423 |
+
2650,168800,5.9973
|
| 424 |
+
2660,169200,7.9659
|
| 425 |
+
2665,169600,14.8961
|
| 426 |
+
2673,170000,9.1592
|
| 427 |
+
2680,170400,12.4329
|
| 428 |
+
2687,170800,12.6719
|
| 429 |
+
2694,171200,11.0606
|
| 430 |
+
2702,171600,10.5086
|
| 431 |
+
2709,172000,10.1179
|
| 432 |
+
2716,172400,9.1879
|
| 433 |
+
2723,172800,11.2291
|
| 434 |
+
2728,173200,12.3325
|
| 435 |
+
2734,173600,12.1718
|
| 436 |
+
2741,174000,10.1937
|
| 437 |
+
2748,174400,9.3497
|
| 438 |
+
2753,174800,10.5969
|
| 439 |
+
2761,175200,10.1798
|
| 440 |
+
2767,175600,10.9461
|
| 441 |
+
2772,176000,13.9387
|
| 442 |
+
2777,176400,14.2562
|
| 443 |
+
2783,176800,8.5326
|
| 444 |
+
2789,177200,9.8383
|
| 445 |
+
2794,177600,12.2766
|
| 446 |
+
2800,178000,14.2094
|
| 447 |
+
2804,178400,15.156
|
| 448 |
+
2811,178800,6.4663
|
| 449 |
+
2816,179200,15.0295
|
| 450 |
+
2823,179600,10.1727
|
| 451 |
+
2828,180000,11.4667
|
| 452 |
+
2834,180400,11.0373
|
| 453 |
+
2840,180800,11.0221
|
| 454 |
+
2844,181200,18.4976
|
| 455 |
+
2850,181600,9.7416
|
| 456 |
+
2857,182000,8.77
|
| 457 |
+
2863,182400,7.9617
|
| 458 |
+
2867,182800,21.1088
|
| 459 |
+
2872,183200,15.0862
|
| 460 |
+
2880,183600,9.2334
|
| 461 |
+
2885,184000,15.0575
|
| 462 |
+
2891,184400,8.7998
|
| 463 |
+
2898,184800,9.4749
|
| 464 |
+
2903,185200,15.2583
|
| 465 |
+
2908,185600,15.9073
|
| 466 |
+
2913,186000,18.1103
|
| 467 |
+
2920,186400,10.0962
|
| 468 |
+
2925,186800,14.1606
|
| 469 |
+
2931,187200,13.2483
|
| 470 |
+
2936,187600,9.6115
|
| 471 |
+
2942,188000,10.5395
|
| 472 |
+
2948,188400,13.0603
|
| 473 |
+
2952,188800,15.9296
|
| 474 |
+
2958,189200,11.0247
|
| 475 |
+
2964,189600,13.2235
|
| 476 |
+
2973,190000,6.3575
|
| 477 |
+
2978,190400,11.5839
|
| 478 |
+
2983,190800,13.9557
|
| 479 |
+
2989,191200,9.2667
|
| 480 |
+
2995,191600,10.0113
|
| 481 |
+
3000,192000,10.4833
|
| 482 |
+
3006,192400,11.3706
|
| 483 |
+
3012,192800,10.4763
|
| 484 |
+
3018,193200,10.3391
|
| 485 |
+
3022,193600,16.8413
|
| 486 |
+
3030,194000,9.8189
|
| 487 |
+
3042,194400,4.0255
|
| 488 |
+
3049,194800,8.3469
|
| 489 |
+
3058,195200,6.7786
|
| 490 |
+
3066,195600,9.455
|
| 491 |
+
3072,196000,9.361
|
| 492 |
+
3078,196400,11.942
|
| 493 |
+
3086,196800,8.1976
|
| 494 |
+
3093,197200,8.8319
|
| 495 |
+
3098,197600,10.2572
|
| 496 |
+
3105,198000,10.4746
|
| 497 |
+
3111,198400,9.7314
|
| 498 |
+
3117,198800,13.439
|
| 499 |
+
3121,199200,15.2623
|
| 500 |
+
3127,199600,15.3384
|
| 501 |
+
3137,200000,8.1123
|
code/Lake application/logs/results_2/PPO_frozen_lake_log_2.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
19,400,1.6097
|
| 3 |
+
38,800,1.6416
|
| 4 |
+
61,1200,1.4003
|
| 5 |
+
73,1600,2.5901
|
| 6 |
+
81,2000,3.7322
|
| 7 |
+
90,2400,3.1764
|
| 8 |
+
95,2800,6.9871
|
| 9 |
+
102,3200,4.2802
|
| 10 |
+
109,3600,4.7436
|
| 11 |
+
116,4000,4.2818
|
| 12 |
+
120,4400,7.5307
|
| 13 |
+
124,4800,6.3049
|
| 14 |
+
132,5200,4.3085
|
| 15 |
+
137,5600,6.2239
|
| 16 |
+
141,6000,7.0954
|
| 17 |
+
145,6400,6.7039
|
| 18 |
+
150,6800,6.2469
|
| 19 |
+
155,7200,6.36
|
| 20 |
+
160,7600,6.1549
|
| 21 |
+
164,8000,6.9545
|
| 22 |
+
169,8400,7.0148
|
| 23 |
+
175,8800,4.7251
|
| 24 |
+
179,9200,7.9361
|
| 25 |
+
184,9600,6.5351
|
| 26 |
+
189,10000,6.2774
|
| 27 |
+
195,10400,5.7017
|
| 28 |
+
199,10800,7.825
|
| 29 |
+
203,11200,6.8724
|
| 30 |
+
207,11600,7.7336
|
| 31 |
+
211,12000,7.5364
|
| 32 |
+
215,12400,7.8172
|
| 33 |
+
219,12800,7.9367
|
| 34 |
+
223,13200,7.7311
|
| 35 |
+
228,13600,7.2277
|
| 36 |
+
232,14000,7.0049
|
| 37 |
+
238,14400,5.1928
|
| 38 |
+
242,14800,7.6765
|
| 39 |
+
246,15200,7.4842
|
| 40 |
+
250,15600,7.5239
|
| 41 |
+
254,16000,7.1122
|
| 42 |
+
260,16400,5.5361
|
| 43 |
+
265,16800,7.1787
|
| 44 |
+
272,17200,4.4245
|
| 45 |
+
276,17600,6.2406
|
| 46 |
+
280,18000,7.7256
|
| 47 |
+
286,18400,5.6842
|
| 48 |
+
291,18800,6.646
|
| 49 |
+
297,19200,5.5573
|
| 50 |
+
302,19600,4.9828
|
| 51 |
+
308,20000,5.9812
|
| 52 |
+
313,20400,6.4346
|
| 53 |
+
320,20800,4.4117
|
| 54 |
+
324,21200,6.6082
|
| 55 |
+
330,21600,6.2485
|
| 56 |
+
334,22000,7.8137
|
| 57 |
+
339,22400,5.5175
|
| 58 |
+
343,22800,7.8811
|
| 59 |
+
347,23200,7.1971
|
| 60 |
+
353,23600,5.196
|
| 61 |
+
358,24000,6.0231
|
| 62 |
+
364,24400,6.6306
|
| 63 |
+
369,24800,5.7511
|
| 64 |
+
374,25200,6.1309
|
| 65 |
+
378,25600,7.6626
|
| 66 |
+
382,26000,6.8129
|
| 67 |
+
387,26400,6.4101
|
| 68 |
+
391,26800,8.1621
|
| 69 |
+
395,27200,7.6024
|
| 70 |
+
399,27600,7.6139
|
| 71 |
+
404,28000,7.434
|
| 72 |
+
409,28400,7.5108
|
| 73 |
+
413,28800,10.0048
|
| 74 |
+
417,29200,8.9736
|
| 75 |
+
423,29600,5.5721
|
| 76 |
+
428,30000,5.6607
|
| 77 |
+
433,30400,7.3198
|
| 78 |
+
437,30800,7.5887
|
| 79 |
+
441,31200,7.1714
|
| 80 |
+
446,31600,5.5443
|
| 81 |
+
452,32000,6.0095
|
| 82 |
+
456,32400,6.5919
|
| 83 |
+
461,32800,6.9332
|
| 84 |
+
465,33200,5.9742
|
| 85 |
+
472,33600,4.6549
|
| 86 |
+
476,34000,6.7065
|
| 87 |
+
481,34400,6.6351
|
| 88 |
+
485,34800,7.3781
|
| 89 |
+
491,35200,5.3901
|
| 90 |
+
495,35600,7.481
|
| 91 |
+
499,36000,7.3904
|
| 92 |
+
504,36400,5.5203
|
| 93 |
+
508,36800,6.6256
|
| 94 |
+
513,37200,6.2715
|
| 95 |
+
519,37600,5.4467
|
| 96 |
+
523,38000,7.5914
|
| 97 |
+
528,38400,5.8271
|
| 98 |
+
535,38800,4.7497
|
| 99 |
+
539,39200,6.705
|
| 100 |
+
543,39600,7.2193
|
| 101 |
+
547,40000,7.6591
|
| 102 |
+
553,40400,5.4713
|
| 103 |
+
559,40800,5.6278
|
| 104 |
+
563,41200,7.0377
|
| 105 |
+
567,41600,6.5975
|
| 106 |
+
572,42000,6.4589
|
| 107 |
+
577,42400,6.4962
|
| 108 |
+
581,42800,7.5891
|
| 109 |
+
585,43200,7.5598
|
| 110 |
+
593,43600,3.9839
|
| 111 |
+
597,44000,6.7188
|
| 112 |
+
601,44400,6.9495
|
| 113 |
+
606,44800,7.4883
|
| 114 |
+
613,45200,3.7567
|
| 115 |
+
617,45600,7.7218
|
| 116 |
+
622,46000,6.69
|
| 117 |
+
626,46400,7.9262
|
| 118 |
+
630,46800,6.3034
|
| 119 |
+
635,47200,6.0545
|
| 120 |
+
639,47600,7.4195
|
| 121 |
+
644,48000,7.13
|
| 122 |
+
648,48400,7.0398
|
| 123 |
+
652,48800,7.6392
|
| 124 |
+
660,49200,3.7038
|
| 125 |
+
667,49600,4.3578
|
| 126 |
+
672,50000,5.9194
|
| 127 |
+
678,50400,4.8665
|
| 128 |
+
683,50800,6.3008
|
| 129 |
+
691,51200,4.5975
|
| 130 |
+
697,51600,5.1888
|
| 131 |
+
702,52000,5.7734
|
| 132 |
+
707,52400,7.4679
|
| 133 |
+
715,52800,3.3284
|
| 134 |
+
721,53200,6.0641
|
| 135 |
+
727,53600,6.0441
|
| 136 |
+
731,54000,7.6702
|
| 137 |
+
736,54400,7.4219
|
| 138 |
+
740,54800,6.222
|
| 139 |
+
746,55200,6.4839
|
| 140 |
+
751,55600,5.6146
|
| 141 |
+
755,56000,7.5972
|
| 142 |
+
761,56400,5.612
|
| 143 |
+
765,56800,7.8148
|
| 144 |
+
770,57200,5.7253
|
| 145 |
+
774,57600,7.9334
|
| 146 |
+
778,58000,8.3276
|
| 147 |
+
783,58400,6.8913
|
| 148 |
+
787,58800,7.2828
|
| 149 |
+
793,59200,6.9596
|
| 150 |
+
797,59600,8.2329
|
| 151 |
+
803,60000,6.4227
|
| 152 |
+
808,60400,6.7123
|
| 153 |
+
812,60800,7.9976
|
| 154 |
+
819,61200,5.5859
|
| 155 |
+
826,61600,4.4553
|
| 156 |
+
832,62000,6.7451
|
| 157 |
+
837,62400,6.241
|
| 158 |
+
843,62800,6.4673
|
| 159 |
+
848,63200,6.9543
|
| 160 |
+
855,63600,5.0913
|
| 161 |
+
861,64000,7.2159
|
| 162 |
+
865,64400,7.9432
|
| 163 |
+
870,64800,7.8044
|
| 164 |
+
875,65200,7.2243
|
| 165 |
+
880,65600,7.7488
|
| 166 |
+
886,66000,6.3843
|
| 167 |
+
890,66400,9.9507
|
| 168 |
+
894,66800,9.063
|
| 169 |
+
899,67200,7.5112
|
| 170 |
+
903,67600,7.6428
|
| 171 |
+
908,68000,8.2787
|
| 172 |
+
912,68400,8.5889
|
| 173 |
+
918,68800,5.723
|
| 174 |
+
922,69200,8.2975
|
| 175 |
+
926,69600,7.3824
|
| 176 |
+
931,70000,8.2068
|
| 177 |
+
937,70400,7.7218
|
| 178 |
+
941,70800,8.7229
|
| 179 |
+
945,71200,10.2961
|
| 180 |
+
950,71600,7.6121
|
| 181 |
+
955,72000,7.8974
|
| 182 |
+
960,72400,7.8978
|
| 183 |
+
967,72800,5.6801
|
| 184 |
+
979,73200,2.1174
|
| 185 |
+
990,73600,3.7935
|
| 186 |
+
995,74000,10.1194
|
| 187 |
+
1000,74400,6.8977
|
| 188 |
+
1006,74800,7.6098
|
| 189 |
+
1011,75200,7.2811
|
| 190 |
+
1017,75600,7.2474
|
| 191 |
+
1023,76000,8.5673
|
| 192 |
+
1029,76400,6.6197
|
| 193 |
+
1035,76800,8.3262
|
| 194 |
+
1041,77200,7.9771
|
| 195 |
+
1047,77600,7.1542
|
| 196 |
+
1051,78000,11.2335
|
| 197 |
+
1056,78400,8.8322
|
| 198 |
+
1062,78800,7.5779
|
| 199 |
+
1066,79200,13.4042
|
| 200 |
+
1071,79600,10.4175
|
| 201 |
+
1076,80000,13.3333
|
| 202 |
+
1080,80400,12.734
|
| 203 |
+
1086,80800,11.1026
|
| 204 |
+
1090,81200,14.7759
|
| 205 |
+
1096,81600,8.6412
|
| 206 |
+
1104,82000,6.3892
|
| 207 |
+
1110,82400,9.8357
|
| 208 |
+
1118,82800,7.1741
|
| 209 |
+
1125,83200,7.8912
|
| 210 |
+
1131,83600,8.455
|
| 211 |
+
1135,84000,14.1645
|
| 212 |
+
1140,84400,9.4645
|
| 213 |
+
1146,84800,10.1664
|
| 214 |
+
1153,85200,5.8022
|
| 215 |
+
1160,85600,6.215
|
| 216 |
+
1168,86000,4.1062
|
| 217 |
+
1175,86400,6.1067
|
| 218 |
+
1180,86800,6.3874
|
| 219 |
+
1187,87200,6.3452
|
| 220 |
+
1192,87600,6.9666
|
| 221 |
+
1198,88000,5.1382
|
| 222 |
+
1203,88400,6.1001
|
| 223 |
+
1212,88800,4.8099
|
| 224 |
+
1218,89200,7.4769
|
| 225 |
+
1226,89600,7.5724
|
| 226 |
+
1231,90000,10.0412
|
| 227 |
+
1238,90400,8.6996
|
| 228 |
+
1242,90800,13.2195
|
| 229 |
+
1247,91200,11.4526
|
| 230 |
+
1252,91600,12.0908
|
| 231 |
+
1260,92000,6.7688
|
| 232 |
+
1265,92400,10.449
|
| 233 |
+
1270,92800,10.2558
|
| 234 |
+
1277,93200,5.5992
|
| 235 |
+
1284,93600,9.3672
|
| 236 |
+
1292,94000,7.979
|
| 237 |
+
1302,94400,6.4251
|
| 238 |
+
1308,94800,8.6478
|
| 239 |
+
1314,95200,12.1168
|
| 240 |
+
1322,95600,6.8864
|
| 241 |
+
1333,96000,5.5147
|
| 242 |
+
1339,96400,9.0327
|
| 243 |
+
1347,96800,6.0256
|
| 244 |
+
1356,97200,4.7894
|
| 245 |
+
1365,97600,5.5282
|
| 246 |
+
1371,98000,7.7578
|
| 247 |
+
1380,98400,8.3785
|
| 248 |
+
1389,98800,5.239
|
| 249 |
+
1395,99200,8.3681
|
| 250 |
+
1400,99600,9.7949
|
| 251 |
+
1406,100000,9.5216
|
| 252 |
+
1414,100400,5.3299
|
| 253 |
+
1420,100800,7.8871
|
| 254 |
+
1428,101200,6.0441
|
| 255 |
+
1434,101600,6.6909
|
| 256 |
+
1444,102000,5.0341
|
| 257 |
+
1450,102400,7.7649
|
| 258 |
+
1461,102800,4.1021
|
| 259 |
+
1472,103200,2.9832
|
| 260 |
+
1482,103600,4.4576
|
| 261 |
+
1489,104000,7.3442
|
| 262 |
+
1497,104400,5.6498
|
| 263 |
+
1505,104800,6.4131
|
| 264 |
+
1511,105200,10.5803
|
| 265 |
+
1516,105600,11.3073
|
| 266 |
+
1520,106000,13.2675
|
| 267 |
+
1527,106400,10.6688
|
| 268 |
+
1535,106800,5.8265
|
| 269 |
+
1540,107200,10.7056
|
| 270 |
+
1545,107600,7.6742
|
| 271 |
+
1553,108000,5.7264
|
| 272 |
+
1560,108400,6.6821
|
| 273 |
+
1567,108800,5.4216
|
| 274 |
+
1573,109200,8.4766
|
| 275 |
+
1578,109600,8.6739
|
| 276 |
+
1582,110000,11.4666
|
| 277 |
+
1586,110400,9.0832
|
| 278 |
+
1594,110800,6.2276
|
| 279 |
+
1599,111200,8.6866
|
| 280 |
+
1606,111600,6.2615
|
| 281 |
+
1614,112000,5.6982
|
| 282 |
+
1621,112400,5.8051
|
| 283 |
+
1626,112800,7.93
|
| 284 |
+
1631,113200,10.595
|
| 285 |
+
1636,113600,7.6407
|
| 286 |
+
1640,114000,11.1847
|
| 287 |
+
1647,114400,5.6078
|
| 288 |
+
1651,114800,9.1446
|
| 289 |
+
1658,115200,6.8432
|
| 290 |
+
1662,115600,12.9911
|
| 291 |
+
1671,116000,4.613
|
| 292 |
+
1676,116400,11.0305
|
| 293 |
+
1681,116800,8.694
|
| 294 |
+
1688,117200,6.11
|
| 295 |
+
1698,117600,6.4954
|
| 296 |
+
1703,118000,9.7062
|
| 297 |
+
1712,118400,5.8668
|
| 298 |
+
1717,118800,7.5547
|
| 299 |
+
1724,119200,8.3224
|
| 300 |
+
1729,119600,8.329
|
| 301 |
+
1737,120000,7.1094
|
| 302 |
+
1742,120400,9.2663
|
| 303 |
+
1747,120800,9.6127
|
| 304 |
+
1757,121200,5.6769
|
| 305 |
+
1762,121600,8.5658
|
| 306 |
+
1770,122000,7.4468
|
| 307 |
+
1776,122400,8.0238
|
| 308 |
+
1782,122800,8.0927
|
| 309 |
+
1790,123200,5.6296
|
| 310 |
+
1798,123600,6.9246
|
| 311 |
+
1806,124000,6.5561
|
| 312 |
+
1813,124400,7.4058
|
| 313 |
+
1821,124800,6.9491
|
| 314 |
+
1833,125200,4.4584
|
| 315 |
+
1839,125600,8.2732
|
| 316 |
+
1848,126000,6.3892
|
| 317 |
+
1853,126400,10.4589
|
| 318 |
+
1858,126800,12.1972
|
| 319 |
+
1867,127200,5.6719
|
| 320 |
+
1874,127600,7.9428
|
| 321 |
+
1880,128000,7.5333
|
| 322 |
+
1885,128400,12.1847
|
| 323 |
+
1889,128800,11.591
|
| 324 |
+
1895,129200,9.8666
|
| 325 |
+
1902,129600,9.6165
|
| 326 |
+
1910,130000,8.469
|
| 327 |
+
1917,130400,5.6391
|
| 328 |
+
1924,130800,8.5008
|
| 329 |
+
1931,131200,9.7077
|
| 330 |
+
1935,131600,13.5229
|
| 331 |
+
1939,132000,16.4664
|
| 332 |
+
1944,132400,13.0046
|
| 333 |
+
1949,132800,8.4371
|
| 334 |
+
1955,133200,8.8647
|
| 335 |
+
1959,133600,14.3521
|
| 336 |
+
1963,134000,11.7871
|
| 337 |
+
1968,134400,10.4688
|
| 338 |
+
1974,134800,9.3431
|
| 339 |
+
1979,135200,8.226
|
| 340 |
+
1984,135600,10.8513
|
| 341 |
+
1988,136000,10.6682
|
| 342 |
+
1998,136400,4.4273
|
| 343 |
+
2003,136800,12.778
|
| 344 |
+
2011,137200,8.0067
|
| 345 |
+
2017,137600,9.4886
|
| 346 |
+
2027,138000,5.6532
|
| 347 |
+
2033,138400,7.9827
|
| 348 |
+
2039,138800,5.9282
|
| 349 |
+
2045,139200,9.6567
|
| 350 |
+
2053,139600,7.7935
|
| 351 |
+
2059,140000,5.9489
|
| 352 |
+
2064,140400,6.9939
|
| 353 |
+
2070,140800,7.3466
|
| 354 |
+
2075,141200,6.8939
|
| 355 |
+
2079,141600,9.1796
|
| 356 |
+
2085,142000,9.0719
|
| 357 |
+
2089,142400,13.9325
|
| 358 |
+
2094,142800,11.0623
|
| 359 |
+
2099,143200,6.7284
|
| 360 |
+
2107,143600,5.6728
|
| 361 |
+
2115,144000,6.321
|
| 362 |
+
2119,144400,10.3033
|
| 363 |
+
2124,144800,8.3466
|
| 364 |
+
2130,145200,6.2657
|
| 365 |
+
2136,145600,5.7428
|
| 366 |
+
2142,146000,7.4074
|
| 367 |
+
2147,146400,7.8209
|
| 368 |
+
2153,146800,7.3415
|
| 369 |
+
2161,147200,5.2917
|
| 370 |
+
2166,147600,12.5109
|
| 371 |
+
2174,148000,5.0395
|
| 372 |
+
2179,148400,10.3555
|
| 373 |
+
2187,148800,5.349
|
| 374 |
+
2192,149200,10.0253
|
| 375 |
+
2198,149600,7.0379
|
| 376 |
+
2205,150000,7.5125
|
| 377 |
+
2211,150400,7.8038
|
| 378 |
+
2217,150800,7.8167
|
| 379 |
+
2221,151200,11.7039
|
| 380 |
+
2229,151600,5.489
|
| 381 |
+
2236,152000,6.0868
|
| 382 |
+
2243,152400,6.2941
|
| 383 |
+
2249,152800,6.6384
|
| 384 |
+
2254,153200,8.3705
|
| 385 |
+
2259,153600,8.5642
|
| 386 |
+
2265,154000,6.8584
|
| 387 |
+
2272,154400,7.3834
|
| 388 |
+
2278,154800,8.2766
|
| 389 |
+
2286,155200,5.1656
|
| 390 |
+
2290,155600,12.9118
|
| 391 |
+
2294,156000,11.8071
|
| 392 |
+
2299,156400,8.9169
|
| 393 |
+
2303,156800,8.9791
|
| 394 |
+
2308,157200,9.3741
|
| 395 |
+
2312,157600,11.1361
|
| 396 |
+
2316,158000,11.7926
|
| 397 |
+
2320,158400,11.7151
|
| 398 |
+
2326,158800,8.0207
|
| 399 |
+
2333,159200,6.0691
|
| 400 |
+
2338,159600,9.577
|
| 401 |
+
2344,160000,6.4232
|
| 402 |
+
2350,160400,8.8049
|
| 403 |
+
2354,160800,9.1235
|
| 404 |
+
2359,161200,8.3861
|
| 405 |
+
2365,161600,7.0742
|
| 406 |
+
2369,162000,7.6221
|
| 407 |
+
2373,162400,7.9897
|
| 408 |
+
2378,162800,5.8857
|
| 409 |
+
2384,163200,7.0723
|
| 410 |
+
2389,163600,6.654
|
| 411 |
+
2395,164000,7.1041
|
| 412 |
+
2399,164400,8.4728
|
| 413 |
+
2407,164800,4.6772
|
| 414 |
+
2411,165200,7.5167
|
| 415 |
+
2417,165600,7.8597
|
| 416 |
+
2422,166000,9.3692
|
| 417 |
+
2427,166400,8.0704
|
| 418 |
+
2431,166800,10.7773
|
| 419 |
+
2435,167200,9.905
|
| 420 |
+
2440,167600,8.5513
|
| 421 |
+
2444,168000,8.9629
|
| 422 |
+
2450,168400,7.6352
|
| 423 |
+
2457,168800,6.9678
|
| 424 |
+
2463,169200,7.899
|
| 425 |
+
2469,169600,7.6206
|
| 426 |
+
2474,170000,8.0358
|
| 427 |
+
2481,170400,7.7953
|
| 428 |
+
2486,170800,10.2047
|
| 429 |
+
2492,171200,12.0048
|
| 430 |
+
2500,171600,5.3765
|
| 431 |
+
2509,172000,5.1863
|
| 432 |
+
2513,172400,12.2984
|
| 433 |
+
2517,172800,11.7797
|
| 434 |
+
2524,173200,7.875
|
| 435 |
+
2534,173600,3.9569
|
| 436 |
+
2539,174000,11.8781
|
| 437 |
+
2544,174400,10.3608
|
| 438 |
+
2550,174800,8.7275
|
| 439 |
+
2556,175200,8.4987
|
| 440 |
+
2561,175600,10.7181
|
| 441 |
+
2566,176000,8.2619
|
| 442 |
+
2571,176400,8.8454
|
| 443 |
+
2575,176800,8.7559
|
| 444 |
+
2580,177200,8.4039
|
| 445 |
+
2584,177600,11.9876
|
| 446 |
+
2592,178000,6.4634
|
| 447 |
+
2597,178400,7.8467
|
| 448 |
+
2602,178800,6.8881
|
| 449 |
+
2607,179200,7.1522
|
| 450 |
+
2612,179600,7.6053
|
| 451 |
+
2620,180000,5.2739
|
| 452 |
+
2628,180400,4.7562
|
| 453 |
+
2636,180800,3.7301
|
| 454 |
+
2641,181200,10.6831
|
| 455 |
+
2645,181600,8.9943
|
| 456 |
+
2652,182000,5.277
|
| 457 |
+
2661,182400,4.275
|
| 458 |
+
2671,182800,3.453
|
| 459 |
+
2676,183200,8.5581
|
| 460 |
+
2681,183600,7.2929
|
| 461 |
+
2686,184000,12.0382
|
| 462 |
+
2690,184400,12.133
|
| 463 |
+
2694,184800,12.2167
|
| 464 |
+
2701,185200,5.3062
|
| 465 |
+
2706,185600,10.3605
|
| 466 |
+
2714,186000,5.9351
|
| 467 |
+
2718,186400,9.3307
|
| 468 |
+
2723,186800,9.5625
|
| 469 |
+
2730,187200,7.1449
|
| 470 |
+
2735,187600,7.7182
|
| 471 |
+
2741,188000,6.4756
|
| 472 |
+
2745,188400,8.736
|
| 473 |
+
2751,188800,8.123
|
| 474 |
+
2757,189200,8.8964
|
| 475 |
+
2761,189600,10.7888
|
| 476 |
+
2765,190000,12.0779
|
| 477 |
+
2769,190400,10.5991
|
| 478 |
+
2775,190800,8.0868
|
| 479 |
+
2780,191200,8.917
|
| 480 |
+
2785,191600,9.9389
|
| 481 |
+
2789,192000,11.5622
|
| 482 |
+
2795,192400,8.386
|
| 483 |
+
2800,192800,8.3721
|
| 484 |
+
2805,193200,8.8166
|
| 485 |
+
2811,193600,8.1763
|
| 486 |
+
2817,194000,8.3255
|
| 487 |
+
2823,194400,8.8563
|
| 488 |
+
2827,194800,12.6773
|
| 489 |
+
2833,195200,7.9132
|
| 490 |
+
2837,195600,10.4681
|
| 491 |
+
2844,196000,8.2131
|
| 492 |
+
2849,196400,9.2795
|
| 493 |
+
2855,196800,7.094
|
| 494 |
+
2860,197200,10.5194
|
| 495 |
+
2864,197600,12.9269
|
| 496 |
+
2870,198000,7.9327
|
| 497 |
+
2876,198400,7.223
|
| 498 |
+
2881,198800,10.8269
|
| 499 |
+
2885,199200,12.3889
|
| 500 |
+
2892,199600,6.5769
|
| 501 |
+
2897,200000,11.076
|
code/Lake application/logs/results_2/PPO_frozen_lake_log_3.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
15,400,1.8261
|
| 3 |
+
32,800,1.7685
|
| 4 |
+
45,1200,2.4142
|
| 5 |
+
51,1600,4.6704
|
| 6 |
+
61,2000,3.1899
|
| 7 |
+
67,2400,5.3735
|
| 8 |
+
75,2800,4.1154
|
| 9 |
+
82,3200,3.7725
|
| 10 |
+
92,3600,3.7704
|
| 11 |
+
103,4000,2.6905
|
| 12 |
+
110,4400,4.0276
|
| 13 |
+
117,4800,5.8064
|
| 14 |
+
124,5200,4.4713
|
| 15 |
+
132,5600,4.0093
|
| 16 |
+
137,6000,5.9495
|
| 17 |
+
142,6400,5.8014
|
| 18 |
+
148,6800,5.8551
|
| 19 |
+
155,7200,4.6169
|
| 20 |
+
162,7600,4.2494
|
| 21 |
+
170,8000,3.9722
|
| 22 |
+
177,8400,3.9192
|
| 23 |
+
186,8800,3.8894
|
| 24 |
+
192,9200,5.2858
|
| 25 |
+
196,9600,6.1068
|
| 26 |
+
201,10000,7.4978
|
| 27 |
+
206,10400,6.5813
|
| 28 |
+
210,10800,7.0333
|
| 29 |
+
216,11200,5.7853
|
| 30 |
+
220,11600,6.9548
|
| 31 |
+
226,12000,6.9295
|
| 32 |
+
231,12400,8.6606
|
| 33 |
+
235,12800,8.1602
|
| 34 |
+
241,13200,5.5479
|
| 35 |
+
248,13600,5.2991
|
| 36 |
+
253,14000,5.5222
|
| 37 |
+
259,14400,6.6097
|
| 38 |
+
266,14800,5.7681
|
| 39 |
+
272,15200,5.1424
|
| 40 |
+
281,15600,3.9703
|
| 41 |
+
289,16000,5.446
|
| 42 |
+
296,16400,5.6469
|
| 43 |
+
301,16800,8.1023
|
| 44 |
+
309,17200,5.4118
|
| 45 |
+
313,17600,10.0884
|
| 46 |
+
318,18000,7.85
|
| 47 |
+
324,18400,7.1867
|
| 48 |
+
334,18800,4.1973
|
| 49 |
+
341,19200,6.5775
|
| 50 |
+
346,19600,8.8721
|
| 51 |
+
353,20000,8.3356
|
| 52 |
+
358,20400,8.4092
|
| 53 |
+
364,20800,7.0598
|
| 54 |
+
369,21200,9.7722
|
| 55 |
+
375,21600,8.7407
|
| 56 |
+
381,22000,7.1701
|
| 57 |
+
387,22400,6.8198
|
| 58 |
+
391,22800,11.2731
|
| 59 |
+
399,23200,6.9213
|
| 60 |
+
404,23600,7.6675
|
| 61 |
+
408,24000,8.5927
|
| 62 |
+
412,24400,9.5454
|
| 63 |
+
416,24800,11.0606
|
| 64 |
+
421,25200,11.0094
|
| 65 |
+
426,25600,9.2321
|
| 66 |
+
431,26000,7.2778
|
| 67 |
+
436,26400,10.7602
|
| 68 |
+
440,26800,10.2041
|
| 69 |
+
444,27200,10.7586
|
| 70 |
+
448,27600,8.5868
|
| 71 |
+
452,28000,9.4227
|
| 72 |
+
458,28400,8.901
|
| 73 |
+
462,28800,9.059
|
| 74 |
+
467,29200,9.2612
|
| 75 |
+
472,29600,10.802
|
| 76 |
+
478,30000,7.4041
|
| 77 |
+
483,30400,8.8905
|
| 78 |
+
489,30800,6.3011
|
| 79 |
+
493,31200,12.0364
|
| 80 |
+
497,31600,11.5981
|
| 81 |
+
501,32000,11.5024
|
| 82 |
+
505,32400,11.9104
|
| 83 |
+
509,32800,11.9212
|
| 84 |
+
513,33200,12.0185
|
| 85 |
+
518,33600,9.4035
|
| 86 |
+
522,34000,11.5176
|
| 87 |
+
526,34400,11.0239
|
| 88 |
+
534,34800,6.1402
|
| 89 |
+
538,35200,9.7348
|
| 90 |
+
546,35600,6.35
|
| 91 |
+
551,36000,7.8883
|
| 92 |
+
558,36400,7.7455
|
| 93 |
+
563,36800,7.1019
|
| 94 |
+
571,37200,6.6505
|
| 95 |
+
576,37600,7.5348
|
| 96 |
+
581,38000,12.1861
|
| 97 |
+
586,38400,9.1601
|
| 98 |
+
591,38800,8.1292
|
| 99 |
+
596,39200,7.3226
|
| 100 |
+
602,39600,9.3
|
| 101 |
+
608,40000,10.1455
|
| 102 |
+
614,40400,7.404
|
| 103 |
+
620,40800,9.5543
|
| 104 |
+
627,41200,8.0328
|
| 105 |
+
636,41600,5.051
|
| 106 |
+
648,42000,4.3144
|
| 107 |
+
654,42400,8.7103
|
| 108 |
+
661,42800,8.5619
|
| 109 |
+
666,43200,9.0912
|
| 110 |
+
671,43600,12.1562
|
| 111 |
+
679,44000,6.8929
|
| 112 |
+
683,44400,12.4673
|
| 113 |
+
690,44800,7.4547
|
| 114 |
+
700,45200,6.1627
|
| 115 |
+
708,45600,5.2344
|
| 116 |
+
712,46000,14.522
|
| 117 |
+
718,46400,9.7264
|
| 118 |
+
724,46800,9.4083
|
| 119 |
+
731,47200,7.3673
|
| 120 |
+
735,47600,10.918
|
| 121 |
+
741,48000,9.7135
|
| 122 |
+
746,48400,11.6226
|
| 123 |
+
753,48800,6.5335
|
| 124 |
+
760,49200,6.1922
|
| 125 |
+
765,49600,11.59
|
| 126 |
+
772,50000,7.6406
|
| 127 |
+
779,50400,7.3931
|
| 128 |
+
785,50800,8.8649
|
| 129 |
+
790,51200,13.0236
|
| 130 |
+
796,51600,9.1355
|
| 131 |
+
802,52000,9.2798
|
| 132 |
+
812,52400,4.6073
|
| 133 |
+
818,52800,8.5625
|
| 134 |
+
823,53200,8.0732
|
| 135 |
+
829,53600,8.2494
|
| 136 |
+
837,54000,5.0721
|
| 137 |
+
849,54400,3.926
|
| 138 |
+
857,54800,5.9843
|
| 139 |
+
866,55200,5.4496
|
| 140 |
+
872,55600,9.6436
|
| 141 |
+
877,56000,9.8259
|
| 142 |
+
882,56400,12.0831
|
| 143 |
+
886,56800,11.8707
|
| 144 |
+
892,57200,9.3723
|
| 145 |
+
897,57600,8.75
|
| 146 |
+
902,58000,9.1673
|
| 147 |
+
908,58400,8.2213
|
| 148 |
+
919,58800,3.6353
|
| 149 |
+
929,59200,3.9628
|
| 150 |
+
935,59600,6.8984
|
| 151 |
+
942,60000,6.928
|
| 152 |
+
948,60400,8.007
|
| 153 |
+
954,60800,7.1696
|
| 154 |
+
962,61200,6.8068
|
| 155 |
+
970,61600,5.7813
|
| 156 |
+
979,62000,6.7075
|
| 157 |
+
990,62400,4.5979
|
| 158 |
+
995,62800,11.0131
|
| 159 |
+
1001,63200,9.9881
|
| 160 |
+
1007,63600,8.771
|
| 161 |
+
1013,64000,6.4708
|
| 162 |
+
1020,64400,8.4602
|
| 163 |
+
1024,64800,12.5658
|
| 164 |
+
1029,65200,12.6734
|
| 165 |
+
1033,65600,13.9195
|
| 166 |
+
1037,66000,10.7454
|
| 167 |
+
1043,66400,10.9443
|
| 168 |
+
1048,66800,10.9429
|
| 169 |
+
1053,67200,9.4126
|
| 170 |
+
1059,67600,8.414
|
| 171 |
+
1066,68000,6.8977
|
| 172 |
+
1071,68400,8.4342
|
| 173 |
+
1077,68800,6.9781
|
| 174 |
+
1081,69200,9.3134
|
| 175 |
+
1087,69600,8.1705
|
| 176 |
+
1091,70000,8.8618
|
| 177 |
+
1096,70400,10.7669
|
| 178 |
+
1100,70800,10.662
|
| 179 |
+
1104,71200,9.404
|
| 180 |
+
1108,71600,10.9212
|
| 181 |
+
1114,72000,7.7906
|
| 182 |
+
1120,72400,6.9977
|
| 183 |
+
1124,72800,10.0251
|
| 184 |
+
1130,73200,8.4128
|
| 185 |
+
1134,73600,9.9691
|
| 186 |
+
1138,74000,11.4181
|
| 187 |
+
1143,74400,8.6228
|
| 188 |
+
1152,74800,5.3614
|
| 189 |
+
1157,75200,8.1655
|
| 190 |
+
1164,75600,7.1774
|
| 191 |
+
1171,76000,5.9159
|
| 192 |
+
1180,76400,4.0023
|
| 193 |
+
1189,76800,4.7476
|
| 194 |
+
1197,77200,5.5766
|
| 195 |
+
1202,77600,8.0878
|
| 196 |
+
1209,78000,6.6897
|
| 197 |
+
1213,78400,13.7633
|
| 198 |
+
1221,78800,6.482
|
| 199 |
+
1230,79200,6.0141
|
| 200 |
+
1234,79600,12.2026
|
| 201 |
+
1242,80000,5.6537
|
| 202 |
+
1251,80400,4.3695
|
| 203 |
+
1259,80800,7.4921
|
| 204 |
+
1264,81200,9.8077
|
| 205 |
+
1269,81600,10.9606
|
| 206 |
+
1275,82000,9.6273
|
| 207 |
+
1280,82400,12.195
|
| 208 |
+
1287,82800,7.4125
|
| 209 |
+
1292,83200,12.1273
|
| 210 |
+
1296,83600,13.0822
|
| 211 |
+
1303,84000,7.0237
|
| 212 |
+
1308,84400,11.6651
|
| 213 |
+
1313,84800,9.4606
|
| 214 |
+
1318,85200,12.5532
|
| 215 |
+
1324,85600,9.9701
|
| 216 |
+
1329,86000,11.7337
|
| 217 |
+
1333,86400,14.7119
|
| 218 |
+
1339,86800,9.0294
|
| 219 |
+
1345,87200,7.5184
|
| 220 |
+
1353,87600,7.3597
|
| 221 |
+
1359,88000,10.1495
|
| 222 |
+
1365,88400,8.7107
|
| 223 |
+
1375,88800,4.0411
|
| 224 |
+
1383,89200,5.6476
|
| 225 |
+
1389,89600,6.5996
|
| 226 |
+
1397,90000,8.0321
|
| 227 |
+
1404,90400,8.9973
|
| 228 |
+
1410,90800,8.9065
|
| 229 |
+
1417,91200,5.712
|
| 230 |
+
1427,91600,4.6027
|
| 231 |
+
1436,92000,5.296
|
| 232 |
+
1441,92400,7.6209
|
| 233 |
+
1449,92800,7.4688
|
| 234 |
+
1456,93200,5.0893
|
| 235 |
+
1464,93600,6.9208
|
| 236 |
+
1472,94000,7.4646
|
| 237 |
+
1479,94400,8.2095
|
| 238 |
+
1484,94800,9.7461
|
| 239 |
+
1490,95200,8.781
|
| 240 |
+
1494,95600,8.3202
|
| 241 |
+
1499,96000,9.0412
|
| 242 |
+
1506,96400,6.9789
|
| 243 |
+
1511,96800,7.9199
|
| 244 |
+
1517,97200,10.6694
|
| 245 |
+
1524,97600,6.481
|
| 246 |
+
1532,98000,6.7254
|
| 247 |
+
1541,98400,6.2726
|
| 248 |
+
1547,98800,7.5851
|
| 249 |
+
1555,99200,5.3696
|
| 250 |
+
1560,99600,10.4877
|
| 251 |
+
1567,100000,8.8012
|
| 252 |
+
1577,100400,5.5168
|
| 253 |
+
1584,100800,10.155
|
| 254 |
+
1588,101200,8.5114
|
| 255 |
+
1595,101600,6.3359
|
| 256 |
+
1602,102000,6.6452
|
| 257 |
+
1608,102400,6.8052
|
| 258 |
+
1612,102800,8.0109
|
| 259 |
+
1617,103200,5.8693
|
| 260 |
+
1621,103600,8.5857
|
| 261 |
+
1626,104000,9.6799
|
| 262 |
+
1634,104400,5.6426
|
| 263 |
+
1639,104800,6.5151
|
| 264 |
+
1644,105200,7.8849
|
| 265 |
+
1648,105600,8.9338
|
| 266 |
+
1654,106000,5.6678
|
| 267 |
+
1659,106400,6.3244
|
| 268 |
+
1665,106800,5.6218
|
| 269 |
+
1669,107200,9.5578
|
| 270 |
+
1676,107600,5.3338
|
| 271 |
+
1686,108000,4.6969
|
| 272 |
+
1693,108400,5.4701
|
| 273 |
+
1698,108800,10.4455
|
| 274 |
+
1704,109200,8.6891
|
| 275 |
+
1710,109600,10.0138
|
| 276 |
+
1717,110000,7.5448
|
| 277 |
+
1726,110400,7.8171
|
| 278 |
+
1731,110800,10.4446
|
| 279 |
+
1739,111200,8.1385
|
| 280 |
+
1748,111600,6.0488
|
| 281 |
+
1756,112000,7.1346
|
| 282 |
+
1761,112400,9.6157
|
| 283 |
+
1767,112800,9.1395
|
| 284 |
+
1774,113200,8.8882
|
| 285 |
+
1782,113600,8.0369
|
| 286 |
+
1789,114000,9.7018
|
| 287 |
+
1795,114400,8.5519
|
| 288 |
+
1802,114800,12.4735
|
| 289 |
+
1810,115200,6.0752
|
| 290 |
+
1815,115600,11.7469
|
| 291 |
+
1824,116000,5.4505
|
| 292 |
+
1829,116400,9.3351
|
| 293 |
+
1835,116800,10.7987
|
| 294 |
+
1840,117200,15.1342
|
| 295 |
+
1846,117600,14.5398
|
| 296 |
+
1853,118000,10.6334
|
| 297 |
+
1861,118400,11.3101
|
| 298 |
+
1866,118800,14.8907
|
| 299 |
+
1873,119200,9.6076
|
| 300 |
+
1882,119600,7.7126
|
| 301 |
+
1893,120000,4.7907
|
| 302 |
+
1901,120400,6.0066
|
| 303 |
+
1906,120800,15.4955
|
| 304 |
+
1911,121200,13.3978
|
| 305 |
+
1919,121600,9.6642
|
| 306 |
+
1924,122000,15.7393
|
| 307 |
+
1928,122400,18.9361
|
| 308 |
+
1932,122800,19.4331
|
| 309 |
+
1940,123200,8.6073
|
| 310 |
+
1947,123600,10.161
|
| 311 |
+
1954,124000,8.8061
|
| 312 |
+
1959,124400,13.5384
|
| 313 |
+
1966,124800,9.9289
|
| 314 |
+
1981,125200,4.1506
|
| 315 |
+
1991,125600,8.5539
|
| 316 |
+
1995,126000,20.6911
|
| 317 |
+
2002,126400,11.9305
|
| 318 |
+
2007,126800,16.2045
|
| 319 |
+
2016,127200,6.5353
|
| 320 |
+
2021,127600,17.3839
|
| 321 |
+
2027,128000,13.1571
|
| 322 |
+
2035,128400,11.3393
|
| 323 |
+
2039,128800,22.3028
|
| 324 |
+
2047,129200,12.8433
|
| 325 |
+
2055,129600,12.7583
|
| 326 |
+
2066,130000,6.9837
|
| 327 |
+
2071,130400,18.0303
|
| 328 |
+
2076,130800,18.2896
|
| 329 |
+
2084,131200,8.7988
|
| 330 |
+
2093,131600,8.7713
|
| 331 |
+
2098,132000,11.0877
|
| 332 |
+
2109,132400,7.8183
|
| 333 |
+
2115,132800,12.4679
|
| 334 |
+
2124,133200,10.5047
|
| 335 |
+
2137,133600,6.17
|
| 336 |
+
2142,134000,20.3565
|
| 337 |
+
2148,134400,10.4538
|
| 338 |
+
2154,134800,12.7331
|
| 339 |
+
2161,135200,12.5367
|
| 340 |
+
2171,135600,5.7754
|
| 341 |
+
2178,136000,13.3435
|
| 342 |
+
2182,136400,13.3376
|
| 343 |
+
2195,136800,5.0278
|
| 344 |
+
2203,137200,8.039
|
| 345 |
+
2215,137600,5.0622
|
| 346 |
+
2225,138000,7.6281
|
| 347 |
+
2232,138400,12.4199
|
| 348 |
+
2243,138800,5.7324
|
| 349 |
+
2249,139200,14.5818
|
| 350 |
+
2255,139600,14.0929
|
| 351 |
+
2262,140000,13.6329
|
| 352 |
+
2267,140400,18.3515
|
| 353 |
+
2272,140800,18.0695
|
| 354 |
+
2280,141200,12.0349
|
| 355 |
+
2287,141600,13.6652
|
| 356 |
+
2296,142000,9.2929
|
| 357 |
+
2305,142400,10.1985
|
| 358 |
+
2312,142800,12.7522
|
| 359 |
+
2323,143200,7.2459
|
| 360 |
+
2331,143600,8.9751
|
| 361 |
+
2338,144000,11.4881
|
| 362 |
+
2344,144400,15.2227
|
| 363 |
+
2351,144800,12.8927
|
| 364 |
+
2358,145200,10.6543
|
| 365 |
+
2362,145600,22.496
|
| 366 |
+
2368,146000,13.9616
|
| 367 |
+
2373,146400,18.1932
|
| 368 |
+
2378,146800,16.1787
|
| 369 |
+
2382,147200,21.2142
|
| 370 |
+
2386,147600,22.1002
|
| 371 |
+
2396,148000,8.9528
|
| 372 |
+
2401,148400,15.8869
|
| 373 |
+
2408,148800,13.7149
|
| 374 |
+
2413,149200,13.7033
|
| 375 |
+
2419,149600,17.2193
|
| 376 |
+
2425,150000,11.3894
|
| 377 |
+
2432,150400,13.8544
|
| 378 |
+
2437,150800,17.5939
|
| 379 |
+
2444,151200,12.8075
|
| 380 |
+
2449,151600,12.1515
|
| 381 |
+
2457,152000,10.4033
|
| 382 |
+
2465,152400,11.4859
|
| 383 |
+
2470,152800,14.4762
|
| 384 |
+
2477,153200,12.3627
|
| 385 |
+
2483,153600,14.8347
|
| 386 |
+
2488,154000,18.2382
|
| 387 |
+
2497,154400,9.2311
|
| 388 |
+
2501,154800,19.7235
|
| 389 |
+
2509,155200,13.3697
|
| 390 |
+
2515,155600,11.9598
|
| 391 |
+
2525,156000,7.2526
|
| 392 |
+
2534,156400,9.3025
|
| 393 |
+
2545,156800,8.9835
|
| 394 |
+
2551,157200,12.1765
|
| 395 |
+
2558,157600,14.0303
|
| 396 |
+
2564,158000,13.4739
|
| 397 |
+
2573,158400,9.8322
|
| 398 |
+
2578,158800,19.6338
|
| 399 |
+
2584,159200,15.7125
|
| 400 |
+
2588,159600,17.0086
|
| 401 |
+
2594,160000,14.7127
|
| 402 |
+
2598,160400,23.1588
|
| 403 |
+
2607,160800,11.0373
|
| 404 |
+
2615,161200,9.348
|
| 405 |
+
2619,161600,21.6514
|
| 406 |
+
2624,162000,12.9316
|
| 407 |
+
2631,162400,12.1088
|
| 408 |
+
2636,162800,20.0918
|
| 409 |
+
2640,163200,18.6887
|
| 410 |
+
2644,163600,19.3577
|
| 411 |
+
2653,164000,8.5057
|
| 412 |
+
2662,164400,8.0083
|
| 413 |
+
2668,164800,15.0007
|
| 414 |
+
2676,165200,8.8861
|
| 415 |
+
2682,165600,15.3621
|
| 416 |
+
2689,166000,13.6995
|
| 417 |
+
2696,166400,10.5381
|
| 418 |
+
2701,166800,19.4263
|
| 419 |
+
2708,167200,12.0695
|
| 420 |
+
2713,167600,11.9025
|
| 421 |
+
2719,168000,10.0897
|
| 422 |
+
2725,168400,15.0383
|
| 423 |
+
2731,168800,14.8992
|
| 424 |
+
2735,169200,14.9242
|
| 425 |
+
2739,169600,20.4302
|
| 426 |
+
2745,170000,15.2987
|
| 427 |
+
2750,170400,16.7812
|
| 428 |
+
2754,170800,16.0345
|
| 429 |
+
2759,171200,16.4285
|
| 430 |
+
2768,171600,9.1208
|
| 431 |
+
2780,172000,5.583
|
| 432 |
+
2787,172400,8.2014
|
| 433 |
+
2793,172800,10.1961
|
| 434 |
+
2798,173200,17.1725
|
| 435 |
+
2806,173600,9.115
|
| 436 |
+
2814,174000,9.2754
|
| 437 |
+
2821,174400,10.8946
|
| 438 |
+
2827,174800,11.5879
|
| 439 |
+
2834,175200,10.3869
|
| 440 |
+
2840,175600,13.8918
|
| 441 |
+
2845,176000,12.5769
|
| 442 |
+
2854,176400,10.322
|
| 443 |
+
2863,176800,6.8967
|
| 444 |
+
2869,177200,17.4846
|
| 445 |
+
2874,177600,19.6151
|
| 446 |
+
2881,178000,12.9361
|
| 447 |
+
2886,178400,18.2368
|
| 448 |
+
2892,178800,12.8876
|
| 449 |
+
2898,179200,12.3181
|
| 450 |
+
2903,179600,17.6907
|
| 451 |
+
2908,180000,15.7174
|
| 452 |
+
2915,180400,11.7662
|
| 453 |
+
2920,180800,17.438
|
| 454 |
+
2925,181200,14.2649
|
| 455 |
+
2931,181600,12.5882
|
| 456 |
+
2936,182000,17.2888
|
| 457 |
+
2942,182400,15.7864
|
| 458 |
+
2946,182800,19.7236
|
| 459 |
+
2952,183200,14.7757
|
| 460 |
+
2957,183600,13.2554
|
| 461 |
+
2962,184000,16.9161
|
| 462 |
+
2966,184400,19.4477
|
| 463 |
+
2971,184800,14.806
|
| 464 |
+
2976,185200,14.7174
|
| 465 |
+
2981,185600,14.6584
|
| 466 |
+
2985,186000,13.1555
|
| 467 |
+
2993,186400,8.2998
|
| 468 |
+
2999,186800,10.4079
|
| 469 |
+
3004,187200,14.5865
|
| 470 |
+
3011,187600,9.0036
|
| 471 |
+
3015,188000,13.7298
|
| 472 |
+
3022,188400,8.8899
|
| 473 |
+
3026,188800,15.7034
|
| 474 |
+
3032,189200,11.7676
|
| 475 |
+
3036,189600,17.0897
|
| 476 |
+
3044,190000,10.1182
|
| 477 |
+
3049,190400,13.9028
|
| 478 |
+
3054,190800,16.7113
|
| 479 |
+
3059,191200,16.4022
|
| 480 |
+
3064,191600,18.3592
|
| 481 |
+
3069,192000,17.6439
|
| 482 |
+
3074,192400,15.5535
|
| 483 |
+
3079,192800,15.6137
|
| 484 |
+
3085,193200,14.7975
|
| 485 |
+
3091,193600,11.9988
|
| 486 |
+
3099,194000,10.8644
|
| 487 |
+
3106,194400,14.6474
|
| 488 |
+
3113,194800,12.833
|
| 489 |
+
3117,195200,22.6677
|
| 490 |
+
3121,195600,21.1913
|
| 491 |
+
3126,196000,17.2308
|
| 492 |
+
3131,196400,14.7737
|
| 493 |
+
3140,196800,6.5658
|
| 494 |
+
3147,197200,8.1251
|
| 495 |
+
3155,197600,8.3248
|
| 496 |
+
3162,198000,10.0096
|
| 497 |
+
3168,198400,9.34
|
| 498 |
+
3175,198800,9.7054
|
| 499 |
+
3181,199200,9.9748
|
| 500 |
+
3186,199600,11.6185
|
| 501 |
+
3191,200000,10.6864
|
code/Lake application/logs/results_2/PPO_frozen_lake_log_4.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
30,400,0.8897
|
| 3 |
+
47,800,1.6985
|
| 4 |
+
70,1200,1.5344
|
| 5 |
+
81,1600,2.8837
|
| 6 |
+
93,2000,2.7853
|
| 7 |
+
99,2400,4.9228
|
| 8 |
+
105,2800,4.8879
|
| 9 |
+
109,3200,6.7205
|
| 10 |
+
116,3600,5.1037
|
| 11 |
+
121,4000,6.8188
|
| 12 |
+
126,4400,5.7358
|
| 13 |
+
130,4800,7.5711
|
| 14 |
+
136,5200,5.2419
|
| 15 |
+
141,5600,5.8543
|
| 16 |
+
147,6000,5.2331
|
| 17 |
+
152,6400,4.9293
|
| 18 |
+
161,6800,4.0985
|
| 19 |
+
166,7200,5.2082
|
| 20 |
+
172,7600,5.3103
|
| 21 |
+
177,8000,6.0557
|
| 22 |
+
181,8400,6.9039
|
| 23 |
+
188,8800,4.5342
|
| 24 |
+
192,9200,9.1478
|
| 25 |
+
199,9600,5.3605
|
| 26 |
+
206,10000,5.1273
|
| 27 |
+
211,10400,7.8887
|
| 28 |
+
216,10800,6.2693
|
| 29 |
+
220,11200,7.3964
|
| 30 |
+
224,11600,7.1054
|
| 31 |
+
230,12000,5.4118
|
| 32 |
+
234,12400,7.4287
|
| 33 |
+
238,12800,7.4478
|
| 34 |
+
245,13200,4.4117
|
| 35 |
+
249,13600,7.2105
|
| 36 |
+
253,14000,6.7015
|
| 37 |
+
257,14400,7.4679
|
| 38 |
+
262,14800,6.5292
|
| 39 |
+
267,15200,6.025
|
| 40 |
+
271,15600,7.6356
|
| 41 |
+
275,16000,7.6501
|
| 42 |
+
280,16400,7.0316
|
| 43 |
+
284,16800,8.6843
|
| 44 |
+
289,17200,6.7495
|
| 45 |
+
294,17600,6.023
|
| 46 |
+
299,18000,6.415
|
| 47 |
+
305,18400,5.4163
|
| 48 |
+
311,18800,5.4882
|
| 49 |
+
316,19200,6.2097
|
| 50 |
+
320,19600,8.0329
|
| 51 |
+
324,20000,7.5882
|
| 52 |
+
329,20400,7.3095
|
| 53 |
+
333,20800,7.5687
|
| 54 |
+
339,21200,6.6201
|
| 55 |
+
343,21600,9.5695
|
| 56 |
+
349,22000,6.358
|
| 57 |
+
354,22400,7.729
|
| 58 |
+
362,22800,3.8919
|
| 59 |
+
367,23200,6.2263
|
| 60 |
+
372,23600,8.9296
|
| 61 |
+
377,24000,8.9107
|
| 62 |
+
383,24400,6.8109
|
| 63 |
+
391,24800,4.3733
|
| 64 |
+
397,25200,7.8942
|
| 65 |
+
402,25600,8.1473
|
| 66 |
+
408,26000,6.342
|
| 67 |
+
414,26400,6.6643
|
| 68 |
+
419,26800,8.5145
|
| 69 |
+
423,27200,9.8487
|
| 70 |
+
427,27600,8.3884
|
| 71 |
+
432,28000,8.1417
|
| 72 |
+
438,28400,6.5363
|
| 73 |
+
442,28800,8.3084
|
| 74 |
+
447,29200,7.5203
|
| 75 |
+
451,29600,8.0109
|
| 76 |
+
455,30000,7.1375
|
| 77 |
+
459,30400,6.972
|
| 78 |
+
464,30800,7.2792
|
| 79 |
+
468,31200,8.3772
|
| 80 |
+
472,31600,8.6912
|
| 81 |
+
476,32000,7.6424
|
| 82 |
+
480,32400,8.1047
|
| 83 |
+
484,32800,6.6454
|
| 84 |
+
489,33200,7.4736
|
| 85 |
+
493,33600,8.1904
|
| 86 |
+
497,34000,7.1456
|
| 87 |
+
503,34400,6.2541
|
| 88 |
+
507,34800,7.7885
|
| 89 |
+
511,35200,7.3507
|
| 90 |
+
515,35600,8.0471
|
| 91 |
+
520,36000,8.5436
|
| 92 |
+
524,36400,6.8725
|
| 93 |
+
529,36800,8.4028
|
| 94 |
+
535,37200,5.2433
|
| 95 |
+
542,37600,4.7139
|
| 96 |
+
546,38000,7.3213
|
| 97 |
+
555,38400,3.8831
|
| 98 |
+
561,38800,5.6601
|
| 99 |
+
568,39200,4.7948
|
| 100 |
+
576,39600,4.6981
|
| 101 |
+
584,40000,4.3181
|
| 102 |
+
589,40400,7.5472
|
| 103 |
+
593,40800,9.3392
|
| 104 |
+
602,41200,3.9924
|
| 105 |
+
609,41600,7.1339
|
| 106 |
+
615,42000,6.7132
|
| 107 |
+
620,42400,6.7015
|
| 108 |
+
628,42800,5.4925
|
| 109 |
+
636,43200,3.7468
|
| 110 |
+
644,43600,4.3569
|
| 111 |
+
651,44000,5.8671
|
| 112 |
+
655,44400,8.3115
|
| 113 |
+
660,44800,9.1009
|
| 114 |
+
665,45200,7.2625
|
| 115 |
+
672,45600,5.378
|
| 116 |
+
678,46000,5.686
|
| 117 |
+
684,46400,5.8378
|
| 118 |
+
688,46800,7.674
|
| 119 |
+
693,47200,7.7574
|
| 120 |
+
697,47600,9.4904
|
| 121 |
+
706,48000,4.0155
|
| 122 |
+
712,48400,9.193
|
| 123 |
+
718,48800,6.0672
|
| 124 |
+
723,49200,8.703
|
| 125 |
+
729,49600,6.4219
|
| 126 |
+
737,50000,5.2146
|
| 127 |
+
742,50400,7.6968
|
| 128 |
+
747,50800,9.559
|
| 129 |
+
753,51200,6.7111
|
| 130 |
+
758,51600,9.7201
|
| 131 |
+
764,52000,7.5954
|
| 132 |
+
770,52400,8.0675
|
| 133 |
+
775,52800,7.1163
|
| 134 |
+
782,53200,5.3886
|
| 135 |
+
786,53600,10.9581
|
| 136 |
+
791,54000,9.5825
|
| 137 |
+
800,54400,4.9313
|
| 138 |
+
808,54800,3.2748
|
| 139 |
+
813,55200,9.4975
|
| 140 |
+
819,55600,8.5919
|
| 141 |
+
828,56000,4.0659
|
| 142 |
+
834,56400,6.4677
|
| 143 |
+
839,56800,8.6157
|
| 144 |
+
847,57200,7.6231
|
| 145 |
+
854,57600,6.1867
|
| 146 |
+
864,58000,5.138
|
| 147 |
+
875,58400,4.1107
|
| 148 |
+
884,58800,4.6541
|
| 149 |
+
890,59200,8.6775
|
| 150 |
+
898,59600,4.5193
|
| 151 |
+
903,60000,10.8015
|
| 152 |
+
909,60400,7.2792
|
| 153 |
+
916,60800,6.9898
|
| 154 |
+
920,61200,9.2429
|
| 155 |
+
926,61600,7.8279
|
| 156 |
+
930,62000,9.559
|
| 157 |
+
938,62400,6.2201
|
| 158 |
+
942,62800,12.4695
|
| 159 |
+
949,63200,6.0011
|
| 160 |
+
955,63600,7.5678
|
| 161 |
+
960,64000,8.5841
|
| 162 |
+
965,64400,8.8059
|
| 163 |
+
969,64800,9.559
|
| 164 |
+
974,65200,8.137
|
| 165 |
+
979,65600,6.2258
|
| 166 |
+
985,66000,6.0418
|
| 167 |
+
990,66400,7.6972
|
| 168 |
+
994,66800,10.6031
|
| 169 |
+
999,67200,6.4527
|
| 170 |
+
1004,67600,7.6003
|
| 171 |
+
1009,68000,8.036
|
| 172 |
+
1014,68400,11.559
|
| 173 |
+
1018,68800,9.9028
|
| 174 |
+
1024,69200,8.8209
|
| 175 |
+
1030,69600,6.8682
|
| 176 |
+
1034,70000,9.1513
|
| 177 |
+
1039,70400,8.8808
|
| 178 |
+
1045,70800,6.2892
|
| 179 |
+
1052,71200,6.6137
|
| 180 |
+
1056,71600,9.1258
|
| 181 |
+
1061,72000,8.2712
|
| 182 |
+
1069,72400,5.1346
|
| 183 |
+
1073,72800,9.3301
|
| 184 |
+
1079,73200,8.6006
|
| 185 |
+
1083,73600,9.7199
|
| 186 |
+
1088,74000,8.5393
|
| 187 |
+
1093,74400,9.4136
|
| 188 |
+
1098,74800,9.2309
|
| 189 |
+
1104,75200,9.9483
|
| 190 |
+
1111,75600,6.868
|
| 191 |
+
1117,76000,7.3642
|
| 192 |
+
1123,76400,8.7512
|
| 193 |
+
1128,76800,7.7363
|
| 194 |
+
1133,77200,11.2048
|
| 195 |
+
1138,77600,8.7672
|
| 196 |
+
1142,78000,11.2022
|
| 197 |
+
1148,78400,8.6267
|
| 198 |
+
1156,78800,5.7085
|
| 199 |
+
1165,79200,5.2502
|
| 200 |
+
1171,79600,9.1847
|
| 201 |
+
1181,80000,3.8257
|
| 202 |
+
1187,80400,9.0944
|
| 203 |
+
1193,80800,7.8396
|
| 204 |
+
1204,81200,4.6099
|
| 205 |
+
1214,81600,4.2423
|
| 206 |
+
1223,82000,4.2463
|
| 207 |
+
1231,82400,6.0866
|
| 208 |
+
1236,82800,9.3631
|
| 209 |
+
1244,83200,6.0139
|
| 210 |
+
1250,83600,9.3664
|
| 211 |
+
1258,84000,5.5445
|
| 212 |
+
1263,84400,9.2419
|
| 213 |
+
1270,84800,6.5126
|
| 214 |
+
1277,85200,5.3284
|
| 215 |
+
1284,85600,4.7442
|
| 216 |
+
1289,86000,6.6238
|
| 217 |
+
1295,86400,5.9787
|
| 218 |
+
1300,86800,6.273
|
| 219 |
+
1304,87200,7.7552
|
| 220 |
+
1310,87600,6.0625
|
| 221 |
+
1315,88000,6.2442
|
| 222 |
+
1320,88400,7.2193
|
| 223 |
+
1326,88800,5.8607
|
| 224 |
+
1333,89200,5.8177
|
| 225 |
+
1338,89600,5.6992
|
| 226 |
+
1344,90000,6.309
|
| 227 |
+
1350,90400,7.4904
|
| 228 |
+
1357,90800,5.3341
|
| 229 |
+
1362,91200,9.1276
|
| 230 |
+
1370,91600,5.6335
|
| 231 |
+
1377,92000,5.5404
|
| 232 |
+
1382,92400,10.4014
|
| 233 |
+
1387,92800,8.972
|
| 234 |
+
1393,93200,7.6199
|
| 235 |
+
1400,93600,7.0028
|
| 236 |
+
1408,94000,6.7953
|
| 237 |
+
1417,94400,4.607
|
| 238 |
+
1425,94800,6.7686
|
| 239 |
+
1431,95200,6.3672
|
| 240 |
+
1437,95600,7.3133
|
| 241 |
+
1442,96000,5.5286
|
| 242 |
+
1449,96400,8.1326
|
| 243 |
+
1454,96800,6.6459
|
| 244 |
+
1459,97200,9.6138
|
| 245 |
+
1465,97600,8.3167
|
| 246 |
+
1470,98000,11.7781
|
| 247 |
+
1474,98400,17.2643
|
| 248 |
+
1481,98800,9.2478
|
| 249 |
+
1490,99200,8.6222
|
| 250 |
+
1497,99600,10.0905
|
| 251 |
+
1504,100000,7.0319
|
| 252 |
+
1510,100400,11.7434
|
| 253 |
+
1515,100800,12.7016
|
| 254 |
+
1519,101200,14.5775
|
| 255 |
+
1525,101600,7.4347
|
| 256 |
+
1531,102000,17.0148
|
| 257 |
+
1535,102400,21.5497
|
| 258 |
+
1541,102800,13.3595
|
| 259 |
+
1546,103200,20.2214
|
| 260 |
+
1553,103600,12.2627
|
| 261 |
+
1562,104000,10.1528
|
| 262 |
+
1572,104400,7.0324
|
| 263 |
+
1577,104800,13.988
|
| 264 |
+
1587,105200,9.7205
|
| 265 |
+
1591,105600,22.2021
|
| 266 |
+
1599,106000,12.8697
|
| 267 |
+
1611,106400,6.7423
|
| 268 |
+
1618,106800,9.1311
|
| 269 |
+
1624,107200,13.5325
|
| 270 |
+
1630,107600,15.3574
|
| 271 |
+
1637,108000,11.4605
|
| 272 |
+
1647,108400,8.1593
|
| 273 |
+
1657,108800,7.3692
|
| 274 |
+
1664,109200,12.4204
|
| 275 |
+
1671,109600,16.0635
|
| 276 |
+
1677,110000,11.0747
|
| 277 |
+
1687,110400,10.0776
|
| 278 |
+
1695,110800,9.713
|
| 279 |
+
1704,111200,6.6402
|
| 280 |
+
1709,111600,16.4947
|
| 281 |
+
1714,112000,12.0573
|
| 282 |
+
1720,112400,12.4928
|
| 283 |
+
1726,112800,16.9818
|
| 284 |
+
1731,113200,16.4082
|
| 285 |
+
1735,113600,19.4684
|
| 286 |
+
1741,114000,17.2942
|
| 287 |
+
1746,114400,17.3803
|
| 288 |
+
1752,114800,14.3429
|
| 289 |
+
1759,115200,15.4686
|
| 290 |
+
1764,115600,18.3797
|
| 291 |
+
1771,116000,10.6607
|
| 292 |
+
1778,116400,12.9278
|
| 293 |
+
1783,116800,21.2477
|
| 294 |
+
1789,117200,12.1737
|
| 295 |
+
1795,117600,15.1248
|
| 296 |
+
1801,118000,11.4594
|
| 297 |
+
1808,118400,11.8572
|
| 298 |
+
1816,118800,8.6953
|
| 299 |
+
1822,119200,12.9991
|
| 300 |
+
1830,119600,8.346
|
| 301 |
+
1835,120000,16.5775
|
| 302 |
+
1840,120400,18.7012
|
| 303 |
+
1845,120800,18.5211
|
| 304 |
+
1852,121200,12.3151
|
| 305 |
+
1860,121600,11.191
|
| 306 |
+
1868,122000,10.6305
|
| 307 |
+
1875,122400,12.1361
|
| 308 |
+
1883,122800,12.0561
|
| 309 |
+
1887,123200,21.1206
|
| 310 |
+
1895,123600,10.3102
|
| 311 |
+
1901,124000,15.5468
|
| 312 |
+
1905,124400,21.1214
|
| 313 |
+
1913,124800,12.598
|
| 314 |
+
1919,125200,8.6702
|
| 315 |
+
1923,125600,19.5976
|
| 316 |
+
1928,126000,17.347
|
| 317 |
+
1936,126400,12.0519
|
| 318 |
+
1944,126800,6.2953
|
| 319 |
+
1949,127200,13.6435
|
| 320 |
+
1956,127600,9.3424
|
| 321 |
+
1960,128000,22.6692
|
| 322 |
+
1966,128400,12.2863
|
| 323 |
+
1973,128800,15.4013
|
| 324 |
+
1978,129200,17.9858
|
| 325 |
+
1988,129600,7.2154
|
| 326 |
+
1996,130000,10.964
|
| 327 |
+
2004,130400,10.9658
|
| 328 |
+
2009,130800,16.1921
|
| 329 |
+
2015,131200,19.8994
|
| 330 |
+
2020,131600,12.5598
|
| 331 |
+
2026,132000,18.5603
|
| 332 |
+
2034,132400,8.9442
|
| 333 |
+
2039,132800,15.7247
|
| 334 |
+
2044,133200,19.6043
|
| 335 |
+
2048,133600,22.708
|
| 336 |
+
2055,134000,12.1769
|
| 337 |
+
2059,134400,30.2886
|
| 338 |
+
2064,134800,19.3976
|
| 339 |
+
2069,135200,24.011
|
| 340 |
+
2075,135600,22.3232
|
| 341 |
+
2079,136000,22.4054
|
| 342 |
+
2087,136400,14.8207
|
| 343 |
+
2095,136800,14.1154
|
| 344 |
+
2102,137200,13.3378
|
| 345 |
+
2106,137600,22.9892
|
| 346 |
+
2112,138000,19.1975
|
| 347 |
+
2119,138400,16.2562
|
| 348 |
+
2125,138800,16.5325
|
| 349 |
+
2134,139200,9.7804
|
| 350 |
+
2143,139600,12.9261
|
| 351 |
+
2149,140000,15.1729
|
| 352 |
+
2157,140400,11.4505
|
| 353 |
+
2163,140800,16.225
|
| 354 |
+
2168,141200,15.0464
|
| 355 |
+
2175,141600,12.2286
|
| 356 |
+
2181,142000,14.5324
|
| 357 |
+
2187,142400,17.9193
|
| 358 |
+
2192,142800,21.9792
|
| 359 |
+
2202,143200,7.5693
|
| 360 |
+
2214,143600,7.0395
|
| 361 |
+
2219,144000,20.2988
|
| 362 |
+
2230,144400,8.1503
|
| 363 |
+
2237,144800,12.8959
|
| 364 |
+
2246,145200,11.8272
|
| 365 |
+
2254,145600,15.8534
|
| 366 |
+
2259,146000,20.5079
|
| 367 |
+
2266,146400,10.7379
|
| 368 |
+
2271,146800,21.599
|
| 369 |
+
2279,147200,12.679
|
| 370 |
+
2284,147600,14.8514
|
| 371 |
+
2291,148000,8.6118
|
| 372 |
+
2297,148400,7.5502
|
| 373 |
+
2306,148800,5.1645
|
| 374 |
+
2313,149200,10.6152
|
| 375 |
+
2319,149600,15.1497
|
| 376 |
+
2324,150000,13.6594
|
| 377 |
+
2331,150400,10.1251
|
| 378 |
+
2337,150800,10.9294
|
| 379 |
+
2345,151200,5.0712
|
| 380 |
+
2350,151600,13.3293
|
| 381 |
+
2359,152000,6.394
|
| 382 |
+
2370,152400,4.9969
|
| 383 |
+
2379,152800,7.9595
|
| 384 |
+
2391,153200,4.0272
|
| 385 |
+
2398,153600,7.3762
|
| 386 |
+
2407,154000,7.3333
|
| 387 |
+
2413,154400,10.8586
|
| 388 |
+
2423,154800,9.7345
|
| 389 |
+
2432,155200,7.9822
|
| 390 |
+
2439,155600,10.3486
|
| 391 |
+
2450,156000,6.3284
|
| 392 |
+
2458,156400,9.4372
|
| 393 |
+
2473,156800,3.132
|
| 394 |
+
2481,157200,10.3754
|
| 395 |
+
2487,157600,10.447
|
| 396 |
+
2493,158000,15.9101
|
| 397 |
+
2503,158400,8.3842
|
| 398 |
+
2510,158800,15.4866
|
| 399 |
+
2518,159200,11.682
|
| 400 |
+
2526,159600,11.0361
|
| 401 |
+
2537,160000,8.7871
|
| 402 |
+
2545,160400,11.1971
|
| 403 |
+
2555,160800,4.4022
|
| 404 |
+
2562,161200,13.1779
|
| 405 |
+
2568,161600,12.9045
|
| 406 |
+
2579,162000,8.0949
|
| 407 |
+
2587,162400,11.4998
|
| 408 |
+
2597,162800,7.2315
|
| 409 |
+
2604,163200,14.4484
|
| 410 |
+
2609,163600,15.2563
|
| 411 |
+
2622,164000,7.1052
|
| 412 |
+
2631,164400,8.0708
|
| 413 |
+
2636,164800,15.3412
|
| 414 |
+
2646,165200,7.0698
|
| 415 |
+
2654,165600,10.9479
|
| 416 |
+
2661,166000,12.6783
|
| 417 |
+
2666,166400,16.1794
|
| 418 |
+
2671,166800,18.6718
|
| 419 |
+
2676,167200,13.5239
|
| 420 |
+
2681,167600,12.3507
|
| 421 |
+
2691,168000,7.2502
|
| 422 |
+
2700,168400,6.2099
|
| 423 |
+
2707,168800,13.9091
|
| 424 |
+
2712,169200,16.7988
|
| 425 |
+
2717,169600,22.2866
|
| 426 |
+
2728,170000,8.0224
|
| 427 |
+
2738,170400,11.7132
|
| 428 |
+
2743,170800,22.9338
|
| 429 |
+
2750,171200,15.1354
|
| 430 |
+
2762,171600,8.9432
|
| 431 |
+
2768,172000,14.02
|
| 432 |
+
2773,172400,19.0923
|
| 433 |
+
2780,172800,14.6205
|
| 434 |
+
2787,173200,12.9528
|
| 435 |
+
2796,173600,11.7497
|
| 436 |
+
2803,174000,14.0874
|
| 437 |
+
2808,174400,21.8773
|
| 438 |
+
2813,174800,16.507
|
| 439 |
+
2819,175200,19.8032
|
| 440 |
+
2827,175600,13.8242
|
| 441 |
+
2833,176000,19.0166
|
| 442 |
+
2838,176400,27.172
|
| 443 |
+
2846,176800,11.3364
|
| 444 |
+
2851,177200,19.2692
|
| 445 |
+
2857,177600,21.4003
|
| 446 |
+
2865,178000,13.1762
|
| 447 |
+
2871,178400,20.7159
|
| 448 |
+
2878,178800,12.615
|
| 449 |
+
2882,179200,17.292
|
| 450 |
+
2886,179600,22.37
|
| 451 |
+
2893,180000,16.9824
|
| 452 |
+
2903,180400,7.4276
|
| 453 |
+
2912,180800,12.7024
|
| 454 |
+
2918,181200,14.4444
|
| 455 |
+
2926,181600,14.2302
|
| 456 |
+
2932,182000,18.124
|
| 457 |
+
2938,182400,13.5542
|
| 458 |
+
2943,182800,33.0073
|
| 459 |
+
2950,183200,12.0173
|
| 460 |
+
2954,183600,22.766
|
| 461 |
+
2959,184000,16.2093
|
| 462 |
+
2965,184400,15.9019
|
| 463 |
+
2972,184800,13.7782
|
| 464 |
+
2976,185200,26.2554
|
| 465 |
+
2985,185600,10.2687
|
| 466 |
+
2991,186000,14.767
|
| 467 |
+
2998,186400,12.8508
|
| 468 |
+
3002,186800,19.9929
|
| 469 |
+
3008,187200,16.2693
|
| 470 |
+
3016,187600,14.6283
|
| 471 |
+
3026,188000,9.5896
|
| 472 |
+
3034,188400,11.7475
|
| 473 |
+
3041,188800,12.2039
|
| 474 |
+
3045,189200,23.8742
|
| 475 |
+
3056,189600,8.3229
|
| 476 |
+
3063,190000,11.436
|
| 477 |
+
3070,190400,10.1774
|
| 478 |
+
3077,190800,10.9202
|
| 479 |
+
3082,191200,16.139
|
| 480 |
+
3087,191600,17.0197
|
| 481 |
+
3096,192000,6.3408
|
| 482 |
+
3103,192400,10.6383
|
| 483 |
+
3108,192800,14.7467
|
| 484 |
+
3113,193200,16.4579
|
| 485 |
+
3121,193600,9.5428
|
| 486 |
+
3125,194000,22.9954
|
| 487 |
+
3129,194400,11.4853
|
| 488 |
+
3136,194800,8.377
|
| 489 |
+
3141,195200,13.0133
|
| 490 |
+
3145,195600,14.8357
|
| 491 |
+
3150,196000,20.832
|
| 492 |
+
3159,196400,8.4116
|
| 493 |
+
3166,196800,16.5597
|
| 494 |
+
3178,197200,6.8153
|
| 495 |
+
3186,197600,11.2551
|
| 496 |
+
3197,198000,9.8681
|
| 497 |
+
3202,198400,16.0912
|
| 498 |
+
3210,198800,13.4439
|
| 499 |
+
3216,199200,11.4534
|
| 500 |
+
3222,199600,16.3593
|
| 501 |
+
3229,200000,11.4777
|
code/Lake application/logs/results_2/PPO_frozen_lake_log_5.csv
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
episode,timestep,reward
|
| 2 |
+
25,400,1.1394
|
| 3 |
+
48,800,1.3002
|
| 4 |
+
55,1200,4.2174
|
| 5 |
+
65,1600,3.1185
|
| 6 |
+
72,2000,4.6995
|
| 7 |
+
77,2400,5.4912
|
| 8 |
+
83,2800,5.3572
|
| 9 |
+
87,3200,6.4659
|
| 10 |
+
93,3600,5.7893
|
| 11 |
+
97,4000,7.5009
|
| 12 |
+
101,4400,7.0671
|
| 13 |
+
105,4800,6.9336
|
| 14 |
+
109,5200,7.0397
|
| 15 |
+
114,5600,6.2202
|
| 16 |
+
120,6000,4.966
|
| 17 |
+
126,6400,5.4569
|
| 18 |
+
131,6800,5.7776
|
| 19 |
+
135,7200,7.3573
|
| 20 |
+
139,7600,6.3645
|
| 21 |
+
144,8000,6.1568
|
| 22 |
+
148,8400,7.3496
|
| 23 |
+
152,8800,6.4834
|
| 24 |
+
157,9200,6.0306
|
| 25 |
+
162,9600,6.5141
|
| 26 |
+
167,10000,6.2576
|
| 27 |
+
172,10400,5.8399
|
| 28 |
+
176,10800,5.7246
|
| 29 |
+
182,11200,5.6336
|
| 30 |
+
186,11600,6.3594
|
| 31 |
+
192,12000,5.9243
|
| 32 |
+
196,12400,7.3959
|
| 33 |
+
200,12800,7.3087
|
| 34 |
+
204,13200,7.4288
|
| 35 |
+
208,13600,6.0092
|
| 36 |
+
212,14000,7.4051
|
| 37 |
+
219,14400,5.0751
|
| 38 |
+
223,14800,6.1337
|
| 39 |
+
228,15200,6.0328
|
| 40 |
+
233,15600,6.2323
|
| 41 |
+
237,16000,7.2947
|
| 42 |
+
242,16400,6.21
|
| 43 |
+
247,16800,5.2467
|
| 44 |
+
251,17200,7.301
|
| 45 |
+
256,17600,6.7575
|
| 46 |
+
260,18000,7.3051
|
| 47 |
+
264,18400,7.3659
|
| 48 |
+
270,18800,4.9364
|
| 49 |
+
275,19200,4.7764
|
| 50 |
+
280,19600,6.6525
|
| 51 |
+
286,20000,5.1666
|
| 52 |
+
290,20400,7.305
|
| 53 |
+
295,20800,5.3332
|
| 54 |
+
301,21200,5.8009
|
| 55 |
+
306,21600,6.3444
|
| 56 |
+
311,22000,5.38
|
| 57 |
+
316,22400,5.8886
|
| 58 |
+
321,22800,5.9808
|
| 59 |
+
326,23200,6.1518
|
| 60 |
+
331,23600,6.2596
|
| 61 |
+
335,24000,7.4699
|
| 62 |
+
340,24400,5.0459
|
| 63 |
+
344,24800,7.1979
|
| 64 |
+
348,25200,7.1906
|
| 65 |
+
354,25600,5.5289
|
| 66 |
+
358,26000,7.4249
|
| 67 |
+
362,26400,7.3209
|
| 68 |
+
366,26800,6.8339
|
| 69 |
+
371,27200,6.2896
|
| 70 |
+
375,27600,5.8839
|
| 71 |
+
382,28000,4.5678
|
| 72 |
+
387,28400,6.4948
|
| 73 |
+
391,28800,7.3334
|
| 74 |
+
395,29200,6.8561
|
| 75 |
+
400,29600,5.7921
|
| 76 |
+
405,30000,5.775
|
| 77 |
+
411,30400,5.2429
|
| 78 |
+
416,30800,6.3733
|
| 79 |
+
420,31200,6.9394
|
| 80 |
+
424,31600,7.4063
|
| 81 |
+
430,32000,5.6989
|
| 82 |
+
434,32400,6.6094
|
| 83 |
+
439,32800,6.4591
|
| 84 |
+
443,33200,7.5158
|
| 85 |
+
448,33600,5.1683
|
| 86 |
+
457,34000,3.2012
|
| 87 |
+
462,34400,6.4385
|
| 88 |
+
466,34800,7.6264
|
| 89 |
+
471,35200,6.8552
|
| 90 |
+
475,35600,6.6808
|
| 91 |
+
479,36000,7.5093
|
| 92 |
+
485,36400,6.0369
|
| 93 |
+
491,36800,5.9012
|
| 94 |
+
497,37200,4.7016
|
| 95 |
+
507,37600,3.1521
|
| 96 |
+
513,38000,5.874
|
| 97 |
+
517,38400,7.713
|
| 98 |
+
522,38800,6.4352
|
| 99 |
+
526,39200,7.6985
|
| 100 |
+
530,39600,7.8557
|
| 101 |
+
540,40000,3.9398
|
| 102 |
+
546,40400,5.5794
|
| 103 |
+
551,40800,6.7952
|
| 104 |
+
555,41200,7.4607
|
| 105 |
+
561,41600,6.2385
|
| 106 |
+
565,42000,6.9421
|
| 107 |
+
571,42400,5.5255
|
| 108 |
+
577,42800,7.0349
|
| 109 |
+
583,43200,7.038
|
| 110 |
+
587,43600,8.1308
|
| 111 |
+
592,44000,6.715
|
| 112 |
+
598,44400,6.6102
|
| 113 |
+
603,44800,5.7423
|
| 114 |
+
609,45200,5.7222
|
| 115 |
+
616,45600,4.6904
|
| 116 |
+
621,46000,6.6802
|
| 117 |
+
627,46400,5.1794
|
| 118 |
+
631,46800,7.8428
|
| 119 |
+
636,47200,6.6569
|
| 120 |
+
640,47600,6.0637
|
| 121 |
+
645,48000,6.6306
|
| 122 |
+
649,48400,7.8827
|
| 123 |
+
654,48800,7.2829
|
| 124 |
+
660,49200,5.3484
|
| 125 |
+
666,49600,4.965
|
| 126 |
+
671,50000,6.2847
|
| 127 |
+
675,50400,7.2723
|
| 128 |
+
681,50800,5.6506
|
| 129 |
+
689,51200,4.2316
|
| 130 |
+
693,51600,6.588
|
| 131 |
+
699,52000,5.8609
|
| 132 |
+
704,52400,6.7229
|
| 133 |
+
710,52800,5.2104
|
| 134 |
+
716,53200,5.5105
|
| 135 |
+
720,53600,6.7428
|
| 136 |
+
728,54000,4.2067
|
| 137 |
+
733,54400,6.1901
|
| 138 |
+
737,54800,6.1446
|
| 139 |
+
743,55200,6.4328
|
| 140 |
+
747,55600,7.5352
|
| 141 |
+
753,56000,4.6297
|
| 142 |
+
758,56400,6.7488
|
| 143 |
+
765,56800,4.4836
|
| 144 |
+
769,57200,7.332
|
| 145 |
+
773,57600,7.3174
|
| 146 |
+
778,58000,7.5865
|
| 147 |
+
784,58400,5.1354
|
| 148 |
+
788,58800,7.5499
|
| 149 |
+
792,59200,7.6182
|
| 150 |
+
802,59600,2.9436
|
| 151 |
+
807,60000,6.7174
|
| 152 |
+
812,60400,6.5712
|
| 153 |
+
820,60800,3.8267
|
| 154 |
+
827,61200,5.0311
|
| 155 |
+
831,61600,7.6775
|
| 156 |
+
835,62000,6.2429
|
| 157 |
+
842,62400,5.1519
|
| 158 |
+
847,62800,5.1346
|
| 159 |
+
858,63200,3.2845
|
| 160 |
+
862,63600,6.7612
|
| 161 |
+
868,64000,5.4555
|
| 162 |
+
874,64400,5.6836
|
| 163 |
+
879,64800,5.4058
|
| 164 |
+
883,65200,7.5245
|
| 165 |
+
887,65600,7.5208
|
| 166 |
+
892,66000,7.063
|
| 167 |
+
897,66400,6.6028
|
| 168 |
+
903,66800,6.9216
|
| 169 |
+
908,67200,7.874
|
| 170 |
+
912,67600,6.5846
|
| 171 |
+
918,68000,6.0015
|
| 172 |
+
923,68400,6.5042
|
| 173 |
+
927,68800,6.4518
|
| 174 |
+
932,69200,7.5216
|
| 175 |
+
937,69600,7.0083
|
| 176 |
+
942,70000,6.8853
|
| 177 |
+
948,70400,5.5392
|
| 178 |
+
954,70800,8.848
|
| 179 |
+
960,71200,11.4058
|
| 180 |
+
967,71600,7.5975
|
| 181 |
+
972,72000,8.9093
|
| 182 |
+
979,72400,9.266
|
| 183 |
+
985,72800,9.023
|
| 184 |
+
991,73200,11.5379
|
| 185 |
+
1000,73600,7.4839
|
| 186 |
+
1006,74000,10.8982
|
| 187 |
+
1011,74400,9.6794
|
| 188 |
+
1016,74800,11.5398
|
| 189 |
+
1022,75200,12.7577
|
| 190 |
+
1032,75600,7.5257
|
| 191 |
+
1040,76000,10.313
|
| 192 |
+
1052,76400,4.9592
|
| 193 |
+
1057,76800,9.929
|
| 194 |
+
1065,77200,9.0269
|
| 195 |
+
1074,77600,10.0283
|
| 196 |
+
1080,78000,9.1994
|
| 197 |
+
1084,78400,17.318
|
| 198 |
+
1090,78800,8.6919
|
| 199 |
+
1099,79200,8.3069
|
| 200 |
+
1104,79600,14.1304
|
| 201 |
+
1110,80000,13.6171
|
| 202 |
+
1114,80400,16.2281
|
| 203 |
+
1119,80800,13.7672
|
| 204 |
+
1127,81200,9.2344
|
| 205 |
+
1133,81600,12.9287
|
| 206 |
+
1139,82000,9.0991
|
| 207 |
+
1146,82400,10.4085
|
| 208 |
+
1154,82800,6.4297
|
| 209 |
+
1161,83200,11.2657
|
| 210 |
+
1170,83600,8.6181
|
| 211 |
+
1176,84000,10.1173
|
| 212 |
+
1182,84400,10.5116
|
| 213 |
+
1189,84800,7.5418
|
| 214 |
+
1197,85200,7.8979
|
| 215 |
+
1204,85600,10.4355
|
| 216 |
+
1214,86000,5.9039
|
| 217 |
+
1228,86400,4.1987
|
| 218 |
+
1238,86800,6.374
|
| 219 |
+
1246,87200,5.9424
|
| 220 |
+
1251,87600,15.9749
|
| 221 |
+
1257,88000,14.0111
|
| 222 |
+
1261,88400,19.8135
|
| 223 |
+
1270,88800,7.7016
|
| 224 |
+
1276,89200,10.2966
|
| 225 |
+
1281,89600,12.6069
|
| 226 |
+
1288,90000,10.6588
|
| 227 |
+
1293,90400,17.1633
|
| 228 |
+
1300,90800,9.8388
|
| 229 |
+
1308,91200,9.1061
|
| 230 |
+
1314,91600,10.2858
|
| 231 |
+
1319,92000,15.5991
|
| 232 |
+
1323,92400,19.9744
|
| 233 |
+
1329,92800,13.7349
|
| 234 |
+
1333,93200,16.2973
|
| 235 |
+
1340,93600,12.1433
|
| 236 |
+
1346,94000,15.6216
|
| 237 |
+
1350,94400,16.604
|
| 238 |
+
1356,94800,17.9473
|
| 239 |
+
1360,95200,22.7261
|
| 240 |
+
1367,95600,9.7798
|
| 241 |
+
1372,96000,18.8177
|
| 242 |
+
1376,96400,23.3835
|
| 243 |
+
1380,96800,22.7676
|
| 244 |
+
1385,97200,17.9196
|
| 245 |
+
1391,97600,11.0045
|
| 246 |
+
1398,98000,12.9032
|
| 247 |
+
1403,98400,19.9428
|
| 248 |
+
1411,98800,10.0649
|
| 249 |
+
1415,99200,23.4105
|
| 250 |
+
1421,99600,14.4659
|
| 251 |
+
1427,100000,12.6086
|
| 252 |
+
1434,100400,11.2016
|
| 253 |
+
1440,100800,16.4195
|
| 254 |
+
1445,101200,14.8528
|
| 255 |
+
1451,101600,13.3423
|
| 256 |
+
1457,102000,15.6651
|
| 257 |
+
1462,102400,19.3589
|
| 258 |
+
1467,102800,19.2814
|
| 259 |
+
1472,103200,17.4048
|
| 260 |
+
1479,103600,10.9156
|
| 261 |
+
1484,104000,19.1347
|
| 262 |
+
1489,104400,16.7974
|
| 263 |
+
1500,104800,7.7077
|
| 264 |
+
1508,105200,8.3873
|
| 265 |
+
1515,105600,11.8502
|
| 266 |
+
1522,106000,12.4617
|
| 267 |
+
1529,106400,10.997
|
| 268 |
+
1534,106800,18.1086
|
| 269 |
+
1538,107200,21.5753
|
| 270 |
+
1542,107600,18.1229
|
| 271 |
+
1548,108000,19.0807
|
| 272 |
+
1553,108400,19.9151
|
| 273 |
+
1557,108800,24.3347
|
| 274 |
+
1565,109200,11.5838
|
| 275 |
+
1571,109600,10.4892
|
| 276 |
+
1576,110000,18.4124
|
| 277 |
+
1583,110400,9.6659
|
| 278 |
+
1589,110800,15.3845
|
| 279 |
+
1594,111200,19.4332
|
| 280 |
+
1603,111600,9.1848
|
| 281 |
+
1608,112000,19.8579
|
| 282 |
+
1614,112400,14.6327
|
| 283 |
+
1620,112800,15.4716
|
| 284 |
+
1628,113200,7.6968
|
| 285 |
+
1633,113600,14.4689
|
| 286 |
+
1637,114000,19.6793
|
| 287 |
+
1642,114400,20.0721
|
| 288 |
+
1647,114800,15.0668
|
| 289 |
+
1652,115200,17.4454
|
| 290 |
+
1657,115600,19.6026
|
| 291 |
+
1663,116000,14.572
|
| 292 |
+
1669,116400,12.3857
|
| 293 |
+
1675,116800,15.0434
|
| 294 |
+
1679,117200,23.0521
|
| 295 |
+
1685,117600,15.9115
|
| 296 |
+
1691,118000,16.4641
|
| 297 |
+
1695,118400,18.5005
|
| 298 |
+
1701,118800,13.3055
|
| 299 |
+
1705,119200,20.5855
|
| 300 |
+
1711,119600,15.2568
|
| 301 |
+
1716,120000,17.1653
|
| 302 |
+
1721,120400,16.2964
|
| 303 |
+
1726,120800,17.3911
|
| 304 |
+
1731,121200,18.9176
|
| 305 |
+
1735,121600,20.2643
|
| 306 |
+
1741,122000,21.9711
|
| 307 |
+
1748,122400,15.1474
|
| 308 |
+
1752,122800,21.0002
|
| 309 |
+
1756,123200,27.017
|
| 310 |
+
1761,123600,21.0847
|
| 311 |
+
1768,124000,16.9835
|
| 312 |
+
1774,124400,16.988
|
| 313 |
+
1780,124800,17.102
|
| 314 |
+
1784,125200,27.0598
|
| 315 |
+
1790,125600,18.8929
|
| 316 |
+
1795,126000,18.4346
|
| 317 |
+
1799,126400,27.4704
|
| 318 |
+
1803,126800,20.451
|
| 319 |
+
1812,127200,12.5548
|
| 320 |
+
1816,127600,24.9355
|
| 321 |
+
1821,128000,14.002
|
| 322 |
+
1826,128400,16.9177
|
| 323 |
+
1834,128800,9.9734
|
| 324 |
+
1847,129200,4.5403
|
| 325 |
+
1853,129600,10.6147
|
| 326 |
+
1865,130000,6.26
|
| 327 |
+
1872,130400,12.1423
|
| 328 |
+
1877,130800,16.8818
|
| 329 |
+
1882,131200,14.1034
|
| 330 |
+
1887,131600,19.5902
|
| 331 |
+
1894,132000,12.8515
|
| 332 |
+
1899,132400,16.2843
|
| 333 |
+
1904,132800,15.5745
|
| 334 |
+
1914,133200,8.3905
|
| 335 |
+
1922,133600,13.0687
|
| 336 |
+
1929,134000,13.9548
|
| 337 |
+
1935,134400,11.7435
|
| 338 |
+
1944,134800,12.2644
|
| 339 |
+
1949,135200,18.9015
|
| 340 |
+
1957,135600,10.5449
|
| 341 |
+
1968,136000,6.614
|
| 342 |
+
1979,136400,7.9006
|
| 343 |
+
1988,136800,8.6919
|
| 344 |
+
1993,137200,19.6558
|
| 345 |
+
1999,137600,13.7705
|
| 346 |
+
2004,138000,19.7431
|
| 347 |
+
2010,138400,16.1015
|
| 348 |
+
2018,138800,7.969
|
| 349 |
+
2024,139200,10.7627
|
| 350 |
+
2033,139600,9.8075
|
| 351 |
+
2038,140000,15.1353
|
| 352 |
+
2044,140400,14.33
|
| 353 |
+
2051,140800,13.0915
|
| 354 |
+
2059,141200,11.0496
|
| 355 |
+
2067,141600,8.5425
|
| 356 |
+
2074,142000,12.6574
|
| 357 |
+
2079,142400,18.6865
|
| 358 |
+
2083,142800,18.9614
|
| 359 |
+
2091,143200,7.6956
|
| 360 |
+
2097,143600,19.3319
|
| 361 |
+
2106,144000,6.1586
|
| 362 |
+
2112,144400,11.7879
|
| 363 |
+
2117,144800,14.6574
|
| 364 |
+
2124,145200,9.78
|
| 365 |
+
2131,145600,8.172
|
| 366 |
+
2138,146000,9.3161
|
| 367 |
+
2145,146400,10.1464
|
| 368 |
+
2151,146800,13.3546
|
| 369 |
+
2158,147200,10.2643
|
| 370 |
+
2162,147600,17.7297
|
| 371 |
+
2167,148000,12.2066
|
| 372 |
+
2174,148400,11.723
|
| 373 |
+
2181,148800,12.61
|
| 374 |
+
2185,149200,20.9512
|
| 375 |
+
2192,149600,9.257
|
| 376 |
+
2200,150000,13.0471
|
| 377 |
+
2206,150400,10.6689
|
| 378 |
+
2212,150800,16.0447
|
| 379 |
+
2219,151200,13.6559
|
| 380 |
+
2225,151600,13.2487
|
| 381 |
+
2235,152000,7.2764
|
| 382 |
+
2242,152400,11.6686
|
| 383 |
+
2248,152800,12.3615
|
| 384 |
+
2255,153200,13.5621
|
| 385 |
+
2263,153600,9.6251
|
| 386 |
+
2269,154000,9.0672
|
| 387 |
+
2276,154400,13.0372
|
| 388 |
+
2281,154800,16.5969
|
| 389 |
+
2286,155200,18.0225
|
| 390 |
+
2292,155600,14.2052
|
| 391 |
+
2298,156000,11.6988
|
| 392 |
+
2304,156400,9.5336
|
| 393 |
+
2312,156800,8.0191
|
| 394 |
+
2324,157200,5.3825
|
| 395 |
+
2330,157600,8.1571
|
| 396 |
+
2337,158000,10.3493
|
| 397 |
+
2344,158400,10.4621
|
| 398 |
+
2350,158800,10.5959
|
| 399 |
+
2356,159200,7.2691
|
| 400 |
+
2364,159600,6.3992
|
| 401 |
+
2372,160000,7.9295
|
| 402 |
+
2377,160400,7.4555
|
| 403 |
+
2384,160800,7.9996
|
| 404 |
+
2389,161200,13.726
|
| 405 |
+
2395,161600,7.7046
|
| 406 |
+
2399,162000,16.8889
|
| 407 |
+
2410,162400,5.528
|
| 408 |
+
2422,162800,4.9575
|
| 409 |
+
2429,163200,8.7608
|
| 410 |
+
2438,163600,7.2575
|
| 411 |
+
2446,164000,7.0835
|
| 412 |
+
2452,164400,10.8246
|
| 413 |
+
2459,164800,7.831
|
| 414 |
+
2467,165200,6.133
|
| 415 |
+
2476,165600,7.8923
|
| 416 |
+
2483,166000,8.0733
|
| 417 |
+
2489,166400,11.0754
|
| 418 |
+
2493,166800,20.1624
|
| 419 |
+
2500,167200,12.4293
|
| 420 |
+
2504,167600,15.0355
|
| 421 |
+
2510,168000,13.0286
|
| 422 |
+
2518,168400,7.4877
|
| 423 |
+
2523,168800,12.2261
|
| 424 |
+
2531,169200,7.3993
|
| 425 |
+
2536,169600,8.9622
|
| 426 |
+
2542,170000,10.8549
|
| 427 |
+
2547,170400,11.6566
|
| 428 |
+
2555,170800,8.8997
|
| 429 |
+
2562,171200,8.861
|
| 430 |
+
2568,171600,13.2091
|
| 431 |
+
2574,172000,10.3659
|
| 432 |
+
2580,172400,11.7853
|
| 433 |
+
2585,172800,14.792
|
| 434 |
+
2592,173200,10.6782
|
| 435 |
+
2602,173600,6.9546
|
| 436 |
+
2609,174000,9.9301
|
| 437 |
+
2614,174400,17.7772
|
| 438 |
+
2623,174800,6.2142
|
| 439 |
+
2630,175200,12.9292
|
| 440 |
+
2637,175600,10.1204
|
| 441 |
+
2645,176000,9.0597
|
| 442 |
+
2651,176400,15.3755
|
| 443 |
+
2657,176800,13.128
|
| 444 |
+
2662,177200,19.3868
|
| 445 |
+
2670,177600,10.7437
|
| 446 |
+
2679,178000,6.904
|
| 447 |
+
2686,178400,9.2907
|
| 448 |
+
2696,178800,6.4837
|
| 449 |
+
2705,179200,8.2248
|
| 450 |
+
2711,179600,12.1069
|
| 451 |
+
2720,180000,8.3973
|
| 452 |
+
2724,180400,22.0167
|
| 453 |
+
2729,180800,15.3768
|
| 454 |
+
2734,181200,15.6707
|
| 455 |
+
2738,181600,21.4503
|
| 456 |
+
2744,182000,14.5199
|
| 457 |
+
2750,182400,16.9138
|
| 458 |
+
2756,182800,12.2078
|
| 459 |
+
2762,183200,15.9948
|
| 460 |
+
2769,183600,12.4933
|
| 461 |
+
2775,184000,14.7625
|
| 462 |
+
2781,184400,16.4597
|
| 463 |
+
2786,184800,12.6036
|
| 464 |
+
2793,185200,11.1748
|
| 465 |
+
2799,185600,13.5976
|
| 466 |
+
2805,186000,13.175
|
| 467 |
+
2811,186400,14.25
|
| 468 |
+
2816,186800,22.0337
|
| 469 |
+
2822,187200,17.4297
|
| 470 |
+
2827,187600,17.1395
|
| 471 |
+
2832,188000,18.1786
|
| 472 |
+
2837,188400,16.0257
|
| 473 |
+
2844,188800,11.8928
|
| 474 |
+
2850,189200,16.6968
|
| 475 |
+
2855,189600,19.1383
|
| 476 |
+
2860,190000,21.8792
|
| 477 |
+
2864,190400,27.2875
|
| 478 |
+
2868,190800,25.2937
|
| 479 |
+
2873,191200,20.9754
|
| 480 |
+
2882,191600,12.5236
|
| 481 |
+
2886,192000,26.9158
|
| 482 |
+
2896,192400,9.8619
|
| 483 |
+
2912,192800,5.0885
|
| 484 |
+
2923,193200,4.6341
|
| 485 |
+
2930,193600,13.8767
|
| 486 |
+
2937,194000,11.1766
|
| 487 |
+
2944,194400,14.2145
|
| 488 |
+
2952,194800,7.6092
|
| 489 |
+
2961,195200,9.0705
|
| 490 |
+
2968,195600,10.5332
|
| 491 |
+
2973,196000,13.6747
|
| 492 |
+
2979,196400,17.6262
|
| 493 |
+
2986,196800,12.0028
|
| 494 |
+
2997,197200,5.1463
|
| 495 |
+
3008,197600,9.2429
|
| 496 |
+
3015,198000,11.3805
|
| 497 |
+
3022,198400,7.9289
|
| 498 |
+
3029,198800,12.3534
|
| 499 |
+
3033,199200,26.1072
|
| 500 |
+
3041,199600,13.3515
|
| 501 |
+
3048,200000,11.5205
|
code/Lake application/plot_figure.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Created on Mon Mar 6 16:30:32 2023
|
| 4 |
+
|
| 5 |
+
@author: leona
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import pandas as pd
|
| 10 |
+
import matplotlib.pyplot as plt
|
| 11 |
+
import seaborn as sns
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def save_graph():
|
| 17 |
+
print("============================================================================================")
|
| 18 |
+
|
| 19 |
+
# experiment_name = '15items_5machines_i100'
|
| 20 |
+
# experiment_name = '20items_10machines'
|
| 21 |
+
# experiment_name = '25items_10machines'
|
| 22 |
+
experiment_name = 'frozen_lake'
|
| 23 |
+
env_name = experiment_name
|
| 24 |
+
|
| 25 |
+
rolling_window = 10
|
| 26 |
+
|
| 27 |
+
BASE_DIR = os.path.dirname(os.path.abspath('__file__'))
|
| 28 |
+
BASE_DIR = BASE_DIR+'\\Post-Doctorate\\Lot-sizing\\Lake application'
|
| 29 |
+
|
| 30 |
+
# make directory for saving figures
|
| 31 |
+
figures_dir = BASE_DIR + "\\results"
|
| 32 |
+
|
| 33 |
+
if not os.path.exists(figures_dir):
|
| 34 |
+
os.makedirs(figures_dir)
|
| 35 |
+
|
| 36 |
+
# make environment directory for saving figures
|
| 37 |
+
figures_dir = figures_dir + '/' + env_name + '_PPO'+'/'
|
| 38 |
+
if not os.path.exists(figures_dir):
|
| 39 |
+
os.makedirs(figures_dir)
|
| 40 |
+
|
| 41 |
+
#fig_save_path = figures_dir + '/PPO_' + env_name + '_fig_' + str(fig_num) + '.png'
|
| 42 |
+
|
| 43 |
+
# get number of log files in directory
|
| 44 |
+
|
| 45 |
+
# Use the logs file in the root path of the main.
|
| 46 |
+
LOG_DIR = os.path.join(BASE_DIR,'logs')
|
| 47 |
+
|
| 48 |
+
log_dir = LOG_DIR + '/' + env_name + '_PPO' + '/'
|
| 49 |
+
|
| 50 |
+
current_num_files = next(os.walk(log_dir))[2]
|
| 51 |
+
num_runs = len(current_num_files)-1
|
| 52 |
+
|
| 53 |
+
all_runs_ppo = []
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
########################################################################################
|
| 57 |
+
for run_num in range(num_runs):
|
| 58 |
+
run_num = run_num + 1
|
| 59 |
+
log_f_name = log_dir + '/PPO_' + env_name + "_log_" + str(run_num) + ".csv"
|
| 60 |
+
print("loading data from : " + log_f_name)
|
| 61 |
+
data = pd.read_csv(log_f_name)
|
| 62 |
+
data = pd.DataFrame(data)
|
| 63 |
+
|
| 64 |
+
print("data shape : ", data.shape)
|
| 65 |
+
|
| 66 |
+
all_runs_ppo.append(data)
|
| 67 |
+
print("--------------------------------------------------------------------------------------------")
|
| 68 |
+
|
| 69 |
+
# average all runs
|
| 70 |
+
df_concat = pd.concat(all_runs_ppo)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
#Apply rolling mean to reward values
|
| 74 |
+
df_concat['reward_mean'] = df_concat['reward'].rolling(window=rolling_window, win_type='triang', min_periods=1).mean()
|
| 75 |
+
|
| 76 |
+
# Drop NaN values from beginning of rolling mean
|
| 77 |
+
df_concat = df_concat.dropna().reset_index(drop=True)
|
| 78 |
+
|
| 79 |
+
# Calculate mean and standard deviation of reward values
|
| 80 |
+
reward_mean = df_concat.groupby('timestep')['reward_mean'].mean().iloc[rolling_window:]
|
| 81 |
+
reward_std = df_concat.groupby('timestep')['reward_mean'].std().iloc[rolling_window:]
|
| 82 |
+
|
| 83 |
+
# Set up plot using seaborn
|
| 84 |
+
sns.set_style("whitegrid")
|
| 85 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 86 |
+
|
| 87 |
+
sns.set_style("whitegrid")
|
| 88 |
+
# Plot mean reward with shaded confidence interval
|
| 89 |
+
sns.lineplot(x=reward_mean.index, y=reward_mean, ax=ax,label='PPO')
|
| 90 |
+
ax.fill_between(reward_mean.index, reward_mean - reward_std, reward_mean + reward_std, alpha=0.2)
|
| 91 |
+
# keep only reward_smooth in the legend and rename it
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
########################################################################################
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
log_dir = LOG_DIR + '/' + env_name + '_PDPPO' + '/'
|
| 98 |
+
|
| 99 |
+
current_num_files = next(os.walk(log_dir))[2]
|
| 100 |
+
num_runs = len(current_num_files)-1
|
| 101 |
+
|
| 102 |
+
all_runs = []
|
| 103 |
+
|
| 104 |
+
for run_num in range(num_runs):
|
| 105 |
+
run_num = run_num + 1
|
| 106 |
+
log_f_name = log_dir + 'PDPPO_' + env_name + "_log_" + str(run_num) + ".csv"
|
| 107 |
+
print("loading data from : " + log_f_name)
|
| 108 |
+
data = pd.read_csv(log_f_name)
|
| 109 |
+
data = pd.DataFrame(data)
|
| 110 |
+
|
| 111 |
+
print("data shape : ", data.shape)
|
| 112 |
+
|
| 113 |
+
all_runs.append(data)
|
| 114 |
+
print("--------------------------------------------------------------------------------------------")
|
| 115 |
+
|
| 116 |
+
# average all runs
|
| 117 |
+
df_concat = pd.concat(all_runs)
|
| 118 |
+
|
| 119 |
+
#Apply rolling mean to reward values
|
| 120 |
+
df_concat['reward_mean'] = df_concat['reward'].rolling(window=rolling_window, win_type='triang', min_periods=1).mean()
|
| 121 |
+
|
| 122 |
+
# Drop NaN values from beginning of rolling mean
|
| 123 |
+
df_concat = df_concat.dropna().reset_index(drop=True)
|
| 124 |
+
|
| 125 |
+
# Calculate mean and standard deviation of reward values
|
| 126 |
+
reward_mean = df_concat.groupby('timestep')['reward_mean'].mean().iloc[rolling_window:]
|
| 127 |
+
reward_std = df_concat.groupby('timestep')['reward_mean'].std().iloc[rolling_window:]
|
| 128 |
+
|
| 129 |
+
# Plot mean reward with shaded confidence interval
|
| 130 |
+
sns.lineplot(x=reward_mean.index, y=reward_mean, ax=ax,label='PDPPO')
|
| 131 |
+
ax.fill_between(reward_mean.index, reward_mean - reward_std, reward_mean + reward_std, alpha=0.2)
|
| 132 |
+
#ax.set(xlabel='Timestep', ylabel='Mean Reward', title='Average Reward with Confidence Interval')
|
| 133 |
+
ax.legend()
|
| 134 |
+
########################################################################################
|
| 135 |
+
|
| 136 |
+
# ax.set_yticks(np.arange(0, 1800, 200))
|
| 137 |
+
# ax.set_xticks(np.arange(0, int(4e6), int(5e5)))
|
| 138 |
+
|
| 139 |
+
ax.grid(color='gray', linestyle='-', linewidth=1, alpha=0.2)
|
| 140 |
+
|
| 141 |
+
ax.set_xlabel("Timesteps", fontsize=12)
|
| 142 |
+
ax.set_ylabel("Rewards", fontsize=12)
|
| 143 |
+
|
| 144 |
+
fig = plt.gcf()
|
| 145 |
+
|
| 146 |
+
fig.set_size_inches(15, 4)
|
| 147 |
+
|
| 148 |
+
print("============================================================================================")
|
| 149 |
+
fig.savefig(os.path.join(figures_dir, f'{experiment_name}.pdf'), dpi=300, bbox_inches='tight')
|
| 150 |
+
print("figure saved at : ", figures_dir)
|
| 151 |
+
print("============================================================================================")
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
if __name__ == '__main__':
|
| 155 |
+
|
| 156 |
+
save_graph()
|
| 157 |
+
|