leokana commited on
Commit
4b36c77
·
1 Parent(s): febb285

include modifications to test dual critic ppo

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. code/Lot-sizing/agents/PDPPO.py +0 -2
  2. code/Lot-sizing/agents/PDPPOAgent.py +1 -1
  3. code/Lot-sizing/agents/PDPPOAgent_one_critic.py +14 -14
  4. code/Lot-sizing/agents/PDPPO_v0.py +0 -328
  5. code/Lot-sizing/agents/{PDPPO one critic.py → PDPPOonecritic.py} +1 -1
  6. code/Lot-sizing/agents/PPO.py +1 -3
  7. code/Lot-sizing/agents/PPOAgent.py +1 -3
  8. code/Lot-sizing/agents/PPOAgent_two_critics.py +385 -0
  9. code/Lot-sizing/agents/{PDPPO_one_critic.py → PPOtwocritics.py} +37 -67
  10. code/Lot-sizing/agents/__init__.py +8 -27
  11. code/Lot-sizing/agents/__pycache__/PDPPO.cpython-38.pyc +0 -0
  12. code/Lot-sizing/agents/__pycache__/PDPPOAgent.cpython-38.pyc +0 -0
  13. code/Lot-sizing/agents/__pycache__/PDPPOAgent_one_critic.cpython-38.pyc +0 -0
  14. code/Lot-sizing/agents/__pycache__/PDPPO_one_critic.cpython-38.pyc +0 -0
  15. code/Lot-sizing/agents/__pycache__/PDPPOonecritic.cpython-38.pyc +0 -0
  16. code/Lot-sizing/agents/__pycache__/PPO.cpython-38.pyc +0 -0
  17. code/Lot-sizing/agents/__pycache__/PPOAgent.cpython-38.pyc +0 -0
  18. code/Lot-sizing/agents/__pycache__/PPOAgent_two_critics.cpython-38.pyc +0 -0
  19. code/Lot-sizing/agents/__pycache__/PPOtwocritics.cpython-38.pyc +0 -0
  20. code/Lot-sizing/agents/__pycache__/__init__.cpython-38.pyc +0 -0
  21. code/Lot-sizing/agents/__pycache__/perfectInfoAgent.cpython-38.pyc +0 -0
  22. code/Lot-sizing/agents/__pycache__/stableBaselineAgents.cpython-38.pyc +0 -0
  23. code/Lot-sizing/agents/perfectInfoAgent.py +18 -0
  24. code/Lot-sizing/agents/stableBaselineAgents.py +320 -0
  25. code/Lot-sizing/envs/__pycache__/__init__.cpython-38.pyc +0 -0
  26. code/Lot-sizing/envs/__pycache__/simplePlant.cpython-38.pyc +0 -0
  27. code/Lot-sizing/envs/__pycache__/singleSequenceDependentMachinePlant.cpython-38.pyc +0 -0
  28. code/Lot-sizing/experiments.py +77 -25
  29. code/Lot-sizing/logs/15items_5machines_i100_PDPPO/PDPPO_15items_5machines_i100_0_0.pth +3 -0
  30. code/Lot-sizing/logs/15items_5machines_i100_PDPPO/PDPPO_15items_5machines_i100_log_0.csv +126 -0
  31. code/Lot-sizing/logs/15items_5machines_i100_PDPPO/PDPPO_15items_5machines_i100_log_2.csv +126 -0
  32. code/Lot-sizing/logs/15items_5machines_i100_PDPPO/PDPPO_15items_5machines_i100_log_3.csv +126 -0
  33. code/Lot-sizing/logs/15items_5machines_i100_PPO/PPO_15items_5machines_i100_0_0.pth +3 -0
  34. code/Lot-sizing/logs/15items_5machines_i100_PPO/PPO_15items_5machines_i100_log_0.csv +251 -0
  35. code/Lot-sizing/logs/15items_5machines_i100_PPO/PPO_15items_5machines_i100_log_2.csv +216 -0
  36. code/Lot-sizing/logs/best_A2C_15items_5machines_i100_0/best_model.zip +3 -0
  37. code/Lot-sizing/logs/evaluations.npz +0 -0
  38. code/Lot-sizing/models/__pycache__/__init__.cpython-38.pyc +0 -0
  39. code/Lot-sizing/models/__pycache__/multistageOptimization.cpython-38.pyc +0 -0
  40. code/Lot-sizing/models/__pycache__/optimizationProblemInstance.cpython-38.pyc +0 -0
  41. code/Lot-sizing/models/__pycache__/perfectInfoOptimization.cpython-38.pyc +0 -0
  42. code/Lot-sizing/results/PDPPO_15items_5machines_i100_actions_test.npy +3 -0
  43. code/Lot-sizing/results/PDPPO_15items_5machines_i100_costs_test.npy +3 -0
  44. code/Lot-sizing/results/PDPPO_15items_5machines_i100_demands_test.npy +3 -0
  45. code/Lot-sizing/results/PDPPO_15items_5machines_i100_holding_costs_test.npy +3 -0
  46. code/Lot-sizing/results/PDPPO_15items_5machines_i100_lost_sales_test.npy +3 -0
  47. code/Lot-sizing/results/PDPPO_15items_5machines_i100_observations_test.npy +3 -0
  48. code/Lot-sizing/results/PDPPO_15items_5machines_i100_setup_costs_test.npy +3 -0
  49. code/Lot-sizing/results/PPO_15items_5machines_i100_actions_test.npy +3 -0
  50. code/Lot-sizing/results/PPO_15items_5machines_i100_costs_test.npy +3 -0
code/Lot-sizing/agents/PDPPO.py CHANGED
@@ -309,8 +309,6 @@ class PDPPO:
309
  # final loss of clipped objective PDPPO
310
  loss = -torch.min(surr1, surr2) + 0.5 * self.MseLoss(torch.min(state_values,state_values_post.squeeze()), rewards) - 0.012 * dist_entropy
311
 
312
- loss_numpy = loss.detach().cpu().numpy()
313
-
314
  # take gradient step
315
  self.optimizer.zero_grad()
316
  loss.mean().backward()
 
309
  # final loss of clipped objective PDPPO
310
  loss = -torch.min(surr1, surr2) + 0.5 * self.MseLoss(torch.min(state_values,state_values_post.squeeze()), rewards) - 0.012 * dist_entropy
311
 
 
 
312
  # take gradient step
313
  self.optimizer.zero_grad()
314
  loss.mean().backward()
code/Lot-sizing/agents/PDPPOAgent.py CHANGED
@@ -14,7 +14,7 @@ BASE_DIR = os.path.dirname(os.path.abspath('__file__'))
14
  AGENTS_DIR = os.path.join(BASE_DIR,'agents')
15
  sys.path.append(AGENTS_DIR)
16
  from agents.PDPPO import PDPPO
17
- from envs import *
18
  import copy
19
 
20
 
 
14
  AGENTS_DIR = os.path.join(BASE_DIR,'agents')
15
  sys.path.append(AGENTS_DIR)
16
  from agents.PDPPO import PDPPO
17
+ from envs import SimplePlant
18
  import copy
19
 
20
 
code/Lot-sizing/agents/PDPPOAgent_one_critic.py CHANGED
@@ -13,8 +13,8 @@ import matplotlib.patches as mpatches # Provides a way of adding a colored patch
13
  BASE_DIR = os.path.dirname(os.path.abspath('__file__'))
14
  AGENTS_DIR = os.path.join(BASE_DIR,'agents')
15
  sys.path.append(AGENTS_DIR)
16
- from agents.PDPPO import PDPPO
17
- from envs import *
18
  import copy
19
 
20
 
@@ -101,7 +101,7 @@ class SimplePlantSB(SimplePlant):
101
  return obs
102
 
103
 
104
- class PDPPOAgent():
105
  def __init__(self, env: SimplePlant, settings: dict):
106
  self.env = SimplePlantSB(env.settings, env.stoch_model)
107
  self.last_inventory = env.inventory_level
@@ -142,7 +142,7 @@ class PDPPOAgent():
142
 
143
  ## Note : print/log frequencies should be > than self.max_ep_len
144
 
145
- ################ PDPPO hyperparameters ################
146
  self.update_timestep = self.max_ep_len * 4 # update policy every n timesteps
147
  self.K_epochs = 60 # update policy for K epochs in one PDPPO update
148
 
@@ -169,7 +169,7 @@ class PDPPOAgent():
169
  else:
170
  self.action_dim = self.env.action_space
171
 
172
- self.pdppo_agent = PDPPO(self.state_dim, self.action_dim, self.lr_actor, self.lr_critic, self.gamma, self.K_epochs, self.eps_clip, copy.copy(self.env), self.has_continuous_action_space,self.tau, self.action_std)
173
 
174
 
175
  ################################### Training ###################################
@@ -187,7 +187,7 @@ class PDPPOAgent():
187
  if not os.path.exists(log_dir):
188
  os.makedirs(log_dir)
189
 
190
- log_dir = log_dir + '/' + self.experiment_name + '_PDPPO/'
191
  if not os.path.exists(log_dir):
192
  os.makedirs(log_dir)
193
 
@@ -197,7 +197,7 @@ class PDPPOAgent():
197
  run_num = len(current_num_files)
198
 
199
  #### create new log file for each run
200
- log_f_name = log_dir + '/PDPPO_' + self.experiment_name + "_log_" + str(run_num) + ".csv"
201
 
202
  print("current logging run number for " + self.experiment_name + " : ", run_num)
203
  print("logging at : " + log_f_name)
@@ -215,7 +215,7 @@ class PDPPOAgent():
215
  os.makedirs(directory)
216
 
217
 
218
- checkpoint_path = directory + "PDPPO_{}_{}_{}.pth".format(self.experiment_name, self.random_seed, self.run_num_pretrained)
219
  print("save checkpoint path : " + checkpoint_path)
220
  #####################################################
221
 
@@ -241,9 +241,9 @@ class PDPPOAgent():
241
  else:
242
  print("Initializing a discrete action space policy")
243
  print("--------------------------------------------------------------------------------------------")
244
- print("PDPPO update frequency : " + str(self.update_timestep) + " timesteps")
245
- print("PDPPO K epochs : ", self.K_epochs)
246
- print("PDPPO epsilon clip : ", self.eps_clip)
247
  print("discount factor (self.gamma) : ", self.gamma)
248
  print("--------------------------------------------------------------------------------------------")
249
  print("optimizer learning rate actor : ", self.lr_actor)
@@ -259,7 +259,7 @@ class PDPPOAgent():
259
  ################# training procedure ################
260
 
261
  # initialize a PDPPO agent
262
- self.PDPPO_agent = PDPPO(self.state_dim, self.action_dim, self.lr_actor, self.lr_critic, self.gamma, self.K_epochs, self.eps_clip, copy.copy(self.env), self.has_continuous_action_space, self.action_std)
263
 
264
  # track total training time
265
  start_time = datetime.now().replace(microsecond=0)
@@ -388,7 +388,7 @@ class PDPPOAgent():
388
  def load_agent(self,path):
389
  #directory = "PDPPO_preTrained" + '/' + env_name + '/'
390
  directory = self.LOG_DIR
391
- directory = directory + '/' + self.experiment_name + '_PDPPO' + '/'
392
- checkpoint_path = directory + "PDPPO_{}_{}_{}.pth".format(self.experiment_name, self.random_seed, self.run_num_pretrained)
393
  print("loading network from : " + checkpoint_path)
394
  self.pdppo_agent.load(checkpoint_path)
 
13
  BASE_DIR = os.path.dirname(os.path.abspath('__file__'))
14
  AGENTS_DIR = os.path.join(BASE_DIR,'agents')
15
  sys.path.append(AGENTS_DIR)
16
+ from agents.PDPPOonecritic import PDPPOonecritic
17
+ from envs import SimplePlant
18
  import copy
19
 
20
 
 
101
  return obs
102
 
103
 
104
+ class PDPPOAgent_one_critic():
105
  def __init__(self, env: SimplePlant, settings: dict):
106
  self.env = SimplePlantSB(env.settings, env.stoch_model)
107
  self.last_inventory = env.inventory_level
 
142
 
143
  ## Note : print/log frequencies should be > than self.max_ep_len
144
 
145
+ ################ PDPPO_one_critic hyperparameters ################
146
  self.update_timestep = self.max_ep_len * 4 # update policy every n timesteps
147
  self.K_epochs = 60 # update policy for K epochs in one PDPPO update
148
 
 
169
  else:
170
  self.action_dim = self.env.action_space
171
 
172
+ self.pdppo_agent = PDPPO_one_critic(self.state_dim, self.action_dim, self.lr_actor, self.lr_critic, self.gamma, self.K_epochs, self.eps_clip, copy.copy(self.env), self.has_continuous_action_space,self.tau, self.action_std)
173
 
174
 
175
  ################################### Training ###################################
 
187
  if not os.path.exists(log_dir):
188
  os.makedirs(log_dir)
189
 
190
+ log_dir = log_dir + '/' + self.experiment_name + '_PDPPO_one_critic/'
191
  if not os.path.exists(log_dir):
192
  os.makedirs(log_dir)
193
 
 
197
  run_num = len(current_num_files)
198
 
199
  #### create new log file for each run
200
+ log_f_name = log_dir + '/PDPPO_one_critic_' + self.experiment_name + "_log_" + str(run_num) + ".csv"
201
 
202
  print("current logging run number for " + self.experiment_name + " : ", run_num)
203
  print("logging at : " + log_f_name)
 
215
  os.makedirs(directory)
216
 
217
 
218
+ checkpoint_path = directory + "PDPPO_one_critic_{}_{}_{}.pth".format(self.experiment_name, self.random_seed, self.run_num_pretrained)
219
  print("save checkpoint path : " + checkpoint_path)
220
  #####################################################
221
 
 
241
  else:
242
  print("Initializing a discrete action space policy")
243
  print("--------------------------------------------------------------------------------------------")
244
+ print("PDPPO_one_critic update frequency : " + str(self.update_timestep) + " timesteps")
245
+ print("PDPPO_one_critic K epochs : ", self.K_epochs)
246
+ print("PDPPO_one_critic epsilon clip : ", self.eps_clip)
247
  print("discount factor (self.gamma) : ", self.gamma)
248
  print("--------------------------------------------------------------------------------------------")
249
  print("optimizer learning rate actor : ", self.lr_actor)
 
259
  ################# training procedure ################
260
 
261
  # initialize a PDPPO agent
262
+ self.PDPPO_agent = PDPPO_one_critic(self.state_dim, self.action_dim, self.lr_actor, self.lr_critic, self.gamma, self.K_epochs, self.eps_clip, copy.copy(self.env), self.has_continuous_action_space, self.action_std)
263
 
264
  # track total training time
265
  start_time = datetime.now().replace(microsecond=0)
 
388
  def load_agent(self,path):
389
  #directory = "PDPPO_preTrained" + '/' + env_name + '/'
390
  directory = self.LOG_DIR
391
+ directory = directory + '/' + self.experiment_name + '_PDPPO_one_critic' + '/'
392
+ checkpoint_path = directory + "PDPPO_one_critic_{}_{}_{}.pth".format(self.experiment_name, self.random_seed, self.run_num_pretrained)
393
  print("loading network from : " + checkpoint_path)
394
  self.pdppo_agent.load(checkpoint_path)
code/Lot-sizing/agents/PDPPO_v0.py DELETED
@@ -1,328 +0,0 @@
1
- import os
2
- import copy
3
- import numpy as np
4
- import torch
5
- import torch.nn as nn
6
- import torch.optim as optim
7
- import torch.nn.functional as F
8
- from torch.distributions import Categorical
9
- from envs import *
10
- import gym
11
-
12
-
13
-
14
- class SimplePlantSB(SimplePlant):
15
- def __init__(self, settings, stoch_model):
16
- super().__init__(settings, stoch_model)
17
- try:self.dict_obs = settings['dict_obs']
18
- except:self.dict_obs = False
19
- self.last_inventory = copy.copy(self.inventory_level)
20
- self.action_space = gym.spaces.MultiDiscrete(
21
- [self.n_items+1] * self.n_machines
22
- )
23
-
24
- if self.dict_obs:
25
- self.observation_space = gym.spaces.Dict({
26
- 'inventory_level': gym.spaces.Box(low = np.zeros(self.n_items),high = np.ones(self.n_items)*(settings['max_inventory_level'][0]+1)*self.n_items),
27
- 'machine_setup': gym.spaces.MultiDiscrete([self.n_items+1] * self.n_machines)
28
- })
29
- else:
30
- self.observation_space = gym.spaces.Box(
31
- low=np.zeros(self.n_items+self.n_machines),# high for the inventory level
32
- high=np.concatenate(
33
- [
34
- np.array(self.max_inventory_level),
35
- np.ones(self.n_machines) * (self.n_items+1), #high for the machine setups
36
- ]),
37
- dtype=np.int32
38
- )
39
-
40
- def step(self, action):
41
- """
42
- Step method: Execute one time step within the environment
43
-
44
- Parameters
45
- ----------
46
- action : action given by the agent
47
-
48
- Returns
49
- -------
50
- obs : Observation of the state give the method _next_observation
51
- reward : Cost given by the _reward method
52
- done : returns True or False given by the _done method
53
- dict : possible information for control to environment monitoring
54
-
55
- """
56
- self.last_inventory = copy.copy(self.inventory_level)
57
-
58
- self.total_cost = self._take_action(action, self.machine_setup, self.inventory_level, self.demand)
59
-
60
- # self.total_cost['setup_costs'] = 0
61
- # self.total_cost['holding_costs'] = 0
62
-
63
- reward = -sum([ele for key, ele in self.total_cost.items()])
64
- #reward = -self.total_cost['lost_sales']
65
-
66
- #reward = np.abs(action)
67
-
68
- self.current_step += 1
69
- done = self.current_step == self.T
70
- obs = self._next_observation()
71
-
72
- return obs, reward, done, self.total_cost
73
-
74
- def _next_observation(self):
75
- """
76
- Returns the next demand
77
- """
78
- obs = SimplePlant._next_observation(self)
79
- #obs['last_inventory_level'] = copy.copy(self.last_inventory)
80
- if isinstance(obs, dict):
81
- if not self.dict_obs:
82
- obs = np.concatenate(
83
- (
84
- obs['inventory_level'], # n_items size
85
- obs['machine_setup'], # n_machine size
86
- #obs['last_inventory_level']# n_items size
87
- )
88
- )
89
- else:
90
- if self.dict_obs:
91
- raise('Change dict_obst to False')
92
- return obs
93
-
94
- # Define the policy network
95
- class Policy(nn.Module):
96
- def __init__(self, input_size, output_shape):
97
- super(Policy, self).__init__()
98
- self.fc1 = nn.Linear(input_size, 128)
99
- self.fc_list = nn.ModuleList([nn.Linear(128, output_shape[0]) for list(output_shape)[1] in range(0,output_shape[1])])
100
-
101
- def forward(self, x):
102
- x = F.relu(self.fc1(x)).requires_grad_()
103
- outputs = [F.softmax(fc(x), dim=1)for fc in self.fc_list]
104
- return outputs
105
-
106
- # Define the value network for deterministic components
107
- class Value(nn.Module):
108
- def __init__(self,input_size,output_size):
109
- super(Value, self).__init__()
110
- self.fc1 = nn.Linear(input_size, 128)
111
- self.fc2 = nn.Linear(128, output_size)
112
-
113
- def forward(self, x):
114
- x = F.relu(self.fc1(x)).requires_grad_()
115
- x = self.fc2(x)
116
- return x
117
-
118
- # Define the value network for stochastic components
119
- class ValueStochastic(nn.Module):
120
- def __init__(self,input_size,output_size):
121
- super(ValueStochastic, self).__init__()
122
- self.fc1 = nn.Linear(input_size, 128)
123
- self.fc2 = nn.Linear(128, output_size)
124
-
125
- def forward(self, x):
126
- x = F.relu(self.fc1(x)).requires_grad_()
127
- x = F.softmax(self.fc2(x), dim=1)
128
- return x
129
-
130
- # Define the PPO agent
131
- class PDPPO:
132
- def __init__(self, env: SimplePlant, settings: dict):
133
-
134
- self.env = SimplePlantSB(env.settings, env.stoch_model)
135
- self.last_inventory = env.inventory_level
136
- self.experiment_name = settings['experiment_name']
137
- try:self.dict_obs = settings['dict_obs']
138
- except:self.dict_obs = False
139
-
140
- self.POSSIBLE_STATES = self.env.n_items + 1
141
- self.env.cost_to_reward = True
142
- self.epsilon = 0
143
-
144
- BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
145
- # Use the logs file in the root path of the main.
146
- self.LOG_DIR = os.path.join(BASE_DIR,'logs')
147
-
148
-
149
- if self.dict_obs == False:
150
- input_size = self.env.observation_space.shape[0]
151
- output_size_policy = (self.env.n_items+1, self.env.action_space.shape[0]) # we add 1 for the idle state
152
- output_size_value = self.env.action_space.shape[0]
153
- self.policy = Policy(input_size,output_size_policy)
154
- self.value = Value(input_size,output_size_value)
155
- self.value_post = ValueStochastic(input_size,output_size_value)
156
- self.optimizer_policy = optim.Adam(self.policy.parameters(), lr=1e-3)
157
- self.optimizer_value = optim.Adam(self.value.parameters(), lr=1e-3)
158
- self.optimizer_value_post = optim.Adam(self.value_post.parameters(), lr=1e-3)
159
- self.eps_clip = 0.2
160
- self.gamma = 0.99
161
- self.lmbda = 0.95
162
-
163
- def get_post_state(self, action, machine_setup, inventory_level):
164
- setup_loss = np.zeros(self.env.n_machines, dtype=int)
165
- setup_costs = np.zeros(self.env.n_machines)
166
- # if we are just changing the setup, we use the setup cost matrix with the corresponding position given by the actual setup and the new setup
167
- for m in range(self.env.n_machines):
168
- if action[m] != 0: # if the machine is not iddle
169
- # 1. IF NEEDED CHANGE SETUP
170
- if machine_setup[m] != action[m] and action[m] != 0:
171
- setup_costs[m] = self.env.setup_costs[m][action[m] - 1]
172
- setup_loss[m] = self.env.setup_loss[m][action[m] - 1]
173
- machine_setup[m] = action[m]
174
- # 2. PRODUCTION
175
- production = self.env.machine_production_matrix[m][action[m] - 1] - setup_loss[m]
176
- inventory_level[action[m] - 1] += production
177
- else:
178
- machine_setup[m] = 0
179
- # return the new machine_setup_inventory_level and the setup_cost
180
- return machine_setup, inventory_level, setup_costs
181
-
182
- def get_action(self, state):
183
- state = torch.from_numpy(state).float().unsqueeze(0)
184
- probs = self.policy(state)
185
- probs_concat = torch.stack(probs, dim=1)
186
- m = Categorical(probs_concat)
187
- action = m.sample()
188
- value = self.value(state)
189
- machine_setup, inventory_level, setup_cost = self.get_post_state(action.numpy()[0], state[0][self.env.n_items:self.env.n_items+self.env.n_machines].numpy(), state[0][0:self.env.n_items].numpy())
190
- value_post = self.value_post(state)
191
-
192
- return action, m.log_prob(action), probs_concat, value, value_post
193
-
194
-
195
- def update(self, rewards, rewards_pre_state, rewards_post_state, states, post_states, actions, probs, next_states):
196
- # Update deterministic value function
197
- for epoch in range(10):
198
- for i in range(len(actions)):
199
- state = torch.from_numpy(states[i]).float().unsqueeze(0)
200
- value = self.value(state)
201
- next_state = torch.from_numpy(next_states[i]).float().unsqueeze(0)
202
- next_value = self.value(next_state)
203
- target = rewards_pre_state[i] + self.gamma * next_value
204
- advantage = target - value
205
- loss = advantage.pow(2).mean()
206
- self.optimizer_value.zero_grad()
207
- loss.backward()
208
- self.optimizer_value.step()
209
-
210
- # Update stochastic value function
211
- for epoch in range(10):
212
- for i in range(len(actions)):
213
- state = torch.from_numpy(states[i]).float().unsqueeze(0)
214
- value = self.value_post(state)
215
- post_state = torch.from_numpy(post_states[i]).float().unsqueeze(0)
216
- value_post = self.value_post(post_state)
217
- target = rewards_post_state[i] + self.gamma * value_post
218
- advantage = target - value
219
- loss = advantage.pow(2).mean()
220
- self.optimizer_value_post.zero_grad()
221
- loss.backward()
222
- self.optimizer_value_post.step()
223
-
224
- # Update policy network
225
- states = torch.from_numpy(np.vstack(states)).float()
226
- actions = torch.cat(actions).unsqueeze(1)
227
- old_probs = torch.cat(probs)
228
- old_probs = torch.gather(old_probs.clone(),2, actions)
229
-
230
- policy_epochs = 10
231
- for epoch in range(policy_epochs):
232
- probs = self.policy(states)
233
- probs = torch.stack(probs, dim=1).clone()
234
- m = Categorical(probs)
235
- action = m.sample()
236
- probs = torch.gather(probs, 2, actions)
237
- kl_div = (old_probs * (torch.log(old_probs) - torch.log(probs))).sum()
238
-
239
- for state,post_state, action, old_prob, prob, next_state, reward_pre_state, reward_post_state in zip(states,post_states, actions, old_probs, probs, next_states,rewards_pre_state,rewards_post_state):
240
- state = state.unsqueeze(0)
241
- next_state = torch.from_numpy(next_state).unsqueeze(0).float()
242
- post_state = torch.from_numpy(post_state).unsqueeze(0).float()
243
- action = action.unsqueeze(0)
244
- old_prob = old_prob.unsqueeze(0)
245
- prob = prob.unsqueeze(0)
246
- value = self.value(state)
247
- value_post = self.value_post(post_state)
248
- advantage = reward_pre_state + self.gamma * self.value(next_state) - self.value(state)
249
- advantage_post = reward_post_state + self.gamma * self.value_post(post_state) - self.value_post(state)
250
-
251
- ratio = (prob / old_prob)
252
- surr1 = ratio * advantage
253
- surr2 = torch.clamp(ratio, 1 - self.eps_clip, 1 + self.eps_clip) * advantage
254
- policy_loss = -torch.min(surr1, surr2) - 0.01 * m.entropy()
255
-
256
- ratio_post = ratio
257
- surr1_post = ratio_post * advantage_post
258
- surr2_post = torch.clamp(ratio_post, 1 - self.eps_clip, 1 + self.eps_clip) * advantage_post
259
- policy_loss_post = -torch.min(surr1_post, surr2_post) - 0.01 * m.entropy()
260
-
261
- self.optimizer_policy.zero_grad()
262
- (policy_loss.pow(2).mean() + policy_loss_post.pow(2).mean() + 0.5 * value.pow(2).mean() + 0.5 * value_post.pow(2).mean()).backward(retain_graph=True)
263
- self.optimizer_policy.step()
264
-
265
- def learn(self, n_episodes=1000, save_interval=100):
266
- # Train the agent
267
- for episode in range(n_episodes):
268
- state = self.env.reset()
269
- rewards = []
270
- rewards_pre_state = []
271
- rewards_post_state = []
272
- states = []
273
- next_states = []
274
- actions = []
275
- probs = []
276
- post_states = []
277
- # next_post_states = []
278
- done = False
279
- while not done:
280
- action, log_prob, prob, value, value_post = self.get_action(state)
281
- next_state, reward, done, info = self.env.step(action[0].detach().numpy())
282
- machine_setup, inventory_level, setup_cost = self.get_post_state(action[0].detach().numpy(), state[self.env.n_items:self.env.n_items+self.env.n_machines], state[0:self.env.n_items])
283
- post_state = state.copy()
284
- post_state[self.env.n_items:self.env.n_items+self.env.n_machines] = machine_setup
285
- post_state[0:self.env.n_items] = inventory_level
286
- post_states.append(post_state)
287
- post_state = torch.from_numpy(post_state).float().unsqueeze(0)
288
- rewards.append(reward)
289
- reward_pre_state = -(self.env.total_cost['holding_costs'] + self.env.total_cost['lost_sales'])
290
- reward_post_state = -setup_cost.sum()
291
- rewards_pre_state.append(reward_pre_state)
292
- rewards_post_state.append(reward_post_state)
293
- states.append(state)
294
- next_states.append(next_state)
295
- actions.append(action)
296
- probs.append(prob)
297
-
298
- state = next_state
299
- if done:
300
- self.update(rewards, rewards_pre_state, rewards_post_state, states, post_states, actions, probs, next_states)
301
- print('Episode:', episode, 'Reward:', sum(rewards))
302
- if episode % save_interval == 0:
303
- self.save(f'policy_{episode}.pt')
304
- self.save(self.LOG_DIR)
305
-
306
-
307
- def save(self, filepath):
308
- torch.save({
309
- 'policy_state_dict': self.policy.state_dict(),
310
- 'value_state_dict': self.value.state_dict(),
311
- 'value_post_state_dict': self.value_post.state_dict(),
312
- 'optimizer_policy_state_dict': self.optimizer_policy.state_dict(),
313
- 'optimizer_value_state_dict': self.optimizer_value.state_dict(),
314
- 'optimizer_value_post_state_dict': self.optimizer_value_post.state_dict()
315
- }, filepath)
316
-
317
-
318
-
319
- def load(self, filepath):
320
- checkpoint = torch.load(filepath)
321
- self.policy.load_state_dict(checkpoint['policy_state_dict'])
322
- self.value.load_state_dict(checkpoint['value_state_dict'])
323
- self.value_post.load_state_dict(checkpoint['value_post_state_dict'])
324
- self.optimizer_policy.load_state_dict(checkpoint['optimizer_policy_state_dict'])
325
- self.optimizer_value.load_state_dict(checkpoint['optimizer_value_state_dict'])
326
- self.optimizer_value_post.load_state_dict(checkpoint['optimizer_value_post_state_dict'])
327
-
328
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
code/Lot-sizing/agents/{PDPPO one critic.py → PDPPOonecritic.py} RENAMED
@@ -144,7 +144,7 @@ class ActorCritic(nn.Module):
144
  return action_logprobs, state_values, dist_entropy
145
 
146
 
147
- class PDPPO:
148
  def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, env, has_continuous_action_space, tau, action_std_init=0.6):
149
 
150
  self.has_continuous_action_space = has_continuous_action_space
 
144
  return action_logprobs, state_values, dist_entropy
145
 
146
 
147
+ class PDPPOonecritic:
148
  def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, env, has_continuous_action_space, tau, action_std_init=0.6):
149
 
150
  self.has_continuous_action_space = has_continuous_action_space
code/Lot-sizing/agents/PPO.py CHANGED
@@ -214,7 +214,7 @@ class PPO:
214
  self.buffer.logprobs.append(action_logprob)
215
  self.buffer.state_values.append(state_val)
216
 
217
- return action.numpy()
218
 
219
  def update(self):
220
  # Monte Carlo estimate of returns
@@ -258,8 +258,6 @@ class PPO:
258
  # final loss of clipped objective PPO
259
  loss = -torch.min(surr1, surr2) + 0.5 * self.MseLoss(state_values, rewards) - 0.012 * dist_entropy
260
 
261
- loss_numpy = loss.detach().numpy()
262
-
263
  # take gradient step
264
  self.optimizer.zero_grad()
265
  loss.mean().backward()
 
214
  self.buffer.logprobs.append(action_logprob)
215
  self.buffer.state_values.append(state_val)
216
 
217
+ return action.cpu().numpy()
218
 
219
  def update(self):
220
  # Monte Carlo estimate of returns
 
258
  # final loss of clipped objective PPO
259
  loss = -torch.min(surr1, surr2) + 0.5 * self.MseLoss(state_values, rewards) - 0.012 * dist_entropy
260
 
 
 
261
  # take gradient step
262
  self.optimizer.zero_grad()
263
  loss.mean().backward()
code/Lot-sizing/agents/PPOAgent.py CHANGED
@@ -14,7 +14,7 @@ BASE_DIR = os.path.dirname(os.path.abspath('__file__'))
14
  AGENTS_DIR = os.path.join(BASE_DIR,'agents')
15
  sys.path.append(AGENTS_DIR)
16
  from agents.PPO import PPO
17
- from envs import *
18
 
19
 
20
  class SimplePlantSB(SimplePlant):
@@ -155,8 +155,6 @@ class PPOAgent():
155
 
156
  print("training environment name : " + self.experiment_name + '_PPO')
157
 
158
-
159
-
160
  # state space dimension
161
  self.state_dim = self.env.observation_space.shape[0]
162
 
 
14
  AGENTS_DIR = os.path.join(BASE_DIR,'agents')
15
  sys.path.append(AGENTS_DIR)
16
  from agents.PPO import PPO
17
+ from envs import SimplePlant
18
 
19
 
20
  class SimplePlantSB(SimplePlant):
 
155
 
156
  print("training environment name : " + self.experiment_name + '_PPO')
157
 
 
 
158
  # state space dimension
159
  self.state_dim = self.env.observation_space.shape[0]
160
 
code/Lot-sizing/agents/PPOAgent_two_critics.py ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os # Provides a way of interacting with the file system
2
+ import sys
3
+ import glob # Helps find all the pathnames matching a specified pattern according to the rules used by the Unix shell
4
+ import time # Provides various time-related functions
5
+ from datetime import datetime # Module that supplies classes for working with dates and times
6
+
7
+ import numpy as np # A library for the Python programming language, adding support for large, multi-dimensional arrays and matrices
8
+ import gym # Provides a collection of test problems — environments — that you can use to work out your reinforcement learning algorithms
9
+ import torch # A machine learning framework that provides tensor computation (like NumPy) with strong acceleration on GPUs
10
+ import copy # Provides a module for shallow and deep copying operations
11
+ import matplotlib.pyplot as plt # A plotting library for the Python programming language and its numerical mathematics extension NumPy
12
+ import matplotlib.patches as mpatches # Provides a way of adding a colored patch to the plot, for example to create a legend
13
+ BASE_DIR = os.path.dirname(os.path.abspath('__file__'))
14
+ AGENTS_DIR = os.path.join(BASE_DIR,'agents')
15
+ sys.path.append(AGENTS_DIR)
16
+ from agents.PPOtwocritics import PPOtwocritics
17
+ from envs import SimplePlant
18
+
19
+
20
+ class SimplePlantSB(SimplePlant):
21
+ def __init__(self, settings, stoch_model):
22
+ super().__init__(settings, stoch_model)
23
+ try:self.dict_obs = settings['dict_obs']
24
+ except:self.dict_obs = False
25
+ self.last_inventory = copy.copy(self.inventory_level)
26
+ self.action_space = gym.spaces.MultiDiscrete(
27
+ [self.n_items+1] * self.n_machines
28
+ )
29
+
30
+ if self.dict_obs:
31
+ self.observation_space = gym.spaces.Dict({
32
+ 'inventory_level': gym.spaces.Box(low = np.zeros(self.n_items),high = np.ones(self.n_items)*(settings['max_inventory_level'][0]+1)*self.n_items),
33
+ 'machine_setup': gym.spaces.MultiDiscrete([self.n_items+1] * self.n_machines)
34
+ #'last_inventory_level':gym.spaces.Box(low = np.zeros(self.n_items),high = np.ones(self.n_items)*(settings['max_inventory_level'][0]+1)*self.n_items)
35
+ })
36
+ else:
37
+ self.observation_space = gym.spaces.Box(
38
+ low=np.zeros(self.n_items+self.n_machines),# high for the inventory level
39
+ high=np.concatenate(
40
+ [
41
+ np.array(self.max_inventory_level),
42
+ np.ones(self.n_machines) * (self.n_items+1), #high for the machine setups
43
+ #np.array(self.max_inventory_level) # high for the inventory level
44
+ ]),
45
+ dtype=np.int32
46
+ )
47
+
48
+ def step(self, action):
49
+ """
50
+ Step method: Execute one time step within the environment
51
+
52
+ Parameters
53
+ ----------
54
+ action : action given by the agent
55
+
56
+ Returns
57
+ -------
58
+ obs : Observation of the state give the method _next_observation
59
+ reward : Cost given by the _reward method
60
+ done : returns True or False given by the _done method
61
+ dict : possible information for control to environment monitoring
62
+
63
+ """
64
+ self.last_inventory = copy.copy(self.inventory_level)
65
+
66
+ self.total_cost = self._take_action(action, self.machine_setup, self.inventory_level, self.demand)
67
+
68
+ # self.total_cost['setup_costs'] = 0
69
+ # self.total_cost['holding_costs'] = 0
70
+
71
+ reward = -sum([ele for key, ele in self.total_cost.items()])
72
+ #reward = -self.total_cost['lost_sales']
73
+
74
+ #reward = np.abs(action)
75
+
76
+ self.current_step += 1
77
+ done = self.current_step == self.T
78
+ obs = self._next_observation()
79
+
80
+ return obs, reward, done, self.total_cost
81
+
82
+ def _next_observation(self):
83
+ """
84
+ Returns the next demand
85
+ """
86
+ obs = SimplePlant._next_observation(self)
87
+ #obs['last_inventory_level'] = copy.copy(self.last_inventory)
88
+ if isinstance(obs, dict):
89
+ if not self.dict_obs:
90
+ obs = np.concatenate(
91
+ (
92
+ obs['inventory_level'], # n_items size
93
+ obs['machine_setup'], # n_machine size
94
+ #obs['last_inventory_level']# n_items size
95
+ )
96
+ )
97
+ else:
98
+ if self.dict_obs:
99
+ raise('Change dict_obst to False')
100
+ return obs
101
+
102
+
103
+ class PPOAgent_two_critics():
104
+ def __init__(self, env: SimplePlant, settings: dict):
105
+ self.env = SimplePlantSB(env.settings, env.stoch_model)
106
+ self.last_inventory = env.inventory_level
107
+ self.model_name = settings['model_name']
108
+ self.experiment_name = settings['experiment_name']
109
+ self.parallelization = settings['parallelization']
110
+ try:self.dict_obs = settings['dict_obs']
111
+ except:self.dict_obs = False
112
+
113
+ self.POSSIBLE_STATES = self.env.n_items + 1
114
+ self.env.cost_to_reward = True
115
+ self.epsilon = 0
116
+
117
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
118
+ # Use the logs file in the root path of the main.
119
+ self.LOG_DIR = os.path.join(BASE_DIR,'logs')
120
+
121
+ print("============================================================================================")
122
+
123
+ ####### initialize environment hyperparameters ######
124
+
125
+ self.has_continuous_action_space = False # continuous action space; else discrete
126
+
127
+ self.max_ep_len = 1000 # max timesteps in one episode
128
+
129
+
130
+ self.print_freq = self.max_ep_len * 10 # print avg reward in the interval (in num timesteps)
131
+ self.log_freq = self.max_ep_len * 2 # log avg reward in the interval (in num timesteps)
132
+ self.save_model_freq = int(4999) # save model frequency (in num timesteps)
133
+
134
+ self.action_std = 0.6 # starting std for action distribution (Multivariate Normal)
135
+ self.action_std_decay_rate = 0.05 # linearly decay self.action_std (self.action_std = self.action_std - self.action_std_decay_rate)
136
+ self.min_action_std = 0.1 # minimum self.action_std (stop decay after self.action_std <= min_self.action_std)
137
+ self.action_std_decay_freq = int(2.5e5) # self.action_std decay frequency (in num timesteps)
138
+ #####################################################
139
+
140
+ ## Note : print/log frequencies should be > than self.max_ep_len
141
+
142
+ ################ PPO_two_critics hyperparameters ################
143
+ self.update_timestep = self.max_ep_len * 4 # update policy every n timesteps
144
+ self.K_epochs = 60 # update policy for K epochs in one PPO_two_critics update
145
+
146
+ self.eps_clip = 0.2 # clip parameter for PPO_two_critics
147
+ self.gamma = 0.99 # discount factor
148
+
149
+ self.lr_actor = 0.00055 # learning rate for actor network
150
+ self.lr_critic = 0.001 # learning rate for critic network
151
+
152
+ self.random_seed = 0 # set random seed if required (0 = no random seed)
153
+ #####################################################
154
+ self.run_num_pretrained = 0 #### change this to prevent overwriting weights in same self.experiment_name folder
155
+
156
+ print("training environment name : " + self.experiment_name + '_PPO_two_critics')
157
+
158
+
159
+
160
+ # state space dimension
161
+ self.state_dim = self.env.observation_space.shape[0]
162
+
163
+ # action space dimension
164
+ if self.has_continuous_action_space:
165
+ self.action_dim = self.env.action_space.shape[0]
166
+ else:
167
+ self.action_dim = self.env.action_space
168
+
169
+ self.ppo_agent = PPOtwocritics(self.state_dim, self.action_dim, self.lr_actor, self.lr_critic, self.gamma, self.K_epochs, self.eps_clip, self.has_continuous_action_space, self.action_std)
170
+
171
+
172
+ ################################### Training ###################################
173
+ def learn(self,n_episodes = 100000):
174
+
175
+
176
+ ###################### logging ######################
177
+
178
+ self.max_training_timesteps = n_episodes # break training loop if timeteps > self.max_training_timesteps
179
+
180
+ env = self.env
181
+
182
+ #### log files for multiple runs are NOT overwritten
183
+ log_dir = self.LOG_DIR
184
+ if not os.path.exists(log_dir):
185
+ os.makedirs(log_dir)
186
+
187
+ log_dir = log_dir + '/' + self.experiment_name + '_PPO_two_critics/'
188
+ if not os.path.exists(log_dir):
189
+ os.makedirs(log_dir)
190
+
191
+ #### get number of log files in log directory
192
+ run_num = 0
193
+ current_num_files = next(os.walk(log_dir))[2]
194
+ run_num = len(current_num_files)
195
+
196
+ #### create new log file for each run
197
+ log_f_name = log_dir + '/PPO_two_critics_' + self.experiment_name + "_log_" + str(run_num) + ".csv"
198
+
199
+ print("current logging run number for " + self.experiment_name + " : ", run_num)
200
+ print("logging at : " + log_f_name)
201
+ #####################################################
202
+
203
+ ################### checkpointing ###################
204
+
205
+
206
+ directory = self.LOG_DIR
207
+ if not os.path.exists(directory):
208
+ os.makedirs(directory)
209
+
210
+ directory = directory + '/' + self.experiment_name + '_PPO_two_critics' + '/'
211
+ if not os.path.exists(directory):
212
+ os.makedirs(directory)
213
+
214
+
215
+ checkpoint_path = directory + "PPO_two_critics_{}_{}_{}.pth".format(self.experiment_name, self.random_seed, self.run_num_pretrained)
216
+ print("save checkpoint path : " + checkpoint_path)
217
+ #####################################################
218
+
219
+
220
+ ############# print all hyperparameters #############
221
+ print("--------------------------------------------------------------------------------------------")
222
+ print("max training timesteps : ", self.max_training_timesteps)
223
+ print("max timesteps per episode : ", self.max_ep_len)
224
+ print("model saving frequency : " + str(self.save_model_freq) + " timesteps")
225
+ print("log frequency : " + str(self.log_freq) + " timesteps")
226
+ print("printing average reward over episodes in last : " + str(self.print_freq) + " timesteps")
227
+ print("--------------------------------------------------------------------------------------------")
228
+ print("state space dimension : ", self.state_dim)
229
+ print("action space dimension : ", self.action_dim)
230
+ print("--------------------------------------------------------------------------------------------")
231
+ if self.has_continuous_action_space:
232
+ print("Initializing a continuous action space policy")
233
+ print("--------------------------------------------------------------------------------------------")
234
+ print("starting std of action distribution : ", self.action_std)
235
+ print("decay rate of std of action distribution : ", self.action_std_decay_rate)
236
+ print("minimum std of action distribution : ", min_self.action_std)
237
+ print("decay frequency of std of action distribution : " + str(self.action_std_decay_freq) + " timesteps")
238
+ else:
239
+ print("Initializing a discrete action space policy")
240
+ print("--------------------------------------------------------------------------------------------")
241
+ print("PPO_two_critics update frequency : " + str(self.update_timestep) + " timesteps")
242
+ print("PPO_two_critics K epochs : ", self.K_epochs)
243
+ print("PPO_two_critics epsilon clip : ", self.eps_clip)
244
+ print("discount factor (self.gamma) : ", self.gamma)
245
+ print("--------------------------------------------------------------------------------------------")
246
+ print("optimizer learning rate actor : ", self.lr_actor)
247
+ print("optimizer learning rate critic : ", self.lr_critic)
248
+ if self.random_seed:
249
+ print("--------------------------------------------------------------------------------------------")
250
+ print("setting random seed to ", self.random_seed)
251
+
252
+ #####################################################
253
+
254
+ print("============================================================================================")
255
+
256
+ ################# training procedure ################
257
+
258
+ # initialize a PPO agent
259
+ self.ppo_agent = PPOtwocritics(self.state_dim, self.action_dim, self.lr_actor, self.lr_critic, self.gamma, self.K_epochs, self.eps_clip, self.has_continuous_action_space, self.action_std)
260
+
261
+ # track total training time
262
+ start_time = datetime.now().replace(microsecond=0)
263
+ print("Started training at (GMT) : ", start_time)
264
+
265
+ print("============================================================================================")
266
+
267
+ # logging file
268
+ log_f = open(log_f_name,"w+")
269
+ log_f.write('episode,timestep,reward\n')
270
+
271
+ # printing and logging variables
272
+ print_running_reward = 0
273
+ print_running_episodes = 0
274
+
275
+ log_running_reward = 0
276
+ log_running_episodes = 0
277
+
278
+ time_step = 0
279
+ i_episode = 0
280
+
281
+ # training loop
282
+ while time_step <= self.max_training_timesteps:
283
+
284
+ state = env.reset()
285
+ current_ep_reward = 0
286
+
287
+ for t in range(1, self.max_ep_len+1):
288
+
289
+ # select action with policy
290
+ action = self.ppo_agent.select_action(state)
291
+ state, reward, done, _ = env.step(action)
292
+
293
+ # saving reward and is_terminals
294
+ self.ppo_agent.buffer.rewards.append(reward)
295
+ self.ppo_agent.buffer.is_terminals.append(done)
296
+
297
+ time_step +=1
298
+ current_ep_reward += reward
299
+
300
+ # update PPO_two_critics agent
301
+ if time_step % self.update_timestep == 0:
302
+ self.ppo_agent.update()
303
+
304
+ # if continuous action space; then decay action std of ouput action distribution
305
+ if self.has_continuous_action_space and time_step % self.action_std_decay_freq == 0:
306
+ self.ppo_agent.decay_self.action_std(self.action_std_decay_rate, self.action_std)
307
+
308
+ # log in logging file
309
+ if time_step % self.log_freq == 0:
310
+
311
+ # log average reward till last episode
312
+ log_avg_reward = log_running_reward / log_running_episodes
313
+ log_avg_reward = round(log_avg_reward, 4)
314
+
315
+ log_f.write('{},{},{}\n'.format(i_episode, time_step, log_avg_reward))
316
+ log_f.flush()
317
+
318
+ log_running_reward = 0
319
+ log_running_episodes = 0
320
+
321
+ # printing average reward
322
+ if time_step % self.print_freq == 0:
323
+
324
+ # print average reward till last episode
325
+ print_avg_reward = print_running_reward / print_running_episodes
326
+ print_avg_reward = round(print_avg_reward, 2)
327
+
328
+ print("Episode : {} \t\t Timestep : {} \t\t Average Reward : {}".format(i_episode, time_step, print_avg_reward))
329
+
330
+ print_running_reward = 0
331
+ print_running_episodes = 0
332
+
333
+ # save model weights
334
+ if time_step % self.save_model_freq == 0:
335
+ print("--------------------------------------------------------------------------------------------")
336
+ print("saving model at : " + checkpoint_path)
337
+ self.ppo_agent.save(checkpoint_path)
338
+ print("model saved")
339
+ print("Elapsed Time : ", datetime.now().replace(microsecond=0) - start_time)
340
+ print("--------------------------------------------------------------------------------------------")
341
+
342
+ # break; if the episode is over
343
+ if done:
344
+ break
345
+
346
+ print_running_reward += current_ep_reward
347
+ print_running_episodes += 1
348
+
349
+ log_running_reward += current_ep_reward
350
+ log_running_episodes += 1
351
+
352
+ i_episode += 1
353
+
354
+ log_f.close()
355
+ #env.close()
356
+
357
+ # print total training time
358
+ print("============================================================================================")
359
+ end_time = datetime.now().replace(microsecond=0)
360
+ print("Started training at (GMT) : ", start_time)
361
+ print("Finished training at (GMT) : ", end_time)
362
+ print("Total training time : ", end_time - start_time)
363
+ print("============================================================================================")
364
+
365
+ def get_action(self,state):
366
+ if isinstance(state, dict):
367
+ if not self.dict_obs:
368
+ state = np.concatenate(
369
+ (
370
+ state['inventory_level'], # n_items size
371
+ state['machine_setup'], # n_machine size
372
+ )
373
+ )
374
+ else:
375
+ if self.dict_obs:
376
+ raise('Change dict_obst to False')
377
+ return self.ppo_agent.select_action(state)
378
+
379
+ def load_agent(self,path):
380
+ #directory = "PPO_two_critics_preTrained" + '/' + env_name + '/'
381
+ directory = self.LOG_DIR
382
+ directory = directory + '/' + self.experiment_name + '_PPO_two_critics' + '/'
383
+ checkpoint_path = directory + "PPO_two_critics_{}_{}_{}.pth".format(self.experiment_name, self.random_seed, self.run_num_pretrained)
384
+ print("loading network from : " + checkpoint_path)
385
+ self.ppo_agent.load(checkpoint_path)
code/Lot-sizing/agents/{PDPPO_one_critic.py → PPOtwocritics.py} RENAMED
@@ -5,10 +5,8 @@ Created on Wed Mar 1 00:43:49 2023
5
  @author: leona
6
  """
7
 
8
- import numpy as np
9
  import torch
10
  import torch.nn as nn
11
- import torch.nn.init as init
12
  from torch.distributions import MultivariateNormal
13
  from torch.distributions import Categorical
14
 
@@ -25,26 +23,22 @@ else:
25
  print("============================================================================================")
26
 
27
 
28
- ################################## PDPPO Policy ##################################
29
  class RolloutBuffer:
30
  def __init__(self):
31
  self.actions = []
32
  self.states = []
33
- self.post_states = []
34
  self.logprobs = []
35
  self.rewards = []
36
  self.state_values = []
37
- self.state_values_post = []
38
  self.is_terminals = []
39
 
40
  def clear(self):
41
  del self.actions[:]
42
  del self.states[:]
43
- del self.post_states[:]
44
  del self.logprobs[:]
45
  del self.rewards[:]
46
  del self.state_values[:]
47
- del self.state_values_post[:]
48
  del self.is_terminals[:]
49
 
50
 
@@ -74,7 +68,6 @@ class ActorCritic(nn.Module):
74
  self.fc2 = nn.Linear(128, 128)
75
  self.actor = nn.Linear(128, self.action_dim.nvec.sum())
76
 
77
-
78
  # critic
79
  self.critic = nn.Sequential(
80
  nn.Linear(state_dim, 128),
@@ -84,6 +77,13 @@ class ActorCritic(nn.Module):
84
  nn.Linear(128, 1)
85
  )
86
 
 
 
 
 
 
 
 
87
 
88
  def forward(self, state):
89
  raise NotImplementedError
@@ -100,24 +100,28 @@ class ActorCritic(nn.Module):
100
 
101
 
102
 
103
- def act(self, state,tau):
104
 
105
  if self.has_continuous_action_space:
106
  action_mean = self.actor(state)
107
  cov_mat = torch.diag(self.action_var).unsqueeze(dim=0)
108
  dist = MultivariateNormal(action_mean, cov_mat)
109
  else:
 
110
  x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
111
  logits = self.actor(x)
112
  action_probs = nn.functional.softmax(logits, dim=-1)
113
  dist = Categorical(action_probs.view(len(self.action_dim.nvec),-1))
 
 
114
 
115
  action = dist.sample()
116
  action_logprob = dist.log_prob(action)
117
-
118
- return action.detach(), action_logprob.detach()
 
119
 
120
- def evaluate(self, state,post_state, action,tau):
121
 
122
  if self.has_continuous_action_space:
123
  action_mean = self.actor(state)
@@ -130,30 +134,29 @@ class ActorCritic(nn.Module):
130
  if self.action_dim == 1:
131
  action = action.reshape(-1, self.action_dim)
132
  else:
 
133
  x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
134
  logits = self.actor(x)
135
  action_probs = nn.functional.softmax(logits, dim=-1)
136
-
137
  dist = Categorical(action_probs.view(state.shape[0],len(self.action_dim.nvec),-1))
138
  # action_probs = self.actor(state)
139
  # dist = Categorical(action_probs)
140
  action_logprobs = dist.log_prob(action)
141
  dist_entropy = dist.entropy()
142
- state_values = self.critic(post_state)
 
143
 
144
- return action_logprobs, state_values, dist_entropy
145
 
146
 
147
- class PDPPO:
148
- def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, env, has_continuous_action_space, tau, action_std_init=0.6):
149
 
150
  self.has_continuous_action_space = has_continuous_action_space
151
 
152
  if has_continuous_action_space:
153
  self.action_std = action_std_init
154
-
155
- self.tau = tau
156
- self.env = env
157
  self.gamma = gamma
158
  self.eps_clip = eps_clip
159
  self.K_epochs = K_epochs
@@ -164,7 +167,7 @@ class PDPPO:
164
  self.optimizer = torch.optim.Adam([
165
  {'params': self.policy.actor.parameters(), 'lr': lr_actor},
166
  {'params': self.policy.critic.parameters(), 'lr': lr_critic}
167
- ], weight_decay=0.001)
168
 
169
  self.policy_old = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
170
  self.policy_old.load_state_dict(self.policy.state_dict())
@@ -178,7 +181,7 @@ class PDPPO:
178
  self.policy_old.set_action_std(new_action_std)
179
  else:
180
  print("--------------------------------------------------------------------------------------------")
181
- print("WARNING : Calling PDPPO::set_action_std() on discrete action space policy")
182
  print("--------------------------------------------------------------------------------------------")
183
 
184
  def decay_action_std(self, action_std_decay_rate, min_action_std):
@@ -194,64 +197,33 @@ class PDPPO:
194
  self.set_action_std(self.action_std)
195
 
196
  else:
197
- print("WARNING : Calling PDPPO::decay_action_std() on discrete action space policy")
198
  print("--------------------------------------------------------------------------------------------")
199
-
200
- def get_post_state(self, action, machine_setup, inventory_level):
201
- setup_loss = np.zeros(self.env.n_machines, dtype=int)
202
- setup_costs = np.zeros(self.env.n_machines)
203
- # if we are just changing the setup, we use the setup cost matrix with the corresponding position given by the actual setup and the new setup
204
- for m in range(self.env.n_machines):
205
- if action[m] != 0: # if the machine is not iddle
206
- # 1. IF NEEDED CHANGE SETUP
207
- if machine_setup[m] != action[m] and action[m] != 0:
208
- setup_costs[m] = self.env.setup_costs[m][action[m] - 1]
209
- setup_loss[m] = self.env.setup_loss[m][action[m] - 1]
210
- machine_setup[m] = action[m]
211
- # 2. PRODUCTION
212
- production = self.env.machine_production_matrix[m][action[m] - 1] - setup_loss[m]
213
- inventory_level[action[m] - 1] += production
214
- else:
215
- machine_setup[m] = 0
216
- # return the new machine_setup_inventory_level and the setup_cost
217
- return machine_setup, inventory_level, setup_costs
218
-
219
- def select_action(self, state,tau):
220
 
221
  if self.has_continuous_action_space:
222
  with torch.no_grad():
223
  state = torch.FloatTensor(state).to(device)
224
- action, action_logprob, state_val = self.policy_old.act(state,tau)
225
 
226
  self.buffer.states.append(state)
227
  self.buffer.actions.append(action)
228
  self.buffer.logprobs.append(action_logprob)
229
  self.buffer.state_values.append(state_val)
 
230
 
231
  return action.detach().cpu().numpy().flatten()
232
  else:
233
  with torch.no_grad():
234
  state = torch.FloatTensor(state).to(device)
235
- action, action_logprob = self.policy_old.act(state,tau)
236
-
237
-
238
- machine_setup, inventory_level, setup_cost = self.get_post_state(action, state[self.env.n_items:self.env.n_items+self.env.n_machines].clone(), state[0:self.env.n_items].clone())
239
-
240
- post_state = state.clone()
241
- post_state[self.env.n_items:self.env.n_items+self.env.n_machines] = machine_setup.clone()
242
- post_state[0:self.env.n_items] = inventory_level.clone()
243
- post_state = torch.FloatTensor(post_state).to(device)
244
 
245
  self.buffer.states.append(state)
246
- self.buffer.post_states.append(post_state)
247
  self.buffer.actions.append(action)
248
  self.buffer.logprobs.append(action_logprob)
249
-
250
- with torch.no_grad():
251
- #post_state = torch.cat([post_state.clone(),state.clone()])
252
- state_val = self.policy_old.critic(post_state)
253
-
254
  self.buffer.state_values.append(state_val)
 
255
 
256
  return action.numpy()
257
 
@@ -271,19 +243,19 @@ class PDPPO:
271
 
272
  # convert list to tensor
273
  old_states = torch.squeeze(torch.stack(self.buffer.states, dim=0)).detach().to(device)
274
- old_post_states = torch.squeeze(torch.stack(self.buffer.post_states, dim=0)).detach().to(device)
275
  old_actions = torch.squeeze(torch.stack(self.buffer.actions, dim=0)).detach().to(device)
276
  old_logprobs = torch.squeeze(torch.stack(self.buffer.logprobs, dim=0)).detach().to(device)
277
  old_state_values = torch.squeeze(torch.stack(self.buffer.state_values, dim=0)).detach().to(device)
 
278
 
279
  # calculate advantages
280
- advantages = rewards.detach() - old_state_values.detach()
281
 
282
  # Optimize policy for K epochs
283
  for _ in range(self.K_epochs):
284
 
285
  # Evaluating old actions and values
286
- logprobs, state_values, dist_entropy = self.policy.evaluate(old_states,old_post_states, old_actions,self.tau)
287
 
288
  # match state_values tensor dimensions with rewards tensor
289
  state_values = torch.squeeze(state_values)
@@ -295,19 +267,17 @@ class PDPPO:
295
  surr1 = ratios * advantages.unsqueeze(1)
296
  surr2 = torch.clamp(ratios, 1-self.eps_clip, 1+self.eps_clip) * advantages.unsqueeze(1)
297
 
298
- # final loss of clipped objective PDPPO
299
- loss = -torch.min(surr1, surr2) + 0.5 * self.MseLoss(state_values, rewards) - 0.012 * dist_entropy
300
 
301
  loss_numpy = loss.detach().numpy()
302
 
303
  # take gradient step
304
  self.optimizer.zero_grad()
305
  loss.mean().backward()
306
- torch.nn.utils.clip_grad_norm_(self.policy.parameters(), max_norm=1)
307
  self.optimizer.step()
308
 
309
  # Copy new weights into old policy
310
-
311
  self.policy_old.load_state_dict(self.policy.state_dict())
312
 
313
  # clear buffer
 
5
  @author: leona
6
  """
7
 
 
8
  import torch
9
  import torch.nn as nn
 
10
  from torch.distributions import MultivariateNormal
11
  from torch.distributions import Categorical
12
 
 
23
  print("============================================================================================")
24
 
25
 
26
+ ################################## PPO_two_critics Policy ##################################
27
  class RolloutBuffer:
28
  def __init__(self):
29
  self.actions = []
30
  self.states = []
 
31
  self.logprobs = []
32
  self.rewards = []
33
  self.state_values = []
 
34
  self.is_terminals = []
35
 
36
  def clear(self):
37
  del self.actions[:]
38
  del self.states[:]
 
39
  del self.logprobs[:]
40
  del self.rewards[:]
41
  del self.state_values[:]
 
42
  del self.is_terminals[:]
43
 
44
 
 
68
  self.fc2 = nn.Linear(128, 128)
69
  self.actor = nn.Linear(128, self.action_dim.nvec.sum())
70
 
 
71
  # critic
72
  self.critic = nn.Sequential(
73
  nn.Linear(state_dim, 128),
 
77
  nn.Linear(128, 1)
78
  )
79
 
80
+ self.critic_2 = nn.Sequential(
81
+ nn.Linear(state_dim, 128),
82
+ nn.Tanh(),
83
+ nn.Linear(128, 128),
84
+ nn.Tanh(),
85
+ nn.Linear(128, 1)
86
+ )
87
 
88
  def forward(self, state):
89
  raise NotImplementedError
 
100
 
101
 
102
 
103
+ def act(self, state):
104
 
105
  if self.has_continuous_action_space:
106
  action_mean = self.actor(state)
107
  cov_mat = torch.diag(self.action_var).unsqueeze(dim=0)
108
  dist = MultivariateNormal(action_mean, cov_mat)
109
  else:
110
+ #x = nn.functional.relu(self.fc(state))
111
  x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
112
  logits = self.actor(x)
113
  action_probs = nn.functional.softmax(logits, dim=-1)
114
  dist = Categorical(action_probs.view(len(self.action_dim.nvec),-1))
115
+ # action_probs = self.actor(state)
116
+ # dist = Categorical(action_probs)
117
 
118
  action = dist.sample()
119
  action_logprob = dist.log_prob(action)
120
+ state_val = self.critic(state)
121
+
122
+ return action.cpu().detach(), action_logprob.detach(), state_val.detach()
123
 
124
+ def evaluate(self, state, action):
125
 
126
  if self.has_continuous_action_space:
127
  action_mean = self.actor(state)
 
134
  if self.action_dim == 1:
135
  action = action.reshape(-1, self.action_dim)
136
  else:
137
+ #x = nn.functional.relu(self.fc(state))
138
  x = nn.functional.relu(self.fc2(nn.functional.relu(self.fc1(state))))
139
  logits = self.actor(x)
140
  action_probs = nn.functional.softmax(logits, dim=-1)
 
141
  dist = Categorical(action_probs.view(state.shape[0],len(self.action_dim.nvec),-1))
142
  # action_probs = self.actor(state)
143
  # dist = Categorical(action_probs)
144
  action_logprobs = dist.log_prob(action)
145
  dist_entropy = dist.entropy()
146
+ state_values = self.critic(state)
147
+ state_values_2 = self.critic_2(state)
148
 
149
+ return action_logprobs, state_values, state_values_2, dist_entropy
150
 
151
 
152
+ class PPOtwocritics:
153
+ def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, has_continuous_action_space, action_std_init=0.6):
154
 
155
  self.has_continuous_action_space = has_continuous_action_space
156
 
157
  if has_continuous_action_space:
158
  self.action_std = action_std_init
159
+
 
 
160
  self.gamma = gamma
161
  self.eps_clip = eps_clip
162
  self.K_epochs = K_epochs
 
167
  self.optimizer = torch.optim.Adam([
168
  {'params': self.policy.actor.parameters(), 'lr': lr_actor},
169
  {'params': self.policy.critic.parameters(), 'lr': lr_critic}
170
+ ])
171
 
172
  self.policy_old = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
173
  self.policy_old.load_state_dict(self.policy.state_dict())
 
181
  self.policy_old.set_action_std(new_action_std)
182
  else:
183
  print("--------------------------------------------------------------------------------------------")
184
+ print("WARNING : Calling PPO_two_critics::set_action_std() on discrete action space policy")
185
  print("--------------------------------------------------------------------------------------------")
186
 
187
  def decay_action_std(self, action_std_decay_rate, min_action_std):
 
197
  self.set_action_std(self.action_std)
198
 
199
  else:
200
+ print("WARNING : Calling PPO_two_critics::decay_action_std() on discrete action space policy")
201
  print("--------------------------------------------------------------------------------------------")
202
+
203
+ def select_action(self, state):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
  if self.has_continuous_action_space:
206
  with torch.no_grad():
207
  state = torch.FloatTensor(state).to(device)
208
+ action, action_logprob, state_val, state_val_2 = self.policy_old.act(state)
209
 
210
  self.buffer.states.append(state)
211
  self.buffer.actions.append(action)
212
  self.buffer.logprobs.append(action_logprob)
213
  self.buffer.state_values.append(state_val)
214
+ self.buffer.state_values_2.append(state_val_2)
215
 
216
  return action.detach().cpu().numpy().flatten()
217
  else:
218
  with torch.no_grad():
219
  state = torch.FloatTensor(state).to(device)
220
+ action, action_logprob, state_val, state_val_2 = self.policy_old.act(state)
 
 
 
 
 
 
 
 
221
 
222
  self.buffer.states.append(state)
 
223
  self.buffer.actions.append(action)
224
  self.buffer.logprobs.append(action_logprob)
 
 
 
 
 
225
  self.buffer.state_values.append(state_val)
226
+ self.buffer.state_values_2.append(state_val_2)
227
 
228
  return action.numpy()
229
 
 
243
 
244
  # convert list to tensor
245
  old_states = torch.squeeze(torch.stack(self.buffer.states, dim=0)).detach().to(device)
 
246
  old_actions = torch.squeeze(torch.stack(self.buffer.actions, dim=0)).detach().to(device)
247
  old_logprobs = torch.squeeze(torch.stack(self.buffer.logprobs, dim=0)).detach().to(device)
248
  old_state_values = torch.squeeze(torch.stack(self.buffer.state_values, dim=0)).detach().to(device)
249
+ old_state_values_2 = torch.squeeze(torch.stack(self.buffer.state_values_2, dim=0)).detach().to(device)
250
 
251
  # calculate advantages
252
+ advantages = rewards.detach() - torch.min(old_state_values.detach(), old_state_values_2.detach()).detach()
253
 
254
  # Optimize policy for K epochs
255
  for _ in range(self.K_epochs):
256
 
257
  # Evaluating old actions and values
258
+ logprobs, state_values, state_values_2, dist_entropy = self.policy.evaluate(old_states, old_actions, self.tau)
259
 
260
  # match state_values tensor dimensions with rewards tensor
261
  state_values = torch.squeeze(state_values)
 
267
  surr1 = ratios * advantages.unsqueeze(1)
268
  surr2 = torch.clamp(ratios, 1-self.eps_clip, 1+self.eps_clip) * advantages.unsqueeze(1)
269
 
270
+ # final loss of clipped objective PPO_two_critics
271
+ loss = -torch.min(surr1, surr2) + 0.5 * self.MseLoss(torch.min(state_values,state_values_2.squeeze()), rewards) - 0.012 * dist_entropy
272
 
273
  loss_numpy = loss.detach().numpy()
274
 
275
  # take gradient step
276
  self.optimizer.zero_grad()
277
  loss.mean().backward()
 
278
  self.optimizer.step()
279
 
280
  # Copy new weights into old policy
 
281
  self.policy_old.load_state_dict(self.policy.state_dict())
282
 
283
  # clear buffer
code/Lot-sizing/agents/__init__.py CHANGED
@@ -1,36 +1,17 @@
1
- from .dummyAgent import DummyAgent
2
- from .qLearningAgent import QLearningAgent
3
- from .stochasticProgrammingAgent import StochasticProgrammingAgent
4
- from .valueIteration import ValueIteration
5
- from .approximateValueIterationMC import ValueIterationMC
6
  from .stableBaselineAgents import StableBaselineAgent
7
- from .regressionTreeApproximation import RegressionTreeApproximation
8
- from .PSOAgent import PSOagent
9
- from .adpAgentHD import AdpAgentHD
10
- from .adpAgentHD1 import AdpAgentHD1
11
- from .adpAgentHD3 import AdpAgentHD3
12
- from .multiAgentRL import MultiAgentRL
13
  from .perfectInfoAgent import PerfectInfoAgent
14
- from .ensembleAgent import EnsembleAgent
15
- from .PPOAgent import PPOAgent
16
- from .PDPPOAgent_one_critic import PDPPOAgent
17
-
18
 
19
  __all__ = [
20
  "DummyAgent",
21
- "QLearningAgent",
22
- "StochasticProgrammingAgent",
23
- "ValueIteration",
24
- "ValueIterationMC",
25
- "RegressionTreeApproximation",
26
  "StableBaselineAgent",
27
- "PSOagent",
28
- "AdpAgentHD",
29
- "AdpAgentHD1",
30
- "AdpAgentHD3",
31
- "MultiAgentRL",
32
- "PerfectInfoAgent",
33
- "EnsembleAgent",
34
  "PPOAgent",
 
 
35
  "PDPPOAgent_one_critic"
36
  ]
 
1
+ from .PPOAgent import PPOAgent
2
+ from .PDPPOAgent import PDPPOAgent
3
+ from .PPOAgent_two_critics import PPOAgent_two_critics
4
+ from .PDPPOAgent_one_critic import PDPPOAgent_one_critic
5
+ from .stableBaselineAgents import StableBaselineAgent
6
  from .stableBaselineAgents import StableBaselineAgent
 
 
 
 
 
 
7
  from .perfectInfoAgent import PerfectInfoAgent
 
 
 
 
8
 
9
  __all__ = [
10
  "DummyAgent",
11
+ "PerfectInfoAgent",
 
 
 
 
12
  "StableBaselineAgent",
 
 
 
 
 
 
 
13
  "PPOAgent",
14
+ "PPOAgent_two_critics",
15
+ "PDPPOAgent",
16
  "PDPPOAgent_one_critic"
17
  ]
code/Lot-sizing/agents/__pycache__/PDPPO.cpython-38.pyc ADDED
Binary file (8.99 kB). View file
 
code/Lot-sizing/agents/__pycache__/PDPPOAgent.cpython-38.pyc ADDED
Binary file (8.68 kB). View file
 
code/Lot-sizing/agents/__pycache__/PDPPOAgent_one_critic.cpython-38.pyc ADDED
Binary file (8.86 kB). View file
 
code/Lot-sizing/agents/__pycache__/PDPPO_one_critic.cpython-38.pyc ADDED
Binary file (8.73 kB). View file
 
code/Lot-sizing/agents/__pycache__/PDPPOonecritic.cpython-38.pyc ADDED
Binary file (8.82 kB). View file
 
code/Lot-sizing/agents/__pycache__/PPO.cpython-38.pyc ADDED
Binary file (7.5 kB). View file
 
code/Lot-sizing/agents/__pycache__/PPOAgent.cpython-38.pyc ADDED
Binary file (8.48 kB). View file
 
code/Lot-sizing/agents/__pycache__/PPOAgent_two_critics.cpython-38.pyc ADDED
Binary file (8.65 kB). View file
 
code/Lot-sizing/agents/__pycache__/PPOtwocritics.cpython-38.pyc ADDED
Binary file (7.93 kB). View file
 
code/Lot-sizing/agents/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (548 Bytes). View file
 
code/Lot-sizing/agents/__pycache__/perfectInfoAgent.cpython-38.pyc ADDED
Binary file (1.04 kB). View file
 
code/Lot-sizing/agents/__pycache__/stableBaselineAgents.cpython-38.pyc ADDED
Binary file (11 kB). View file
 
code/Lot-sizing/agents/perfectInfoAgent.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ from models import *
3
+ from envs import *
4
+
5
+
6
+ class PerfectInfoAgent():
7
+ def __init__(self, env, settings):
8
+ super(PerfectInfoAgent, self).__init__()
9
+ self.env = env
10
+ self.solver = PerfectInfoOptimization(env)
11
+ _, self.sol, _ = self.solver.solve()
12
+ self.sol = self.sol.astype(int)
13
+
14
+ def learn(self, epochs = 1000):
15
+ pass
16
+
17
+ def get_action(self, obs):
18
+ return list(self.sol[:,self.env.current_step])
code/Lot-sizing/agents/stableBaselineAgents.py ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ import os
3
+ import time
4
+ import gym
5
+ import torch
6
+ import numpy as np
7
+ import copy
8
+ from envs import SimplePlant
9
+ import matplotlib.pyplot as plt
10
+ import matplotlib.patches as mpatches
11
+ from stable_baselines3 import PPO,A2C,DQN,SAC,DDPG
12
+ from stable_baselines3.common.monitor import Monitor
13
+ from stable_baselines3.common.vec_env import SubprocVecEnv
14
+ from stable_baselines3.common.callbacks import EvalCallback
15
+
16
+
17
+ class SimplePlantSB(SimplePlant):
18
+ def __init__(self, settings, stoch_model):
19
+ super().__init__(settings, stoch_model)
20
+ try:self.dict_obs = settings['dict_obs']
21
+ except:self.dict_obs = False
22
+ self.last_inventory = copy.copy(self.inventory_level)
23
+ self.action_space = gym.spaces.MultiDiscrete(
24
+ [self.n_items+1] * self.n_machines
25
+ )
26
+
27
+ if self.dict_obs:
28
+ self.observation_space = gym.spaces.Dict({
29
+ 'inventory_level': gym.spaces.Box(low = np.zeros(self.n_items),high = np.ones(self.n_items)*(settings['max_inventory_level'][0]+1)*self.n_items),
30
+ 'machine_setup': gym.spaces.MultiDiscrete([self.n_items+1] * self.n_machines),
31
+ 'last_inventory_level':gym.spaces.Box(low = np.zeros(self.n_items),high = np.ones(self.n_items)*(settings['max_inventory_level'][0]+1)*self.n_items)
32
+ })
33
+ else:
34
+ self.observation_space = gym.spaces.Box(
35
+ low=np.zeros(2*self.n_items+self.n_machines),# high for the inventory level
36
+ high=np.concatenate(
37
+ [
38
+ np.array(self.max_inventory_level),
39
+ np.ones(self.n_machines) * (self.n_items+1), #high for the machine setups
40
+ np.array(self.max_inventory_level) # high for the inventory level
41
+ ]),
42
+ dtype=np.int32
43
+ )
44
+
45
+ def step(self, action):
46
+ """
47
+ Step method: Execute one time step within the environment
48
+
49
+ Parameters
50
+ ----------
51
+ action : action given by the agent
52
+
53
+ Returns
54
+ -------
55
+ obs : Observation of the state give the method _next_observation
56
+ reward : Cost given by the _reward method
57
+ done : returns True or False given by the _done method
58
+ dict : possible information for control to environment monitoring
59
+
60
+ """
61
+ self.last_inventory = copy.copy(self.inventory_level)
62
+
63
+ self.total_cost = self._take_action(action, self.machine_setup, self.inventory_level, self.demand)
64
+
65
+ # self.total_cost['setup_costs'] = 0
66
+ # self.total_cost['holding_costs'] = 0
67
+
68
+ reward = -sum([ele for key, ele in self.total_cost.items()])
69
+ #reward = -self.total_cost['lost_sales']
70
+
71
+ #reward = np.abs(action)
72
+
73
+ self.current_step += 1
74
+ done = self.current_step == self.T
75
+ obs = self._next_observation()
76
+
77
+ return obs, reward, done, self.total_cost
78
+
79
+ def _next_observation(self):
80
+ """
81
+ Returns the next demand
82
+ """
83
+ obs = SimplePlant._next_observation(self)
84
+ obs['last_inventory_level'] = copy.copy(self.last_inventory)
85
+ if isinstance(obs, dict):
86
+ if not self.dict_obs:
87
+ obs = np.concatenate(
88
+ (
89
+ obs['inventory_level'], # n_items size
90
+ obs['machine_setup'], # n_machine size
91
+ obs['last_inventory_level']# n_items size
92
+ )
93
+ )
94
+ else:
95
+ if self.dict_obs:
96
+ raise('Change dict_obst to False')
97
+ return obs
98
+
99
+
100
+ class StableBaselineAgent():
101
+ """
102
+ Stable baseline Agent Agent from StableBaselines3
103
+ We adapt the env to stablebaseline requirements:
104
+ A different _next_observation is required, with the observation space.
105
+ """
106
+ def __init__(self, env: SimplePlant, settings: dict):
107
+ super(StableBaselineAgent, self).__init__()
108
+
109
+ if settings['multiagent']:
110
+ self.env = env
111
+ else:
112
+ self.env = SimplePlantSB(env.settings, env.stoch_model)
113
+ self.last_inventory = env.inventory_level
114
+ self.model_name = settings['model_name']
115
+ self.experiment_name = settings['experiment_name']
116
+ self.parallelization = settings['parallelization']
117
+ self.run = settings['run']
118
+ try:self.dict_obs = settings['dict_obs']
119
+ except:self.dict_obs = False
120
+
121
+ self.POSSIBLE_STATES = self.env.n_items + 1
122
+ self.env.cost_to_reward = True
123
+ self.epsilon = 0
124
+
125
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
126
+ # Use the logs file in the root path of the main.
127
+ self.LOG_DIR = os.path.join(BASE_DIR,'logs')
128
+
129
+ if self.parallelization:
130
+ # For cpu parallelization in StableBaseline learning
131
+ def make_env(seed):
132
+ def _init():
133
+ env = self.env
134
+ env = Monitor(
135
+ env,
136
+ os.path.join(f'{self.LOG_DIR}','monitor',f'{self.model_name}_{self.experiment_name}_{seed}_{self.run}'),
137
+ allow_early_resets=True
138
+ )
139
+ return env
140
+ return _init
141
+ num_cpu = 5
142
+ env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])
143
+ else:
144
+ env = Monitor(
145
+ self.env,
146
+ os.path.join(f'{self.LOG_DIR}','monitor',f'{self.model_name}_{self.experiment_name}_{self.run}')
147
+ )
148
+ self.eval_callback = EvalCallback(
149
+ env,
150
+ best_model_save_path=os.path.join(f'{self.LOG_DIR}',f'best_{self.model_name}_{self.experiment_name}_{self.run}'),
151
+ log_path=f'{self.LOG_DIR}/',
152
+ eval_freq=100,
153
+ deterministic=True,
154
+ verbose=0,
155
+ render=False
156
+ )
157
+ if self.dict_obs:
158
+ policy = 'MultiInputPolicy'
159
+ else:
160
+ policy = 'MlpPolicy'
161
+ if self.model_name == 'PPO':
162
+ self.model = PPO(
163
+ policy,
164
+ env,verbose = 0, batch_size = 256, n_steps = 256, gamma = 0.96, gae_lambda = 0.9, n_epochs = 20, ent_coef = 0.0, max_grad_norm = 0.5, vf_coef = 0.5, learning_rate = 5e-3, use_sde = False, clip_range = 0.4, policy_kwargs = dict(log_std_init=-2,ortho_init=False,activation_fn=torch.nn.ReLU,net_arch=[dict(pi=[300, 300], vf=[300, 300])])
165
+ )
166
+ elif self.model_name == 'A2C':
167
+ self.model = A2C(
168
+ policy,
169
+ env,verbose = 0, learning_rate=0.002, n_steps=100, gamma = 0.95, vf_coef = 0.7,policy_kwargs= dict(net_arch=[300, 300]), seed = None
170
+ )
171
+ elif self.model_name == 'DQN':
172
+ self.model = DQN(
173
+ policy,
174
+ env, verbose = 0, learning_rate= 2.3e-3, buffer_size=100000, learning_starts=1000, batch_size=32, tau=1.0, gamma=0.99,target_update_interval=10,train_freq= 256,gradient_steps= 128, exploration_fraction=0.16, exploration_initial_eps=0.04, policy_kwargs= dict(net_arch=[300, 300]), seed = None
175
+ )
176
+ elif self.model_name == 'SAC':
177
+ self.model = SAC(
178
+ policy,
179
+ env, verbose = 0, learning_rate=0.0003, buffer_size=1000000, learning_starts=1000, batch_size=256, tau=0.005, gamma=0.99, train_freq=1, gradient_steps=1,seed = None,action_noise=None, replay_buffer_class=None, replay_buffer_kwargs=None, optimize_memory_usage=False, ent_coef='auto', target_update_interval=1, target_entropy='auto', use_sde=False, sde_sample_freq=-1, use_sde_at_warmup=False, tensorboard_log=None, create_eval_env=False, policy_kwargs=dict(activation_fn=torch.nn.ReLU,net_arch=[dict(pi=[300, 300], vf=[300, 300])])
180
+ )
181
+ elif self.model_name == 'DDPG':
182
+ self.model = DDPG(
183
+ policy,
184
+ env, verbose = 0, learning_rate=0.0003, buffer_size=1000000, learning_starts=1000, batch_size=256
185
+ )
186
+
187
+ def get_action(self, obs):
188
+ obs['last_inventory_level'] = copy.copy(self.last_inventory)
189
+ if isinstance(obs, dict):
190
+ if self.dict_obs:
191
+ act = self.model.predict(obs,deterministic=True)[0]
192
+ else:
193
+ list_obs = []
194
+ for item in obs:
195
+ list_obs.append(obs[item])
196
+ obs_ = np.array(np.concatenate(list_obs))
197
+ act = self.model.predict(obs_,deterministic=True)[0]
198
+ else:
199
+ if self.dict_obs:
200
+ raise('Change the policy to dictionary observations')
201
+ else:
202
+ act = self.model.predict(obs,deterministic=True)[0]
203
+ self.last_inventory = copy.copy(obs['inventory_level'])
204
+ return act
205
+
206
+ def learn(self, epochs=1000):
207
+ print(f"{self.model_name} learning...")
208
+ start_time = time.time()
209
+
210
+ # We define the EvalCallback wrapper to save the best model
211
+ # Here the model learns using the provided environment in the Stable baseline Agent definition
212
+ # We mutiply the number of epochs by the number of time periods to give the number of training steps
213
+ self.model.learn(
214
+ epochs*self.env.T,
215
+ callback=self.eval_callback,
216
+ # tb_log_name='PPO'
217
+ )
218
+
219
+ self.env.close()
220
+
221
+ time_duration = time.time() - start_time
222
+ print(f"Finished Learning {time_duration:.2f} s")
223
+
224
+ def load_agent(self, path):
225
+ if self.model_name == 'PPO':
226
+ self.model = PPO.load(path)
227
+ elif self.model_name == 'A2C':
228
+ self.model = A2C.load(path)
229
+ elif self.model_name == 'DQN':
230
+ self.model = DQN.load(path)
231
+ elif self.model_name == 'SAC':
232
+ self.model = SAC.load(path)
233
+ elif self.model_name == 'DDPG':
234
+ self.model = SAC.load(path)
235
+
236
+ def plot_policy(self, seed=1):
237
+ # ONLY WORKING FOR 2 ITEMS 1 MACHINE
238
+ cmap = plt.cm.get_cmap('viridis', 3)
239
+ policy_map = np.zeros((self.env.max_inventory_level[0]+1,self.env.max_inventory_level[1]+1,self.env.n_items+1))
240
+ for i in range(self.env.max_inventory_level[0]+1):
241
+ for j in range(self.env.max_inventory_level[1]+1):
242
+ for k in range(self.env.n_items+1):
243
+ obs = np.expand_dims(np.array([i,j,k]), axis = 0)
244
+ try: action = self.model.predict(obs,deterministic=True)[0][0][0]
245
+ except: action = self.model.predict(obs,deterministic=True)[0][0]
246
+ #print(f'action: {action} | obs: {obs}')
247
+ policy_map[i,j,k] = action
248
+ self.policy = policy_map
249
+
250
+ fig, axs = plt.subplots(1, self.POSSIBLE_STATES)
251
+ fig.suptitle('Found Policy')
252
+ for i, ax in enumerate(axs):
253
+ ax.set_title(f'Setup {i}')
254
+ im = ax.pcolormesh(
255
+ self.policy[:,:,i], cmap = cmap, edgecolors='k', linewidth=2
256
+ )
257
+ im.set_clim(0, self.POSSIBLE_STATES - 1)
258
+ ax.set_xlabel('I2')
259
+ if i == 0:
260
+ ax.set_ylabel('I1')
261
+
262
+ # COLOR BAR:
263
+ bound = [0,1,2]
264
+ # Creating 8 Patch instances
265
+ fig.subplots_adjust(bottom=0.2)
266
+ ax.legend(
267
+ [mpatches.Patch(color=cmap(b)) for b in bound],
268
+ ['{}'.format(i) for i in range(3)],
269
+ loc='upper center', bbox_to_anchor=(-0.8,-0.13),
270
+ fancybox=True, shadow=True, ncol=3
271
+ )
272
+ fig.savefig(os.path.join(f'results', f'policy_function_{self.model_name}_{self.experiment_name}_{seed}.pdf'), bbox_inches='tight')
273
+ plt.close()
274
+
275
+ def plot_value_function(self, seed):
276
+ # ONLY WORKING FOR 2 ITEMS 1 MACHINE
277
+ value_map = np.zeros((self.env.max_inventory_level[0]+1,self.env.max_inventory_level[1]+1,self.env.n_items+1))
278
+ for i in range(self.env.max_inventory_level[0]+1):
279
+ for j in range(self.env.max_inventory_level[1]+1):
280
+ for k in range(self.env.n_items+1):
281
+ value_list = []
282
+ for action in range(self.env.n_items+1):
283
+ obs = np.expand_dims(np.array([j,i,k]), axis = 0)
284
+ action = np.array([[action]])
285
+ if torch.cuda.is_available():
286
+ obs = torch.from_numpy(obs).to(torch.float).to(device="cuda")
287
+ action = torch.from_numpy(action).to(torch.float).to(device="cuda")
288
+ else:
289
+ obs = torch.from_numpy(obs).to(torch.float)
290
+ action = torch.from_numpy(action).to(torch.float)
291
+ try:
292
+ value,prob,dist_entropy = self.model.policy.evaluate_actions(obs,action)
293
+ value_list.append(value.item())
294
+ except:
295
+ value = self.model.policy.q_net(obs)[0][int(action.item())]
296
+ value_list.append(value.item())
297
+
298
+ value_map[j,i,k] = np.array(value_list).mean()
299
+
300
+ self.value_function = value_map
301
+ # Plotting:
302
+ fig, axs = plt.subplots(nrows=1, ncols=self.POSSIBLE_STATES)
303
+ fig.suptitle('Value Function')
304
+ for i, ax in enumerate(axs):
305
+ ax.set_title(f'Setup {i}')
306
+ im = ax.imshow(
307
+ -self.value_function[:,:,i],
308
+ aspect='auto', cmap='viridis'
309
+ )
310
+ if i == 0:
311
+ ax.set_ylabel('I1')
312
+
313
+ ax.set_xlabel('I2')
314
+ ax.invert_yaxis()
315
+ fig.subplots_adjust(right=0.85)
316
+ cbar_ax = fig.add_axes([0.88, 0.15, 0.04, 0.7])
317
+
318
+ fig.colorbar(im, cax=cbar_ax)
319
+ fig.savefig(os.path.join('results',f'value_function_{self.model_name}_{self.experiment_name}_{self.run}_{seed}.pdf'))
320
+ plt.close()
code/Lot-sizing/envs/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (337 Bytes). View file
 
code/Lot-sizing/envs/__pycache__/simplePlant.cpython-38.pyc ADDED
Binary file (6.66 kB). View file
 
code/Lot-sizing/envs/__pycache__/singleSequenceDependentMachinePlant.cpython-38.pyc ADDED
Binary file (2.66 kB). View file
 
code/Lot-sizing/experiments.py CHANGED
@@ -12,15 +12,16 @@ sys.path.append(AGENTS_DIR)
12
  from agents.PPO import PPO
13
  from agents.PDPPO import PDPPO
14
 
 
 
15
  from agents.PDPPOAgent import PDPPOAgent
16
  from agents.PPOAgent import PPOAgent
 
17
 
 
 
18
  import numpy as np
19
- from envs import *
20
- from agents import *
21
- from agents import StochasticProgrammingAgent, AdpAgentHD3
22
- from agents import StableBaselineAgent, MultiAgentRL, EnsembleAgent, PerfectInfoAgent,PSOagent,AdpAgentHD, PPOAgent
23
- from test_functions import *
24
  from scenarioManager.stochasticDemandModel import StochasticDemandModel
25
 
26
 
@@ -29,13 +30,11 @@ from scenarioManager.stochasticDemandModel import StochasticDemandModel
29
  if __name__ == '__main__':
30
  experiments = ['15items_5machines_i100','20items_10machines','25items_10machines']
31
  for experiment_name in experiments:
32
- for i in range(0,5):
33
  # Setting the seeds
34
  np.random.seed(1)
35
  random.seed(10)
36
  # Environment setup load:
37
- # experiment_name = '15items_5machines_i100' # we set the experiment using the available files in cfg
38
- # experiment_name = '25items_10machines' # we set the experiment using the available files in cfg
39
  file_path = os.path.abspath(f"./cfg_env/setting_{experiment_name}.json")
40
  fp = open(file_path, 'r')
41
  settings = json.load(fp)
@@ -57,37 +56,37 @@ if __name__ == '__main__':
57
  # Parameters for the ADPHS:
58
  setting_sol_method['regressor_name'] = 'plain_matrix_I2xM1'
59
  setting_sol_method['discount_rate'] = 0.9
 
 
 
60
  agents = []
61
  # Parameters for the RL:
62
 
63
- training_epochs_RL = 30000
64
- training_epochs_multiagent = 2000
65
-
66
-
67
- setting_sol_method['parallelization'] = False
68
  env = SimplePlant(settings, stoch_model)
69
 
70
  # Number of test execution (number of complet environment iterations)
71
  nreps = 100
72
 
73
  ###########################################################################
74
- # #PPO
75
  ###########################################################################
76
 
77
- # base_model_name = 'PPO'
78
- # ppo_agent = PPOAgent(
79
- # env,
80
- # setting_sol_method
81
- # )
82
- # ppo_agent.learn(n_episodes=training_epochs_RL*settings['time_horizon'] ) # Each ep with 200 steps
83
 
84
- # #load best agent before appending in the test list
85
- # BEST_MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath('__file__')),'logs',f'best_{base_model_name}_{experiment_name}','best_model')
86
- # ppo_agent.load_agent(BEST_MODEL_DIR) # For training purposes
87
- # agents.append(("PPO", ppo_agent))
88
 
89
  ###########################################################################
90
- # Post-decision PPO
91
  ###########################################################################
92
 
93
  base_model_name = 'PDPPO'
@@ -101,7 +100,60 @@ if __name__ == '__main__':
101
  BEST_MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath('__file__')),'logs',f'best_{base_model_name}_{experiment_name}','best_model')
102
  pdppo_agent.load_agent(BEST_MODEL_DIR) # For training purposes
103
  agents.append(("PDPPO", pdppo_agent))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
 
 
 
 
 
105
 
106
  ###########################################################################
107
  #TESTING
 
12
  from agents.PPO import PPO
13
  from agents.PDPPO import PDPPO
14
 
15
+ from agents.PDPPOAgent_one_critic import PDPPOAgent_one_critic
16
+ from agents.PPOAgent_two_critics import PPOAgent_two_critics
17
  from agents.PDPPOAgent import PDPPOAgent
18
  from agents.PPOAgent import PPOAgent
19
+ from agents.stableBaselineAgents import StableBaselineAgent
20
 
21
+
22
+ from test_functions import test_agents
23
  import numpy as np
24
+ from envs import SimplePlant
 
 
 
 
25
  from scenarioManager.stochasticDemandModel import StochasticDemandModel
26
 
27
 
 
30
  if __name__ == '__main__':
31
  experiments = ['15items_5machines_i100','20items_10machines','25items_10machines']
32
  for experiment_name in experiments:
33
+ for i in range(0,10):
34
  # Setting the seeds
35
  np.random.seed(1)
36
  random.seed(10)
37
  # Environment setup load:
 
 
38
  file_path = os.path.abspath(f"./cfg_env/setting_{experiment_name}.json")
39
  fp = open(file_path, 'r')
40
  settings = json.load(fp)
 
56
  # Parameters for the ADPHS:
57
  setting_sol_method['regressor_name'] = 'plain_matrix_I2xM1'
58
  setting_sol_method['discount_rate'] = 0.9
59
+ setting_sol_method['multiagent'] = False
60
+ setting_sol_method['parallelization'] = True
61
+ setting_sol_method['run'] = i
62
  agents = []
63
  # Parameters for the RL:
64
 
65
+ training_epochs_RL = 5000 # 30000
66
+
 
 
 
67
  env = SimplePlant(settings, stoch_model)
68
 
69
  # Number of test execution (number of complet environment iterations)
70
  nreps = 100
71
 
72
  ###########################################################################
73
+ # PPO
74
  ###########################################################################
75
 
76
+ base_model_name = 'PPO'
77
+ ppo_agent = PPOAgent(
78
+ env,
79
+ setting_sol_method
80
+ )
81
+ ppo_agent.learn(n_episodes=training_epochs_RL*settings['time_horizon'] ) # Each ep with 200 steps
82
 
83
+ #load best agent before appending in the test list
84
+ BEST_MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath('__file__')),'logs',f'best_{base_model_name}_{experiment_name}','best_model')
85
+ ppo_agent.load_agent(BEST_MODEL_DIR) # For training purposes
86
+ agents.append(("PPO", ppo_agent))
87
 
88
  ###########################################################################
89
+ # Post-decision PPO - Dual critic
90
  ###########################################################################
91
 
92
  base_model_name = 'PDPPO'
 
100
  BEST_MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath('__file__')),'logs',f'best_{base_model_name}_{experiment_name}','best_model')
101
  pdppo_agent.load_agent(BEST_MODEL_DIR) # For training purposes
102
  agents.append(("PDPPO", pdppo_agent))
103
+
104
+
105
+ ###########################################################################
106
+ # Post-decision PPO - Dual critic
107
+ ###########################################################################
108
+
109
+ base_model_name = 'PDPPO_one_critic'
110
+ pdppo_agent_one_critic = PDPPOAgent(
111
+ env,
112
+ setting_sol_method
113
+ )
114
+ pdppo_agent_one_critic.learn(n_episodes=training_epochs_RL*settings['time_horizon'] ) # Each ep with 200 steps
115
+
116
+ #load best agent before appending in the test list
117
+ BEST_MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath('__file__')),'logs',f'best_{base_model_name}_{experiment_name}','best_model')
118
+ pdppo_agent_one_critic.load_agent(BEST_MODEL_DIR) # For training purposes
119
+ agents.append(("PDPPO", pdppo_agent_one_critic))
120
+
121
+
122
+ ###########################################################################
123
+ # Post-decision PPO - Dual critic
124
+ ###########################################################################
125
+
126
+ base_model_name = 'PPO_two_critics'
127
+ ppo_agent_two_critics = PDPPOAgent(
128
+ env,
129
+ setting_sol_method
130
+ )
131
+ ppo_agent_two_critics.learn(n_episodes=training_epochs_RL*settings['time_horizon'] ) # Each ep with 200 steps
132
+
133
+ #load best agent before appending in the test list
134
+ BEST_MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath('__file__')),'logs',f'best_{base_model_name}_{experiment_name}','best_model')
135
+ ppo_agent_two_critics.load_agent(BEST_MODEL_DIR) # For training purposes
136
+ agents.append(("PDPPO", ppo_agent_two_critics))
137
+
138
+ ###########################################################################
139
+ # RL A2C
140
+ ###########################################################################
141
+
142
+ # base_model_name = 'A2C'
143
+ # env = SimplePlant(settings, stoch_model)
144
+ # setting_sol_method['model_name'] = base_model_name
145
+ # rl_agent = StableBaselineAgent(
146
+ # env,
147
+ # setting_sol_method
148
+ # )
149
+
150
+ # rl_agent.learn(epochs=training_epochs_RL) # Each ep with 200 steps
151
 
152
+ # #load best agent before appending in the test list
153
+ # BEST_MODEL_DIR = os.path.join(os.path.dirname(os.path.abspath('__file__')),'logs',f'best_{base_model_name}_{experiment_name}','best_model')
154
+ # rl_agent.load_agent(BEST_MODEL_DIR)
155
+ # agents.append(("A2C", rl_agent))
156
+
157
 
158
  ###########################################################################
159
  #TESTING
code/Lot-sizing/logs/15items_5machines_i100_PDPPO/PDPPO_15items_5machines_i100_0_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48a42ab9cb3c0ecd6aee7da1dbd709f9ce11e1f7855dd8b4ca263ed02aa4f106
3
+ size 279139
code/Lot-sizing/logs/15items_5machines_i100_PDPPO/PDPPO_15items_5machines_i100_log_0.csv ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 39,4000,-2141.4023
3
+ 79,8000,-2186.1688
4
+ 119,12000,-2176.6288
5
+ 159,16000,-2191.1093
6
+ 199,20000,-2189.4693
7
+ 239,24000,-2187.8145
8
+ 279,28000,-2196.4592
9
+ 319,32000,-2186.1425
10
+ 359,36000,-2170.4952
11
+ 399,40000,-2184.8005
12
+ 439,44000,-2156.964
13
+ 479,48000,-2159.1175
14
+ 519,52000,-2177.2202
15
+ 559,56000,-2184.189
16
+ 599,60000,-2173.9785
17
+ 639,64000,-2185.696
18
+ 679,68000,-2187.422
19
+ 719,72000,-2188.3395
20
+ 759,76000,-2200.6052
21
+ 799,80000,-2206.662
22
+ 839,84000,-2198.9285
23
+ 879,88000,-2212.6385
24
+ 919,92000,-2214.2012
25
+ 959,96000,-2210.518
26
+ 999,100000,-2212.269
27
+ 1039,104000,-2202.792
28
+ 1079,108000,-2230.6483
29
+ 1119,112000,-2230.9285
30
+ 1159,116000,-2234.553
31
+ 1199,120000,-2231.0472
32
+ 1239,124000,-2205.4692
33
+ 1279,128000,-2224.4608
34
+ 1319,132000,-2222.5775
35
+ 1359,136000,-2220.6603
36
+ 1399,140000,-2217.5998
37
+ 1439,144000,-2206.9042
38
+ 1479,148000,-2219.398
39
+ 1519,152000,-2219.977
40
+ 1559,156000,-2208.6932
41
+ 1599,160000,-2199.11
42
+ 1639,164000,-2216.3345
43
+ 1679,168000,-2195.7275
44
+ 1719,172000,-2208.9112
45
+ 1759,176000,-2196.7148
46
+ 1799,180000,-2179.6018
47
+ 1839,184000,-2185.97
48
+ 1879,188000,-2190.0938
49
+ 1919,192000,-2182.9055
50
+ 1959,196000,-2212.575
51
+ 1999,200000,-2217.4317
52
+ 2039,204000,-2207.0677
53
+ 2079,208000,-2201.1058
54
+ 2119,212000,-2210.03
55
+ 2159,216000,-2196.9463
56
+ 2199,220000,-2186.4732
57
+ 2239,224000,-2196.7148
58
+ 2279,228000,-2180.514
59
+ 2319,232000,-2182.2113
60
+ 2359,236000,-2177.5078
61
+ 2399,240000,-2177.2448
62
+ 2439,244000,-2176.5475
63
+ 2479,248000,-2176.4643
64
+ 2519,252000,-2166.5628
65
+ 2559,256000,-2181.9908
66
+ 2599,260000,-2201.7215
67
+ 2639,264000,-2189.474
68
+ 2679,268000,-2170.9755
69
+ 2719,272000,-2169.075
70
+ 2759,276000,-2183.726
71
+ 2799,280000,-2165.4742
72
+ 2839,284000,-2187.9715
73
+ 2879,288000,-2179.0172
74
+ 2919,292000,-2161.0182
75
+ 2959,296000,-2168.9047
76
+ 2999,300000,-2165.532
77
+ 3039,304000,-2168.5285
78
+ 3079,308000,-2159.3415
79
+ 3119,312000,-2168.1608
80
+ 3159,316000,-2177.5103
81
+ 3199,320000,-2185.0758
82
+ 3239,324000,-2176.5248
83
+ 3279,328000,-2173.1685
84
+ 3319,332000,-2190.4757
85
+ 3359,336000,-2219.6503
86
+ 3399,340000,-2207.4892
87
+ 3439,344000,-2199.8988
88
+ 3479,348000,-2211.5325
89
+ 3519,352000,-2201.5668
90
+ 3559,356000,-2202.0843
91
+ 3599,360000,-2196.885
92
+ 3639,364000,-2199.742
93
+ 3679,368000,-2219.324
94
+ 3719,372000,-2224.5802
95
+ 3759,376000,-2213.3832
96
+ 3799,380000,-2191.889
97
+ 3839,384000,-2220.2653
98
+ 3879,388000,-2206.6353
99
+ 3919,392000,-2193.6993
100
+ 3959,396000,-2173.2148
101
+ 3999,400000,-2168.8942
102
+ 4039,404000,-2182.1583
103
+ 4079,408000,-2170.8605
104
+ 4119,412000,-2179.9363
105
+ 4159,416000,-2177.2738
106
+ 4199,420000,-2186.921
107
+ 4239,424000,-2176.2058
108
+ 4279,428000,-2178.0973
109
+ 4319,432000,-2179.0505
110
+ 4359,436000,-2183.782
111
+ 4399,440000,-2189.763
112
+ 4439,444000,-2191.9625
113
+ 4479,448000,-2190.0078
114
+ 4519,452000,-2208.7985
115
+ 4559,456000,-2196.431
116
+ 4599,460000,-2204.601
117
+ 4639,464000,-2198.331
118
+ 4679,468000,-2197.519
119
+ 4719,472000,-2195.665
120
+ 4759,476000,-2179.7755
121
+ 4799,480000,-2201.7112
122
+ 4839,484000,-2187.7942
123
+ 4879,488000,-2177.4918
124
+ 4919,492000,-2188.3555
125
+ 4959,496000,-2178.8962
126
+ 4999,500000,-2193.1755
code/Lot-sizing/logs/15items_5machines_i100_PDPPO/PDPPO_15items_5machines_i100_log_2.csv ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 39,4000,-2335.3474
3
+ 79,8000,-2244.5998
4
+ 119,12000,-2238.709
5
+ 159,16000,-2290.7835
6
+ 199,20000,-2266.6182
7
+ 239,24000,-2236.9325
8
+ 279,28000,-2260.8245
9
+ 319,32000,-2270.0922
10
+ 359,36000,-2267.02
11
+ 399,40000,-2260.1368
12
+ 439,44000,-2246.2922
13
+ 479,48000,-2251.6645
14
+ 519,52000,-2270.2068
15
+ 559,56000,-2271.094
16
+ 599,60000,-2270.1955
17
+ 639,64000,-2251.9755
18
+ 679,68000,-2283.5295
19
+ 719,72000,-2265.69
20
+ 759,76000,-2307.0708
21
+ 799,80000,-2293.882
22
+ 839,84000,-2290.8482
23
+ 879,88000,-2277.416
24
+ 919,92000,-2250.59
25
+ 959,96000,-2234.1988
26
+ 999,100000,-2249.0185
27
+ 1039,104000,-2247.6215
28
+ 1079,108000,-2216.7135
29
+ 1119,112000,-2213.6995
30
+ 1159,116000,-2224.0747
31
+ 1199,120000,-2228.468
32
+ 1239,124000,-2255.0583
33
+ 1279,128000,-2226.098
34
+ 1319,132000,-2217.657
35
+ 1359,136000,-2243.1698
36
+ 1399,140000,-2232.1338
37
+ 1439,144000,-2250.4618
38
+ 1479,148000,-2235.0085
39
+ 1519,152000,-2249.4723
40
+ 1559,156000,-2216.2995
41
+ 1599,160000,-2233.6805
42
+ 1639,164000,-2247.4035
43
+ 1679,168000,-2229.968
44
+ 1719,172000,-2218.4828
45
+ 1759,176000,-2223.798
46
+ 1799,180000,-2213.273
47
+ 1839,184000,-2219.179
48
+ 1879,188000,-2205.1017
49
+ 1919,192000,-2207.6708
50
+ 1959,196000,-2200.982
51
+ 1999,200000,-2218.6955
52
+ 2039,204000,-2200.056
53
+ 2079,208000,-2218.9955
54
+ 2119,212000,-2214.628
55
+ 2159,216000,-2230.9135
56
+ 2199,220000,-2212.2112
57
+ 2239,224000,-2228.0432
58
+ 2279,228000,-2228.0378
59
+ 2319,232000,-2218.216
60
+ 2359,236000,-2237.9682
61
+ 2399,240000,-2218.8503
62
+ 2439,244000,-2201.6265
63
+ 2479,248000,-2216.5263
64
+ 2519,252000,-2209.0173
65
+ 2559,256000,-2210.7017
66
+ 2599,260000,-2192.9838
67
+ 2639,264000,-2206.9902
68
+ 2679,268000,-2196.276
69
+ 2719,272000,-2187.5165
70
+ 2759,276000,-2201.5815
71
+ 2799,280000,-2197.6468
72
+ 2839,284000,-2181.081
73
+ 2879,288000,-2191.911
74
+ 2919,292000,-2210.5108
75
+ 2959,296000,-2191.4668
76
+ 2999,300000,-2207.3622
77
+ 3039,304000,-2188.681
78
+ 3079,308000,-2205.789
79
+ 3119,312000,-2189.5567
80
+ 3159,316000,-2171.3155
81
+ 3199,320000,-2170.6315
82
+ 3239,324000,-2170.7322
83
+ 3279,328000,-2174.4193
84
+ 3319,332000,-2175.8538
85
+ 3359,336000,-2154.2035
86
+ 3399,340000,-2185.9618
87
+ 3439,344000,-2178.553
88
+ 3479,348000,-2170.287
89
+ 3519,352000,-2159.5517
90
+ 3559,356000,-2172.067
91
+ 3599,360000,-2159.2972
92
+ 3639,364000,-2177.8195
93
+ 3679,368000,-2156.6698
94
+ 3719,372000,-2168.946
95
+ 3759,376000,-2182.2233
96
+ 3799,380000,-2170.65
97
+ 3839,384000,-2158.5868
98
+ 3879,388000,-2162.828
99
+ 3919,392000,-2148.9192
100
+ 3959,396000,-2152.153
101
+ 3999,400000,-2169.9372
102
+ 4039,404000,-2169.7798
103
+ 4079,408000,-2162.5945
104
+ 4119,412000,-2148.3235
105
+ 4159,416000,-2157.2015
106
+ 4199,420000,-2171.1243
107
+ 4239,424000,-2154.7868
108
+ 4279,428000,-2164.997
109
+ 4319,432000,-2162.2733
110
+ 4359,436000,-2167.9713
111
+ 4399,440000,-2163.9672
112
+ 4439,444000,-2152.2753
113
+ 4479,448000,-2149.6665
114
+ 4519,452000,-2160.5565
115
+ 4559,456000,-2157.0198
116
+ 4599,460000,-2158.6238
117
+ 4639,464000,-2153.1465
118
+ 4679,468000,-2161.9365
119
+ 4719,472000,-2147.464
120
+ 4759,476000,-2157.8608
121
+ 4799,480000,-2163.0485
122
+ 4839,484000,-2170.2235
123
+ 4879,488000,-2165.6525
124
+ 4919,492000,-2161.917
125
+ 4959,496000,-2157.1193
126
+ 4999,500000,-2146.3092
code/Lot-sizing/logs/15items_5machines_i100_PDPPO/PDPPO_15items_5machines_i100_log_3.csv ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 39,4000,-2240.3108
3
+ 79,8000,-2288.825
4
+ 119,12000,-2320.0678
5
+ 159,16000,-2266.3545
6
+ 199,20000,-2254.704
7
+ 239,24000,-2284.7492
8
+ 279,28000,-2275.9468
9
+ 319,32000,-2268.6098
10
+ 359,36000,-2288.8185
11
+ 399,40000,-2272.3042
12
+ 439,44000,-2261.6483
13
+ 479,48000,-2266.3038
14
+ 519,52000,-2270.6518
15
+ 559,56000,-2244.104
16
+ 599,60000,-2245.5072
17
+ 639,64000,-2238.031
18
+ 679,68000,-2220.696
19
+ 719,72000,-2220.8338
20
+ 759,76000,-2215.0553
21
+ 799,80000,-2206.708
22
+ 839,84000,-2227.824
23
+ 879,88000,-2214.947
24
+ 919,92000,-2217.5067
25
+ 959,96000,-2224.1628
26
+ 999,100000,-2221.2443
27
+ 1039,104000,-2244.4968
28
+ 1079,108000,-2239.8208
29
+ 1119,112000,-2232.748
30
+ 1159,116000,-2223.8978
31
+ 1199,120000,-2218.0928
32
+ 1239,124000,-2203.4095
33
+ 1279,128000,-2204.5672
34
+ 1319,132000,-2238.5695
35
+ 1359,136000,-2203.7973
36
+ 1399,140000,-2217.6258
37
+ 1439,144000,-2213.7642
38
+ 1479,148000,-2207.4387
39
+ 1519,152000,-2215.8908
40
+ 1559,156000,-2191.1362
41
+ 1599,160000,-2224.398
42
+ 1639,164000,-2201.3452
43
+ 1679,168000,-2189.2553
44
+ 1719,172000,-2209.868
45
+ 1759,176000,-2169.1652
46
+ 1799,180000,-2191.6032
47
+ 1839,184000,-2192.4662
48
+ 1879,188000,-2173.139
49
+ 1919,192000,-2180.778
50
+ 1959,196000,-2193.01
51
+ 1999,200000,-2196.909
52
+ 2039,204000,-2203.634
53
+ 2079,208000,-2203.3062
54
+ 2119,212000,-2205.7118
55
+ 2159,216000,-2221.6275
56
+ 2199,220000,-2207.0085
57
+ 2239,224000,-2205.649
58
+ 2279,228000,-2229.8532
59
+ 2319,232000,-2198.7525
60
+ 2359,236000,-2180.7215
61
+ 2399,240000,-2173.0688
62
+ 2439,244000,-2191.3938
63
+ 2479,248000,-2194.5465
64
+ 2519,252000,-2200.4895
65
+ 2559,256000,-2215.1643
66
+ 2599,260000,-2196.0888
67
+ 2639,264000,-2205.88
68
+ 2679,268000,-2186.5843
69
+ 2719,272000,-2189.5945
70
+ 2759,276000,-2177.4112
71
+ 2799,280000,-2185.7688
72
+ 2839,284000,-2180.7005
73
+ 2879,288000,-2194.3678
74
+ 2919,292000,-2183.5812
75
+ 2959,296000,-2188.0495
76
+ 2999,300000,-2185.9692
77
+ 3039,304000,-2178.563
78
+ 3079,308000,-2184.8002
79
+ 3119,312000,-2210.264
80
+ 3159,316000,-2190.2037
81
+ 3199,320000,-2198.2853
82
+ 3239,324000,-2206.658
83
+ 3279,328000,-2197.803
84
+ 3319,332000,-2206.5752
85
+ 3359,336000,-2210.574
86
+ 3399,340000,-2207.2495
87
+ 3439,344000,-2222.5217
88
+ 3479,348000,-2208.8218
89
+ 3519,352000,-2214.9137
90
+ 3559,356000,-2223.4288
91
+ 3599,360000,-2226.1332
92
+ 3639,364000,-2227.895
93
+ 3679,368000,-2213.1972
94
+ 3719,372000,-2217.1715
95
+ 3759,376000,-2229.5115
96
+ 3799,380000,-2232.2263
97
+ 3839,384000,-2250.712
98
+ 3879,388000,-2237.0413
99
+ 3919,392000,-2237.8288
100
+ 3959,396000,-2242.2087
101
+ 3999,400000,-2242.8518
102
+ 4039,404000,-2242.582
103
+ 4079,408000,-2247.5048
104
+ 4119,412000,-2219.5345
105
+ 4159,416000,-2219.813
106
+ 4199,420000,-2206.089
107
+ 4239,424000,-2229.2065
108
+ 4279,428000,-2232.5973
109
+ 4319,432000,-2220.915
110
+ 4359,436000,-2213.3003
111
+ 4399,440000,-2225.92
112
+ 4439,444000,-2229.2655
113
+ 4479,448000,-2223.2977
114
+ 4519,452000,-2222.3368
115
+ 4559,456000,-2217.945
116
+ 4599,460000,-2209.8247
117
+ 4639,464000,-2203.7908
118
+ 4679,468000,-2222.4963
119
+ 4719,472000,-2213.5595
120
+ 4759,476000,-2207.0573
121
+ 4799,480000,-2224.0718
122
+ 4839,484000,-2192.7728
123
+ 4879,488000,-2211.8895
124
+ 4919,492000,-2209.2267
125
+ 4959,496000,-2208.4648
126
+ 4999,500000,-2238.9572
code/Lot-sizing/logs/15items_5machines_i100_PPO/PPO_15items_5machines_i100_0_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34225abaea5568390f9cbf0a8d2382fa6d67e11addda79738a756604f1550ac6
3
+ size 199811
code/Lot-sizing/logs/15items_5machines_i100_PPO/PPO_15items_5machines_i100_log_0.csv ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 19,2000,-2135.3463
3
+ 39,4000,-2149.332
4
+ 59,6000,-2116.58
5
+ 79,8000,-2147.7215
6
+ 99,10000,-2145.4525
7
+ 119,12000,-2129.431
8
+ 139,14000,-2166.079
9
+ 159,16000,-2163.6345
10
+ 179,18000,-2139.9555
11
+ 199,20000,-2163.143
12
+ 219,22000,-2153.983
13
+ 239,24000,-2155.218
14
+ 259,26000,-2188.591
15
+ 279,28000,-2189.6865
16
+ 299,30000,-2163.1825
17
+ 319,32000,-2164.4245
18
+ 339,34000,-2179.1755
19
+ 359,36000,-2196.876
20
+ 379,38000,-2174.789
21
+ 399,40000,-2177.7325
22
+ 419,42000,-2171.5065
23
+ 439,44000,-2180.978
24
+ 459,46000,-2177.033
25
+ 479,48000,-2166.894
26
+ 499,50000,-2174.373
27
+ 519,52000,-2160.9975
28
+ 539,54000,-2139.3125
29
+ 559,56000,-2153.796
30
+ 579,58000,-2160.5355
31
+ 599,60000,-2147.8125
32
+ 619,62000,-2160.4915
33
+ 639,64000,-2156.354
34
+ 659,66000,-2120.872
35
+ 679,68000,-2144.24
36
+ 699,70000,-2132.259
37
+ 719,72000,-2161.746
38
+ 739,74000,-2157.3845
39
+ 759,76000,-2152.7245
40
+ 779,78000,-2172.0235
41
+ 799,80000,-2142.163
42
+ 819,82000,-2139.4385
43
+ 839,84000,-2144.8855
44
+ 859,86000,-2168.6705
45
+ 879,88000,-2151.953
46
+ 899,90000,-2163.172
47
+ 919,92000,-2146.0855
48
+ 939,94000,-2164.2995
49
+ 959,96000,-2136.362
50
+ 979,98000,-2144.5915
51
+ 999,100000,-2140.8605
52
+ 1019,102000,-2145.3225
53
+ 1039,104000,-2158.671
54
+ 1059,106000,-2143.01
55
+ 1079,108000,-2139.721
56
+ 1099,110000,-2116.822
57
+ 1119,112000,-2114.3315
58
+ 1139,114000,-2124.026
59
+ 1159,116000,-2142.8155
60
+ 1179,118000,-2147.8685
61
+ 1199,120000,-2143.8945
62
+ 1219,122000,-2146.832
63
+ 1239,124000,-2124.1695
64
+ 1259,126000,-2090.169
65
+ 1279,128000,-2146.8415
66
+ 1299,130000,-2121.292
67
+ 1319,132000,-2121.0195
68
+ 1339,134000,-2123.3185
69
+ 1359,136000,-2140.0235
70
+ 1379,138000,-2099.58
71
+ 1399,140000,-2110.5595
72
+ 1419,142000,-2113.717
73
+ 1439,144000,-2115.8905
74
+ 1459,146000,-2095.1055
75
+ 1479,148000,-2106.1685
76
+ 1499,150000,-2109.4955
77
+ 1519,152000,-2111.4375
78
+ 1539,154000,-2106.307
79
+ 1559,156000,-2130.6555
80
+ 1579,158000,-2136.0985
81
+ 1599,160000,-2121.9925
82
+ 1619,162000,-2109.5255
83
+ 1639,164000,-2128.574
84
+ 1659,166000,-2124.366
85
+ 1679,168000,-2139.8685
86
+ 1699,170000,-2116.211
87
+ 1719,172000,-2126.852
88
+ 1739,174000,-2117.076
89
+ 1759,176000,-2135.6755
90
+ 1779,178000,-2117.5595
91
+ 1799,180000,-2131.8435
92
+ 1819,182000,-2141.5565
93
+ 1839,184000,-2150.929
94
+ 1859,186000,-2139.8145
95
+ 1879,188000,-2129.5425
96
+ 1899,190000,-2126.8315
97
+ 1919,192000,-2133.958
98
+ 1939,194000,-2141.4045
99
+ 1959,196000,-2133.663
100
+ 1979,198000,-2141.9005
101
+ 1999,200000,-2148.833
102
+ 2019,202000,-2131.9035
103
+ 2039,204000,-2174.289
104
+ 2059,206000,-2160.0245
105
+ 2079,208000,-2165.5785
106
+ 2099,210000,-2147.701
107
+ 2119,212000,-2154.0045
108
+ 2139,214000,-2124.077
109
+ 2159,216000,-2166.3835
110
+ 2179,218000,-2168.514
111
+ 2199,220000,-2152.2125
112
+ 2219,222000,-2162.136
113
+ 2239,224000,-2154.616
114
+ 2259,226000,-2148.246
115
+ 2279,228000,-2146.5235
116
+ 2299,230000,-2143.4965
117
+ 2319,232000,-2133.6155
118
+ 2339,234000,-2144.0695
119
+ 2359,236000,-2139.9
120
+ 2379,238000,-2137.7725
121
+ 2399,240000,-2151.268
122
+ 2419,242000,-2175.9375
123
+ 2439,244000,-2157.845
124
+ 2459,246000,-2153.9685
125
+ 2479,248000,-2175.319
126
+ 2499,250000,-2140.522
127
+ 2519,252000,-2154.707
128
+ 2539,254000,-2133.141
129
+ 2559,256000,-2122.6885
130
+ 2579,258000,-2136.63
131
+ 2599,260000,-2141.906
132
+ 2619,262000,-2136.6815
133
+ 2639,264000,-2109.2965
134
+ 2659,266000,-2122.899
135
+ 2679,268000,-2149.3255
136
+ 2699,270000,-2118.7445
137
+ 2719,272000,-2131.16
138
+ 2739,274000,-2119.327
139
+ 2759,276000,-2127.0115
140
+ 2779,278000,-2165.839
141
+ 2799,280000,-2163.743
142
+ 2819,282000,-2135.451
143
+ 2839,284000,-2144.345
144
+ 2859,286000,-2129.195
145
+ 2879,288000,-2143.4665
146
+ 2899,290000,-2130.941
147
+ 2919,292000,-2147.0725
148
+ 2939,294000,-2125.8355
149
+ 2959,296000,-2126.066
150
+ 2979,298000,-2146.799
151
+ 2999,300000,-2147.949
152
+ 3019,302000,-2100.444
153
+ 3039,304000,-2116.093
154
+ 3059,306000,-2122.09
155
+ 3079,308000,-2136.446
156
+ 3099,310000,-2106.498
157
+ 3119,312000,-2101.977
158
+ 3139,314000,-2102.1295
159
+ 3159,316000,-2092.621
160
+ 3179,318000,-2112.0175
161
+ 3199,320000,-2102.532
162
+ 3219,322000,-2100.1165
163
+ 3239,324000,-2108.5405
164
+ 3259,326000,-2117.316
165
+ 3279,328000,-2113.263
166
+ 3299,330000,-2095.814
167
+ 3319,332000,-2097.3245
168
+ 3339,334000,-2091.1245
169
+ 3359,336000,-2112.114
170
+ 3379,338000,-2107.756
171
+ 3399,340000,-2105.6305
172
+ 3419,342000,-2106.4435
173
+ 3439,344000,-2093.697
174
+ 3459,346000,-2101.936
175
+ 3479,348000,-2087.019
176
+ 3499,350000,-2094.8375
177
+ 3519,352000,-2091.358
178
+ 3539,354000,-2114.3615
179
+ 3559,356000,-2131.719
180
+ 3579,358000,-2116.838
181
+ 3599,360000,-2128.923
182
+ 3619,362000,-2104.5615
183
+ 3639,364000,-2109.625
184
+ 3659,366000,-2106.293
185
+ 3679,368000,-2124.0315
186
+ 3699,370000,-2116.146
187
+ 3719,372000,-2121.4415
188
+ 3739,374000,-2084.2695
189
+ 3759,376000,-2104.179
190
+ 3779,378000,-2111.046
191
+ 3799,380000,-2108.5605
192
+ 3819,382000,-2092.0465
193
+ 3839,384000,-2107.194
194
+ 3859,386000,-2095.3865
195
+ 3879,388000,-2082.453
196
+ 3899,390000,-2119.981
197
+ 3919,392000,-2104.4325
198
+ 3939,394000,-2100.127
199
+ 3959,396000,-2103.365
200
+ 3979,398000,-2108.799
201
+ 3999,400000,-2087.373
202
+ 4019,402000,-2089.962
203
+ 4039,404000,-2113.7635
204
+ 4059,406000,-2127.984
205
+ 4079,408000,-2087.538
206
+ 4099,410000,-2071.391
207
+ 4119,412000,-2103.1025
208
+ 4139,414000,-2092.2085
209
+ 4159,416000,-2088.2855
210
+ 4179,418000,-2094.342
211
+ 4199,420000,-2089.6075
212
+ 4219,422000,-2088.1145
213
+ 4239,424000,-2101.0985
214
+ 4259,426000,-2107.1365
215
+ 4279,428000,-2093.734
216
+ 4299,430000,-2090.7895
217
+ 4319,432000,-2079.56
218
+ 4339,434000,-2083.1335
219
+ 4359,436000,-2087.81
220
+ 4379,438000,-2096.6135
221
+ 4399,440000,-2089.9545
222
+ 4419,442000,-2074.709
223
+ 4439,444000,-2080.6065
224
+ 4459,446000,-2078.952
225
+ 4479,448000,-2059.433
226
+ 4499,450000,-2049.38
227
+ 4519,452000,-2065.312
228
+ 4539,454000,-2057.3825
229
+ 4559,456000,-2085.955
230
+ 4579,458000,-2092.071
231
+ 4599,460000,-2073.4495
232
+ 4619,462000,-2082.937
233
+ 4639,464000,-2077.3055
234
+ 4659,466000,-2078.2065
235
+ 4679,468000,-2062.653
236
+ 4699,470000,-2054.374
237
+ 4719,472000,-2074.2705
238
+ 4739,474000,-2066.9925
239
+ 4759,476000,-2049.2215
240
+ 4779,478000,-2071.545
241
+ 4799,480000,-2057.4975
242
+ 4819,482000,-2045.2775
243
+ 4839,484000,-2059.3195
244
+ 4859,486000,-2054.074
245
+ 4879,488000,-2069.4245
246
+ 4899,490000,-2069.116
247
+ 4919,492000,-2038.679
248
+ 4939,494000,-2068.0445
249
+ 4959,496000,-2039.354
250
+ 4979,498000,-2032.349
251
+ 4999,500000,-2026.1585
code/Lot-sizing/logs/15items_5machines_i100_PPO/PPO_15items_5machines_i100_log_2.csv ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ episode,timestep,reward
2
+ 19,2000,-2380.8889
3
+ 39,4000,-2373.828
4
+ 59,6000,-2335.17
5
+ 79,8000,-2358.4425
6
+ 99,10000,-2358.743
7
+ 119,12000,-2359.581
8
+ 139,14000,-2374.202
9
+ 159,16000,-2350.303
10
+ 179,18000,-2351.3965
11
+ 199,20000,-2369.7695
12
+ 219,22000,-2333.9435
13
+ 239,24000,-2349.8265
14
+ 259,26000,-2369.537
15
+ 279,28000,-2370.489
16
+ 299,30000,-2325.0625
17
+ 319,32000,-2364.1575
18
+ 339,34000,-2315.2425
19
+ 359,36000,-2347.791
20
+ 379,38000,-2333.213
21
+ 399,40000,-2345.041
22
+ 419,42000,-2324.087
23
+ 439,44000,-2327.946
24
+ 459,46000,-2290.026
25
+ 479,48000,-2297.604
26
+ 499,50000,-2318.3805
27
+ 519,52000,-2304.4445
28
+ 539,54000,-2278.6925
29
+ 559,56000,-2279.398
30
+ 579,58000,-2275.2595
31
+ 599,60000,-2292.0225
32
+ 619,62000,-2292.0715
33
+ 639,64000,-2304.709
34
+ 659,66000,-2277.029
35
+ 679,68000,-2258.1065
36
+ 699,70000,-2263.7255
37
+ 719,72000,-2265.5755
38
+ 739,74000,-2246.222
39
+ 759,76000,-2234.866
40
+ 779,78000,-2254.3855
41
+ 799,80000,-2239.978
42
+ 819,82000,-2205.3095
43
+ 839,84000,-2230.3575
44
+ 859,86000,-2225.6055
45
+ 879,88000,-2229.913
46
+ 899,90000,-2244.0605
47
+ 919,92000,-2229.112
48
+ 939,94000,-2233.0065
49
+ 959,96000,-2221.533
50
+ 979,98000,-2199.2975
51
+ 999,100000,-2218.0545
52
+ 1019,102000,-2219.6245
53
+ 1039,104000,-2212.3515
54
+ 1059,106000,-2228.366
55
+ 1079,108000,-2213.773
56
+ 1099,110000,-2214.438
57
+ 1119,112000,-2218.1
58
+ 1139,114000,-2195.482
59
+ 1159,116000,-2225.5825
60
+ 1179,118000,-2213.362
61
+ 1199,120000,-2211.5005
62
+ 1219,122000,-2208.066
63
+ 1239,124000,-2191.501
64
+ 1259,126000,-2235.9985
65
+ 1279,128000,-2211.5905
66
+ 1299,130000,-2202.716
67
+ 1319,132000,-2212.6015
68
+ 1339,134000,-2216.1535
69
+ 1359,136000,-2215.3695
70
+ 1379,138000,-2210.9315
71
+ 1399,140000,-2219.104
72
+ 1419,142000,-2223.478
73
+ 1439,144000,-2222.4635
74
+ 1459,146000,-2221.686
75
+ 1479,148000,-2211.6465
76
+ 1499,150000,-2208.096
77
+ 1519,152000,-2209.976
78
+ 1539,154000,-2199.5775
79
+ 1559,156000,-2213.538
80
+ 1579,158000,-2196.544
81
+ 1599,160000,-2191.9365
82
+ 1619,162000,-2202.8655
83
+ 1639,164000,-2195.785
84
+ 1659,166000,-2197.826
85
+ 1679,168000,-2198.4345
86
+ 1699,170000,-2192.2155
87
+ 1719,172000,-2183.3555
88
+ 1739,174000,-2215.12
89
+ 1759,176000,-2183.842
90
+ 1779,178000,-2185.168
91
+ 1799,180000,-2173.7945
92
+ 1819,182000,-2172.845
93
+ 1839,184000,-2176.132
94
+ 1859,186000,-2188.4535
95
+ 1879,188000,-2156.692
96
+ 1899,190000,-2169.1765
97
+ 1919,192000,-2150.046
98
+ 1939,194000,-2169.566
99
+ 1959,196000,-2159.7815
100
+ 1979,198000,-2167.8865
101
+ 1999,200000,-2188.2145
102
+ 2019,202000,-2145.591
103
+ 2039,204000,-2156.559
104
+ 2059,206000,-2164.4925
105
+ 2079,208000,-2162.0795
106
+ 2099,210000,-2157.1775
107
+ 2119,212000,-2145.232
108
+ 2139,214000,-2147.627
109
+ 2159,216000,-2154.1195
110
+ 2179,218000,-2155.8565
111
+ 2199,220000,-2134.7075
112
+ 2219,222000,-2127.8285
113
+ 2239,224000,-2168.0365
114
+ 2259,226000,-2142.8975
115
+ 2279,228000,-2140.589
116
+ 2299,230000,-2149.4825
117
+ 2319,232000,-2140.3645
118
+ 2339,234000,-2143.029
119
+ 2359,236000,-2126.4945
120
+ 2379,238000,-2135.033
121
+ 2399,240000,-2124.0925
122
+ 2419,242000,-2118.4725
123
+ 2439,244000,-2134.5425
124
+ 2459,246000,-2102.6725
125
+ 2479,248000,-2117.8615
126
+ 2499,250000,-2123.2265
127
+ 2519,252000,-2099.3605
128
+ 2539,254000,-2113.7965
129
+ 2559,256000,-2126.2285
130
+ 2579,258000,-2124.4015
131
+ 2599,260000,-2099.2445
132
+ 2619,262000,-2131.927
133
+ 2639,264000,-2128.738
134
+ 2659,266000,-2120.8685
135
+ 2679,268000,-2121.595
136
+ 2699,270000,-2140.079
137
+ 2719,272000,-2115.872
138
+ 2739,274000,-2105.3305
139
+ 2759,276000,-2133.0435
140
+ 2779,278000,-2117.732
141
+ 2799,280000,-2143.8175
142
+ 2819,282000,-2076.888
143
+ 2839,284000,-2106.048
144
+ 2859,286000,-2105.761
145
+ 2879,288000,-2102.9825
146
+ 2899,290000,-2118.6665
147
+ 2919,292000,-2122.7975
148
+ 2939,294000,-2121.764
149
+ 2959,296000,-2128.1515
150
+ 2979,298000,-2113.3235
151
+ 2999,300000,-2126.751
152
+ 3019,302000,-2111.186
153
+ 3039,304000,-2112.5405
154
+ 3059,306000,-2110.0095
155
+ 3079,308000,-2118.0815
156
+ 3099,310000,-2100.6005
157
+ 3119,312000,-2106.429
158
+ 3139,314000,-2092.304
159
+ 3159,316000,-2105.092
160
+ 3179,318000,-2085.4645
161
+ 3199,320000,-2107.1535
162
+ 3219,322000,-2107.04
163
+ 3239,324000,-2092.5935
164
+ 3259,326000,-2096.9715
165
+ 3279,328000,-2103.3905
166
+ 3299,330000,-2105.1935
167
+ 3319,332000,-2108.05
168
+ 3339,334000,-2100.4505
169
+ 3359,336000,-2087.976
170
+ 3379,338000,-2093.996
171
+ 3399,340000,-2103.395
172
+ 3419,342000,-2075.1395
173
+ 3439,344000,-2100.193
174
+ 3459,346000,-2097.6485
175
+ 3479,348000,-2103.601
176
+ 3499,350000,-2109.8605
177
+ 3519,352000,-2087.653
178
+ 3539,354000,-2126.2165
179
+ 3559,356000,-2117.9495
180
+ 3579,358000,-2112.5835
181
+ 3599,360000,-2117.1415
182
+ 3619,362000,-2108.5045
183
+ 3639,364000,-2103.0745
184
+ 3659,366000,-2111.068
185
+ 3679,368000,-2126.239
186
+ 3699,370000,-2104.904
187
+ 3719,372000,-2085.1685
188
+ 3739,374000,-2093.3945
189
+ 3759,376000,-2101.3165
190
+ 3779,378000,-2103.0655
191
+ 3799,380000,-2101.006
192
+ 3819,382000,-2103.158
193
+ 3839,384000,-2102.5225
194
+ 3859,386000,-2107.4555
195
+ 3879,388000,-2095.627
196
+ 3899,390000,-2114.5905
197
+ 3919,392000,-2112.6065
198
+ 3939,394000,-2103.3065
199
+ 3959,396000,-2111.277
200
+ 3979,398000,-2106.088
201
+ 3999,400000,-2106.7605
202
+ 4019,402000,-2084.668
203
+ 4039,404000,-2104.5425
204
+ 4059,406000,-2105.0865
205
+ 4079,408000,-2086.7805
206
+ 4099,410000,-2116.368
207
+ 4119,412000,-2100.076
208
+ 4139,414000,-2115.2785
209
+ 4159,416000,-2111.847
210
+ 4179,418000,-2075.2525
211
+ 4199,420000,-2089.003
212
+ 4219,422000,-2101.154
213
+ 4239,424000,-2099.7625
214
+ 4259,426000,-2118.5795
215
+ 4279,428000,-2108.951
216
+ 4299,430000,-2099.8935
code/Lot-sizing/logs/best_A2C_15items_5machines_i100_0/best_model.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43b750cb2d2aac2c7897b0e0f6d1495b4e6a4cd6a5dc5938b3359734024810bc
3
+ size 1022438
code/Lot-sizing/logs/evaluations.npz ADDED
Binary file (40.2 kB). View file
 
code/Lot-sizing/models/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (419 Bytes). View file
 
code/Lot-sizing/models/__pycache__/multistageOptimization.cpython-38.pyc ADDED
Binary file (6.56 kB). View file
 
code/Lot-sizing/models/__pycache__/optimizationProblemInstance.cpython-38.pyc ADDED
Binary file (877 Bytes). View file
 
code/Lot-sizing/models/__pycache__/perfectInfoOptimization.cpython-38.pyc ADDED
Binary file (5.42 kB). View file
 
code/Lot-sizing/results/PDPPO_15items_5machines_i100_actions_test.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e173a4ff1f1af7ec376b991a97d5d971a9358e0ee59e78d1dac0c5706dce0a3
3
+ size 1200128
code/Lot-sizing/results/PDPPO_15items_5machines_i100_costs_test.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4f6e5d4c9a9837667f1df2a98f3556b3b67f700011695efcd5503099bfd39b7
3
+ size 136
code/Lot-sizing/results/PDPPO_15items_5machines_i100_demands_test.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8427469f56af868c1a5fed54b3b235af670398cb091217c84c3d710f8c02ea5
3
+ size 1800128
code/Lot-sizing/results/PDPPO_15items_5machines_i100_holding_costs_test.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:433d5d5321c151cfdc6fb420c90de1c55c6ba45888445f942fe3880c56b7d947
3
+ size 240128
code/Lot-sizing/results/PDPPO_15items_5machines_i100_lost_sales_test.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85741d269fea615ac96e4de74211ce0aa144e1d69d471fcce17cc133c6c0a88b
3
+ size 240128
code/Lot-sizing/results/PDPPO_15items_5machines_i100_observations_test.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49242c880236d956378b24445e0d0c653d8af1f474e5fd61113e125ed244dfba
3
+ size 744805
code/Lot-sizing/results/PDPPO_15items_5machines_i100_setup_costs_test.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da0680577feff47cc4071a0ac12f0ba5b4f6c3a1240e33a5cfb799916317c6f9
3
+ size 240128
code/Lot-sizing/results/PPO_15items_5machines_i100_actions_test.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de66cf942bdc3f335699c6c75de45a952811465f0a0d19dfdb4121c2baa1314f
3
+ size 400128
code/Lot-sizing/results/PPO_15items_5machines_i100_costs_test.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6056e23a628ef0b32f9b252ff401ac9aadb26324986af77c7e80e2d0bb201cc
3
+ size 136