leokana commited on
Commit
b1e0fd3
·
1 Parent(s): ff58990

update the frozenlake environment

Browse files
.vscode/launch.json CHANGED
@@ -11,7 +11,7 @@
11
  "request": "launch",
12
  "program": "${file}",
13
  "console": "integratedTerminal",
14
- "cwd": "C:\\Users\\leona\\Meu Drive\\USP\\Doutorado\\PoliTO\\pdppo\\code\\Lake application\\"
15
 
16
  }
17
  ]
 
11
  "request": "launch",
12
  "program": "${file}",
13
  "console": "integratedTerminal",
14
+ "cwd": "C:\\Users\\leona\\Meu Drive\\USP\\Doutorado\\PoliTO\\pdppo\\code\\Lot-sizing\\"
15
 
16
  }
17
  ]
code/Lake application/envs/frozen_lake.py CHANGED
@@ -130,6 +130,7 @@ class FrozenLakeEnv(discrete.DiscreteEnv):
130
  break
131
  if goal_position:
132
  break
 
133
 
134
  def proximity_reward(current_row, current_col):
135
  goal_row, goal_col = goal_position
@@ -140,92 +141,19 @@ class FrozenLakeEnv(discrete.DiscreteEnv):
140
  newrow, newcol = inc(row, col, a)
141
  newstate = to_s(newrow, newcol)
142
  newletter = desc[newrow, newcol]
143
- terminated = bytes(newletter) in b"GH"
144
  reward = float(newletter == b"G")
145
  if not terminated:
146
- reward = proximity_reward(newrow, newcol)
147
  return newstate, reward, terminated
148
 
149
- # def update_probability_matrix(row, col, action):
150
- # newrow, newcol = inc(row, col, action)
151
- # newstate = to_s(newrow, newcol)
152
- # newletter = desc[newrow, newcol]
153
- # done = bytes(newletter) in b"GH"
154
- # reward = float(newletter == b"G")
155
- # return newstate, reward, done
156
-
157
- # for row in range(nrow):
158
- # for col in range(ncol):
159
- # s = to_s(row, col)
160
- # for a in range(4):
161
- # li = P[s][a]
162
- # letter = desc[row, col]
163
- # if letter in b"GH":
164
- # li.append((1.0, s, 0, True))
165
- # else:
166
- # if is_slippery:
167
- # for b in [(a - 1) % 4, a, (a + 1) % 4]:
168
- # li.append(
169
- # (1.0 / 3.0, *update_probability_matrix(row, col, b))
170
- # )
171
- # else:
172
- # li.append((1.0, *update_probability_matrix(row, col, a)))
173
-
174
  np.random.seed(42) # Set a seed for reproducibility
175
  tile_probabilities = np.random.dirichlet(np.ones(4), size=(nrow, ncol))
176
 
177
  def to_row_col(s):
178
  return divmod(s, ncol)
179
 
180
- # for row in range(nrow):
181
- # for col in range(ncol):
182
- # s = to_s(row, col)
183
- # for a in range(4):
184
- # li = P[s][a]
185
- # letter = desc[row, col]
186
- # if letter in b"GH":
187
- # li.append((1.0, s, 0, True))
188
- # else:
189
- # if is_slippery:
190
- # # First, the agent moves in the desired direction
191
- # newstate, reward, terminated = update_probability_matrix(row, col, a)
192
- # if terminated:
193
- # li.append((1.0, newstate, reward, terminated))
194
- # else:
195
- # # After the first move, slippery condition causes a random additional movement
196
- # row2, col2 = to_row_col(newstate)
197
- # for b in range(4):
198
- # li.append(
199
- # (1.0 / 4.0, *update_probability_matrix(row2, col2, b))
200
- # )
201
- # else:
202
- # li.append((1.0, *update_probability_matrix(row, col, a)))
203
-
204
- # for row in range(nrow):
205
- # for col in range(ncol):
206
- # s = to_s(row, col)
207
- # for a in range(4):
208
- # li = P[s][a]
209
- # letter = desc[row, col]
210
- # if letter in b"GH":
211
- # li.append((1.0, s, 0, True))
212
- # else:
213
- # if is_slippery:
214
- # # First, the agent moves in the desired direction
215
- # newstate, reward, terminated = update_probability_matrix(row, col, a)
216
- # if terminated:
217
- # li.append((1.0, newstate, reward, terminated))
218
- # else:
219
- # # After the first move, slippery condition causes an additional movement
220
- # row2, col2 = to_row_col(newstate)
221
- # for b, prob in enumerate(tile_probabilities[row2, col2]):
222
- # li.append(
223
- # (prob, *update_probability_matrix(row2, col2, b))
224
- # )
225
- # else:
226
- # li.append((1.0, *update_probability_matrix(row, col, a)))
227
-
228
- base_slip_prob=0.3
229
 
230
  for row in range(nrow):
231
  for col in range(ncol):
@@ -245,40 +173,48 @@ class FrozenLakeEnv(discrete.DiscreteEnv):
245
  # After the first move, slippery condition causes an additional movement
246
  row2, col2 = to_row_col(newstate)
247
  for b, prob in enumerate(tile_probabilities[row2, col2]):
 
248
  li.append(
249
- (base_slip_prob * prob, *update_probability_matrix(row2, col2, b))
250
  )
251
  # Add the remaining probability for staying at the newstate
252
  li.append((1.0 - base_slip_prob, newstate, reward, False))
253
  else:
254
  li.append((1.0, *update_probability_matrix(row, col, a)))
 
 
255
  super(FrozenLakeEnv, self).__init__(nS, nA, P, isd)
256
 
257
  def get_post_decision_state(self, s, a):
258
- def inc(row, col, a):
259
- if a == LEFT:
260
- col = max(col - 1, 0)
261
- elif a == DOWN:
262
- row = min(row + 1, self.nrow - 1)
263
- elif a == RIGHT:
264
- col = min(col + 1, self.ncol - 1)
265
- elif a == UP:
266
- row = max(row - 1, 0)
267
- return (row, col)
268
-
269
- def to_s(row, col):
270
- return row * self.ncol + col
271
-
272
- def to_row_col(s):
273
- row = s // self.ncol
274
- col = s % self.ncol
275
- return row, col
276
-
277
- row, col = to_row_col(s)
278
- next_row, next_col = inc(row, col, a)
279
- next_s = to_s(next_row, next_col)
280
- post_reward = self.proximity_reward(next_row, next_col)
281
- return next_s, post_reward
 
 
 
 
 
282
 
283
  def render(self, mode="human"):
284
  outfile = StringIO() if mode == "ansi" else sys.stdout
 
130
  break
131
  if goal_position:
132
  break
133
+ self.goal_position = goal_position
134
 
135
  def proximity_reward(current_row, current_col):
136
  goal_row, goal_col = goal_position
 
141
  newrow, newcol = inc(row, col, a)
142
  newstate = to_s(newrow, newcol)
143
  newletter = desc[newrow, newcol]
144
+ terminated = bytes(newletter) in b"G"
145
  reward = float(newletter == b"G")
146
  if not terminated:
147
+ reward = proximity_reward(newrow, newcol) + float(newletter == b"H")* -(1/(nrow+ncol))
148
  return newstate, reward, terminated
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  np.random.seed(42) # Set a seed for reproducibility
151
  tile_probabilities = np.random.dirichlet(np.ones(4), size=(nrow, ncol))
152
 
153
  def to_row_col(s):
154
  return divmod(s, ncol)
155
 
156
+ base_slip_prob= 0.50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
  for row in range(nrow):
159
  for col in range(ncol):
 
173
  # After the first move, slippery condition causes an additional movement
174
  row2, col2 = to_row_col(newstate)
175
  for b, prob in enumerate(tile_probabilities[row2, col2]):
176
+ newstate_post, reward_pos, terminated_post = update_probability_matrix(row2, col2, b)
177
  li.append(
178
+ (base_slip_prob * prob, newstate_post, reward_pos + reward, terminated_post)
179
  )
180
  # Add the remaining probability for staying at the newstate
181
  li.append((1.0 - base_slip_prob, newstate, reward, False))
182
  else:
183
  li.append((1.0, *update_probability_matrix(row, col, a)))
184
+
185
+ self.P = P
186
  super(FrozenLakeEnv, self).__init__(nS, nA, P, isd)
187
 
188
  def get_post_decision_state(self, s, a):
189
+ def proximity_reward(current_row, current_col):
190
+ goal_row, goal_col = self.goal_position
191
+ distance = abs(goal_row - current_row) + abs(goal_col - current_col)
192
+ return 1.0 / (1.0 + distance)
193
+
194
+ def inc(row, col, a):
195
+ if a == LEFT:
196
+ col = max(col - 1, 0)
197
+ elif a == DOWN:
198
+ row = min(row + 1, self.nrow - 1)
199
+ elif a == RIGHT:
200
+ col = min(col + 1, self.ncol - 1)
201
+ elif a == UP:
202
+ row = max(row - 1, 0)
203
+ return (row, col)
204
+
205
+ def to_s(row, col):
206
+ return row * self.ncol + col
207
+
208
+ def to_row_col(s):
209
+ row = s // self.ncol
210
+ col = s % self.ncol
211
+ return row, col
212
+
213
+ row, col = to_row_col(s)
214
+ next_row, next_col = inc(row, col, a)
215
+ next_s = to_s(next_row, next_col)
216
+ next_r = proximity_reward(next_row, next_col)
217
+ return next_s, next_r
218
 
219
  def render(self, mode="human"):
220
  outfile = StringIO() if mode == "ansi" else sys.stdout