hirann commited on
Commit
ff4146a
·
verified ·
1 Parent(s): 925ffc2

Fix: Add missing _calculate_iterative_reward method

Browse files
Files changed (1) hide show
  1. env/core.py +53 -1
env/core.py CHANGED
@@ -177,7 +177,7 @@ class CloudOpsEnvironment:
177
  "latency": latency,
178
  })
179
 
180
- reward = self._calculate_iterative_reward(latency, error_rate, new_cost, prev_cost, prev_latency)
181
 
182
  done = (
183
  self._ep.steps >= self._max_steps or
@@ -221,6 +221,58 @@ class CloudOpsEnvironment:
221
 
222
  return latency, error_rate, avg_utilization
223
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  def _build_observation(self, message: str) -> ObsModel:
225
  if self._ep is None:
226
  return self._error_obs()
 
177
  "latency": latency,
178
  })
179
 
180
+ reward = self._calculate_iterative_reward(latency, error_rate, new_cost, prev_cost, prev_latency, utilization)
181
 
182
  done = (
183
  self._ep.steps >= self._max_steps or
 
221
 
222
  return latency, error_rate, avg_utilization
223
 
224
+ def _calculate_iterative_reward(
225
+ self,
226
+ latency: float,
227
+ error_rate: float,
228
+ new_cost: float,
229
+ prev_cost: float,
230
+ prev_latency: float,
231
+ utilization: float
232
+ ) -> RewModel:
233
+ task = self._ep.task_config
234
+ budget = task.sla["max_budget"]
235
+ max_latency = task.sla["max_latency_ms"]
236
+
237
+ cost_improvement = (prev_cost - new_cost) / (prev_cost + 1e-6)
238
+ latency_improvement = (prev_latency - latency) / (prev_latency + 1e-6)
239
+
240
+ change_bonus = min(self._ep.changes_made * 0.06, 0.3)
241
+
242
+ cost_ratio = new_cost / budget
243
+ cost_reward = 0.3 * (1.0 / (1.0 + max(0, cost_ratio - 1)))
244
+
245
+ lat_ratio = latency / max_latency
246
+ perf_reward = 0.3 * (1.0 / (1.0 + max(0, lat_ratio - 1)))
247
+
248
+ improvement_bonus = 0.0
249
+ if cost_improvement > 0:
250
+ improvement_bonus += min(cost_improvement * 0.15, 0.1)
251
+ if latency_improvement > 0:
252
+ improvement_bonus += min(latency_improvement * 0.15, 0.1)
253
+
254
+ base_reward = cost_reward + perf_reward
255
+ total_reward = min(1.0, base_reward + change_bonus + improvement_bonus)
256
+
257
+ if error_rate > 0.2:
258
+ total_reward *= (1.0 - error_rate)
259
+
260
+ exploration_bonus = min(self._ep.steps * 0.03, 0.15)
261
+ if self._ep.last_action_success:
262
+ total_reward = min(1.0, total_reward + exploration_bonus)
263
+
264
+ initial_latency = self._ep.initial_latency
265
+ initial_cost = self._ep.initial_cost
266
+ cost_change = ((new_cost - initial_cost) / initial_cost) * 100 if initial_cost > 0 else 0
267
+ lat_change = ((latency - initial_latency) / initial_latency) * 100 if initial_latency > 0 else 0
268
+
269
+ return RewModel(
270
+ value=min(1.0, max(0.0, total_reward)),
271
+ reason=f"Changes: {self._ep.changes_made}, Cost: ${new_cost:.1f}, Latency: {latency:.1f}ms",
272
+ cost_change_pct=cost_change,
273
+ latency_change_pct=lat_change,
274
+ )
275
+
276
  def _build_observation(self, message: str) -> ObsModel:
277
  if self._ep is None:
278
  return self._error_obs()