hirann commited on
Commit
925ffc2
·
verified ·
1 Parent(s): a40b65a

Fix: Easy task with overscaled instances

Browse files
Files changed (1) hide show
  1. env/core.py +15 -63
env/core.py CHANGED
@@ -42,11 +42,11 @@ TASKS = {
42
  difficulty="easy",
43
  description="Optimize this 3-server cluster. Start by analyzing load patterns, then iteratively adjust each server. Final reward requires ALL servers properly sized.",
44
  initial_resources=[
45
- {"id": "srv-1", "type": "m5.large", "cpu_usage": 20.0, "mem_usage": 15.0, "monthly_cost": 70.0},
46
- {"id": "srv-2", "type": "m5.large", "cpu_usage": 18.0, "mem_usage": 12.0, "monthly_cost": 70.0},
47
- {"id": "srv-3", "type": "m5.large", "cpu_usage": 22.0, "mem_usage": 16.0, "monthly_cost": 70.0},
48
  ],
49
- sla={"max_latency_ms": 120.0, "max_budget": 80.0, "min_uptime_pct": 99.0},
50
  load=30.0
51
  ),
52
  "medium": TaskConfig(
@@ -164,7 +164,7 @@ class CloudOpsEnvironment:
164
  self._ep.resources
165
  )
166
 
167
- if utilization > 1.3:
168
  self._ep.crashed = True
169
  obs = self._build_observation("SYSTEM CRASH: Resource exhaustion!")
170
  reward = RewModel(value=0.0, reason="System crashed due to resource exhaustion")
@@ -212,79 +212,31 @@ class CloudOpsEnvironment:
212
 
213
  def _calculate_metrics(self, load: float, resources: list) -> Tuple[float, float, float]:
214
  total_cap = sum(INSTANCE_DATA[r.type]["capacity"] for r in resources)
215
- utilization = load / (total_cap + 1e-6)
216
 
217
- latency = 50 * (1 + math.exp(utilization * 2 - 2))
218
- error_rate = 0.0 if utilization < 0.9 else (utilization - 0.9) * 2.0
219
 
220
- return latency, error_rate, utilization
221
-
222
- def _calculate_iterative_reward(
223
- self,
224
- latency: float,
225
- error_rate: float,
226
- new_cost: float,
227
- prev_cost: float,
228
- prev_latency: float
229
- ) -> RewModel:
230
- task = self._ep.task_config
231
- budget = task.sla["max_budget"]
232
- max_latency = task.sla["max_latency_ms"]
233
-
234
- cost_improvement = (prev_cost - new_cost) / (prev_cost + 1e-6)
235
- latency_improvement = (prev_latency - latency) / (prev_latency + 1e-6)
236
-
237
- change_bonus = min(self._ep.changes_made * 0.06, 0.3)
238
-
239
- cost_ratio = new_cost / budget
240
- cost_reward = 0.3 * (1.0 / (1.0 + max(0, cost_ratio - 1)))
241
-
242
- lat_ratio = latency / max_latency
243
- perf_reward = 0.3 * (1.0 / (1.0 + max(0, lat_ratio - 1)))
244
-
245
- improvement_bonus = 0.0
246
- if cost_improvement > 0:
247
- improvement_bonus += min(cost_improvement * 0.15, 0.1)
248
- if latency_improvement > 0:
249
- improvement_bonus += min(latency_improvement * 0.15, 0.1)
250
 
251
- base_reward = cost_reward + perf_reward
252
- total_reward = min(1.0, base_reward + change_bonus + improvement_bonus)
253
-
254
- if error_rate > 0.1:
255
- total_reward *= (1.0 - error_rate)
256
-
257
- exploration_bonus = min(self._ep.steps * 0.03, 0.15)
258
- if self._ep.last_action_success:
259
- total_reward = min(1.0, total_reward + exploration_bonus)
260
-
261
- initial_latency = self._ep.initial_latency
262
- initial_cost = self._ep.initial_cost
263
- cost_change = ((new_cost - initial_cost) / initial_cost) * 100 if initial_cost > 0 else 0
264
- lat_change = ((latency - initial_latency) / initial_latency) * 100 if initial_latency > 0 else 0
265
-
266
- return RewModel(
267
- value=min(1.0, max(0.0, total_reward)),
268
- reason=f"Changes: {self._ep.changes_made}, Cost: ${new_cost:.1f}, Latency: {latency:.1f}ms",
269
- cost_change_pct=cost_change,
270
- latency_change_pct=lat_change,
271
- )
272
 
273
  def _build_observation(self, message: str) -> ObsModel:
274
  if self._ep is None:
275
  return self._error_obs()
276
 
277
- latency, error_rate, _ = self._calculate_metrics(
278
  self._ep.current_load,
279
  self._ep.resources
280
  )
281
 
282
- total_capacity = sum(INSTANCE_DATA[r.type]["capacity"] for r in self._ep.resources)
283
 
284
  for r in self._ep.resources:
285
  cap = INSTANCE_DATA[r.type]["capacity"]
286
- r.cpu_usage = min(100.0, self._ep.current_load / total_capacity / cap * 100)
287
- r.mem_usage = min(100.0, r.cpu_usage * 0.9)
 
288
 
289
  metrics = Metrics(
290
  avg_latency_ms=latency,
 
42
  difficulty="easy",
43
  description="Optimize this 3-server cluster. Start by analyzing load patterns, then iteratively adjust each server. Final reward requires ALL servers properly sized.",
44
  initial_resources=[
45
+ {"id": "srv-1", "type": "m5.xlarge", "cpu_usage": 10.0, "mem_usage": 8.0, "monthly_cost": 140.0},
46
+ {"id": "srv-2", "type": "m5.xlarge", "cpu_usage": 8.0, "mem_usage": 6.0, "monthly_cost": 140.0},
47
+ {"id": "srv-3", "type": "m5.xlarge", "cpu_usage": 12.0, "mem_usage": 9.0, "monthly_cost": 140.0},
48
  ],
49
+ sla={"max_latency_ms": 120.0, "max_budget": 100.0, "min_uptime_pct": 99.0},
50
  load=30.0
51
  ),
52
  "medium": TaskConfig(
 
164
  self._ep.resources
165
  )
166
 
167
+ if utilization > 1.5:
168
  self._ep.crashed = True
169
  obs = self._build_observation("SYSTEM CRASH: Resource exhaustion!")
170
  reward = RewModel(value=0.0, reason="System crashed due to resource exhaustion")
 
212
 
213
  def _calculate_metrics(self, load: float, resources: list) -> Tuple[float, float, float]:
214
  total_cap = sum(INSTANCE_DATA[r.type]["capacity"] for r in resources)
215
+ avg_utilization = load / total_cap if total_cap > 0 else 0
216
 
217
+ utilization = min(avg_utilization, 1.5)
 
218
 
219
+ latency = 30 + 70 * (avg_utilization ** 2)
220
+ error_rate = max(0, (avg_utilization - 0.85) * 2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
+ return latency, error_rate, avg_utilization
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
  def _build_observation(self, message: str) -> ObsModel:
225
  if self._ep is None:
226
  return self._error_obs()
227
 
228
+ latency, error_rate, utilization = self._calculate_metrics(
229
  self._ep.current_load,
230
  self._ep.resources
231
  )
232
 
233
+ total_cap = sum(INSTANCE_DATA[r.type]["capacity"] for r in self._ep.resources)
234
 
235
  for r in self._ep.resources:
236
  cap = INSTANCE_DATA[r.type]["capacity"]
237
+ share = cap / total_cap if total_cap > 0 else 0
238
+ r.cpu_usage = min(100.0, self._ep.current_load * share / cap * 100)
239
+ r.mem_usage = min(100.0, r.cpu_usage * 0.85)
240
 
241
  metrics = Metrics(
242
  avg_latency_ms=latency,