hirann commited on
Commit
400199f
·
verified ·
1 Parent(s): c8c0f98

Enhance: Multi-step iterative tasks

Browse files
Files changed (1) hide show
  1. env/core.py +81 -27
env/core.py CHANGED
@@ -40,35 +40,42 @@ TASKS = {
40
  task_id="easy_right_sizing",
41
  name="Right-Sizing",
42
  difficulty="easy",
43
- description="Reduce an overpriced server without breaking the SLA",
44
  initial_resources=[
45
- {"id": "srv-1", "type": "m5.xlarge", "cpu_usage": 2.0, "mem_usage": 2.0, "monthly_cost": 140.0}
 
 
46
  ],
47
- sla={"max_latency_ms": 200.0, "max_budget": 30.0, "min_uptime_pct": 99.0},
48
- load=2.0
49
  ),
50
  "medium": TaskConfig(
51
  task_id="medium_latency_fix",
52
  name="Latency Fix",
53
  difficulty="medium",
54
- description="Resolve performance bottleneck while staying under budget",
55
  initial_resources=[
56
- {"id": "srv-1", "type": "t3.nano", "cpu_usage": 98.0, "mem_usage": 90.0, "monthly_cost": 3.6}
 
 
57
  ],
58
- sla={"max_latency_ms": 100.0, "max_budget": 60.0, "min_uptime_pct": 99.9},
59
- load=12.0
60
  ),
61
  "hard": TaskConfig(
62
  task_id="hard_balance",
63
  name="Balance Optimization",
64
  difficulty="hard",
65
- description="Optimize a mixed cluster under tight budget constraints",
66
  initial_resources=[
67
- {"id": "srv-1", "type": "m5.large", "cpu_usage": 40.0, "mem_usage": 30.0, "monthly_cost": 70.0},
68
- {"id": "srv-2", "type": "t3.nano", "cpu_usage": 90.0, "mem_usage": 80.0, "monthly_cost": 3.6}
 
 
 
69
  ],
70
- sla={"max_latency_ms": 150.0, "max_budget": 35.0, "min_uptime_pct": 99.9},
71
- load=25.0
72
  ),
73
  }
74
 
@@ -82,6 +89,9 @@ class EpisodeState:
82
  initial_latency: float
83
  steps: int = 0
84
  crashed: bool = False
 
 
 
85
  episode_id: str = field(default_factory=lambda: str(uuid4()))
86
 
87
 
@@ -126,6 +136,9 @@ class CloudOpsEnvironment:
126
  initial_latency=initial_latency,
127
  steps=0,
128
  crashed=False,
 
 
 
129
  episode_id=episode_id or str(uuid4()),
130
  )
131
 
@@ -138,7 +151,12 @@ class CloudOpsEnvironment:
138
  self._ep.steps += 1
139
  msg = action.message.lower()
140
 
 
 
 
141
  message = self._parse_and_execute(msg)
 
 
142
  latency, error_rate, utilization = self._calculate_metrics(
143
  self._ep.current_load,
144
  self._ep.resources
@@ -150,15 +168,22 @@ class CloudOpsEnvironment:
150
  reward = RewModel(value=0.0, reason="System crashed due to resource exhaustion")
151
  return obs, reward, True, {"reason": "crash"}
152
 
153
- reward = self._calculate_reward(latency, error_rate)
 
 
 
 
 
 
 
154
 
155
  done = (
156
- reward.value >= 0.98 or
157
  self._ep.steps >= self._max_steps
158
  )
159
 
160
  obs = self._build_observation(message)
161
- return obs, reward, done, {}
162
 
163
  def _parse_and_execute(self, msg: str) -> str:
164
  match = re.search(r"change\s+([a-z0-9-]+)\s+to\s+([a-z0-9.]+)", msg)
@@ -169,9 +194,12 @@ class CloudOpsEnvironment:
169
 
170
  for r in self._ep.resources:
171
  if r.id == res_id:
 
172
  r.type = new_type
173
  r.monthly_cost = INSTANCE_DATA[new_type]["cost"]
174
- return f"Changed {res_id} to {new_type}"
 
 
175
 
176
  return f"Error: Resource '{res_id}' not found"
177
 
@@ -189,26 +217,52 @@ class CloudOpsEnvironment:
189
 
190
  return latency, error_rate, utilization
191
 
192
- def _calculate_reward(self, latency: float, error_rate: float) -> RewModel:
193
- total_cost = sum(r.monthly_cost for r in self._ep.resources)
194
- budget = self._ep.task_config.sla["max_latency_ms"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
- cost_ratio = total_cost / budget
197
- cost_reward = 0.5 * (1.0 / (1.0 + max(0, cost_ratio - 1)))
198
 
199
- lat_ratio = latency / budget
200
- perf_reward = 0.5 * (1.0 / (1.0 + max(0, lat_ratio - 1)))
201
 
202
- total_reward = cost_reward + perf_reward
 
203
 
204
  initial_latency = self._ep.initial_latency
205
  initial_cost = self._ep.initial_cost
206
- cost_change = ((total_cost - initial_cost) / initial_cost) * 100 if initial_cost > 0 else 0
207
  lat_change = ((latency - initial_latency) / initial_latency) * 100 if initial_latency > 0 else 0
208
 
209
  return RewModel(
210
  value=min(1.0, max(0.0, total_reward)),
211
- reason=f"Cost: ${total_cost:.1f}/mo, Latency: {latency:.1f}ms",
212
  cost_change_pct=cost_change,
213
  latency_change_pct=lat_change,
214
  )
 
40
  task_id="easy_right_sizing",
41
  name="Right-Sizing",
42
  difficulty="easy",
43
+ description="Optimize this 3-server cluster. Start by analyzing load patterns, then iteratively adjust each server. Final reward requires ALL servers properly sized.",
44
  initial_resources=[
45
+ {"id": "srv-1", "type": "m5.xlarge", "cpu_usage": 15.0, "mem_usage": 10.0, "monthly_cost": 140.0},
46
+ {"id": "srv-2", "type": "m5.xlarge", "cpu_usage": 12.0, "mem_usage": 8.0, "monthly_cost": 140.0},
47
+ {"id": "srv-3", "type": "m5.xlarge", "cpu_usage": 18.0, "mem_usage": 12.0, "monthly_cost": 140.0},
48
  ],
49
+ sla={"max_latency_ms": 100.0, "max_budget": 80.0, "min_uptime_pct": 99.0},
50
+ load=45.0
51
  ),
52
  "medium": TaskConfig(
53
  task_id="medium_latency_fix",
54
  name="Latency Fix",
55
  difficulty="medium",
56
+ description="Performance bottleneck! This cluster is struggling. Analyze each server's load, then iteratively upgrade undersized servers. Requires 4+ successful changes for max reward.",
57
  initial_resources=[
58
+ {"id": "srv-1", "type": "t3.nano", "cpu_usage": 95.0, "mem_usage": 85.0, "monthly_cost": 3.6},
59
+ {"id": "srv-2", "type": "t3.nano", "cpu_usage": 88.0, "mem_usage": 80.0, "monthly_cost": 3.6},
60
+ {"id": "srv-3", "type": "t3.nano", "cpu_usage": 92.0, "mem_usage": 83.0, "monthly_cost": 3.6},
61
  ],
62
+ sla={"max_latency_ms": 80.0, "max_budget": 100.0, "min_uptime_pct": 99.9},
63
+ load=30.0
64
  ),
65
  "hard": TaskConfig(
66
  task_id="hard_balance",
67
  name="Balance Optimization",
68
  difficulty="hard",
69
+ description="Tight budget constraint! Optimize a mixed 5-server cluster. Must achieve optimal cost-efficiency while maintaining performance. Requires 5+ iterative changes, exploring different configurations.",
70
  initial_resources=[
71
+ {"id": "srv-1", "type": "m5.xlarge", "cpu_usage": 8.0, "mem_usage": 6.0, "monthly_cost": 140.0},
72
+ {"id": "srv-2", "type": "m5.xlarge", "cpu_usage": 10.0, "mem_usage": 8.0, "monthly_cost": 140.0},
73
+ {"id": "srv-3", "type": "t3.nano", "cpu_usage": 95.0, "mem_usage": 90.0, "monthly_cost": 3.6},
74
+ {"id": "srv-4", "type": "t3.nano", "cpu_usage": 98.0, "mem_usage": 92.0, "monthly_cost": 3.6},
75
+ {"id": "srv-5", "type": "t3.medium", "cpu_usage": 45.0, "mem_usage": 40.0, "monthly_cost": 23.0},
76
  ],
77
+ sla={"max_latency_ms": 100.0, "max_budget": 60.0, "min_uptime_pct": 99.9},
78
+ load=50.0
79
  ),
80
  }
81
 
 
89
  initial_latency: float
90
  steps: int = 0
91
  crashed: bool = False
92
+ changes_made: int = 0
93
+ last_action_success: bool = False
94
+ exploration_history: list = field(default_factory=list)
95
  episode_id: str = field(default_factory=lambda: str(uuid4()))
96
 
97
 
 
136
  initial_latency=initial_latency,
137
  steps=0,
138
  crashed=False,
139
+ changes_made=0,
140
+ last_action_success=False,
141
+ exploration_history=[],
142
  episode_id=episode_id or str(uuid4()),
143
  )
144
 
 
151
  self._ep.steps += 1
152
  msg = action.message.lower()
153
 
154
+ prev_cost = sum(r.monthly_cost for r in self._ep.resources)
155
+ prev_latency, _, _ = self._calculate_metrics(self._ep.current_load, self._ep.resources)
156
+
157
  message = self._parse_and_execute(msg)
158
+
159
+ new_cost = sum(r.monthly_cost for r in self._ep.resources)
160
  latency, error_rate, utilization = self._calculate_metrics(
161
  self._ep.current_load,
162
  self._ep.resources
 
168
  reward = RewModel(value=0.0, reason="System crashed due to resource exhaustion")
169
  return obs, reward, True, {"reason": "crash"}
170
 
171
+ self._ep.exploration_history.append({
172
+ "step": self._ep.steps,
173
+ "action": msg[:50],
174
+ "cost": new_cost,
175
+ "latency": latency,
176
+ })
177
+
178
+ reward = self._calculate_iterative_reward(latency, error_rate, new_cost, prev_cost, prev_latency)
179
 
180
  done = (
181
+ reward.value >= 0.98 and self._ep.changes_made >= 3 or
182
  self._ep.steps >= self._max_steps
183
  )
184
 
185
  obs = self._build_observation(message)
186
+ return obs, reward, done, {"changes_made": self._ep.changes_made}
187
 
188
  def _parse_and_execute(self, msg: str) -> str:
189
  match = re.search(r"change\s+([a-z0-9-]+)\s+to\s+([a-z0-9.]+)", msg)
 
194
 
195
  for r in self._ep.resources:
196
  if r.id == res_id:
197
+ old_type = r.type
198
  r.type = new_type
199
  r.monthly_cost = INSTANCE_DATA[new_type]["cost"]
200
+ self._ep.changes_made += 1
201
+ self._ep.last_action_success = True
202
+ return f"Changed {res_id} from {old_type} to {new_type} (change #{self._ep.changes_made})"
203
 
204
  return f"Error: Resource '{res_id}' not found"
205
 
 
217
 
218
  return latency, error_rate, utilization
219
 
220
+ def _calculate_iterative_reward(
221
+ self,
222
+ latency: float,
223
+ error_rate: float,
224
+ new_cost: float,
225
+ prev_cost: float,
226
+ prev_latency: float
227
+ ) -> RewModel:
228
+ task = self._ep.task_config
229
+ budget = task.sla["max_budget"]
230
+ max_latency = task.sla["max_latency_ms"]
231
+
232
+ cost_improvement = (prev_cost - new_cost) / (prev_cost + 1e-6)
233
+ latency_improvement = (prev_latency - latency) / (prev_latency + 1e-6)
234
+
235
+ change_bonus = min(self._ep.changes_made * 0.08, 0.4)
236
+
237
+ cost_ratio = new_cost / budget
238
+ cost_reward = 0.25 * (1.0 / (1.0 + max(0, cost_ratio - 1)))
239
+
240
+ lat_ratio = latency / max_latency
241
+ perf_reward = 0.25 * (1.0 / (1.0 + max(0, lat_ratio - 1)))
242
+
243
+ improvement_bonus = 0.0
244
+ if cost_improvement > 0:
245
+ improvement_bonus += min(cost_improvement * 0.2, 0.15)
246
+ if latency_improvement > 0:
247
+ improvement_bonus += min(latency_improvement * 0.2, 0.15)
248
 
249
+ base_reward = cost_reward + perf_reward
250
+ total_reward = min(1.0, base_reward + change_bonus + improvement_bonus)
251
 
252
+ if error_rate > 0.1:
253
+ total_reward *= (1.0 - error_rate)
254
 
255
+ if self._ep.changes_made >= 3 and cost_reward > 0.2 and perf_reward > 0.2:
256
+ total_reward = min(1.0, total_reward + 0.1)
257
 
258
  initial_latency = self._ep.initial_latency
259
  initial_cost = self._ep.initial_cost
260
+ cost_change = ((new_cost - initial_cost) / initial_cost) * 100 if initial_cost > 0 else 0
261
  lat_change = ((latency - initial_latency) / initial_latency) * 100 if initial_latency > 0 else 0
262
 
263
  return RewModel(
264
  value=min(1.0, max(0.0, total_reward)),
265
+ reason=f"Changes: {self._ep.changes_made}, Cost: ${new_cost:.1f}, Latency: {latency:.1f}ms",
266
  cost_change_pct=cost_change,
267
  latency_change_pct=lat_change,
268
  )