Spaces:
Sleeping
Sleeping
Fix: Easy task with overscaled instances
Browse files- env/core.py +15 -63
env/core.py
CHANGED
|
@@ -42,11 +42,11 @@ TASKS = {
|
|
| 42 |
difficulty="easy",
|
| 43 |
description="Optimize this 3-server cluster. Start by analyzing load patterns, then iteratively adjust each server. Final reward requires ALL servers properly sized.",
|
| 44 |
initial_resources=[
|
| 45 |
-
{"id": "srv-1", "type": "m5.
|
| 46 |
-
{"id": "srv-2", "type": "m5.
|
| 47 |
-
{"id": "srv-3", "type": "m5.
|
| 48 |
],
|
| 49 |
-
sla={"max_latency_ms": 120.0, "max_budget":
|
| 50 |
load=30.0
|
| 51 |
),
|
| 52 |
"medium": TaskConfig(
|
|
@@ -164,7 +164,7 @@ class CloudOpsEnvironment:
|
|
| 164 |
self._ep.resources
|
| 165 |
)
|
| 166 |
|
| 167 |
-
if utilization > 1.
|
| 168 |
self._ep.crashed = True
|
| 169 |
obs = self._build_observation("SYSTEM CRASH: Resource exhaustion!")
|
| 170 |
reward = RewModel(value=0.0, reason="System crashed due to resource exhaustion")
|
|
@@ -212,79 +212,31 @@ class CloudOpsEnvironment:
|
|
| 212 |
|
| 213 |
def _calculate_metrics(self, load: float, resources: list) -> Tuple[float, float, float]:
|
| 214 |
total_cap = sum(INSTANCE_DATA[r.type]["capacity"] for r in resources)
|
| 215 |
-
|
| 216 |
|
| 217 |
-
|
| 218 |
-
error_rate = 0.0 if utilization < 0.9 else (utilization - 0.9) * 2.0
|
| 219 |
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
def _calculate_iterative_reward(
|
| 223 |
-
self,
|
| 224 |
-
latency: float,
|
| 225 |
-
error_rate: float,
|
| 226 |
-
new_cost: float,
|
| 227 |
-
prev_cost: float,
|
| 228 |
-
prev_latency: float
|
| 229 |
-
) -> RewModel:
|
| 230 |
-
task = self._ep.task_config
|
| 231 |
-
budget = task.sla["max_budget"]
|
| 232 |
-
max_latency = task.sla["max_latency_ms"]
|
| 233 |
-
|
| 234 |
-
cost_improvement = (prev_cost - new_cost) / (prev_cost + 1e-6)
|
| 235 |
-
latency_improvement = (prev_latency - latency) / (prev_latency + 1e-6)
|
| 236 |
-
|
| 237 |
-
change_bonus = min(self._ep.changes_made * 0.06, 0.3)
|
| 238 |
-
|
| 239 |
-
cost_ratio = new_cost / budget
|
| 240 |
-
cost_reward = 0.3 * (1.0 / (1.0 + max(0, cost_ratio - 1)))
|
| 241 |
-
|
| 242 |
-
lat_ratio = latency / max_latency
|
| 243 |
-
perf_reward = 0.3 * (1.0 / (1.0 + max(0, lat_ratio - 1)))
|
| 244 |
-
|
| 245 |
-
improvement_bonus = 0.0
|
| 246 |
-
if cost_improvement > 0:
|
| 247 |
-
improvement_bonus += min(cost_improvement * 0.15, 0.1)
|
| 248 |
-
if latency_improvement > 0:
|
| 249 |
-
improvement_bonus += min(latency_improvement * 0.15, 0.1)
|
| 250 |
|
| 251 |
-
|
| 252 |
-
total_reward = min(1.0, base_reward + change_bonus + improvement_bonus)
|
| 253 |
-
|
| 254 |
-
if error_rate > 0.1:
|
| 255 |
-
total_reward *= (1.0 - error_rate)
|
| 256 |
-
|
| 257 |
-
exploration_bonus = min(self._ep.steps * 0.03, 0.15)
|
| 258 |
-
if self._ep.last_action_success:
|
| 259 |
-
total_reward = min(1.0, total_reward + exploration_bonus)
|
| 260 |
-
|
| 261 |
-
initial_latency = self._ep.initial_latency
|
| 262 |
-
initial_cost = self._ep.initial_cost
|
| 263 |
-
cost_change = ((new_cost - initial_cost) / initial_cost) * 100 if initial_cost > 0 else 0
|
| 264 |
-
lat_change = ((latency - initial_latency) / initial_latency) * 100 if initial_latency > 0 else 0
|
| 265 |
-
|
| 266 |
-
return RewModel(
|
| 267 |
-
value=min(1.0, max(0.0, total_reward)),
|
| 268 |
-
reason=f"Changes: {self._ep.changes_made}, Cost: ${new_cost:.1f}, Latency: {latency:.1f}ms",
|
| 269 |
-
cost_change_pct=cost_change,
|
| 270 |
-
latency_change_pct=lat_change,
|
| 271 |
-
)
|
| 272 |
|
| 273 |
def _build_observation(self, message: str) -> ObsModel:
|
| 274 |
if self._ep is None:
|
| 275 |
return self._error_obs()
|
| 276 |
|
| 277 |
-
latency, error_rate,
|
| 278 |
self._ep.current_load,
|
| 279 |
self._ep.resources
|
| 280 |
)
|
| 281 |
|
| 282 |
-
|
| 283 |
|
| 284 |
for r in self._ep.resources:
|
| 285 |
cap = INSTANCE_DATA[r.type]["capacity"]
|
| 286 |
-
|
| 287 |
-
r.
|
|
|
|
| 288 |
|
| 289 |
metrics = Metrics(
|
| 290 |
avg_latency_ms=latency,
|
|
|
|
| 42 |
difficulty="easy",
|
| 43 |
description="Optimize this 3-server cluster. Start by analyzing load patterns, then iteratively adjust each server. Final reward requires ALL servers properly sized.",
|
| 44 |
initial_resources=[
|
| 45 |
+
{"id": "srv-1", "type": "m5.xlarge", "cpu_usage": 10.0, "mem_usage": 8.0, "monthly_cost": 140.0},
|
| 46 |
+
{"id": "srv-2", "type": "m5.xlarge", "cpu_usage": 8.0, "mem_usage": 6.0, "monthly_cost": 140.0},
|
| 47 |
+
{"id": "srv-3", "type": "m5.xlarge", "cpu_usage": 12.0, "mem_usage": 9.0, "monthly_cost": 140.0},
|
| 48 |
],
|
| 49 |
+
sla={"max_latency_ms": 120.0, "max_budget": 100.0, "min_uptime_pct": 99.0},
|
| 50 |
load=30.0
|
| 51 |
),
|
| 52 |
"medium": TaskConfig(
|
|
|
|
| 164 |
self._ep.resources
|
| 165 |
)
|
| 166 |
|
| 167 |
+
if utilization > 1.5:
|
| 168 |
self._ep.crashed = True
|
| 169 |
obs = self._build_observation("SYSTEM CRASH: Resource exhaustion!")
|
| 170 |
reward = RewModel(value=0.0, reason="System crashed due to resource exhaustion")
|
|
|
|
| 212 |
|
| 213 |
def _calculate_metrics(self, load: float, resources: list) -> Tuple[float, float, float]:
|
| 214 |
total_cap = sum(INSTANCE_DATA[r.type]["capacity"] for r in resources)
|
| 215 |
+
avg_utilization = load / total_cap if total_cap > 0 else 0
|
| 216 |
|
| 217 |
+
utilization = min(avg_utilization, 1.5)
|
|
|
|
| 218 |
|
| 219 |
+
latency = 30 + 70 * (avg_utilization ** 2)
|
| 220 |
+
error_rate = max(0, (avg_utilization - 0.85) * 2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
|
| 222 |
+
return latency, error_rate, avg_utilization
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
def _build_observation(self, message: str) -> ObsModel:
|
| 225 |
if self._ep is None:
|
| 226 |
return self._error_obs()
|
| 227 |
|
| 228 |
+
latency, error_rate, utilization = self._calculate_metrics(
|
| 229 |
self._ep.current_load,
|
| 230 |
self._ep.resources
|
| 231 |
)
|
| 232 |
|
| 233 |
+
total_cap = sum(INSTANCE_DATA[r.type]["capacity"] for r in self._ep.resources)
|
| 234 |
|
| 235 |
for r in self._ep.resources:
|
| 236 |
cap = INSTANCE_DATA[r.type]["capacity"]
|
| 237 |
+
share = cap / total_cap if total_cap > 0 else 0
|
| 238 |
+
r.cpu_usage = min(100.0, self._ep.current_load * share / cap * 100)
|
| 239 |
+
r.mem_usage = min(100.0, r.cpu_usage * 0.85)
|
| 240 |
|
| 241 |
metrics = Metrics(
|
| 242 |
avg_latency_ms=latency,
|