Spaces:
Sleeping
Sleeping
Enhance: Multi-step iterative tasks
Browse files- env/core.py +81 -27
env/core.py
CHANGED
|
@@ -40,35 +40,42 @@ TASKS = {
|
|
| 40 |
task_id="easy_right_sizing",
|
| 41 |
name="Right-Sizing",
|
| 42 |
difficulty="easy",
|
| 43 |
-
description="
|
| 44 |
initial_resources=[
|
| 45 |
-
{"id": "srv-1", "type": "m5.xlarge", "cpu_usage":
|
|
|
|
|
|
|
| 46 |
],
|
| 47 |
-
sla={"max_latency_ms":
|
| 48 |
-
load=
|
| 49 |
),
|
| 50 |
"medium": TaskConfig(
|
| 51 |
task_id="medium_latency_fix",
|
| 52 |
name="Latency Fix",
|
| 53 |
difficulty="medium",
|
| 54 |
-
description="
|
| 55 |
initial_resources=[
|
| 56 |
-
{"id": "srv-1", "type": "t3.nano", "cpu_usage":
|
|
|
|
|
|
|
| 57 |
],
|
| 58 |
-
sla={"max_latency_ms":
|
| 59 |
-
load=
|
| 60 |
),
|
| 61 |
"hard": TaskConfig(
|
| 62 |
task_id="hard_balance",
|
| 63 |
name="Balance Optimization",
|
| 64 |
difficulty="hard",
|
| 65 |
-
description="Optimize a mixed cluster
|
| 66 |
initial_resources=[
|
| 67 |
-
{"id": "srv-1", "type": "m5.
|
| 68 |
-
{"id": "srv-2", "type": "
|
|
|
|
|
|
|
|
|
|
| 69 |
],
|
| 70 |
-
sla={"max_latency_ms":
|
| 71 |
-
load=
|
| 72 |
),
|
| 73 |
}
|
| 74 |
|
|
@@ -82,6 +89,9 @@ class EpisodeState:
|
|
| 82 |
initial_latency: float
|
| 83 |
steps: int = 0
|
| 84 |
crashed: bool = False
|
|
|
|
|
|
|
|
|
|
| 85 |
episode_id: str = field(default_factory=lambda: str(uuid4()))
|
| 86 |
|
| 87 |
|
|
@@ -126,6 +136,9 @@ class CloudOpsEnvironment:
|
|
| 126 |
initial_latency=initial_latency,
|
| 127 |
steps=0,
|
| 128 |
crashed=False,
|
|
|
|
|
|
|
|
|
|
| 129 |
episode_id=episode_id or str(uuid4()),
|
| 130 |
)
|
| 131 |
|
|
@@ -138,7 +151,12 @@ class CloudOpsEnvironment:
|
|
| 138 |
self._ep.steps += 1
|
| 139 |
msg = action.message.lower()
|
| 140 |
|
|
|
|
|
|
|
|
|
|
| 141 |
message = self._parse_and_execute(msg)
|
|
|
|
|
|
|
| 142 |
latency, error_rate, utilization = self._calculate_metrics(
|
| 143 |
self._ep.current_load,
|
| 144 |
self._ep.resources
|
|
@@ -150,15 +168,22 @@ class CloudOpsEnvironment:
|
|
| 150 |
reward = RewModel(value=0.0, reason="System crashed due to resource exhaustion")
|
| 151 |
return obs, reward, True, {"reason": "crash"}
|
| 152 |
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
|
| 155 |
done = (
|
| 156 |
-
reward.value >= 0.98
|
| 157 |
self._ep.steps >= self._max_steps
|
| 158 |
)
|
| 159 |
|
| 160 |
obs = self._build_observation(message)
|
| 161 |
-
return obs, reward, done, {}
|
| 162 |
|
| 163 |
def _parse_and_execute(self, msg: str) -> str:
|
| 164 |
match = re.search(r"change\s+([a-z0-9-]+)\s+to\s+([a-z0-9.]+)", msg)
|
|
@@ -169,9 +194,12 @@ class CloudOpsEnvironment:
|
|
| 169 |
|
| 170 |
for r in self._ep.resources:
|
| 171 |
if r.id == res_id:
|
|
|
|
| 172 |
r.type = new_type
|
| 173 |
r.monthly_cost = INSTANCE_DATA[new_type]["cost"]
|
| 174 |
-
|
|
|
|
|
|
|
| 175 |
|
| 176 |
return f"Error: Resource '{res_id}' not found"
|
| 177 |
|
|
@@ -189,26 +217,52 @@ class CloudOpsEnvironment:
|
|
| 189 |
|
| 190 |
return latency, error_rate, utilization
|
| 191 |
|
| 192 |
-
def
|
| 193 |
-
|
| 194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
|
| 196 |
-
|
| 197 |
-
|
| 198 |
|
| 199 |
-
|
| 200 |
-
|
| 201 |
|
| 202 |
-
|
|
|
|
| 203 |
|
| 204 |
initial_latency = self._ep.initial_latency
|
| 205 |
initial_cost = self._ep.initial_cost
|
| 206 |
-
cost_change = ((
|
| 207 |
lat_change = ((latency - initial_latency) / initial_latency) * 100 if initial_latency > 0 else 0
|
| 208 |
|
| 209 |
return RewModel(
|
| 210 |
value=min(1.0, max(0.0, total_reward)),
|
| 211 |
-
reason=f"Cost: ${
|
| 212 |
cost_change_pct=cost_change,
|
| 213 |
latency_change_pct=lat_change,
|
| 214 |
)
|
|
|
|
| 40 |
task_id="easy_right_sizing",
|
| 41 |
name="Right-Sizing",
|
| 42 |
difficulty="easy",
|
| 43 |
+
description="Optimize this 3-server cluster. Start by analyzing load patterns, then iteratively adjust each server. Final reward requires ALL servers properly sized.",
|
| 44 |
initial_resources=[
|
| 45 |
+
{"id": "srv-1", "type": "m5.xlarge", "cpu_usage": 15.0, "mem_usage": 10.0, "monthly_cost": 140.0},
|
| 46 |
+
{"id": "srv-2", "type": "m5.xlarge", "cpu_usage": 12.0, "mem_usage": 8.0, "monthly_cost": 140.0},
|
| 47 |
+
{"id": "srv-3", "type": "m5.xlarge", "cpu_usage": 18.0, "mem_usage": 12.0, "monthly_cost": 140.0},
|
| 48 |
],
|
| 49 |
+
sla={"max_latency_ms": 100.0, "max_budget": 80.0, "min_uptime_pct": 99.0},
|
| 50 |
+
load=45.0
|
| 51 |
),
|
| 52 |
"medium": TaskConfig(
|
| 53 |
task_id="medium_latency_fix",
|
| 54 |
name="Latency Fix",
|
| 55 |
difficulty="medium",
|
| 56 |
+
description="Performance bottleneck! This cluster is struggling. Analyze each server's load, then iteratively upgrade undersized servers. Requires 4+ successful changes for max reward.",
|
| 57 |
initial_resources=[
|
| 58 |
+
{"id": "srv-1", "type": "t3.nano", "cpu_usage": 95.0, "mem_usage": 85.0, "monthly_cost": 3.6},
|
| 59 |
+
{"id": "srv-2", "type": "t3.nano", "cpu_usage": 88.0, "mem_usage": 80.0, "monthly_cost": 3.6},
|
| 60 |
+
{"id": "srv-3", "type": "t3.nano", "cpu_usage": 92.0, "mem_usage": 83.0, "monthly_cost": 3.6},
|
| 61 |
],
|
| 62 |
+
sla={"max_latency_ms": 80.0, "max_budget": 100.0, "min_uptime_pct": 99.9},
|
| 63 |
+
load=30.0
|
| 64 |
),
|
| 65 |
"hard": TaskConfig(
|
| 66 |
task_id="hard_balance",
|
| 67 |
name="Balance Optimization",
|
| 68 |
difficulty="hard",
|
| 69 |
+
description="Tight budget constraint! Optimize a mixed 5-server cluster. Must achieve optimal cost-efficiency while maintaining performance. Requires 5+ iterative changes, exploring different configurations.",
|
| 70 |
initial_resources=[
|
| 71 |
+
{"id": "srv-1", "type": "m5.xlarge", "cpu_usage": 8.0, "mem_usage": 6.0, "monthly_cost": 140.0},
|
| 72 |
+
{"id": "srv-2", "type": "m5.xlarge", "cpu_usage": 10.0, "mem_usage": 8.0, "monthly_cost": 140.0},
|
| 73 |
+
{"id": "srv-3", "type": "t3.nano", "cpu_usage": 95.0, "mem_usage": 90.0, "monthly_cost": 3.6},
|
| 74 |
+
{"id": "srv-4", "type": "t3.nano", "cpu_usage": 98.0, "mem_usage": 92.0, "monthly_cost": 3.6},
|
| 75 |
+
{"id": "srv-5", "type": "t3.medium", "cpu_usage": 45.0, "mem_usage": 40.0, "monthly_cost": 23.0},
|
| 76 |
],
|
| 77 |
+
sla={"max_latency_ms": 100.0, "max_budget": 60.0, "min_uptime_pct": 99.9},
|
| 78 |
+
load=50.0
|
| 79 |
),
|
| 80 |
}
|
| 81 |
|
|
|
|
| 89 |
initial_latency: float
|
| 90 |
steps: int = 0
|
| 91 |
crashed: bool = False
|
| 92 |
+
changes_made: int = 0
|
| 93 |
+
last_action_success: bool = False
|
| 94 |
+
exploration_history: list = field(default_factory=list)
|
| 95 |
episode_id: str = field(default_factory=lambda: str(uuid4()))
|
| 96 |
|
| 97 |
|
|
|
|
| 136 |
initial_latency=initial_latency,
|
| 137 |
steps=0,
|
| 138 |
crashed=False,
|
| 139 |
+
changes_made=0,
|
| 140 |
+
last_action_success=False,
|
| 141 |
+
exploration_history=[],
|
| 142 |
episode_id=episode_id or str(uuid4()),
|
| 143 |
)
|
| 144 |
|
|
|
|
| 151 |
self._ep.steps += 1
|
| 152 |
msg = action.message.lower()
|
| 153 |
|
| 154 |
+
prev_cost = sum(r.monthly_cost for r in self._ep.resources)
|
| 155 |
+
prev_latency, _, _ = self._calculate_metrics(self._ep.current_load, self._ep.resources)
|
| 156 |
+
|
| 157 |
message = self._parse_and_execute(msg)
|
| 158 |
+
|
| 159 |
+
new_cost = sum(r.monthly_cost for r in self._ep.resources)
|
| 160 |
latency, error_rate, utilization = self._calculate_metrics(
|
| 161 |
self._ep.current_load,
|
| 162 |
self._ep.resources
|
|
|
|
| 168 |
reward = RewModel(value=0.0, reason="System crashed due to resource exhaustion")
|
| 169 |
return obs, reward, True, {"reason": "crash"}
|
| 170 |
|
| 171 |
+
self._ep.exploration_history.append({
|
| 172 |
+
"step": self._ep.steps,
|
| 173 |
+
"action": msg[:50],
|
| 174 |
+
"cost": new_cost,
|
| 175 |
+
"latency": latency,
|
| 176 |
+
})
|
| 177 |
+
|
| 178 |
+
reward = self._calculate_iterative_reward(latency, error_rate, new_cost, prev_cost, prev_latency)
|
| 179 |
|
| 180 |
done = (
|
| 181 |
+
reward.value >= 0.98 and self._ep.changes_made >= 3 or
|
| 182 |
self._ep.steps >= self._max_steps
|
| 183 |
)
|
| 184 |
|
| 185 |
obs = self._build_observation(message)
|
| 186 |
+
return obs, reward, done, {"changes_made": self._ep.changes_made}
|
| 187 |
|
| 188 |
def _parse_and_execute(self, msg: str) -> str:
|
| 189 |
match = re.search(r"change\s+([a-z0-9-]+)\s+to\s+([a-z0-9.]+)", msg)
|
|
|
|
| 194 |
|
| 195 |
for r in self._ep.resources:
|
| 196 |
if r.id == res_id:
|
| 197 |
+
old_type = r.type
|
| 198 |
r.type = new_type
|
| 199 |
r.monthly_cost = INSTANCE_DATA[new_type]["cost"]
|
| 200 |
+
self._ep.changes_made += 1
|
| 201 |
+
self._ep.last_action_success = True
|
| 202 |
+
return f"Changed {res_id} from {old_type} to {new_type} (change #{self._ep.changes_made})"
|
| 203 |
|
| 204 |
return f"Error: Resource '{res_id}' not found"
|
| 205 |
|
|
|
|
| 217 |
|
| 218 |
return latency, error_rate, utilization
|
| 219 |
|
| 220 |
+
def _calculate_iterative_reward(
|
| 221 |
+
self,
|
| 222 |
+
latency: float,
|
| 223 |
+
error_rate: float,
|
| 224 |
+
new_cost: float,
|
| 225 |
+
prev_cost: float,
|
| 226 |
+
prev_latency: float
|
| 227 |
+
) -> RewModel:
|
| 228 |
+
task = self._ep.task_config
|
| 229 |
+
budget = task.sla["max_budget"]
|
| 230 |
+
max_latency = task.sla["max_latency_ms"]
|
| 231 |
+
|
| 232 |
+
cost_improvement = (prev_cost - new_cost) / (prev_cost + 1e-6)
|
| 233 |
+
latency_improvement = (prev_latency - latency) / (prev_latency + 1e-6)
|
| 234 |
+
|
| 235 |
+
change_bonus = min(self._ep.changes_made * 0.08, 0.4)
|
| 236 |
+
|
| 237 |
+
cost_ratio = new_cost / budget
|
| 238 |
+
cost_reward = 0.25 * (1.0 / (1.0 + max(0, cost_ratio - 1)))
|
| 239 |
+
|
| 240 |
+
lat_ratio = latency / max_latency
|
| 241 |
+
perf_reward = 0.25 * (1.0 / (1.0 + max(0, lat_ratio - 1)))
|
| 242 |
+
|
| 243 |
+
improvement_bonus = 0.0
|
| 244 |
+
if cost_improvement > 0:
|
| 245 |
+
improvement_bonus += min(cost_improvement * 0.2, 0.15)
|
| 246 |
+
if latency_improvement > 0:
|
| 247 |
+
improvement_bonus += min(latency_improvement * 0.2, 0.15)
|
| 248 |
|
| 249 |
+
base_reward = cost_reward + perf_reward
|
| 250 |
+
total_reward = min(1.0, base_reward + change_bonus + improvement_bonus)
|
| 251 |
|
| 252 |
+
if error_rate > 0.1:
|
| 253 |
+
total_reward *= (1.0 - error_rate)
|
| 254 |
|
| 255 |
+
if self._ep.changes_made >= 3 and cost_reward > 0.2 and perf_reward > 0.2:
|
| 256 |
+
total_reward = min(1.0, total_reward + 0.1)
|
| 257 |
|
| 258 |
initial_latency = self._ep.initial_latency
|
| 259 |
initial_cost = self._ep.initial_cost
|
| 260 |
+
cost_change = ((new_cost - initial_cost) / initial_cost) * 100 if initial_cost > 0 else 0
|
| 261 |
lat_change = ((latency - initial_latency) / initial_latency) * 100 if initial_latency > 0 else 0
|
| 262 |
|
| 263 |
return RewModel(
|
| 264 |
value=min(1.0, max(0.0, total_reward)),
|
| 265 |
+
reason=f"Changes: {self._ep.changes_made}, Cost: ${new_cost:.1f}, Latency: {latency:.1f}ms",
|
| 266 |
cost_change_pct=cost_change,
|
| 267 |
latency_change_pct=lat_change,
|
| 268 |
)
|