Spaces:
Sleeping
Sleeping
Fix: Balance task difficulty
Browse files- env/core.py +25 -22
env/core.py
CHANGED
|
@@ -42,12 +42,12 @@ TASKS = {
|
|
| 42 |
difficulty="easy",
|
| 43 |
description="Optimize this 3-server cluster. Start by analyzing load patterns, then iteratively adjust each server. Final reward requires ALL servers properly sized.",
|
| 44 |
initial_resources=[
|
| 45 |
-
{"id": "srv-1", "type": "m5.
|
| 46 |
-
{"id": "srv-2", "type": "m5.
|
| 47 |
-
{"id": "srv-3", "type": "m5.
|
| 48 |
],
|
| 49 |
-
sla={"max_latency_ms":
|
| 50 |
-
load=
|
| 51 |
),
|
| 52 |
"medium": TaskConfig(
|
| 53 |
task_id="medium_latency_fix",
|
|
@@ -55,12 +55,12 @@ TASKS = {
|
|
| 55 |
difficulty="medium",
|
| 56 |
description="Performance bottleneck! This cluster is struggling. Analyze each server's load, then iteratively upgrade undersized servers. Requires 4+ successful changes for max reward.",
|
| 57 |
initial_resources=[
|
| 58 |
-
{"id": "srv-1", "type": "t3.
|
| 59 |
-
{"id": "srv-2", "type": "t3.
|
| 60 |
-
{"id": "srv-3", "type": "t3.
|
| 61 |
],
|
| 62 |
-
sla={"max_latency_ms":
|
| 63 |
-
load=
|
| 64 |
),
|
| 65 |
"hard": TaskConfig(
|
| 66 |
task_id="hard_balance",
|
|
@@ -68,14 +68,14 @@ TASKS = {
|
|
| 68 |
difficulty="hard",
|
| 69 |
description="Tight budget constraint! Optimize a mixed 5-server cluster. Must achieve optimal cost-efficiency while maintaining performance. Requires 5+ iterative changes, exploring different configurations.",
|
| 70 |
initial_resources=[
|
| 71 |
-
{"id": "srv-1", "type": "m5.
|
| 72 |
-
{"id": "srv-2", "type": "m5.
|
| 73 |
{"id": "srv-3", "type": "t3.nano", "cpu_usage": 95.0, "mem_usage": 90.0, "monthly_cost": 3.6},
|
| 74 |
{"id": "srv-4", "type": "t3.nano", "cpu_usage": 98.0, "mem_usage": 92.0, "monthly_cost": 3.6},
|
| 75 |
{"id": "srv-5", "type": "t3.medium", "cpu_usage": 45.0, "mem_usage": 40.0, "monthly_cost": 23.0},
|
| 76 |
],
|
| 77 |
sla={"max_latency_ms": 100.0, "max_budget": 60.0, "min_uptime_pct": 99.9},
|
| 78 |
-
load=
|
| 79 |
),
|
| 80 |
}
|
| 81 |
|
|
@@ -156,6 +156,8 @@ class CloudOpsEnvironment:
|
|
| 156 |
|
| 157 |
message = self._parse_and_execute(msg)
|
| 158 |
|
|
|
|
|
|
|
| 159 |
new_cost = sum(r.monthly_cost for r in self._ep.resources)
|
| 160 |
latency, error_rate, utilization = self._calculate_metrics(
|
| 161 |
self._ep.current_load,
|
|
@@ -178,8 +180,8 @@ class CloudOpsEnvironment:
|
|
| 178 |
reward = self._calculate_iterative_reward(latency, error_rate, new_cost, prev_cost, prev_latency)
|
| 179 |
|
| 180 |
done = (
|
| 181 |
-
|
| 182 |
-
self._ep.
|
| 183 |
)
|
| 184 |
|
| 185 |
obs = self._build_observation(message)
|
|
@@ -232,19 +234,19 @@ class CloudOpsEnvironment:
|
|
| 232 |
cost_improvement = (prev_cost - new_cost) / (prev_cost + 1e-6)
|
| 233 |
latency_improvement = (prev_latency - latency) / (prev_latency + 1e-6)
|
| 234 |
|
| 235 |
-
change_bonus = min(self._ep.changes_made * 0.
|
| 236 |
|
| 237 |
cost_ratio = new_cost / budget
|
| 238 |
-
cost_reward = 0.
|
| 239 |
|
| 240 |
lat_ratio = latency / max_latency
|
| 241 |
-
perf_reward = 0.
|
| 242 |
|
| 243 |
improvement_bonus = 0.0
|
| 244 |
if cost_improvement > 0:
|
| 245 |
-
improvement_bonus += min(cost_improvement * 0.
|
| 246 |
if latency_improvement > 0:
|
| 247 |
-
improvement_bonus += min(latency_improvement * 0.
|
| 248 |
|
| 249 |
base_reward = cost_reward + perf_reward
|
| 250 |
total_reward = min(1.0, base_reward + change_bonus + improvement_bonus)
|
|
@@ -252,8 +254,9 @@ class CloudOpsEnvironment:
|
|
| 252 |
if error_rate > 0.1:
|
| 253 |
total_reward *= (1.0 - error_rate)
|
| 254 |
|
| 255 |
-
|
| 256 |
-
|
|
|
|
| 257 |
|
| 258 |
initial_latency = self._ep.initial_latency
|
| 259 |
initial_cost = self._ep.initial_cost
|
|
|
|
| 42 |
difficulty="easy",
|
| 43 |
description="Optimize this 3-server cluster. Start by analyzing load patterns, then iteratively adjust each server. Final reward requires ALL servers properly sized.",
|
| 44 |
initial_resources=[
|
| 45 |
+
{"id": "srv-1", "type": "m5.large", "cpu_usage": 20.0, "mem_usage": 15.0, "monthly_cost": 70.0},
|
| 46 |
+
{"id": "srv-2", "type": "m5.large", "cpu_usage": 18.0, "mem_usage": 12.0, "monthly_cost": 70.0},
|
| 47 |
+
{"id": "srv-3", "type": "m5.large", "cpu_usage": 22.0, "mem_usage": 16.0, "monthly_cost": 70.0},
|
| 48 |
],
|
| 49 |
+
sla={"max_latency_ms": 120.0, "max_budget": 80.0, "min_uptime_pct": 99.0},
|
| 50 |
+
load=30.0
|
| 51 |
),
|
| 52 |
"medium": TaskConfig(
|
| 53 |
task_id="medium_latency_fix",
|
|
|
|
| 55 |
difficulty="medium",
|
| 56 |
description="Performance bottleneck! This cluster is struggling. Analyze each server's load, then iteratively upgrade undersized servers. Requires 4+ successful changes for max reward.",
|
| 57 |
initial_resources=[
|
| 58 |
+
{"id": "srv-1", "type": "t3.small", "cpu_usage": 90.0, "mem_usage": 80.0, "monthly_cost": 11.5},
|
| 59 |
+
{"id": "srv-2", "type": "t3.small", "cpu_usage": 85.0, "mem_usage": 75.0, "monthly_cost": 11.5},
|
| 60 |
+
{"id": "srv-3", "type": "t3.small", "cpu_usage": 88.0, "mem_usage": 78.0, "monthly_cost": 11.5},
|
| 61 |
],
|
| 62 |
+
sla={"max_latency_ms": 100.0, "max_budget": 100.0, "min_uptime_pct": 99.9},
|
| 63 |
+
load=15.0
|
| 64 |
),
|
| 65 |
"hard": TaskConfig(
|
| 66 |
task_id="hard_balance",
|
|
|
|
| 68 |
difficulty="hard",
|
| 69 |
description="Tight budget constraint! Optimize a mixed 5-server cluster. Must achieve optimal cost-efficiency while maintaining performance. Requires 5+ iterative changes, exploring different configurations.",
|
| 70 |
initial_resources=[
|
| 71 |
+
{"id": "srv-1", "type": "m5.large", "cpu_usage": 15.0, "mem_usage": 10.0, "monthly_cost": 70.0},
|
| 72 |
+
{"id": "srv-2", "type": "m5.large", "cpu_usage": 12.0, "mem_usage": 8.0, "monthly_cost": 70.0},
|
| 73 |
{"id": "srv-3", "type": "t3.nano", "cpu_usage": 95.0, "mem_usage": 90.0, "monthly_cost": 3.6},
|
| 74 |
{"id": "srv-4", "type": "t3.nano", "cpu_usage": 98.0, "mem_usage": 92.0, "monthly_cost": 3.6},
|
| 75 |
{"id": "srv-5", "type": "t3.medium", "cpu_usage": 45.0, "mem_usage": 40.0, "monthly_cost": 23.0},
|
| 76 |
],
|
| 77 |
sla={"max_latency_ms": 100.0, "max_budget": 60.0, "min_uptime_pct": 99.9},
|
| 78 |
+
load=35.0
|
| 79 |
),
|
| 80 |
}
|
| 81 |
|
|
|
|
| 156 |
|
| 157 |
message = self._parse_and_execute(msg)
|
| 158 |
|
| 159 |
+
self._ep.last_action_success = message.startswith("Changed")
|
| 160 |
+
|
| 161 |
new_cost = sum(r.monthly_cost for r in self._ep.resources)
|
| 162 |
latency, error_rate, utilization = self._calculate_metrics(
|
| 163 |
self._ep.current_load,
|
|
|
|
| 180 |
reward = self._calculate_iterative_reward(latency, error_rate, new_cost, prev_cost, prev_latency)
|
| 181 |
|
| 182 |
done = (
|
| 183 |
+
self._ep.steps >= self._max_steps or
|
| 184 |
+
(self._ep.changes_made >= 3 and reward.value >= 0.95)
|
| 185 |
)
|
| 186 |
|
| 187 |
obs = self._build_observation(message)
|
|
|
|
| 234 |
cost_improvement = (prev_cost - new_cost) / (prev_cost + 1e-6)
|
| 235 |
latency_improvement = (prev_latency - latency) / (prev_latency + 1e-6)
|
| 236 |
|
| 237 |
+
change_bonus = min(self._ep.changes_made * 0.06, 0.3)
|
| 238 |
|
| 239 |
cost_ratio = new_cost / budget
|
| 240 |
+
cost_reward = 0.3 * (1.0 / (1.0 + max(0, cost_ratio - 1)))
|
| 241 |
|
| 242 |
lat_ratio = latency / max_latency
|
| 243 |
+
perf_reward = 0.3 * (1.0 / (1.0 + max(0, lat_ratio - 1)))
|
| 244 |
|
| 245 |
improvement_bonus = 0.0
|
| 246 |
if cost_improvement > 0:
|
| 247 |
+
improvement_bonus += min(cost_improvement * 0.15, 0.1)
|
| 248 |
if latency_improvement > 0:
|
| 249 |
+
improvement_bonus += min(latency_improvement * 0.15, 0.1)
|
| 250 |
|
| 251 |
base_reward = cost_reward + perf_reward
|
| 252 |
total_reward = min(1.0, base_reward + change_bonus + improvement_bonus)
|
|
|
|
| 254 |
if error_rate > 0.1:
|
| 255 |
total_reward *= (1.0 - error_rate)
|
| 256 |
|
| 257 |
+
exploration_bonus = min(self._ep.steps * 0.03, 0.15)
|
| 258 |
+
if self._ep.last_action_success:
|
| 259 |
+
total_reward = min(1.0, total_reward + exploration_bonus)
|
| 260 |
|
| 261 |
initial_latency = self._ep.initial_latency
|
| 262 |
initial_cost = self._ep.initial_cost
|