hirann commited on
Commit
e0ab109
·
verified ·
1 Parent(s): 739386c

Fix: Balance task difficulty

Browse files
Files changed (1) hide show
  1. env/core.py +25 -22
env/core.py CHANGED
@@ -42,12 +42,12 @@ TASKS = {
42
  difficulty="easy",
43
  description="Optimize this 3-server cluster. Start by analyzing load patterns, then iteratively adjust each server. Final reward requires ALL servers properly sized.",
44
  initial_resources=[
45
- {"id": "srv-1", "type": "m5.xlarge", "cpu_usage": 15.0, "mem_usage": 10.0, "monthly_cost": 140.0},
46
- {"id": "srv-2", "type": "m5.xlarge", "cpu_usage": 12.0, "mem_usage": 8.0, "monthly_cost": 140.0},
47
- {"id": "srv-3", "type": "m5.xlarge", "cpu_usage": 18.0, "mem_usage": 12.0, "monthly_cost": 140.0},
48
  ],
49
- sla={"max_latency_ms": 100.0, "max_budget": 80.0, "min_uptime_pct": 99.0},
50
- load=45.0
51
  ),
52
  "medium": TaskConfig(
53
  task_id="medium_latency_fix",
@@ -55,12 +55,12 @@ TASKS = {
55
  difficulty="medium",
56
  description="Performance bottleneck! This cluster is struggling. Analyze each server's load, then iteratively upgrade undersized servers. Requires 4+ successful changes for max reward.",
57
  initial_resources=[
58
- {"id": "srv-1", "type": "t3.nano", "cpu_usage": 95.0, "mem_usage": 85.0, "monthly_cost": 3.6},
59
- {"id": "srv-2", "type": "t3.nano", "cpu_usage": 88.0, "mem_usage": 80.0, "monthly_cost": 3.6},
60
- {"id": "srv-3", "type": "t3.nano", "cpu_usage": 92.0, "mem_usage": 83.0, "monthly_cost": 3.6},
61
  ],
62
- sla={"max_latency_ms": 80.0, "max_budget": 100.0, "min_uptime_pct": 99.9},
63
- load=30.0
64
  ),
65
  "hard": TaskConfig(
66
  task_id="hard_balance",
@@ -68,14 +68,14 @@ TASKS = {
68
  difficulty="hard",
69
  description="Tight budget constraint! Optimize a mixed 5-server cluster. Must achieve optimal cost-efficiency while maintaining performance. Requires 5+ iterative changes, exploring different configurations.",
70
  initial_resources=[
71
- {"id": "srv-1", "type": "m5.xlarge", "cpu_usage": 8.0, "mem_usage": 6.0, "monthly_cost": 140.0},
72
- {"id": "srv-2", "type": "m5.xlarge", "cpu_usage": 10.0, "mem_usage": 8.0, "monthly_cost": 140.0},
73
  {"id": "srv-3", "type": "t3.nano", "cpu_usage": 95.0, "mem_usage": 90.0, "monthly_cost": 3.6},
74
  {"id": "srv-4", "type": "t3.nano", "cpu_usage": 98.0, "mem_usage": 92.0, "monthly_cost": 3.6},
75
  {"id": "srv-5", "type": "t3.medium", "cpu_usage": 45.0, "mem_usage": 40.0, "monthly_cost": 23.0},
76
  ],
77
  sla={"max_latency_ms": 100.0, "max_budget": 60.0, "min_uptime_pct": 99.9},
78
- load=50.0
79
  ),
80
  }
81
 
@@ -156,6 +156,8 @@ class CloudOpsEnvironment:
156
 
157
  message = self._parse_and_execute(msg)
158
 
 
 
159
  new_cost = sum(r.monthly_cost for r in self._ep.resources)
160
  latency, error_rate, utilization = self._calculate_metrics(
161
  self._ep.current_load,
@@ -178,8 +180,8 @@ class CloudOpsEnvironment:
178
  reward = self._calculate_iterative_reward(latency, error_rate, new_cost, prev_cost, prev_latency)
179
 
180
  done = (
181
- reward.value >= 0.98 and self._ep.changes_made >= 3 or
182
- self._ep.steps >= self._max_steps
183
  )
184
 
185
  obs = self._build_observation(message)
@@ -232,19 +234,19 @@ class CloudOpsEnvironment:
232
  cost_improvement = (prev_cost - new_cost) / (prev_cost + 1e-6)
233
  latency_improvement = (prev_latency - latency) / (prev_latency + 1e-6)
234
 
235
- change_bonus = min(self._ep.changes_made * 0.08, 0.4)
236
 
237
  cost_ratio = new_cost / budget
238
- cost_reward = 0.25 * (1.0 / (1.0 + max(0, cost_ratio - 1)))
239
 
240
  lat_ratio = latency / max_latency
241
- perf_reward = 0.25 * (1.0 / (1.0 + max(0, lat_ratio - 1)))
242
 
243
  improvement_bonus = 0.0
244
  if cost_improvement > 0:
245
- improvement_bonus += min(cost_improvement * 0.2, 0.15)
246
  if latency_improvement > 0:
247
- improvement_bonus += min(latency_improvement * 0.2, 0.15)
248
 
249
  base_reward = cost_reward + perf_reward
250
  total_reward = min(1.0, base_reward + change_bonus + improvement_bonus)
@@ -252,8 +254,9 @@ class CloudOpsEnvironment:
252
  if error_rate > 0.1:
253
  total_reward *= (1.0 - error_rate)
254
 
255
- if self._ep.changes_made >= 3 and cost_reward > 0.2 and perf_reward > 0.2:
256
- total_reward = min(1.0, total_reward + 0.1)
 
257
 
258
  initial_latency = self._ep.initial_latency
259
  initial_cost = self._ep.initial_cost
 
42
  difficulty="easy",
43
  description="Optimize this 3-server cluster. Start by analyzing load patterns, then iteratively adjust each server. Final reward requires ALL servers properly sized.",
44
  initial_resources=[
45
+ {"id": "srv-1", "type": "m5.large", "cpu_usage": 20.0, "mem_usage": 15.0, "monthly_cost": 70.0},
46
+ {"id": "srv-2", "type": "m5.large", "cpu_usage": 18.0, "mem_usage": 12.0, "monthly_cost": 70.0},
47
+ {"id": "srv-3", "type": "m5.large", "cpu_usage": 22.0, "mem_usage": 16.0, "monthly_cost": 70.0},
48
  ],
49
+ sla={"max_latency_ms": 120.0, "max_budget": 80.0, "min_uptime_pct": 99.0},
50
+ load=30.0
51
  ),
52
  "medium": TaskConfig(
53
  task_id="medium_latency_fix",
 
55
  difficulty="medium",
56
  description="Performance bottleneck! This cluster is struggling. Analyze each server's load, then iteratively upgrade undersized servers. Requires 4+ successful changes for max reward.",
57
  initial_resources=[
58
+ {"id": "srv-1", "type": "t3.small", "cpu_usage": 90.0, "mem_usage": 80.0, "monthly_cost": 11.5},
59
+ {"id": "srv-2", "type": "t3.small", "cpu_usage": 85.0, "mem_usage": 75.0, "monthly_cost": 11.5},
60
+ {"id": "srv-3", "type": "t3.small", "cpu_usage": 88.0, "mem_usage": 78.0, "monthly_cost": 11.5},
61
  ],
62
+ sla={"max_latency_ms": 100.0, "max_budget": 100.0, "min_uptime_pct": 99.9},
63
+ load=15.0
64
  ),
65
  "hard": TaskConfig(
66
  task_id="hard_balance",
 
68
  difficulty="hard",
69
  description="Tight budget constraint! Optimize a mixed 5-server cluster. Must achieve optimal cost-efficiency while maintaining performance. Requires 5+ iterative changes, exploring different configurations.",
70
  initial_resources=[
71
+ {"id": "srv-1", "type": "m5.large", "cpu_usage": 15.0, "mem_usage": 10.0, "monthly_cost": 70.0},
72
+ {"id": "srv-2", "type": "m5.large", "cpu_usage": 12.0, "mem_usage": 8.0, "monthly_cost": 70.0},
73
  {"id": "srv-3", "type": "t3.nano", "cpu_usage": 95.0, "mem_usage": 90.0, "monthly_cost": 3.6},
74
  {"id": "srv-4", "type": "t3.nano", "cpu_usage": 98.0, "mem_usage": 92.0, "monthly_cost": 3.6},
75
  {"id": "srv-5", "type": "t3.medium", "cpu_usage": 45.0, "mem_usage": 40.0, "monthly_cost": 23.0},
76
  ],
77
  sla={"max_latency_ms": 100.0, "max_budget": 60.0, "min_uptime_pct": 99.9},
78
+ load=35.0
79
  ),
80
  }
81
 
 
156
 
157
  message = self._parse_and_execute(msg)
158
 
159
+ self._ep.last_action_success = message.startswith("Changed")
160
+
161
  new_cost = sum(r.monthly_cost for r in self._ep.resources)
162
  latency, error_rate, utilization = self._calculate_metrics(
163
  self._ep.current_load,
 
180
  reward = self._calculate_iterative_reward(latency, error_rate, new_cost, prev_cost, prev_latency)
181
 
182
  done = (
183
+ self._ep.steps >= self._max_steps or
184
+ (self._ep.changes_made >= 3 and reward.value >= 0.95)
185
  )
186
 
187
  obs = self._build_observation(message)
 
234
  cost_improvement = (prev_cost - new_cost) / (prev_cost + 1e-6)
235
  latency_improvement = (prev_latency - latency) / (prev_latency + 1e-6)
236
 
237
+ change_bonus = min(self._ep.changes_made * 0.06, 0.3)
238
 
239
  cost_ratio = new_cost / budget
240
+ cost_reward = 0.3 * (1.0 / (1.0 + max(0, cost_ratio - 1)))
241
 
242
  lat_ratio = latency / max_latency
243
+ perf_reward = 0.3 * (1.0 / (1.0 + max(0, lat_ratio - 1)))
244
 
245
  improvement_bonus = 0.0
246
  if cost_improvement > 0:
247
+ improvement_bonus += min(cost_improvement * 0.15, 0.1)
248
  if latency_improvement > 0:
249
+ improvement_bonus += min(latency_improvement * 0.15, 0.1)
250
 
251
  base_reward = cost_reward + perf_reward
252
  total_reward = min(1.0, base_reward + change_bonus + improvement_bonus)
 
254
  if error_rate > 0.1:
255
  total_reward *= (1.0 - error_rate)
256
 
257
+ exploration_bonus = min(self._ep.steps * 0.03, 0.15)
258
+ if self._ep.last_action_success:
259
+ total_reward = min(1.0, total_reward + exploration_bonus)
260
 
261
  initial_latency = self._ep.initial_latency
262
  initial_cost = self._ep.initial_cost