Spaces:

hirann
/

cloud-ops-optimizer

Sleeping

App Files Files Community

hirann commited on Apr 5

Commit

400199f

verified ·

1 Parent(s): c8c0f98

Enhance: Multi-step iterative tasks

Browse files

Files changed (1) hide show

env/core.py +81 -27

env/core.py CHANGED Viewed

@@ -40,35 +40,42 @@ TASKS = {
         task_id="easy_right_sizing",
         name="Right-Sizing",
         difficulty="easy",
-        description="Reduce an overpriced server without breaking the SLA",
         initial_resources=[
-            {"id": "srv-1", "type": "m5.xlarge", "cpu_usage": 2.0, "mem_usage": 2.0, "monthly_cost": 140.0}
         ],
-        sla={"max_latency_ms": 200.0, "max_budget": 30.0, "min_uptime_pct": 99.0},
-        load=2.0
     ),
     "medium": TaskConfig(
         task_id="medium_latency_fix",
         name="Latency Fix",
         difficulty="medium",
-        description="Resolve performance bottleneck while staying under budget",
         initial_resources=[
-            {"id": "srv-1", "type": "t3.nano", "cpu_usage": 98.0, "mem_usage": 90.0, "monthly_cost": 3.6}
         ],
-        sla={"max_latency_ms": 100.0, "max_budget": 60.0, "min_uptime_pct": 99.9},
-        load=12.0
     ),
     "hard": TaskConfig(
         task_id="hard_balance",
         name="Balance Optimization",
         difficulty="hard",
-        description="Optimize a mixed cluster under tight budget constraints",
         initial_resources=[
-            {"id": "srv-1", "type": "m5.large", "cpu_usage": 40.0, "mem_usage": 30.0, "monthly_cost": 70.0},
-            {"id": "srv-2", "type": "t3.nano", "cpu_usage": 90.0, "mem_usage": 80.0, "monthly_cost": 3.6}
         ],
-        sla={"max_latency_ms": 150.0, "max_budget": 35.0, "min_uptime_pct": 99.9},
-        load=25.0
     ),
 }
@@ -82,6 +89,9 @@ class EpisodeState:
     initial_latency: float
     steps: int = 0
     crashed: bool = False
     episode_id: str = field(default_factory=lambda: str(uuid4()))
@@ -126,6 +136,9 @@ class CloudOpsEnvironment:
             initial_latency=initial_latency,
             steps=0,
             crashed=False,
             episode_id=episode_id or str(uuid4()),
         )
@@ -138,7 +151,12 @@ class CloudOpsEnvironment:
         self._ep.steps += 1
         msg = action.message.lower()
         message = self._parse_and_execute(msg)
         latency, error_rate, utilization = self._calculate_metrics(
             self._ep.current_load,
             self._ep.resources
@@ -150,15 +168,22 @@ class CloudOpsEnvironment:
             reward = RewModel(value=0.0, reason="System crashed due to resource exhaustion")
             return obs, reward, True, {"reason": "crash"}
-        reward = self._calculate_reward(latency, error_rate)
         done = (
-            reward.value >= 0.98 or
             self._ep.steps >= self._max_steps
         )
         obs = self._build_observation(message)
-        return obs, reward, done, {}
     def _parse_and_execute(self, msg: str) -> str:
         match = re.search(r"change\s+([a-z0-9-]+)\s+to\s+([a-z0-9.]+)", msg)
@@ -169,9 +194,12 @@ class CloudOpsEnvironment:
             for r in self._ep.resources:
                 if r.id == res_id:
                     r.type = new_type
                     r.monthly_cost = INSTANCE_DATA[new_type]["cost"]
-                    return f"Changed {res_id} to {new_type}"
             return f"Error: Resource '{res_id}' not found"
@@ -189,26 +217,52 @@ class CloudOpsEnvironment:
         return latency, error_rate, utilization
-    def _calculate_reward(self, latency: float, error_rate: float) -> RewModel:
-        total_cost = sum(r.monthly_cost for r in self._ep.resources)
-        budget = self._ep.task_config.sla["max_latency_ms"]
-        cost_ratio = total_cost / budget
-        cost_reward = 0.5 * (1.0 / (1.0 + max(0, cost_ratio - 1)))
-        lat_ratio = latency / budget
-        perf_reward = 0.5 * (1.0 / (1.0 + max(0, lat_ratio - 1)))
-        total_reward = cost_reward + perf_reward
         initial_latency = self._ep.initial_latency
         initial_cost = self._ep.initial_cost
-        cost_change = ((total_cost - initial_cost) / initial_cost) * 100 if initial_cost > 0 else 0
         lat_change = ((latency - initial_latency) / initial_latency) * 100 if initial_latency > 0 else 0
         return RewModel(
             value=min(1.0, max(0.0, total_reward)),
-            reason=f"Cost: ${total_cost:.1f}/mo, Latency: {latency:.1f}ms",
             cost_change_pct=cost_change,
             latency_change_pct=lat_change,
         )

         task_id="easy_right_sizing",
         name="Right-Sizing",
         difficulty="easy",
+        description="Optimize this 3-server cluster. Start by analyzing load patterns, then iteratively adjust each server. Final reward requires ALL servers properly sized.",
         initial_resources=[
+            {"id": "srv-1", "type": "m5.xlarge", "cpu_usage": 15.0, "mem_usage": 10.0, "monthly_cost": 140.0},
+            {"id": "srv-2", "type": "m5.xlarge", "cpu_usage": 12.0, "mem_usage": 8.0, "monthly_cost": 140.0},
+            {"id": "srv-3", "type": "m5.xlarge", "cpu_usage": 18.0, "mem_usage": 12.0, "monthly_cost": 140.0},
         ],
+        sla={"max_latency_ms": 100.0, "max_budget": 80.0, "min_uptime_pct": 99.0},
+        load=45.0
     ),
     "medium": TaskConfig(
         task_id="medium_latency_fix",
         name="Latency Fix",
         difficulty="medium",
+        description="Performance bottleneck! This cluster is struggling. Analyze each server's load, then iteratively upgrade undersized servers. Requires 4+ successful changes for max reward.",
         initial_resources=[
+            {"id": "srv-1", "type": "t3.nano", "cpu_usage": 95.0, "mem_usage": 85.0, "monthly_cost": 3.6},
+            {"id": "srv-2", "type": "t3.nano", "cpu_usage": 88.0, "mem_usage": 80.0, "monthly_cost": 3.6},
+            {"id": "srv-3", "type": "t3.nano", "cpu_usage": 92.0, "mem_usage": 83.0, "monthly_cost": 3.6},
         ],
+        sla={"max_latency_ms": 80.0, "max_budget": 100.0, "min_uptime_pct": 99.9},
+        load=30.0
     ),
     "hard": TaskConfig(
         task_id="hard_balance",
         name="Balance Optimization",
         difficulty="hard",
+        description="Tight budget constraint! Optimize a mixed 5-server cluster. Must achieve optimal cost-efficiency while maintaining performance. Requires 5+ iterative changes, exploring different configurations.",
         initial_resources=[
+            {"id": "srv-1", "type": "m5.xlarge", "cpu_usage": 8.0, "mem_usage": 6.0, "monthly_cost": 140.0},
+            {"id": "srv-2", "type": "m5.xlarge", "cpu_usage": 10.0, "mem_usage": 8.0, "monthly_cost": 140.0},
+            {"id": "srv-3", "type": "t3.nano", "cpu_usage": 95.0, "mem_usage": 90.0, "monthly_cost": 3.6},
+            {"id": "srv-4", "type": "t3.nano", "cpu_usage": 98.0, "mem_usage": 92.0, "monthly_cost": 3.6},
+            {"id": "srv-5", "type": "t3.medium", "cpu_usage": 45.0, "mem_usage": 40.0, "monthly_cost": 23.0},
         ],
+        sla={"max_latency_ms": 100.0, "max_budget": 60.0, "min_uptime_pct": 99.9},
+        load=50.0
     ),
 }
     initial_latency: float
     steps: int = 0
     crashed: bool = False
+    changes_made: int = 0
+    last_action_success: bool = False
+    exploration_history: list = field(default_factory=list)
     episode_id: str = field(default_factory=lambda: str(uuid4()))
             initial_latency=initial_latency,
             steps=0,
             crashed=False,
+            changes_made=0,
+            last_action_success=False,
+            exploration_history=[],
             episode_id=episode_id or str(uuid4()),
         )
         self._ep.steps += 1
         msg = action.message.lower()
+        prev_cost = sum(r.monthly_cost for r in self._ep.resources)
+        prev_latency, _, _ = self._calculate_metrics(self._ep.current_load, self._ep.resources)
         message = self._parse_and_execute(msg)
+        new_cost = sum(r.monthly_cost for r in self._ep.resources)
         latency, error_rate, utilization = self._calculate_metrics(
             self._ep.current_load,
             self._ep.resources
             reward = RewModel(value=0.0, reason="System crashed due to resource exhaustion")
             return obs, reward, True, {"reason": "crash"}
+        self._ep.exploration_history.append({
+            "step": self._ep.steps,
+            "action": msg[:50],
+            "cost": new_cost,
+            "latency": latency,
+        })
+        reward = self._calculate_iterative_reward(latency, error_rate, new_cost, prev_cost, prev_latency)
         done = (
+            reward.value >= 0.98 and self._ep.changes_made >= 3 or
             self._ep.steps >= self._max_steps
         )
         obs = self._build_observation(message)
+        return obs, reward, done, {"changes_made": self._ep.changes_made}
     def _parse_and_execute(self, msg: str) -> str:
         match = re.search(r"change\s+([a-z0-9-]+)\s+to\s+([a-z0-9.]+)", msg)
             for r in self._ep.resources:
                 if r.id == res_id:
+                    old_type = r.type
                     r.type = new_type
                     r.monthly_cost = INSTANCE_DATA[new_type]["cost"]
+                    self._ep.changes_made += 1
+                    self._ep.last_action_success = True
+                    return f"Changed {res_id} from {old_type} to {new_type} (change #{self._ep.changes_made})"
             return f"Error: Resource '{res_id}' not found"
         return latency, error_rate, utilization
+    def _calculate_iterative_reward(
+        self,
+        latency: float,
+        error_rate: float,
+        new_cost: float,
+        prev_cost: float,
+        prev_latency: float
+    ) -> RewModel:
+        task = self._ep.task_config
+        budget = task.sla["max_budget"]
+        max_latency = task.sla["max_latency_ms"]
+        cost_improvement = (prev_cost - new_cost) / (prev_cost + 1e-6)
+        latency_improvement = (prev_latency - latency) / (prev_latency + 1e-6)
+        change_bonus = min(self._ep.changes_made * 0.08, 0.4)
+        cost_ratio = new_cost / budget
+        cost_reward = 0.25 * (1.0 / (1.0 + max(0, cost_ratio - 1)))
+        lat_ratio = latency / max_latency
+        perf_reward = 0.25 * (1.0 / (1.0 + max(0, lat_ratio - 1)))
+        improvement_bonus = 0.0
+        if cost_improvement > 0:
+            improvement_bonus += min(cost_improvement * 0.2, 0.15)
+        if latency_improvement > 0:
+            improvement_bonus += min(latency_improvement * 0.2, 0.15)
+        base_reward = cost_reward + perf_reward
+        total_reward = min(1.0, base_reward + change_bonus + improvement_bonus)
+        if error_rate > 0.1:
+            total_reward *= (1.0 - error_rate)
+        if self._ep.changes_made >= 3 and cost_reward > 0.2 and perf_reward > 0.2:
+            total_reward = min(1.0, total_reward + 0.1)
         initial_latency = self._ep.initial_latency
         initial_cost = self._ep.initial_cost
+        cost_change = ((new_cost - initial_cost) / initial_cost) * 100 if initial_cost > 0 else 0
         lat_change = ((latency - initial_latency) / initial_latency) * 100 if initial_latency > 0 else 0
         return RewModel(
             value=min(1.0, max(0.0, total_reward)),
+            reason=f"Changes: {self._ep.changes_made}, Cost: ${new_cost:.1f}, Latency: {latency:.1f}ms",
             cost_change_pct=cost_change,
             latency_change_pct=lat_change,
         )