Spaces:
Sleeping
Sleeping
| import math | |
| import random | |
| import re | |
| from typing import Any, Dict, Optional, Tuple | |
| from uuid import uuid4 | |
| from dataclasses import dataclass, field | |
| from models import ( | |
| Observation as ObsModel, | |
| Action as ActModel, | |
| Reward as RewModel, | |
| Resource, | |
| Metrics, | |
| SLA, | |
| ) | |
| INSTANCE_DATA = { | |
| "t3.nano": {"cost": 3.6, "capacity": 1.0}, | |
| "t3.small": {"cost": 11.5, "capacity": 2.0}, | |
| "t3.medium": {"cost": 23.0, "capacity": 4.0}, | |
| "m5.large": {"cost": 70.0, "capacity": 8.0}, | |
| "m5.xlarge": {"cost": 140.0,"capacity": 16.0}, | |
| } | |
| class TaskConfig: | |
| task_id: str | |
| name: str | |
| difficulty: str | |
| description: str | |
| initial_resources: list | |
| sla: dict | |
| load: float | |
| TASKS = { | |
| "easy": TaskConfig( | |
| task_id="easy_right_sizing", | |
| name="Right-Sizing", | |
| difficulty="easy", | |
| description="Optimize this 3-server cluster. Start by analyzing load patterns, then iteratively adjust each server. Final reward requires ALL servers properly sized.", | |
| initial_resources=[ | |
| {"id": "srv-1", "type": "m5.xlarge", "cpu_usage": 10.0, "mem_usage": 8.0, "monthly_cost": 140.0}, | |
| {"id": "srv-2", "type": "m5.xlarge", "cpu_usage": 8.0, "mem_usage": 6.0, "monthly_cost": 140.0}, | |
| {"id": "srv-3", "type": "m5.xlarge", "cpu_usage": 12.0, "mem_usage": 9.0, "monthly_cost": 140.0}, | |
| ], | |
| sla={"max_latency_ms": 120.0, "max_budget": 100.0, "min_uptime_pct": 99.0}, | |
| load=30.0 | |
| ), | |
| "medium": TaskConfig( | |
| task_id="medium_latency_fix", | |
| name="Latency Fix", | |
| difficulty="medium", | |
| description="Performance bottleneck! This cluster is struggling. Analyze each server's load, then iteratively upgrade undersized servers. Requires 4+ successful changes for max reward.", | |
| initial_resources=[ | |
| {"id": "srv-1", "type": "t3.small", "cpu_usage": 40.0, "mem_usage": 30.0, "monthly_cost": 11.5}, | |
| {"id": "srv-2", "type": "t3.small", "cpu_usage": 38.0, "mem_usage": 28.0, "monthly_cost": 11.5}, | |
| {"id": "srv-3", "type": "t3.small", "cpu_usage": 42.0, "mem_usage": 32.0, "monthly_cost": 11.5}, | |
| ], | |
| sla={"max_latency_ms": 100.0, "max_budget": 80.0, "min_uptime_pct": 99.9}, | |
| load=4.5 | |
| ), | |
| "hard": TaskConfig( | |
| task_id="hard_balance", | |
| name="Balance Optimization", | |
| difficulty="hard", | |
| description="Tight budget constraint! Optimize a mixed 5-server cluster. Must achieve optimal cost-efficiency while maintaining performance. Requires 5+ iterative changes, exploring different configurations.", | |
| initial_resources=[ | |
| {"id": "srv-1", "type": "m5.large", "cpu_usage": 15.0, "mem_usage": 10.0, "monthly_cost": 70.0}, | |
| {"id": "srv-2", "type": "m5.large", "cpu_usage": 12.0, "mem_usage": 8.0, "monthly_cost": 70.0}, | |
| {"id": "srv-3", "type": "t3.small", "cpu_usage": 50.0, "mem_usage": 40.0, "monthly_cost": 11.5}, | |
| {"id": "srv-4", "type": "t3.small", "cpu_usage": 55.0, "mem_usage": 45.0, "monthly_cost": 11.5}, | |
| {"id": "srv-5", "type": "t3.medium", "cpu_usage": 35.0, "mem_usage": 30.0, "monthly_cost": 23.0}, | |
| ], | |
| sla={"max_latency_ms": 100.0, "max_budget": 80.0, "min_uptime_pct": 99.9}, | |
| load=15.0 | |
| ), | |
| } | |
| class EpisodeState: | |
| task_config: TaskConfig | |
| resources: list | |
| current_load: float | |
| initial_cost: float | |
| initial_latency: float | |
| steps: int = 0 | |
| crashed: bool = False | |
| changes_made: int = 0 | |
| last_action_success: bool = False | |
| exploration_history: list = field(default_factory=list) | |
| episode_id: str = field(default_factory=lambda: str(uuid4())) | |
| class CloudOpsEnvironment: | |
| """Cloud Infrastructure Optimization Environment. | |
| The agent acts as a Cloud SRE optimizing cost and performance. | |
| """ | |
| def __init__(self, max_steps: int = 12): | |
| self._max_steps = max_steps | |
| self._ep: Optional[EpisodeState] = None | |
| def reset( | |
| self, | |
| seed: Optional[int] = None, | |
| episode_id: Optional[str] = None, | |
| task_id: Optional[str] = None, | |
| **kwargs: Any, | |
| ) -> ObsModel: | |
| if seed is not None: | |
| random.seed(seed) | |
| task_key = task_id or random.choice(["easy", "medium", "hard"]) | |
| if task_key not in TASKS: | |
| task_key = "easy" | |
| task = TASKS[task_key] | |
| resources = [ | |
| Resource(**r) for r in task.initial_resources | |
| ] | |
| initial_cost = sum(r.monthly_cost for r in resources) | |
| initial_latency, _, _ = self._calculate_metrics(task.load, resources) | |
| self._ep = EpisodeState( | |
| task_config=task, | |
| resources=resources, | |
| current_load=task.load, | |
| initial_cost=initial_cost, | |
| initial_latency=initial_latency, | |
| steps=0, | |
| crashed=False, | |
| changes_made=0, | |
| last_action_success=False, | |
| exploration_history=[], | |
| episode_id=episode_id or str(uuid4()), | |
| ) | |
| return self._build_observation("Environment ready. Analyze and optimize.") | |
| def step(self, action: ActModel, **kwargs: Any) -> Tuple[ObsModel, RewModel, bool, Dict]: | |
| if self._ep is None: | |
| return self._error_obs("Environment not reset") | |
| self._ep.steps += 1 | |
| msg = action.message.lower() | |
| prev_cost = sum(r.monthly_cost for r in self._ep.resources) | |
| prev_latency, _, _ = self._calculate_metrics(self._ep.current_load, self._ep.resources) | |
| message = self._parse_and_execute(msg) | |
| self._ep.last_action_success = message.startswith("Changed") | |
| new_cost = sum(r.monthly_cost for r in self._ep.resources) | |
| latency, error_rate, utilization = self._calculate_metrics( | |
| self._ep.current_load, | |
| self._ep.resources | |
| ) | |
| if utilization > 1.5: | |
| self._ep.crashed = True | |
| obs = self._build_observation("SYSTEM CRASH: Resource exhaustion!") | |
| reward = RewModel(value=0.0, reason="System crashed due to resource exhaustion") | |
| return obs, reward, True, {"reason": "crash"} | |
| self._ep.exploration_history.append({ | |
| "step": self._ep.steps, | |
| "action": msg[:50], | |
| "cost": new_cost, | |
| "latency": latency, | |
| }) | |
| reward = self._calculate_iterative_reward(latency, error_rate, new_cost, prev_cost, prev_latency, utilization) | |
| done = ( | |
| self._ep.steps >= self._max_steps or | |
| (self._ep.changes_made >= 3 and reward.value >= 0.95) | |
| ) | |
| obs = self._build_observation(message) | |
| return obs, reward, done, {"changes_made": self._ep.changes_made} | |
| def _parse_and_execute(self, msg: str) -> str: | |
| match = re.search(r"change\s+([a-z0-9-]+)\s+to\s+([a-z0-9.]+)", msg) | |
| if match: | |
| res_id, new_type = match.groups() | |
| if new_type not in INSTANCE_DATA: | |
| return f"Error: Unknown instance type '{new_type}'. Available: {', '.join(INSTANCE_DATA.keys())}" | |
| for r in self._ep.resources: | |
| if r.id == res_id: | |
| old_type = r.type | |
| r.type = new_type | |
| r.monthly_cost = INSTANCE_DATA[new_type]["cost"] | |
| self._ep.changes_made += 1 | |
| self._ep.last_action_success = True | |
| return f"Changed {res_id} from {old_type} to {new_type} (change #{self._ep.changes_made})" | |
| return f"Error: Resource '{res_id}' not found" | |
| if "resize" in msg or "scale" in msg or "upgrade" in msg or "downgrade" in msg: | |
| return "Use format: 'change [resource_id] to [instance_type]'" | |
| return "Command not recognized. Use 'change [resource_id] to [instance_type]'" | |
| def _calculate_metrics(self, load: float, resources: list) -> Tuple[float, float, float]: | |
| total_cap = sum(INSTANCE_DATA[r.type]["capacity"] for r in resources) | |
| avg_utilization = load / total_cap if total_cap > 0 else 0 | |
| utilization = min(avg_utilization, 1.5) | |
| latency = 30 + 70 * (avg_utilization ** 2) | |
| error_rate = max(0, (avg_utilization - 0.85) * 2) | |
| return latency, error_rate, avg_utilization | |
| def _calculate_iterative_reward( | |
| self, | |
| latency: float, | |
| error_rate: float, | |
| new_cost: float, | |
| prev_cost: float, | |
| prev_latency: float, | |
| utilization: float | |
| ) -> RewModel: | |
| task = self._ep.task_config | |
| budget = task.sla["max_budget"] | |
| max_latency = task.sla["max_latency_ms"] | |
| cost_improvement = (prev_cost - new_cost) / (prev_cost + 1e-6) | |
| latency_improvement = (prev_latency - latency) / (prev_latency + 1e-6) | |
| change_bonus = min(self._ep.changes_made * 0.06, 0.3) | |
| cost_ratio = new_cost / budget | |
| cost_reward = 0.3 * (1.0 / (1.0 + max(0, cost_ratio - 1))) | |
| lat_ratio = latency / max_latency | |
| perf_reward = 0.3 * (1.0 / (1.0 + max(0, lat_ratio - 1))) | |
| improvement_bonus = 0.0 | |
| if cost_improvement > 0: | |
| improvement_bonus += min(cost_improvement * 0.15, 0.1) | |
| if latency_improvement > 0: | |
| improvement_bonus += min(latency_improvement * 0.15, 0.1) | |
| base_reward = cost_reward + perf_reward | |
| total_reward = min(1.0, base_reward + change_bonus + improvement_bonus) | |
| if error_rate > 0.2: | |
| total_reward *= (1.0 - error_rate) | |
| exploration_bonus = min(self._ep.steps * 0.03, 0.15) | |
| if self._ep.last_action_success: | |
| total_reward = min(1.0, total_reward + exploration_bonus) | |
| initial_latency = self._ep.initial_latency | |
| initial_cost = self._ep.initial_cost | |
| cost_change = ((new_cost - initial_cost) / initial_cost) * 100 if initial_cost > 0 else 0 | |
| lat_change = ((latency - initial_latency) / initial_latency) * 100 if initial_latency > 0 else 0 | |
| return RewModel( | |
| value=min(1.0, max(0.0, total_reward)), | |
| reason=f"Changes: {self._ep.changes_made}, Cost: ${new_cost:.1f}, Latency: {latency:.1f}ms", | |
| cost_change_pct=cost_change, | |
| latency_change_pct=lat_change, | |
| ) | |
| def _build_observation(self, message: str) -> ObsModel: | |
| if self._ep is None: | |
| return self._error_obs() | |
| latency, error_rate, utilization = self._calculate_metrics( | |
| self._ep.current_load, | |
| self._ep.resources | |
| ) | |
| total_cap = sum(INSTANCE_DATA[r.type]["capacity"] for r in self._ep.resources) | |
| for r in self._ep.resources: | |
| cap = INSTANCE_DATA[r.type]["capacity"] | |
| share = cap / total_cap if total_cap > 0 else 0 | |
| r.cpu_usage = min(100.0, self._ep.current_load * share / cap * 100) | |
| r.mem_usage = min(100.0, r.cpu_usage * 0.85) | |
| metrics = Metrics( | |
| avg_latency_ms=latency, | |
| error_rate=error_rate, | |
| throughput_rps=100.0 | |
| ) | |
| sla = SLA(**self._ep.task_config.sla) | |
| return ObsModel( | |
| inventory=self._ep.resources, | |
| metrics=metrics, | |
| sla=sla, | |
| echoed_message=message, | |
| task_id=self._ep.task_config.task_id, | |
| task_name=self._ep.task_config.name, | |
| difficulty=self._ep.task_config.difficulty, | |
| step=self._ep.steps, | |
| ) | |
| def _error_obs(self, message: str = "Error: Environment not initialized") -> ObsModel: | |
| return ObsModel( | |
| inventory=[], | |
| metrics=Metrics(avg_latency_ms=0, error_rate=0, throughput_rps=0), | |
| sla=SLA(max_latency_ms=0, max_budget=0, min_uptime_pct=0), | |
| echoed_message=message, | |
| ) | |
| def state(self) -> Dict[str, Any]: | |
| if self._ep is None: | |
| return {} | |
| latency, error_rate, utilization = self._calculate_metrics( | |
| self._ep.current_load, self._ep.resources | |
| ) | |
| total_cost = sum(r.monthly_cost for r in self._ep.resources) | |
| return { | |
| "episode_id": self._ep.episode_id, | |
| "task_id": self._ep.task_config.task_id, | |
| "task_name": self._ep.task_config.name, | |
| "difficulty": self._ep.task_config.difficulty, | |
| "steps": self._ep.steps, | |
| "changes_made": self._ep.changes_made, | |
| "crashed": self._ep.crashed, | |
| "resources": [ | |
| { | |
| "id": r.id, | |
| "type": r.type, | |
| "monthly_cost": r.monthly_cost, | |
| "cpu_usage": r.cpu_usage, | |
| "mem_usage": r.mem_usage, | |
| } | |
| for r in self._ep.resources | |
| ], | |
| "metrics": { | |
| "total_cost": total_cost, | |
| "latency_ms": latency, | |
| "error_rate": error_rate, | |
| "utilization": utilization, | |
| }, | |
| "sla": self._ep.task_config.sla, | |
| "exploration_history": self._ep.exploration_history, | |
| } | |
| Environment = CloudOpsEnvironment |