Spaces:
Sleeping
Sleeping
Fix: Add missing _calculate_iterative_reward method
Browse files- env/core.py +53 -1
env/core.py
CHANGED
|
@@ -177,7 +177,7 @@ class CloudOpsEnvironment:
|
|
| 177 |
"latency": latency,
|
| 178 |
})
|
| 179 |
|
| 180 |
-
reward = self._calculate_iterative_reward(latency, error_rate, new_cost, prev_cost, prev_latency)
|
| 181 |
|
| 182 |
done = (
|
| 183 |
self._ep.steps >= self._max_steps or
|
|
@@ -221,6 +221,58 @@ class CloudOpsEnvironment:
|
|
| 221 |
|
| 222 |
return latency, error_rate, avg_utilization
|
| 223 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
def _build_observation(self, message: str) -> ObsModel:
|
| 225 |
if self._ep is None:
|
| 226 |
return self._error_obs()
|
|
|
|
| 177 |
"latency": latency,
|
| 178 |
})
|
| 179 |
|
| 180 |
+
reward = self._calculate_iterative_reward(latency, error_rate, new_cost, prev_cost, prev_latency, utilization)
|
| 181 |
|
| 182 |
done = (
|
| 183 |
self._ep.steps >= self._max_steps or
|
|
|
|
| 221 |
|
| 222 |
return latency, error_rate, avg_utilization
|
| 223 |
|
| 224 |
+
def _calculate_iterative_reward(
|
| 225 |
+
self,
|
| 226 |
+
latency: float,
|
| 227 |
+
error_rate: float,
|
| 228 |
+
new_cost: float,
|
| 229 |
+
prev_cost: float,
|
| 230 |
+
prev_latency: float,
|
| 231 |
+
utilization: float
|
| 232 |
+
) -> RewModel:
|
| 233 |
+
task = self._ep.task_config
|
| 234 |
+
budget = task.sla["max_budget"]
|
| 235 |
+
max_latency = task.sla["max_latency_ms"]
|
| 236 |
+
|
| 237 |
+
cost_improvement = (prev_cost - new_cost) / (prev_cost + 1e-6)
|
| 238 |
+
latency_improvement = (prev_latency - latency) / (prev_latency + 1e-6)
|
| 239 |
+
|
| 240 |
+
change_bonus = min(self._ep.changes_made * 0.06, 0.3)
|
| 241 |
+
|
| 242 |
+
cost_ratio = new_cost / budget
|
| 243 |
+
cost_reward = 0.3 * (1.0 / (1.0 + max(0, cost_ratio - 1)))
|
| 244 |
+
|
| 245 |
+
lat_ratio = latency / max_latency
|
| 246 |
+
perf_reward = 0.3 * (1.0 / (1.0 + max(0, lat_ratio - 1)))
|
| 247 |
+
|
| 248 |
+
improvement_bonus = 0.0
|
| 249 |
+
if cost_improvement > 0:
|
| 250 |
+
improvement_bonus += min(cost_improvement * 0.15, 0.1)
|
| 251 |
+
if latency_improvement > 0:
|
| 252 |
+
improvement_bonus += min(latency_improvement * 0.15, 0.1)
|
| 253 |
+
|
| 254 |
+
base_reward = cost_reward + perf_reward
|
| 255 |
+
total_reward = min(1.0, base_reward + change_bonus + improvement_bonus)
|
| 256 |
+
|
| 257 |
+
if error_rate > 0.2:
|
| 258 |
+
total_reward *= (1.0 - error_rate)
|
| 259 |
+
|
| 260 |
+
exploration_bonus = min(self._ep.steps * 0.03, 0.15)
|
| 261 |
+
if self._ep.last_action_success:
|
| 262 |
+
total_reward = min(1.0, total_reward + exploration_bonus)
|
| 263 |
+
|
| 264 |
+
initial_latency = self._ep.initial_latency
|
| 265 |
+
initial_cost = self._ep.initial_cost
|
| 266 |
+
cost_change = ((new_cost - initial_cost) / initial_cost) * 100 if initial_cost > 0 else 0
|
| 267 |
+
lat_change = ((latency - initial_latency) / initial_latency) * 100 if initial_latency > 0 else 0
|
| 268 |
+
|
| 269 |
+
return RewModel(
|
| 270 |
+
value=min(1.0, max(0.0, total_reward)),
|
| 271 |
+
reason=f"Changes: {self._ep.changes_made}, Cost: ${new_cost:.1f}, Latency: {latency:.1f}ms",
|
| 272 |
+
cost_change_pct=cost_change,
|
| 273 |
+
latency_change_pct=lat_change,
|
| 274 |
+
)
|
| 275 |
+
|
| 276 |
def _build_observation(self, message: str) -> ObsModel:
|
| 277 |
if self._ep is None:
|
| 278 |
return self._error_obs()
|