Upload train_all_phases.py
Browse files- train_all_phases.py +5 -5
train_all_phases.py
CHANGED
|
@@ -399,7 +399,7 @@ class CurriculumEnv(gym.Env):
|
|
| 399 |
f"Stage {self.CURRICULUM_STAGES[self.stage_idx]} complete: "
|
| 400 |
f"win_rate={win_rate:.2%}, avg_reward={avg_reward:.1f}. "
|
| 401 |
f"Advancing to {self.CURRICULUM_STAGES[self.stage_idx + 1]}",
|
| 402 |
-
|
| 403 |
)
|
| 404 |
self.stage_idx += 1
|
| 405 |
self.stage_episodes = 0
|
|
@@ -508,8 +508,8 @@ class TrackioLoggingCallback(BaseCallback):
|
|
| 508 |
self._last_mean_reward = 0.0
|
| 509 |
|
| 510 |
def _on_training_start(self):
|
| 511 |
-
trackio.
|
| 512 |
-
trackio.alert("Training Started", f"{self.run_name} training began.",
|
| 513 |
|
| 514 |
def _on_step(self):
|
| 515 |
if self.n_calls % self.log_interval == 0:
|
|
@@ -535,13 +535,13 @@ class TrackioLoggingCallback(BaseCallback):
|
|
| 535 |
|
| 536 |
if mean_r < -5.0 and self.num_timesteps > 50_000:
|
| 537 |
trackio.alert("Low Reward Warning",
|
| 538 |
-
f"mean_reward={mean_r:.2f} at step {self.num_timesteps} -- may be camping.",
|
| 539 |
return True
|
| 540 |
|
| 541 |
def _on_training_end(self):
|
| 542 |
trackio.alert("Training Complete",
|
| 543 |
f"Finished at {self.num_timesteps}. Final mean reward: {self._last_mean_reward:.2f}",
|
| 544 |
-
|
| 545 |
trackio.finish()
|
| 546 |
|
| 547 |
|
|
|
|
| 399 |
f"Stage {self.CURRICULUM_STAGES[self.stage_idx]} complete: "
|
| 400 |
f"win_rate={win_rate:.2%}, avg_reward={avg_reward:.1f}. "
|
| 401 |
f"Advancing to {self.CURRICULUM_STAGES[self.stage_idx + 1]}",
|
| 402 |
+
trackio.AlertLevel.INFO,
|
| 403 |
)
|
| 404 |
self.stage_idx += 1
|
| 405 |
self.stage_episodes = 0
|
|
|
|
| 508 |
self._last_mean_reward = 0.0
|
| 509 |
|
| 510 |
def _on_training_start(self):
|
| 511 |
+
trackio.init(project=self.project, name=self.run_name)
|
| 512 |
+
trackio.alert("Training Started", f"{self.run_name} training began.", trackio.AlertLevel.INFO)
|
| 513 |
|
| 514 |
def _on_step(self):
|
| 515 |
if self.n_calls % self.log_interval == 0:
|
|
|
|
| 535 |
|
| 536 |
if mean_r < -5.0 and self.num_timesteps > 50_000:
|
| 537 |
trackio.alert("Low Reward Warning",
|
| 538 |
+
f"mean_reward={mean_r:.2f} at step {self.num_timesteps} -- may be camping.", trackio.AlertLevel.WARN)
|
| 539 |
return True
|
| 540 |
|
| 541 |
def _on_training_end(self):
|
| 542 |
trackio.alert("Training Complete",
|
| 543 |
f"Finished at {self.num_timesteps}. Final mean reward: {self._last_mean_reward:.2f}",
|
| 544 |
+
trackio.AlertLevel.INFO)
|
| 545 |
trackio.finish()
|
| 546 |
|
| 547 |
|