Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """Train supervisor GRPO-like policy.""" | |
| from __future__ import annotations | |
| import json | |
| import os | |
| from pathlib import Path | |
| import sys | |
| ROOT = Path(__file__).resolve().parents[1] | |
| if str(ROOT) not in sys.path: | |
| sys.path.insert(0, str(ROOT)) | |
| from app.training.supervisor_grpo import train_supervisor_grpo | |
| def main() -> None: | |
| root = Path(__file__).resolve().parents[1] | |
| episodes = int(os.getenv("POLYGUARD_SUPERVISOR_EPISODES", "6")) | |
| result = train_supervisor_grpo(episodes=episodes, checkpoint_dir=root / "checkpoints") | |
| out = root / "outputs" / "reports" | |
| out.mkdir(parents=True, exist_ok=True) | |
| (out / "supervisor_grpo.json").write_text(json.dumps(result, ensure_ascii=True, indent=2), encoding="utf-8") | |
| print("supervisor_grpo_done") | |
| if __name__ == "__main__": | |
| main() | |