#!/usr/bin/env python3 """Train dosing GRPO-like policy.""" from __future__ import annotations import json import os from pathlib import Path import sys ROOT = Path(__file__).resolve().parents[1] if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) from app.training.dosing_grpo import train_dosing_grpo def main() -> None: root = Path(__file__).resolve().parents[1] episodes = int(os.getenv("POLYGUARD_DOSING_EPISODES", "6")) result = train_dosing_grpo(episodes=episodes, checkpoint_dir=root / "checkpoints") out = root / "outputs" / "reports" out.mkdir(parents=True, exist_ok=True) (out / "dosing_grpo.json").write_text(json.dumps(result, ensure_ascii=True, indent=2), encoding="utf-8") print("dosing_grpo_done") if __name__ == "__main__": main()