| """ |
| Replot GRPO curves from a Hugging Face Job log (logging every 5 steps). |
| Data source: sh4shv4t/parlay-sft-1-5b → GRPO, 80 steps, G=2, L4, 2026-04-26. |
| Run: python scripts/plot_grpo_hf_job_curves.py |
| Outputs: results/grpo_reward_curve.png, results/grpo_loss_curve.png, results/grpo_train_metrics.json |
| """ |
| from __future__ import annotations |
|
|
| import json |
| from pathlib import Path |
|
|
| import matplotlib |
|
|
| matplotlib.use("Agg") |
| import matplotlib.pyplot as plt |
|
|
| |
| STEPS = list(range(5, 81, 5)) |
| |
| REWARDS = [ |
| -4.659, |
| -4.604, |
| -4.764, |
| -9.603, |
| -4.233, |
| 0.5344, |
| 0.8288, |
| -4.509, |
| 0.675, |
| -4.456, |
| -9.468, |
| -9.338, |
| 0.3431, |
| 0.4913, |
| -4.505, |
| -9.64, |
| ] |
| LOSSES = [ |
| 8.714e-05, |
| 0.0001001, |
| 4.062e-05, |
| 7.433e-05, |
| 0.0001185, |
| 0.0002067, |
| 0.0002253, |
| 5.42e-05, |
| 4.912e-05, |
| 0.0001332, |
| 0.0001032, |
| 4.481e-05, |
| 5.264e-05, |
| 0.0001981, |
| 0.0001187, |
| 7.517e-05, |
| ] |
|
|
|
|
| def main() -> None: |
| root = Path(__file__).resolve().parents[1] |
| out_dir = root / "results" |
| out_dir.mkdir(parents=True, exist_ok=True) |
|
|
| records = [ |
| {"step": s, "reward": r, "loss": L} for s, r, L in zip(STEPS, REWARDS, LOSSES, strict=True) |
| ] |
| (out_dir / "grpo_train_metrics.json").write_text( |
| json.dumps( |
| { |
| "meta": { |
| "source": "Hugging Face Job", |
| "sft_model": "sh4shv4t/parlay-sft-1-5b", |
| "grpo_steps": 80, |
| "grpo_g": 2, |
| "flavor": "l4x1", |
| "train_loss_final": 0.0001051, |
| "repo": "sh4shv4t/parlay-grpo-1-5b", |
| }, |
| "points": records, |
| }, |
| indent=2, |
| ), |
| encoding="utf-8", |
| ) |
|
|
| fig, ax = plt.subplots(figsize=(10, 5)) |
| ax.plot(STEPS, REWARDS, color="#2e6f95", marker="o", ms=4, lw=1.5) |
| ax.set_xlabel("Global step") |
| ax.set_ylabel("Mean batch reward") |
| ax.set_title("GRPO — mean reward (HF Job, 80 steps, G=2, L4)") |
| ax.grid(True, alpha=0.3) |
| ax.set_xlim(0, 85) |
| fig.tight_layout() |
| fig.savefig(out_dir / "grpo_reward_curve.png", dpi=120, bbox_inches="tight") |
| plt.close(fig) |
|
|
| fig2, ax2 = plt.subplots(figsize=(10, 5)) |
| ax2.plot(STEPS, LOSSES, color="#8b4513", marker="o", ms=4, lw=1.5) |
| ax2.set_xlabel("Global step") |
| ax2.set_ylabel("Policy loss") |
| ax2.set_title("GRPO — training loss (HF Job, 80 steps, G=2, L4)") |
| ax2.grid(True, alpha=0.3) |
| ax2.set_xlim(0, 85) |
| fig2.tight_layout() |
| fig2.savefig(out_dir / "grpo_loss_curve.png", dpi=120, bbox_inches="tight") |
| plt.close(fig2) |
|
|
| print(f"Wrote {out_dir / 'grpo_reward_curve.png'}") |
| print(f"Wrote {out_dir / 'grpo_loss_curve.png'}") |
| print(f"Wrote {out_dir / 'grpo_train_metrics.json'}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|