| """Matplotlib plotting helpers — produces the 3 PNGs that go into the README. |
| |
| Plots: |
| 1. baseline_vs_trained.png — bar/line comparison |
| 2. training_reward_curve.png — moving-average reward over episodes |
| 3. success_by_category.png — per-primitive-type success rate |
| |
| All plots are 600x400 @ 100 dpi, label both axes, and use a colour-blind-safe palette. |
| """ |
| from __future__ import annotations |
|
|
| from pathlib import Path |
| from typing import Iterable |
|
|
| import matplotlib |
|
|
| matplotlib.use("Agg") |
| import matplotlib.pyplot as plt |
|
|
| PALETTE = { |
| "baseline": "#888888", |
| "trained": "#1F77B4", |
| "ema": "#D62728", |
| "raw": "#1F77B4", |
| } |
|
|
|
|
| def _moving_average(values: list[float], window: int = 10) -> list[float]: |
| if not values: |
| return [] |
| out: list[float] = [] |
| cumsum = 0.0 |
| for i, v in enumerate(values): |
| cumsum += v |
| if i >= window: |
| cumsum -= values[i - window] |
| out.append(cumsum / min(i + 1, window)) |
| return out |
|
|
|
|
| def plot_baseline_vs_trained( |
| baseline_rewards: list[float], |
| trained_rewards: list[float], |
| out_path: str | Path, |
| title: str = "ForgeEnv: Baseline vs Trained (50 eval episodes)", |
| ) -> str: |
| """Side-by-side bar chart of mean reward + per-episode strip plot.""" |
| out_path = Path(out_path) |
| out_path.parent.mkdir(parents=True, exist_ok=True) |
| fig, ax = plt.subplots(figsize=(6, 4), dpi=100) |
|
|
| means = [ |
| sum(baseline_rewards) / max(1, len(baseline_rewards)), |
| sum(trained_rewards) / max(1, len(trained_rewards)), |
| ] |
| labels = ["Baseline (no-op)", "Trained (GRPO)"] |
| colors = [PALETTE["baseline"], PALETTE["trained"]] |
| bars = ax.bar(labels, means, color=colors, width=0.5, alpha=0.85) |
| ax.bar_label(bars, fmt="%.2f", padding=3) |
|
|
| for x, rewards in zip([0, 1], [baseline_rewards, trained_rewards]): |
| if rewards: |
| xs = [x + 0.18] * len(rewards) |
| ax.scatter(xs, rewards, s=8, color="black", alpha=0.4, zorder=3) |
|
|
| ax.set_ylabel("Visible verifier reward") |
| ax.set_title(title) |
| ax.grid(axis="y", linestyle=":", alpha=0.5) |
| ax.set_ylim(bottom=min(0, min(means + baseline_rewards + trained_rewards or [0]))) |
| fig.tight_layout() |
| fig.savefig(out_path, dpi=100, bbox_inches="tight") |
| plt.close(fig) |
| return str(out_path) |
|
|
|
|
| def plot_reward_curve( |
| rewards: list[float], |
| out_path: str | Path, |
| window: int = 10, |
| title: str = "ForgeEnv: Repair Agent reward over training", |
| ) -> str: |
| out_path = Path(out_path) |
| out_path.parent.mkdir(parents=True, exist_ok=True) |
| fig, ax = plt.subplots(figsize=(6, 4), dpi=100) |
| xs = list(range(1, len(rewards) + 1)) |
| ax.plot(xs, rewards, color=PALETTE["raw"], alpha=0.35, linewidth=1.0, label="Per-episode") |
| if rewards: |
| ax.plot( |
| xs, |
| _moving_average(rewards, window=window), |
| color=PALETTE["ema"], |
| linewidth=2.0, |
| label=f"Moving avg (w={window})", |
| ) |
| ax.set_xlabel("Episode") |
| ax.set_ylabel("Visible verifier reward") |
| ax.set_title(title) |
| ax.legend(loc="lower right") |
| ax.grid(linestyle=":", alpha=0.4) |
| fig.tight_layout() |
| fig.savefig(out_path, dpi=100, bbox_inches="tight") |
| plt.close(fig) |
| return str(out_path) |
|
|
|
|
| def plot_success_rate_by_category( |
| by_category: dict[str, list[bool]], |
| out_path: str | Path, |
| title: str = "ForgeEnv: Repair success by primitive type", |
| ) -> str: |
| out_path = Path(out_path) |
| out_path.parent.mkdir(parents=True, exist_ok=True) |
| fig, ax = plt.subplots(figsize=(7, 4), dpi=100) |
|
|
| cats = list(by_category.keys()) |
| rates = [ |
| sum(by_category[c]) / max(1, len(by_category[c])) for c in cats |
| ] |
| bars = ax.barh(cats, rates, color=PALETTE["trained"], alpha=0.85) |
| ax.bar_label(bars, fmt="%.2f", padding=3) |
| ax.set_xlim(0, 1.05) |
| ax.set_xlabel("Success rate (held-out: executed_cleanly)") |
| ax.set_title(title) |
| ax.grid(axis="x", linestyle=":", alpha=0.4) |
| fig.tight_layout() |
| fig.savefig(out_path, dpi=100, bbox_inches="tight") |
| plt.close(fig) |
| return str(out_path) |
|
|