File size: 4,123 Bytes

a15535e

"""Matplotlib plotting helpers — produces the 3 PNGs that go into the README.

Plots:
  1. baseline_vs_trained.png — bar/line comparison
  2. training_reward_curve.png — moving-average reward over episodes
  3. success_by_category.png — per-primitive-type success rate

All plots are 600x400 @ 100 dpi, label both axes, and use a colour-blind-safe palette.
"""
from __future__ import annotations

from pathlib import Path
from typing import Iterable

import matplotlib

matplotlib.use("Agg")
import matplotlib.pyplot as plt  # noqa: E402

PALETTE = {
    "baseline": "#888888",
    "trained": "#1F77B4",
    "ema": "#D62728",
    "raw": "#1F77B4",
}


def _moving_average(values: list[float], window: int = 10) -> list[float]:
    if not values:
        return []
    out: list[float] = []
    cumsum = 0.0
    for i, v in enumerate(values):
        cumsum += v
        if i >= window:
            cumsum -= values[i - window]
        out.append(cumsum / min(i + 1, window))
    return out


def plot_baseline_vs_trained(
    baseline_rewards: list[float],
    trained_rewards: list[float],
    out_path: str | Path,
    title: str = "ForgeEnv: Baseline vs Trained (50 eval episodes)",
) -> str:
    """Side-by-side bar chart of mean reward + per-episode strip plot."""
    out_path = Path(out_path)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    fig, ax = plt.subplots(figsize=(6, 4), dpi=100)

    means = [
        sum(baseline_rewards) / max(1, len(baseline_rewards)),
        sum(trained_rewards) / max(1, len(trained_rewards)),
    ]
    labels = ["Baseline (no-op)", "Trained (GRPO)"]
    colors = [PALETTE["baseline"], PALETTE["trained"]]
    bars = ax.bar(labels, means, color=colors, width=0.5, alpha=0.85)
    ax.bar_label(bars, fmt="%.2f", padding=3)

    for x, rewards in zip([0, 1], [baseline_rewards, trained_rewards]):
        if rewards:
            xs = [x + 0.18] * len(rewards)
            ax.scatter(xs, rewards, s=8, color="black", alpha=0.4, zorder=3)

    ax.set_ylabel("Visible verifier reward")
    ax.set_title(title)
    ax.grid(axis="y", linestyle=":", alpha=0.5)
    ax.set_ylim(bottom=min(0, min(means + baseline_rewards + trained_rewards or [0])))
    fig.tight_layout()
    fig.savefig(out_path, dpi=100, bbox_inches="tight")
    plt.close(fig)
    return str(out_path)


def plot_reward_curve(
    rewards: list[float],
    out_path: str | Path,
    window: int = 10,
    title: str = "ForgeEnv: Repair Agent reward over training",
) -> str:
    out_path = Path(out_path)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    fig, ax = plt.subplots(figsize=(6, 4), dpi=100)
    xs = list(range(1, len(rewards) + 1))
    ax.plot(xs, rewards, color=PALETTE["raw"], alpha=0.35, linewidth=1.0, label="Per-episode")
    if rewards:
        ax.plot(
            xs,
            _moving_average(rewards, window=window),
            color=PALETTE["ema"],
            linewidth=2.0,
            label=f"Moving avg (w={window})",
        )
    ax.set_xlabel("Episode")
    ax.set_ylabel("Visible verifier reward")
    ax.set_title(title)
    ax.legend(loc="lower right")
    ax.grid(linestyle=":", alpha=0.4)
    fig.tight_layout()
    fig.savefig(out_path, dpi=100, bbox_inches="tight")
    plt.close(fig)
    return str(out_path)


def plot_success_rate_by_category(
    by_category: dict[str, list[bool]],
    out_path: str | Path,
    title: str = "ForgeEnv: Repair success by primitive type",
) -> str:
    out_path = Path(out_path)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    fig, ax = plt.subplots(figsize=(7, 4), dpi=100)

    cats = list(by_category.keys())
    rates = [
        sum(by_category[c]) / max(1, len(by_category[c])) for c in cats
    ]
    bars = ax.barh(cats, rates, color=PALETTE["trained"], alpha=0.85)
    ax.bar_label(bars, fmt="%.2f", padding=3)
    ax.set_xlim(0, 1.05)
    ax.set_xlabel("Success rate (held-out: executed_cleanly)")
    ax.set_title(title)
    ax.grid(axis="x", linestyle=":", alpha=0.4)
    fig.tight_layout()
    fig.savefig(out_path, dpi=100, bbox_inches="tight")
    plt.close(fig)
    return str(out_path)