Spaces:

ronitraj
/

QuantumScribe

Sleeping

App Files Files Community

ronitraj commited on 12 days ago

Commit

7dc2fe6

verified ·

1 Parent(s): be6d6a7

Upload scripts/make_comparison_plot.py with huggingface_hub

Browse files

Files changed (1) hide show

scripts/make_comparison_plot.py +355 -0

scripts/make_comparison_plot.py ADDED Viewed

	@@ -0,0 +1,355 @@

+"""scripts/make_comparison_plot.py - the "money plot" for Qubit-Medic.
+Renders a side-by-side bar chart comparing four conditions on the two
+headline metrics emitted by ``scripts.eval`` (and dumped to JSON in
+``data/eval/`` or ``data/`` by the training pipeline):
+* Random baseline       (uniform-random qubit picks)
+* Base Qwen2.5-3B       (un-fine-tuned model; usually format failures)
+* SFT-only              (Qwen2.5-3B after supervised fine-tuning)
+* SFT + GRPO            (the full Qubit-Medic checkpoint)
+Two panels:
+* Left: ``logical_correction_rate``    (y-axis 0-1, fraction of shots
+        where the predicted Pauli frame yields no logical-Z flip)
+* Right: ``pymatching_beat_rate``      (y-axis 0-1, fraction of shots
+        where the model corrects but PyMatching does not)
+JSON schema expected per condition file (mirrors
+``scripts/eval.py::_summary``)::
+    {
+      "name": str,
+      "episodes": int,
+      "logical_correction_rate": float,
+      "pymatching_beat_rate": float,
+      ... (other keys are ignored here)
+    }
+The script never runs ``scripts.eval`` itself - it just reads JSON.
+Usage::
+    python scripts/make_comparison_plot.py            # uses defaults
+    python scripts/make_comparison_plot.py --eval-dir data/eval
+    python scripts/make_comparison_plot.py \
+        --random data/eval/random.json \
+        --base   data/eval/base_qwen.json \
+        --sft    data/eval/sft_only.json \
+        --grpo   data/eval/sft_grpo.json \
+        --out    figures/before_after_comparison.png
+"""
+from __future__ import annotations
+import argparse
+import json
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable, Optional
+# --------------------------------------------------------------------------- #
+# Plot configuration                                                          #
+# --------------------------------------------------------------------------- #
+CONDITION_LABELS: tuple[str, ...] = (
+    "Random baseline",
+    "Base Qwen2.5-3B",
+    "SFT-only",
+    "SFT + GRPO",
+)
+# Colour-blind safe-ish palette: greys for the baselines, accent for ours.
+CONDITION_COLOURS: tuple[str, ...] = (
+    "#9aa0a6",  # random  - light grey
+    "#5f6368",  # base    - dark grey
+    "#7e57c2",  # sft     - purple
+    "#1e88e5",  # sft+grpo - blue (the "after" colour)
+)
+# Default filenames the script will look for inside --eval-dir if explicit
+# per-condition paths are not supplied. Order matches CONDITION_LABELS.
+DEFAULT_FILENAMES: tuple[str, ...] = (
+    "random.json",
+    "base_qwen.json",
+    "sft_only.json",
+    "sft_grpo.json",
+)
+# --------------------------------------------------------------------------- #
+# Data structures                                                             #
+# --------------------------------------------------------------------------- #
+@dataclass(frozen=True)
+class Condition:
+    """One bar per panel - a single eval JSON read off disk."""
+    label: str
+    colour: str
+    path: Path
+    data: Optional[dict]  # None if file missing
+    @property
+    def lcr(self) -> float:
+        if self.data is None:
+            return 0.0
+        return float(self.data.get("logical_correction_rate", 0.0))
+    @property
+    def beat(self) -> float:
+        if self.data is None:
+            return 0.0
+        return float(self.data.get("pymatching_beat_rate", 0.0))
+    @property
+    def episodes(self) -> int:
+        if self.data is None:
+            return 0
+        return int(self.data.get("episodes", 0))
+# --------------------------------------------------------------------------- #
+# I/O                                                                         #
+# --------------------------------------------------------------------------- #
+def _load_json(path: Path) -> Optional[dict]:
+    """Read a JSON file, returning ``None`` if it does not exist."""
+    if not path.exists():
+        return None
+    with path.open("r") as f:
+        return json.load(f)
+def _resolve_paths(
+    eval_dir: Path,
+    explicit: dict[str, Optional[str]],
+) -> list[Path]:
+    """Resolve a path per condition, preferring explicit overrides."""
+    paths: list[Path] = []
+    for label, default_name in zip(CONDITION_LABELS, DEFAULT_FILENAMES):
+        override = explicit.get(label)
+        if override:
+            paths.append(Path(override))
+        else:
+            paths.append(eval_dir / default_name)
+    return paths
+def load_conditions(
+    eval_dir: Path,
+    explicit: dict[str, Optional[str]],
+) -> list[Condition]:
+    """Materialise four ``Condition`` rows in the canonical plot order."""
+    paths = _resolve_paths(eval_dir, explicit)
+    out: list[Condition] = []
+    for label, colour, path in zip(CONDITION_LABELS, CONDITION_COLOURS, paths):
+        out.append(
+            Condition(
+                label=label,
+                colour=colour,
+                path=path,
+                data=_load_json(path),
+            )
+        )
+    return out
+# --------------------------------------------------------------------------- #
+# Plot                                                                        #
+# --------------------------------------------------------------------------- #
+def render_plot(
+    conditions: list[Condition],
+    out_path: Path,
+    title: str,
+    dpi: int = 150,
+) -> None:
+    """Render the two-panel money plot to ``out_path`` at ``dpi``."""
+    try:
+        import matplotlib.pyplot as plt  # local import: graceful failure path
+    except ImportError as exc:  # pragma: no cover - import-time only
+        raise SystemExit(
+            "matplotlib is required for make_comparison_plot.py. "
+            "Install with: pip install matplotlib"
+        ) from exc
+    labels = [c.label for c in conditions]
+    colours = [c.colour for c in conditions]
+    lcr_values = [c.lcr for c in conditions]
+    beat_values = [c.beat for c in conditions]
+    fig, (ax_left, ax_right) = plt.subplots(
+        nrows=1, ncols=2, figsize=(12, 5.2), sharey=False
+    )
+    x = list(range(len(labels)))
+    bars_left = ax_left.bar(x, lcr_values, color=colours, edgecolor="black",
+                            linewidth=0.6)
+    ax_left.set_xticks(x)
+    ax_left.set_xticklabels(labels, rotation=20, ha="right")
+    ax_left.set_ylim(0.0, 1.0)
+    ax_left.set_ylabel("Logical correction rate (fraction of shots, 0-1)")
+    ax_left.set_xlabel("Decoder condition")
+    ax_left.set_title("Logical correction rate (per shot)")
+    ax_left.grid(axis="y", linestyle=":", alpha=0.5)
+    for bar, val in zip(bars_left, lcr_values):
+        ax_left.text(
+            bar.get_x() + bar.get_width() / 2,
+            min(val + 0.02, 0.98),
+            f"{val:.3f}",
+            ha="center", va="bottom", fontsize=9,
+        )
+    bars_right = ax_right.bar(x, beat_values, color=colours, edgecolor="black",
+                              linewidth=0.6)
+    ax_right.set_xticks(x)
+    ax_right.set_xticklabels(labels, rotation=20, ha="right")
+    ax_right.set_ylim(0.0, 1.0)
+    ax_right.set_ylabel("PyMatching beat rate (fraction of shots, 0-1)")
+    ax_right.set_xlabel("Decoder condition")
+    ax_right.set_title("PyMatching beat rate (model corrects, PM does not)")
+    ax_right.grid(axis="y", linestyle=":", alpha=0.5)
+    for bar, val in zip(bars_right, beat_values):
+        ax_right.text(
+            bar.get_x() + bar.get_width() / 2,
+            min(val + 0.02, 0.98),
+            f"{val:.3f}",
+            ha="center", va="bottom", fontsize=9,
+        )
+    # One shared legend across both panels.
+    handles = [
+        plt.Rectangle((0, 0), 1, 1, color=c, ec="black", lw=0.6)
+        for c in colours
+    ]
+    fig.legend(
+        handles, labels,
+        loc="lower center", ncol=len(labels),
+        bbox_to_anchor=(0.5, -0.02), frameon=False,
+    )
+    fig.suptitle(title, fontsize=13, y=1.02)
+    fig.tight_layout()
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    fig.savefig(out_path, dpi=dpi, bbox_inches="tight")
+    plt.close(fig)
+# --------------------------------------------------------------------------- #
+# CLI                                                                         #
+# --------------------------------------------------------------------------- #
+def _missing_files_message(conditions: list[Condition]) -> str:
+    """Build a helpful error when one or more eval JSONs are absent."""
+    missing = [(c.label, c.path) for c in conditions if c.data is None]
+    if not missing:
+        return ""
+    lines = [
+        "ERROR: cannot build comparison plot - one or more eval JSON files "
+        "were not found.",
+        "",
+        "Expected files (one per condition):",
+    ]
+    for label, path in missing:
+        lines.append(f"  - {label}: {path}")
+    lines.extend([
+        "",
+        "Generate them with scripts/eval.py, for example:",
+        "  python -m scripts.eval --policy random --episodes 1000 \\",
+        "      --out data/eval/random.json",
+        "  python -m scripts.eval --base-model Qwen/Qwen2.5-3B-Instruct \\",
+        "      --adapter '' --episodes 1000 --out data/eval/base_qwen.json",
+        "  python -m scripts.eval --adapter checkpoints/sft/best \\",
+        "      --episodes 1000 --out data/eval/sft_only.json",
+        "  python -m scripts.eval --adapter checkpoints/grpo/best \\",
+        "      --episodes 1000 --out data/eval/sft_grpo.json",
+        "",
+        "Override individual paths with --random / --base / --sft / --grpo.",
+    ])
+    return "\n".join(lines)
+def parse_args(argv: Iterable[str]) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--eval-dir", type=str, default="data/eval",
+        help="Directory holding one JSON per condition "
+             "(random.json, base_qwen.json, sft_only.json, sft_grpo.json).",
+    )
+    parser.add_argument(
+        "--random", type=str, default=None,
+        help="Override path to the random-baseline eval JSON.",
+    )
+    parser.add_argument(
+        "--base", type=str, default=None,
+        help="Override path to the base-Qwen eval JSON.",
+    )
+    parser.add_argument(
+        "--sft", type=str, default=None,
+        help="Override path to the SFT-only eval JSON.",
+    )
+    parser.add_argument(
+        "--grpo", type=str, default=None,
+        help="Override path to the SFT+GRPO eval JSON.",
+    )
+    parser.add_argument(
+        "--out", type=str, default="figures/before_after_comparison.png",
+        help="Where to write the PNG (created at 150 dpi by default).",
+    )
+    parser.add_argument(
+        "--dpi", type=int, default=150,
+        help="DPI for the saved PNG.",
+    )
+    parser.add_argument(
+        "--title", type=str,
+        default=(
+            "Qubit-Medic decoder accuracy: before vs after RLHF training "
+            "(distance-3 surface code, p=0.001)"
+        ),
+        help="Figure suptitle.",
+    )
+    return parser.parse_args(list(argv))
+def main(argv: Iterable[str] = ()) -> int:
+    args = parse_args(argv)
+    explicit = {
+        "Random baseline": args.random,
+        "Base Qwen2.5-3B": args.base,
+        "SFT-only": args.sft,
+        "SFT + GRPO": args.grpo,
+    }
+    conditions = load_conditions(Path(args.eval_dir), explicit)
+    msg = _missing_files_message(conditions)
+    if msg:
+        print(msg, file=sys.stderr)
+        return 1
+    render_plot(
+        conditions=conditions,
+        out_path=Path(args.out),
+        title=args.title,
+        dpi=args.dpi,
+    )
+    print(f"Wrote comparison plot to {args.out}")
+    for c in conditions:
+        print(
+            f"  {c.label:>18s}: LCR={c.lcr:.3f}  "
+            f"PMbeat={c.beat:.3f}  (n={c.episodes}, src={c.path})"
+        )
+    return 0
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))