Spaces:

anugrahhu
/

cernenv-trainer

Paused

App Files Files Community

anugrahhu commited on 13 days ago

Commit

3080a66

verified ·

1 Parent(s): eb2a494

dashboard: synthesize PNGs on demand + cache-bust + pass --evidence_dir to vanilla

Browse files

Files changed (1) hide show

space/training/app.py +263 -6

space/training/app.py CHANGED Viewed

@@ -14,19 +14,22 @@ work runs in a background thread so the HTTP server stays responsive.
 from __future__ import annotations
 import json
 import logging
 import os
 import subprocess
 import sys
 import threading
 import time
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any, Dict, Optional
 from fastapi import FastAPI, HTTPException
-from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, PlainTextResponse
 from fastapi.staticfiles import StaticFiles
@@ -179,6 +182,9 @@ def _build_training_cmd(config: Dict[str, Any]) -> list[str]:
     backend = str(config.get("training_backend", "vanilla")).lower()
     if backend == "vanilla":
         python_bin = "/usr/local/bin/python" if Path("/usr/local/bin/python").exists() else sys.executable
         return [
             python_bin, "-m", "training.training_script",
             "--model_name", config["model_name"],
@@ -186,7 +192,10 @@ def _build_training_cmd(config: Dict[str, Any]) -> list[str]:
             "--total_episodes", str(config["total_episodes"]),
             "--max_steps", str(config["max_steps"]),
             "--num_generations", str(config["num_generations"]),
             "--output_dir", config["output_dir"],
         ]
     if backend != "unsloth":
@@ -501,6 +510,232 @@ def _start_training(config: Dict[str, Any]) -> None:
         STATE.thread.start()
 # ── FastAPI app ──────────────────────────────────────────────────────────
@@ -513,6 +748,7 @@ _HTML = """\
 <head>
   <meta charset=utf-8>
   <title>CERNenv Trainer</title>
   <style>
     body { font-family: ui-sans-serif, system-ui, sans-serif; margin: 2rem auto;
            max-width: 1000px; color:#111; padding: 0 1rem; line-height:1.5 }
@@ -710,13 +946,34 @@ def evidence_index() -> JSONResponse:
 @app.get("/evidence/{name}")
 def evidence_file(name: str):
-    """Serve a single evidence artifact (PNG/CSV/JSON/MD) by filename."""
     if "/" in name or ".." in name:
         raise HTTPException(status_code=400, detail="invalid name")
     target = EVIDENCE_DIR / name
-    if not target.exists() or not target.is_file():
-        raise HTTPException(status_code=404, detail=f"{name} not found")
-    return FileResponse(target)
 @app.get("/logs", response_class=PlainTextResponse)

 from __future__ import annotations
+import ast
+import io
 import json
 import logging
 import os
+import re
 import subprocess
 import sys
 import threading
 import time
 from datetime import datetime, timezone
 from pathlib import Path
+from typing import Any, Dict, List, Optional
 from fastapi import FastAPI, HTTPException
+from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, PlainTextResponse, Response
 from fastapi.staticfiles import StaticFiles
     backend = str(config.get("training_backend", "vanilla")).lower()
     if backend == "vanilla":
         python_bin = "/usr/local/bin/python" if Path("/usr/local/bin/python").exists() else sys.executable
+        # vanilla now accepts --evidence_dir / --checkpoint_eval_* so the
+        # backported EvidenceCallback writes evidence/*.csv + plots into
+        # the same directory the dashboard serves from.
         return [
             python_bin, "-m", "training.training_script",
             "--model_name", config["model_name"],
             "--total_episodes", str(config["total_episodes"]),
             "--max_steps", str(config["max_steps"]),
             "--num_generations", str(config["num_generations"]),
+            "--checkpoint_eval_steps", str(config["checkpoint_eval_steps"]),
+            "--checkpoint_eval_episodes", str(config["checkpoint_eval_episodes"]),
             "--output_dir", config["output_dir"],
+            "--evidence_dir", config["evidence_dir"],
         ]
     if backend != "unsloth":
         STATE.thread.start()
+# ── On-demand evidence-PNG synthesis ─────────────────────────────────────
+#
+# The vanilla GRPO backend (training/training_script.py) does not register
+# an EvidenceCallback, so it never writes training_log.csv /
+# reward_components.csv mid-run. The unsloth backend does, but a Space that
+# happens to be running the vanilla path leaves those evidence cards empty
+# until post-eval — and even then they stay empty because the underlying
+# CSVs were never produced.
+#
+# To keep the dashboard live without restarting the in-flight run, we
+# synthesise both PNGs on demand by parsing the TRL log dicts that the
+# trainer prints to stdout (captured in training/runs/training.log by
+# _stream_subprocess). The unsloth path still gets its richer
+# component-level CSVs as before; this only kicks in when the file is
+# missing or older than the captured log.
+# Matches a tqdm progress line like " 53%|█████▎    | 190/360 [12:31<10:06,
+#  3.57s/it]" emitted just before each TRL log dict, so we can attribute a
+# dict to the correct global_step instead of guessing from logging_steps.
+_TQDM_PROGRESS_RE = re.compile(r"\b(\d+)\s*/\s*(\d+)\s*\[")
+def _parse_training_log_dicts(text: str) -> List[Dict[str, Any]]:
+    """Extract per-log-step rows from a captured TRL stdout log.
+    TRL prints a Python dict-repr on each ``logging_steps`` boundary.
+    We pair each dict with the most recent tqdm progress line so the
+    plotted x-axis reflects ``global_step`` rather than dict-arrival
+    order. Lines that do not parse cleanly are silently skipped.
+    """
+    rows: List[Dict[str, Any]] = []
+    last_step: Optional[int] = None
+    for raw in text.splitlines():
+        m = _TQDM_PROGRESS_RE.search(raw)
+        if m:
+            try:
+                last_step = int(m.group(1))
+            except ValueError:
+                pass
+            continue
+        s = raw.strip()
+        if not (s.startswith("{") and s.endswith("}")):
+            continue
+        if "'loss'" not in s and "'reward'" not in s and "'kl'" not in s:
+            continue
+        try:
+            d = ast.literal_eval(s)
+        except (ValueError, SyntaxError):
+            continue
+        if not isinstance(d, dict):
+            continue
+        reward = (
+            d.get("reward")
+            or d.get("rewards/mean")
+            or d.get("rewards/reward_fn/mean")
+        )
+        reward_std = (
+            d.get("reward_std")
+            or d.get("rewards/std")
+            or d.get("rewards/reward_fn/std")
+        )
+        rows.append({
+            "step": last_step if last_step is not None else len(rows),
+            "loss": d.get("loss"),
+            "reward": reward,
+            "reward_std": reward_std,
+            "kl": d.get("kl"),
+            "grad_norm": d.get("grad_norm"),
+            "learning_rate": d.get("learning_rate"),
+            "epoch": d.get("epoch"),
+            "frac_reward_zero_std": d.get("frac_reward_zero_std"),
+            "completions_mean_length": d.get("completions/mean_length"),
+            "completions_clipped_ratio": d.get("completions/clipped_ratio"),
+        })
+    return rows
+def _try_matplotlib():
+    try:
+        import matplotlib  # type: ignore
+        matplotlib.use("Agg")
+        import matplotlib.pyplot as plt  # type: ignore
+        return plt
+    except Exception as exc:  # pragma: no cover - plotting is best-effort
+        logger.warning("matplotlib unavailable: %s", exc)
+        return None
+def _png_bytes(fig) -> bytes:
+    buf = io.BytesIO()
+    fig.savefig(buf, format="png", dpi=140)
+    return buf.getvalue()
+def _read_log_text() -> Optional[str]:
+    if not LOG_FILE.exists():
+        return None
+    try:
+        return LOG_FILE.read_text(errors="replace")
+    except OSError:
+        return None
+def _synth_training_curve_png() -> Optional[bytes]:
+    """Render a 2-panel reward/loss curve from the captured TRL stdout log."""
+    text = _read_log_text()
+    if not text:
+        return None
+    rows = _parse_training_log_dicts(text)
+    if not rows:
+        return None
+    plt = _try_matplotlib()
+    if plt is None:
+        return None
+    steps = [r["step"] for r in rows]
+    rewards = [(s, r["reward"]) for s, r in zip(steps, rows) if r["reward"] is not None]
+    losses = [(s, r["loss"]) for s, r in zip(steps, rows) if r["loss"] is not None]
+    fig, axes = plt.subplots(2, 1, figsize=(8, 6), sharex=True)
+    if rewards:
+        axes[0].plot([x for x, _ in rewards], [y for _, y in rewards],
+                     lw=1.6, color="#1d4ed8")
+        axes[0].set_ylabel("mean reward")
+        axes[0].set_title(
+            "CERNenv GRPO training — reward over steps "
+            f"(synthesised from {len(rewards)} log events)"
+        )
+        axes[0].grid(alpha=0.25)
+    if losses:
+        axes[1].plot([x for x, _ in losses], [y for _, y in losses],
+                     lw=1.6, color="#c026d3")
+        axes[1].set_ylabel("GRPO loss")
+        axes[1].set_xlabel("training step")
+        axes[1].grid(alpha=0.25)
+    fig.tight_layout()
+    try:
+        return _png_bytes(fig)
+    finally:
+        plt.close(fig)
+def _synth_reward_components_png() -> Optional[bytes]:
+    """Best-effort reward-components view derived from TRL stdout.
+    The unsloth callback writes a true terminal-vs-shaping split into
+    reward_components.csv. The vanilla backend only emits aggregate
+    reward in the TRL log dict, so here we fall back to plotting reward
+    mean ± std (group dispersion) and KL on a second axis. This still
+    surfaces the "watch dispersion, not just the mean" view the FAQ
+    recommends — at least until a real callback writes a richer CSV.
+    """
+    text = _read_log_text()
+    if not text:
+        return None
+    rows = _parse_training_log_dicts(text)
+    if not rows:
+        return None
+    plt = _try_matplotlib()
+    if plt is None:
+        return None
+    steps = [r["step"] for r in rows]
+    rmean = [r.get("reward") for r in rows]
+    rstd = [r.get("reward_std") for r in rows]
+    kls = [r.get("kl") for r in rows]
+    fzero = [r.get("frac_reward_zero_std") for r in rows]
+    clen = [r.get("completions_mean_length") for r in rows]
+    fig, axes = plt.subplots(2, 1, figsize=(8, 6.5), sharex=True)
+    band = [(s, m, sd) for s, m, sd in zip(steps, rmean, rstd) if m is not None]
+    if band:
+        sx = [b[0] for b in band]
+        rm = [b[1] for b in band]
+        rs = [b[2] if b[2] is not None else 0.0 for b in band]
+        axes[0].plot(sx, rm, lw=2.0, color="#0f172a", label="reward (group mean)")
+        axes[0].fill_between(
+            sx,
+            [m - s for m, s in zip(rm, rs)],
+            [m + s for m, s in zip(rm, rs)],
+            alpha=0.18, color="#1d4ed8", label="±1 std (group dispersion)",
+        )
+        axes[0].set_ylabel("reward at logging step")
+        axes[0].set_title(
+            "CERNenv reward — group mean ± dispersion "
+            "(stdout-derived; install EvidenceCallback for terminal vs shaping split)"
+        )
+        axes[0].grid(alpha=0.25)
+        axes[0].legend(loc="lower right", fontsize=9)
+    kl_pts = [(s, k) for s, k in zip(steps, kls) if k is not None]
+    if kl_pts:
+        axes[1].plot([p[0] for p in kl_pts], [p[1] for p in kl_pts],
+                     lw=1.5, color="#9333ea", label="KL divergence")
+        axes[1].set_ylabel("KL", color="#9333ea")
+    fz_pts = [(s, f) for s, f in zip(steps, fzero) if f is not None]
+    cl_pts = [(s, c) for s, c in zip(steps, clen) if c is not None]
+    if fz_pts or cl_pts:
+        ax2 = axes[1].twinx()
+        if fz_pts:
+            ax2.plot([p[0] for p in fz_pts], [p[1] for p in fz_pts],
+                     "o-", lw=1.0, ms=3, color="#ea580c",
+                     label="frac rollouts with zero-std (saturation)")
+            ax2.set_ylim(-0.02, 1.05)
+        if cl_pts:
+            cmax = max(p[1] for p in cl_pts) or 1.0
+            ax2.plot([p[0] for p in cl_pts], [p[1] / cmax for p in cl_pts],
+                     "x:", lw=1.0, ms=4, color="#16a34a",
+                     label=f"completion mean length / {cmax:.0f}")
+        ax2.set_ylabel("auxiliary (right axis, normalised)", color="#475569")
+        ax2.legend(loc="upper right", fontsize=8)
+    axes[1].set_xlabel("training step")
+    axes[1].grid(alpha=0.25)
+    fig.tight_layout()
+    try:
+        return _png_bytes(fig)
+    finally:
+        plt.close(fig)
+_SYNTH_HANDLERS = {
+    "training_curve.png": _synth_training_curve_png,
+    "reward_components.png": _synth_reward_components_png,
+}
 # ── FastAPI app ──────────────────────────────────────────────────────────
 <head>
   <meta charset=utf-8>
   <title>CERNenv Trainer</title>
+  <meta http-equiv="refresh" content="60">
   <style>
     body { font-family: ui-sans-serif, system-ui, sans-serif; margin: 2rem auto;
            max-width: 1000px; color:#111; padding: 0 1rem; line-height:1.5 }
 @app.get("/evidence/{name}")
 def evidence_file(name: str):
+    """Serve a single evidence artifact (PNG/CSV/JSON/MD) by filename.
+    For ``training_curve.png`` and ``reward_components.png`` we fall back
+    to on-demand synthesis from the captured TRL stdout log when the
+    underlying file does not yet exist on disk — which is the normal
+    state of affairs when the vanilla backend is running and no
+    EvidenceCallback has had a chance to write the source CSV.
+    """
     if "/" in name or ".." in name:
         raise HTTPException(status_code=400, detail="invalid name")
     target = EVIDENCE_DIR / name
+    if target.exists() and target.is_file():
+        return FileResponse(target)
+    handler = _SYNTH_HANDLERS.get(name)
+    if handler is not None:
+        try:
+            png = handler()
+        except Exception as exc:  # pragma: no cover - synthesis is best-effort
+            logger.warning("on-demand synthesis of %s failed: %s", name, exc)
+            png = None
+        if png:
+            return Response(
+                content=png,
+                media_type="image/png",
+                headers={"Cache-Control": "no-store, max-age=0"},
+            )
+    raise HTTPException(status_code=404, detail=f"{name} not found")
 @app.get("/logs", response_class=PlainTextResponse)