Spaces:

karlexmarin
/

taf-agent

Running

karlexmarin Claude Opus 4.7 (1M context) commited on 7 days ago

Commit

3dbfebb

1 Parent(s): d61ea0e

chore: triage untracked — gitignore scratch outputs, move helper script

- .gitignore: diagnose_results/ (regenerable run outputs), docs/*.pdf (paper PDFs live in NeurIPS/), hf-post-v*-update.md (archived old release posts)
- experiments/overnight_diagnose_batch.py → scripts/overnight_diagnose_batch.py (alongside check_lean_manifest.py — author-side maintenance scripts)
- experiments/ dir removed (was 1-file)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (2) hide show

.gitignore +0 -0
scripts/overnight_diagnose_batch.py +117 -0

.gitignore CHANGED Viewed

Binary files a/.gitignore and b/.gitignore differ

scripts/overnight_diagnose_batch.py ADDED Viewed

	@@ -0,0 +1,117 @@

+"""Overnight batch diagnose — runs diagnose_model.py on a series of models.
+Validates v0.5.3 fixes empirically: each model's γ is measured, then run
+through the corrected D_f / partition_Z / free_energy_F. JSON output per
+model in ./diagnose_results/.
+Sequential to avoid GPU OOM on the 14GB RTX 5060 Ti.
+"""
+import json
+import subprocess
+import sys
+import time
+from pathlib import Path
+sys.stdout.reconfigure(encoding="utf-8")
+MODELS = [
+    # Light → heavier
+    "EleutherAI/pythia-70m",
+    "EleutherAI/pythia-160m",
+    "EleutherAI/pythia-410m",
+    "EleutherAI/pythia-1b",
+    "EleutherAI/pythia-1.4b",
+]
+LOG = Path("./diagnose_results/overnight_log.txt")
+LOG.parent.mkdir(parents=True, exist_ok=True)
+ROOT = Path(__file__).resolve().parent.parent
+def run_model(model_id: str) -> dict:
+    """Run diagnose on one model, return summary dict."""
+    print(f"\n{'='*70}")
+    print(f"  {model_id}")
+    print(f"{'='*70}")
+    t0 = time.time()
+    cmd = [
+        sys.executable,
+        str(ROOT / "cli" / "diagnose_model.py"),
+        "--model", model_id,
+        "--fast",
+        "--N", "2000",
+        "--cpu",  # CUDA fp16 default produces NaN attentions; CPU fp32 reliable
+    ]
+    try:
+        proc = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
+        elapsed = time.time() - t0
+        out_summary = {
+            "model": model_id,
+            "elapsed_seconds": elapsed,
+            "return_code": proc.returncode,
+            "stdout_tail": proc.stdout[-2000:],
+            "stderr_tail": proc.stderr[-1500:],
+        }
+        # Try to read the JSON it just saved
+        json_path = ROOT / "cli" / "diagnose_results" / f"{model_id.replace('/', '--')}.json"
+        if json_path.exists():
+            try:
+                out_summary["result"] = json.loads(json_path.read_text(encoding="utf-8"))
+            except Exception:
+                pass
+        return out_summary
+    except subprocess.TimeoutExpired:
+        return {"model": model_id, "elapsed_seconds": 3600, "error": "timeout"}
+    except Exception as e:
+        return {"model": model_id, "error": str(e)}
+def main():
+    results = []
+    for m in MODELS:
+        try:
+            r = run_model(m)
+            results.append(r)
+            with LOG.open("a", encoding="utf-8") as f:
+                f.write(f"\n{r.get('model', '?')}: rc={r.get('return_code', '?')} "
+                        f"({r.get('elapsed_seconds', 0):.0f}s)\n")
+                if "result" in r:
+                    res = r["result"]
+                    f.write(f"  γ={res.get('gamma'):.4f}, R²={res.get('fit_power_law',{}).get('R2','?')}\n")
+                    f.write(f"  D_90={res.get('D90')}, dH_90={res.get('delta_H_90'):.3f}\n")
+                    f.write(f"  γ_pred(Padé)={res.get('gamma_pred'):.4f}, "
+                            f"Δγ={res.get('delta_gamma'):.4f}\n")
+        except KeyboardInterrupt:
+            print("\n[interrupted]")
+            break
+    # Summary table
+    print("\n" + "="*70)
+    print("SYNTHESIS")
+    print("="*70)
+    print(f"{'model':<32s} {'γ':>8s} {'R²':>8s} {'D_90':>6s} {'dH_90':>8s}")
+    for r in results:
+        if "result" in r:
+            res = r["result"]
+            g = res.get("gamma", float("nan"))
+            r2 = res.get("fit_power_law", {}).get("R2", float("nan"))
+            d90 = res.get("D90", -1)
+            dH = res.get("delta_H_90", float("nan"))
+            print(f"{r['model']:<32s} {g:>8.4f} {r2:>8.4f} {d90:>6d} {dH:>8.4f}")
+        else:
+            print(f"{r.get('model', '?'):<32s}  ERROR: {r.get('error', '')}")
+    # Write final summary JSON
+    out = {
+        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
+        "n_models": len(MODELS),
+        "n_completed": sum(1 for r in results if "result" in r),
+        "results": results,
+    }
+    final = LOG.parent / "overnight_summary.json"
+    final.write_text(json.dumps(out, indent=2, default=str), encoding="utf-8")
+    print(f"\nSaved → {final}")
+if __name__ == "__main__":
+    main()