Spaces:
Running
Running
Commit ·
3dbfebb
1
Parent(s): d61ea0e
chore: triage untracked — gitignore scratch outputs, move helper script
Browse files- .gitignore: diagnose_results/ (regenerable run outputs), docs/*.pdf (paper PDFs live in NeurIPS/), hf-post-v*-update.md (archived old release posts)
- experiments/overnight_diagnose_batch.py → scripts/overnight_diagnose_batch.py (alongside check_lean_manifest.py — author-side maintenance scripts)
- experiments/ dir removed (was 1-file)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
- .gitignore +0 -0
- scripts/overnight_diagnose_batch.py +117 -0
.gitignore
CHANGED
|
Binary files a/.gitignore and b/.gitignore differ
|
|
|
scripts/overnight_diagnose_batch.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Overnight batch diagnose — runs diagnose_model.py on a series of models.
|
| 2 |
+
|
| 3 |
+
Validates v0.5.3 fixes empirically: each model's γ is measured, then run
|
| 4 |
+
through the corrected D_f / partition_Z / free_energy_F. JSON output per
|
| 5 |
+
model in ./diagnose_results/.
|
| 6 |
+
|
| 7 |
+
Sequential to avoid GPU OOM on the 14GB RTX 5060 Ti.
|
| 8 |
+
"""
|
| 9 |
+
import json
|
| 10 |
+
import subprocess
|
| 11 |
+
import sys
|
| 12 |
+
import time
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
|
| 15 |
+
sys.stdout.reconfigure(encoding="utf-8")
|
| 16 |
+
|
| 17 |
+
MODELS = [
|
| 18 |
+
# Light → heavier
|
| 19 |
+
"EleutherAI/pythia-70m",
|
| 20 |
+
"EleutherAI/pythia-160m",
|
| 21 |
+
"EleutherAI/pythia-410m",
|
| 22 |
+
"EleutherAI/pythia-1b",
|
| 23 |
+
"EleutherAI/pythia-1.4b",
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
LOG = Path("./diagnose_results/overnight_log.txt")
|
| 27 |
+
LOG.parent.mkdir(parents=True, exist_ok=True)
|
| 28 |
+
ROOT = Path(__file__).resolve().parent.parent
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def run_model(model_id: str) -> dict:
|
| 32 |
+
"""Run diagnose on one model, return summary dict."""
|
| 33 |
+
print(f"\n{'='*70}")
|
| 34 |
+
print(f" {model_id}")
|
| 35 |
+
print(f"{'='*70}")
|
| 36 |
+
t0 = time.time()
|
| 37 |
+
cmd = [
|
| 38 |
+
sys.executable,
|
| 39 |
+
str(ROOT / "cli" / "diagnose_model.py"),
|
| 40 |
+
"--model", model_id,
|
| 41 |
+
"--fast",
|
| 42 |
+
"--N", "2000",
|
| 43 |
+
"--cpu", # CUDA fp16 default produces NaN attentions; CPU fp32 reliable
|
| 44 |
+
]
|
| 45 |
+
try:
|
| 46 |
+
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
|
| 47 |
+
elapsed = time.time() - t0
|
| 48 |
+
out_summary = {
|
| 49 |
+
"model": model_id,
|
| 50 |
+
"elapsed_seconds": elapsed,
|
| 51 |
+
"return_code": proc.returncode,
|
| 52 |
+
"stdout_tail": proc.stdout[-2000:],
|
| 53 |
+
"stderr_tail": proc.stderr[-1500:],
|
| 54 |
+
}
|
| 55 |
+
# Try to read the JSON it just saved
|
| 56 |
+
json_path = ROOT / "cli" / "diagnose_results" / f"{model_id.replace('/', '--')}.json"
|
| 57 |
+
if json_path.exists():
|
| 58 |
+
try:
|
| 59 |
+
out_summary["result"] = json.loads(json_path.read_text(encoding="utf-8"))
|
| 60 |
+
except Exception:
|
| 61 |
+
pass
|
| 62 |
+
return out_summary
|
| 63 |
+
except subprocess.TimeoutExpired:
|
| 64 |
+
return {"model": model_id, "elapsed_seconds": 3600, "error": "timeout"}
|
| 65 |
+
except Exception as e:
|
| 66 |
+
return {"model": model_id, "error": str(e)}
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def main():
|
| 70 |
+
results = []
|
| 71 |
+
for m in MODELS:
|
| 72 |
+
try:
|
| 73 |
+
r = run_model(m)
|
| 74 |
+
results.append(r)
|
| 75 |
+
with LOG.open("a", encoding="utf-8") as f:
|
| 76 |
+
f.write(f"\n{r.get('model', '?')}: rc={r.get('return_code', '?')} "
|
| 77 |
+
f"({r.get('elapsed_seconds', 0):.0f}s)\n")
|
| 78 |
+
if "result" in r:
|
| 79 |
+
res = r["result"]
|
| 80 |
+
f.write(f" γ={res.get('gamma'):.4f}, R²={res.get('fit_power_law',{}).get('R2','?')}\n")
|
| 81 |
+
f.write(f" D_90={res.get('D90')}, dH_90={res.get('delta_H_90'):.3f}\n")
|
| 82 |
+
f.write(f" γ_pred(Padé)={res.get('gamma_pred'):.4f}, "
|
| 83 |
+
f"Δγ={res.get('delta_gamma'):.4f}\n")
|
| 84 |
+
except KeyboardInterrupt:
|
| 85 |
+
print("\n[interrupted]")
|
| 86 |
+
break
|
| 87 |
+
|
| 88 |
+
# Summary table
|
| 89 |
+
print("\n" + "="*70)
|
| 90 |
+
print("SYNTHESIS")
|
| 91 |
+
print("="*70)
|
| 92 |
+
print(f"{'model':<32s} {'γ':>8s} {'R²':>8s} {'D_90':>6s} {'dH_90':>8s}")
|
| 93 |
+
for r in results:
|
| 94 |
+
if "result" in r:
|
| 95 |
+
res = r["result"]
|
| 96 |
+
g = res.get("gamma", float("nan"))
|
| 97 |
+
r2 = res.get("fit_power_law", {}).get("R2", float("nan"))
|
| 98 |
+
d90 = res.get("D90", -1)
|
| 99 |
+
dH = res.get("delta_H_90", float("nan"))
|
| 100 |
+
print(f"{r['model']:<32s} {g:>8.4f} {r2:>8.4f} {d90:>6d} {dH:>8.4f}")
|
| 101 |
+
else:
|
| 102 |
+
print(f"{r.get('model', '?'):<32s} ERROR: {r.get('error', '')}")
|
| 103 |
+
|
| 104 |
+
# Write final summary JSON
|
| 105 |
+
out = {
|
| 106 |
+
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
|
| 107 |
+
"n_models": len(MODELS),
|
| 108 |
+
"n_completed": sum(1 for r in results if "result" in r),
|
| 109 |
+
"results": results,
|
| 110 |
+
}
|
| 111 |
+
final = LOG.parent / "overnight_summary.json"
|
| 112 |
+
final.write_text(json.dumps(out, indent=2, default=str), encoding="utf-8")
|
| 113 |
+
print(f"\nSaved → {final}")
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
if __name__ == "__main__":
|
| 117 |
+
main()
|