Spaces:
Running
Running
| """Overnight batch diagnose — runs diagnose_model.py on a series of models. | |
| Validates v0.5.3 fixes empirically: each model's γ is measured, then run | |
| through the corrected D_f / partition_Z / free_energy_F. JSON output per | |
| model in ./diagnose_results/. | |
| Sequential to avoid GPU OOM on the 14GB RTX 5060 Ti. | |
| """ | |
| import json | |
| import subprocess | |
| import sys | |
| import time | |
| from pathlib import Path | |
| sys.stdout.reconfigure(encoding="utf-8") | |
| MODELS = [ | |
| # Light → heavier | |
| "EleutherAI/pythia-70m", | |
| "EleutherAI/pythia-160m", | |
| "EleutherAI/pythia-410m", | |
| "EleutherAI/pythia-1b", | |
| "EleutherAI/pythia-1.4b", | |
| ] | |
| LOG = Path("./diagnose_results/overnight_log.txt") | |
| LOG.parent.mkdir(parents=True, exist_ok=True) | |
| ROOT = Path(__file__).resolve().parent.parent | |
| def run_model(model_id: str) -> dict: | |
| """Run diagnose on one model, return summary dict.""" | |
| print(f"\n{'='*70}") | |
| print(f" {model_id}") | |
| print(f"{'='*70}") | |
| t0 = time.time() | |
| cmd = [ | |
| sys.executable, | |
| str(ROOT / "cli" / "diagnose_model.py"), | |
| "--model", model_id, | |
| "--fast", | |
| "--N", "2000", | |
| "--cpu", # CUDA fp16 default produces NaN attentions; CPU fp32 reliable | |
| ] | |
| try: | |
| proc = subprocess.run(cmd, capture_output=True, text=True, timeout=3600) | |
| elapsed = time.time() - t0 | |
| out_summary = { | |
| "model": model_id, | |
| "elapsed_seconds": elapsed, | |
| "return_code": proc.returncode, | |
| "stdout_tail": proc.stdout[-2000:], | |
| "stderr_tail": proc.stderr[-1500:], | |
| } | |
| # Try to read the JSON it just saved | |
| json_path = ROOT / "cli" / "diagnose_results" / f"{model_id.replace('/', '--')}.json" | |
| if json_path.exists(): | |
| try: | |
| out_summary["result"] = json.loads(json_path.read_text(encoding="utf-8")) | |
| except Exception: | |
| pass | |
| return out_summary | |
| except subprocess.TimeoutExpired: | |
| return {"model": model_id, "elapsed_seconds": 3600, "error": "timeout"} | |
| except Exception as e: | |
| return {"model": model_id, "error": str(e)} | |
| def main(): | |
| results = [] | |
| for m in MODELS: | |
| try: | |
| r = run_model(m) | |
| results.append(r) | |
| with LOG.open("a", encoding="utf-8") as f: | |
| f.write(f"\n{r.get('model', '?')}: rc={r.get('return_code', '?')} " | |
| f"({r.get('elapsed_seconds', 0):.0f}s)\n") | |
| if "result" in r: | |
| res = r["result"] | |
| f.write(f" γ={res.get('gamma'):.4f}, R²={res.get('fit_power_law',{}).get('R2','?')}\n") | |
| f.write(f" D_90={res.get('D90')}, dH_90={res.get('delta_H_90'):.3f}\n") | |
| f.write(f" γ_pred(Padé)={res.get('gamma_pred'):.4f}, " | |
| f"Δγ={res.get('delta_gamma'):.4f}\n") | |
| except KeyboardInterrupt: | |
| print("\n[interrupted]") | |
| break | |
| # Summary table | |
| print("\n" + "="*70) | |
| print("SYNTHESIS") | |
| print("="*70) | |
| print(f"{'model':<32s} {'γ':>8s} {'R²':>8s} {'D_90':>6s} {'dH_90':>8s}") | |
| for r in results: | |
| if "result" in r: | |
| res = r["result"] | |
| g = res.get("gamma", float("nan")) | |
| r2 = res.get("fit_power_law", {}).get("R2", float("nan")) | |
| d90 = res.get("D90", -1) | |
| dH = res.get("delta_H_90", float("nan")) | |
| print(f"{r['model']:<32s} {g:>8.4f} {r2:>8.4f} {d90:>6d} {dH:>8.4f}") | |
| else: | |
| print(f"{r.get('model', '?'):<32s} ERROR: {r.get('error', '')}") | |
| # Write final summary JSON | |
| out = { | |
| "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), | |
| "n_models": len(MODELS), | |
| "n_completed": sum(1 for r in results if "result" in r), | |
| "results": results, | |
| } | |
| final = LOG.parent / "overnight_summary.json" | |
| final.write_text(json.dumps(out, indent=2, default=str), encoding="utf-8") | |
| print(f"\nSaved → {final}") | |
| if __name__ == "__main__": | |
| main() | |