{ "timestamp": "2026-05-08T00:12:00.713088", "checkpoint": "checkpoints/phase9_production/step_200000.pt", "model_params": 172376579, "total_time_s": 1562.0058193206787, "n_seeds": 5, "guidance_scale": 2.0, "results": [ { "test_id": "A1", "tier": "A", "name": "Consistency", "n_cases": 100, "n_pass": 100, "pass_rate": 1.0, "passed": true, "details": {}, "duration_s": 571.2158267498016 }, { "test_id": "A2", "tier": "A", "name": "Causal Asymmetry", "n_cases": 19, "n_pass": 19, "pass_rate": 1.0, "passed": true, "details": { "mean_corr": 0.5317086431628028 }, "duration_s": 85.96794986724854 }, { "test_id": "A3", "tier": "A", "name": "Compositionality", "n_cases": 30, "n_pass": 28, "pass_rate": 0.9333333333333333, "passed": true, "details": {}, "duration_s": 214.8946647644043 }, { "test_id": "A4", "tier": "A", "name": "Counterfactual Coherence", "n_cases": 50, "n_pass": 46, "pass_rate": 0.92, "passed": true, "details": { "median_corr": 0.999642601137857, "min_corr": 0.7887732292567268 }, "duration_s": 214.578773021698 }, { "test_id": "A5", "tier": "A", "name": "Robustness", "n_cases": 56, "n_pass": 56, "pass_rate": 1.0, "passed": true, "details": {}, "duration_s": 88.48681592941284 }, { "test_id": "B1", "tier": "B", "name": "Placebo (non-edges)", "n_cases": 39, "n_pass": 23, "pass_rate": 0.5897435897435898, "passed": false, "details": { "mean_effect": 0.12529101967811584, "max_effect": 0.444835901260376 }, "duration_s": 90.49227929115295 }, { "test_id": "B2", "tier": "B", "name": "Real effects (edges)", "n_cases": 19, "n_pass": 19, "pass_rate": 1.0, "passed": true, "details": { "mean_effect": 0.3813858926296234 }, "duration_s": 45.237102031707764 }, { "test_id": "B4", "tier": "B", "name": "Sensitivity monotonicity", "n_cases": 10, "n_pass": 10, "pass_rate": 1.0, "passed": true, "details": {}, "duration_s": 140.24205374717712 }, { "test_id": "C1", "tier": "C", "name": "RBI Rate Decisions", "n_cases": 42, "n_pass": 42, "pass_rate": 1.0, "passed": true, "details": { "rate_changes_tested": 42 }, "duration_s": 99.34343814849854 } ], "tier_summary": { "A": { "total": 5, "passed": 5 }, "B": { "total": 3, "passed": 2 }, "C": { "total": 1, "passed": 1 } }, "overall_pass": false }