| { |
| "global": { |
| "a": 1.2898076120721749, |
| "b": -2.221193823574642e-08, |
| "n_calibration_symmetrised": 8654 |
| }, |
| "method": "Per-benchmark Platt calibrator P = sigmoid(a * (mu_A - mu_B) + b), fitted by scipy L-BFGS-B on a deterministic 10 % held-out slice of each pair-image evaluation set. The model's raw mu (unbounded) is used; the per-axis sigma is intentionally not used as a divisor on this checkpoint because sigma is uninformative.", |
| "per_benchmark": { |
| "hpdv3": { |
| "a": 1.8748196287393641, |
| "b": -4.20113735783441e-08, |
| "calibration_bce_after": 0.14394968445629952, |
| "calibration_bce_before": 0.9358390862337813, |
| "calibration_ece_after": 0.08741625491887384, |
| "calibration_ece_before": 0.06325516988648325, |
| "calibration_pair_accuracy_after": 0.9342723004694836, |
| "calibration_pair_accuracy_before": 0.9342723004694836, |
| "display_name": "MizzenAI/HPDv3::all.json", |
| "n_calibration": 1491, |
| "n_eval": 12909 |
| }, |
| "imgrew": { |
| "a": 0.56759383560026, |
| "b": 6.533833949162787e-09, |
| "calibration_bce_after": 0.6117150671038337, |
| "calibration_bce_before": 7.053439925544642, |
| "calibration_ece_after": 0.42527198776410396, |
| "calibration_ece_before": 0.3538308552198105, |
| "calibration_pair_accuracy_after": 0.6547085201793722, |
| "calibration_pair_accuracy_before": 0.6397608370702541, |
| "display_name": "zai-org/ImageRewardDB::test", |
| "n_calibration": 669, |
| "n_eval": 5730 |
| }, |
| "pickscore": { |
| "a": 0.2391948108056091, |
| "b": 2.5978168729447588e-09, |
| "calibration_bce_after": 0.6772960799901248, |
| "calibration_bce_before": 9.637875943097214, |
| "calibration_ece_after": 0.1631131458863776, |
| "calibration_ece_before": 0.438998341287556, |
| "calibration_pair_accuracy_after": 0.5714285714285714, |
| "calibration_pair_accuracy_before": 0.5476190476190477, |
| "display_name": "pickapic-anonymous/pickapic_v1::test_unique", |
| "n_calibration": 42, |
| "n_eval": 390 |
| }, |
| "vrr": { |
| "a": 1.570309174503684, |
| "b": -6.131379274744431e-08, |
| "calibration_bce_after": 0.4579958677877046, |
| "calibration_bce_before": 4.948760962639526, |
| "calibration_ece_after": 0.303661488194056, |
| "calibration_ece_before": 0.22729628363024593, |
| "calibration_pair_accuracy_after": 0.7656470588235295, |
| "calibration_pair_accuracy_before": 0.7656470588235295, |
| "display_name": "zai-org/VisionRewardDB-Image-regression::regression", |
| "n_calibration": 2125, |
| "n_eval": 18622 |
| } |
| }, |
| "split_rule": "sha256('phaseC|{benchmark}|{global_index}').digest()[0] < 26 -> calibration; remainder -> eval. The literal string 'phaseC' is part of the deterministic hash domain and must not be changed." |
| } |
|
|