{ "global": { "a": 1.2898076120721749, "b": -2.221193823574642e-08, "n_calibration_symmetrised": 8654 }, "method": "Per-benchmark Platt calibrator P = sigmoid(a * (mu_A - mu_B) + b), fitted by scipy L-BFGS-B on a deterministic 10 % held-out slice of each pair-image evaluation set. The model's raw mu (unbounded) is used; the per-axis sigma is intentionally not used as a divisor on this checkpoint because sigma is uninformative.", "per_benchmark": { "hpdv3": { "a": 1.8748196287393641, "b": -4.20113735783441e-08, "calibration_bce_after": 0.14394968445629952, "calibration_bce_before": 0.9358390862337813, "calibration_ece_after": 0.08741625491887384, "calibration_ece_before": 0.06325516988648325, "calibration_pair_accuracy_after": 0.9342723004694836, "calibration_pair_accuracy_before": 0.9342723004694836, "display_name": "MizzenAI/HPDv3::all.json", "n_calibration": 1491, "n_eval": 12909 }, "imgrew": { "a": 0.56759383560026, "b": 6.533833949162787e-09, "calibration_bce_after": 0.6117150671038337, "calibration_bce_before": 7.053439925544642, "calibration_ece_after": 0.42527198776410396, "calibration_ece_before": 0.3538308552198105, "calibration_pair_accuracy_after": 0.6547085201793722, "calibration_pair_accuracy_before": 0.6397608370702541, "display_name": "zai-org/ImageRewardDB::test", "n_calibration": 669, "n_eval": 5730 }, "pickscore": { "a": 0.2391948108056091, "b": 2.5978168729447588e-09, "calibration_bce_after": 0.6772960799901248, "calibration_bce_before": 9.637875943097214, "calibration_ece_after": 0.1631131458863776, "calibration_ece_before": 0.438998341287556, "calibration_pair_accuracy_after": 0.5714285714285714, "calibration_pair_accuracy_before": 0.5476190476190477, "display_name": "pickapic-anonymous/pickapic_v1::test_unique", "n_calibration": 42, "n_eval": 390 }, "vrr": { "a": 1.570309174503684, "b": -6.131379274744431e-08, "calibration_bce_after": 0.4579958677877046, "calibration_bce_before": 4.948760962639526, "calibration_ece_after": 0.303661488194056, "calibration_ece_before": 0.22729628363024593, "calibration_pair_accuracy_after": 0.7656470588235295, "calibration_pair_accuracy_before": 0.7656470588235295, "display_name": "zai-org/VisionRewardDB-Image-regression::regression", "n_calibration": 2125, "n_eval": 18622 } }, "split_rule": "sha256('phaseC|{benchmark}|{global_index}').digest()[0] < 26 -> calibration; remainder -> eval. The literal string 'phaseC' is part of the deterministic hash domain and must not be changed." }