LibreHPS-4B-v1.1 / calibration.json
Jeff Moe
Calibration
e3afdb9
{
"global": {
"a": 1.2898076120721749,
"b": -2.221193823574642e-08,
"n_calibration_symmetrised": 8654
},
"method": "Per-benchmark Platt calibrator P = sigmoid(a * (mu_A - mu_B) + b), fitted by scipy L-BFGS-B on a deterministic 10 % held-out slice of each pair-image evaluation set. The model's raw mu (unbounded) is used; the per-axis sigma is intentionally not used as a divisor on this checkpoint because sigma is uninformative.",
"per_benchmark": {
"hpdv3": {
"a": 1.8748196287393641,
"b": -4.20113735783441e-08,
"calibration_bce_after": 0.14394968445629952,
"calibration_bce_before": 0.9358390862337813,
"calibration_ece_after": 0.08741625491887384,
"calibration_ece_before": 0.06325516988648325,
"calibration_pair_accuracy_after": 0.9342723004694836,
"calibration_pair_accuracy_before": 0.9342723004694836,
"display_name": "MizzenAI/HPDv3::all.json",
"n_calibration": 1491,
"n_eval": 12909
},
"imgrew": {
"a": 0.56759383560026,
"b": 6.533833949162787e-09,
"calibration_bce_after": 0.6117150671038337,
"calibration_bce_before": 7.053439925544642,
"calibration_ece_after": 0.42527198776410396,
"calibration_ece_before": 0.3538308552198105,
"calibration_pair_accuracy_after": 0.6547085201793722,
"calibration_pair_accuracy_before": 0.6397608370702541,
"display_name": "zai-org/ImageRewardDB::test",
"n_calibration": 669,
"n_eval": 5730
},
"pickscore": {
"a": 0.2391948108056091,
"b": 2.5978168729447588e-09,
"calibration_bce_after": 0.6772960799901248,
"calibration_bce_before": 9.637875943097214,
"calibration_ece_after": 0.1631131458863776,
"calibration_ece_before": 0.438998341287556,
"calibration_pair_accuracy_after": 0.5714285714285714,
"calibration_pair_accuracy_before": 0.5476190476190477,
"display_name": "pickapic-anonymous/pickapic_v1::test_unique",
"n_calibration": 42,
"n_eval": 390
},
"vrr": {
"a": 1.570309174503684,
"b": -6.131379274744431e-08,
"calibration_bce_after": 0.4579958677877046,
"calibration_bce_before": 4.948760962639526,
"calibration_ece_after": 0.303661488194056,
"calibration_ece_before": 0.22729628363024593,
"calibration_pair_accuracy_after": 0.7656470588235295,
"calibration_pair_accuracy_before": 0.7656470588235295,
"display_name": "zai-org/VisionRewardDB-Image-regression::regression",
"n_calibration": 2125,
"n_eval": 18622
}
},
"split_rule": "sha256('phaseC|{benchmark}|{global_index}').digest()[0] < 26 -> calibration; remainder -> eval. The literal string 'phaseC' is part of the deterministic hash domain and must not be changed."
}