File size: 2,799 Bytes
e3afdb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
{
  "global": {
    "a": 1.2898076120721749,
    "b": -2.221193823574642e-08,
    "n_calibration_symmetrised": 8654
  },
  "method": "Per-benchmark Platt calibrator P = sigmoid(a * (mu_A - mu_B) + b), fitted by scipy L-BFGS-B on a deterministic 10 % held-out slice of each pair-image evaluation set. The model's raw mu (unbounded) is used; the per-axis sigma is intentionally not used as a divisor on this checkpoint because sigma is uninformative.",
  "per_benchmark": {
    "hpdv3": {
      "a": 1.8748196287393641,
      "b": -4.20113735783441e-08,
      "calibration_bce_after": 0.14394968445629952,
      "calibration_bce_before": 0.9358390862337813,
      "calibration_ece_after": 0.08741625491887384,
      "calibration_ece_before": 0.06325516988648325,
      "calibration_pair_accuracy_after": 0.9342723004694836,
      "calibration_pair_accuracy_before": 0.9342723004694836,
      "display_name": "MizzenAI/HPDv3::all.json",
      "n_calibration": 1491,
      "n_eval": 12909
    },
    "imgrew": {
      "a": 0.56759383560026,
      "b": 6.533833949162787e-09,
      "calibration_bce_after": 0.6117150671038337,
      "calibration_bce_before": 7.053439925544642,
      "calibration_ece_after": 0.42527198776410396,
      "calibration_ece_before": 0.3538308552198105,
      "calibration_pair_accuracy_after": 0.6547085201793722,
      "calibration_pair_accuracy_before": 0.6397608370702541,
      "display_name": "zai-org/ImageRewardDB::test",
      "n_calibration": 669,
      "n_eval": 5730
    },
    "pickscore": {
      "a": 0.2391948108056091,
      "b": 2.5978168729447588e-09,
      "calibration_bce_after": 0.6772960799901248,
      "calibration_bce_before": 9.637875943097214,
      "calibration_ece_after": 0.1631131458863776,
      "calibration_ece_before": 0.438998341287556,
      "calibration_pair_accuracy_after": 0.5714285714285714,
      "calibration_pair_accuracy_before": 0.5476190476190477,
      "display_name": "pickapic-anonymous/pickapic_v1::test_unique",
      "n_calibration": 42,
      "n_eval": 390
    },
    "vrr": {
      "a": 1.570309174503684,
      "b": -6.131379274744431e-08,
      "calibration_bce_after": 0.4579958677877046,
      "calibration_bce_before": 4.948760962639526,
      "calibration_ece_after": 0.303661488194056,
      "calibration_ece_before": 0.22729628363024593,
      "calibration_pair_accuracy_after": 0.7656470588235295,
      "calibration_pair_accuracy_before": 0.7656470588235295,
      "display_name": "zai-org/VisionRewardDB-Image-regression::regression",
      "n_calibration": 2125,
      "n_eval": 18622
    }
  },
  "split_rule": "sha256('phaseC|{benchmark}|{global_index}').digest()[0] < 26 -> calibration; remainder -> eval. The literal string 'phaseC' is part of the deterministic hash domain and must not be changed."
}