File size: 1,978 Bytes
03815d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
{
  "n_scenarios": 135,
  "threshold": 0.5,
  "default_weights": {
    "detection": 1.0,
    "missed_scam": -0.5,
    "false_positive": -0.3,
    "calibration": 0.2,
    "explanation": 0.4
  },
  "rubric_class": "AnalyzerRubric",
  "full_summary": {
    "n": 135,
    "tp": 83,
    "fp": 6,
    "fn": 32,
    "tn": 14,
    "detection": 0.7217,
    "fpr": 0.3,
    "precision": 0.9326,
    "f1": 0.8137
  },
  "full_avg_reward": 0.7336,
  "ablations": [
    {
      "rubric_zeroed": "detection",
      "default_weight": 1.0,
      "avg_reward_full": 0.7336,
      "avg_reward_zeroed": 0.1188,
      "delta_reward": -0.6148,
      "interpretation": "rubric matters (reward dropped without it)"
    },
    {
      "rubric_zeroed": "missed_scam",
      "default_weight": -0.5,
      "avg_reward_full": 0.7336,
      "avg_reward_zeroed": 0.7336,
      "delta_reward": 0.0,
      "interpretation": "no effect"
    },
    {
      "rubric_zeroed": "false_positive",
      "default_weight": -0.3,
      "avg_reward_full": 0.7336,
      "avg_reward_zeroed": 0.7469,
      "delta_reward": 0.0133,
      "interpretation": "rubric helps (reward rose without it)"
    },
    {
      "rubric_zeroed": "calibration",
      "default_weight": 0.2,
      "avg_reward_full": 0.7336,
      "avg_reward_zeroed": 0.6015,
      "delta_reward": -0.1321,
      "interpretation": "rubric matters (reward dropped without it)"
    },
    {
      "rubric_zeroed": "explanation",
      "default_weight": 0.4,
      "avg_reward_full": 0.7336,
      "avg_reward_zeroed": 0.7336,
      "delta_reward": 0.0,
      "interpretation": "no effect"
    }
  ],
  "notes": "Post-hoc, eval-time ablation on scripted-baseline scenarios. Each rubric is zeroed in turn; we report \u0394 in average composite reward across the bench. This is a sensitivity probe, not a retrain-ablation \u2014 true 'rubric contribution to learning' requires retraining v2 with each rubric removed (GPU; v3 work).",
  "source_mode": "bench"
}