adithya9903 commited on
Commit
9f2d200
·
verified ·
1 Parent(s): 90eae1e

Upload PolyGuard artifact folder: qwen-qwen2-5-3b-instruct reports after policy_ablation

Browse files
outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/grpo_ablation_report.json ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "ablations": {
4
+ "bandit_only": {
5
+ "avg_reward": 0.779625,
6
+ "legality_rate": 1.0,
7
+ "severe_violation_rate": 0.0,
8
+ "abstention_rate": 0.0,
9
+ "avg_episode_length": 2.8125,
10
+ "success_rate": 0.0,
11
+ "avg_burden_delta": 0.0,
12
+ "avg_safety_delta": 0.483125,
13
+ "avg_dosing_quality": 0.75,
14
+ "avg_process_fidelity": 0.9056250000000008,
15
+ "exploit_detection_count": 2.0,
16
+ "timeout_rate": 0.0,
17
+ "failure_visible_rate": 0.0625,
18
+ "avg_invalid_actions": 0.0625,
19
+ "reward_columns": {
20
+ "format_compliance_score": 0.9989999999999996,
21
+ "candidate_alignment_score": 0.9989999999999996,
22
+ "legality_score": 0.9989999999999996,
23
+ "safety_delta_score": 0.483125,
24
+ "burden_improvement_score": 0.5,
25
+ "disease_stability_score": 0.8999999999999995,
26
+ "dosing_quality_score": 0.75,
27
+ "abstention_quality_score": 0.5600000000000002,
28
+ "efficiency_score": 0.5855625,
29
+ "process_fidelity_score": 0.9056250000000008,
30
+ "explanation_grounding_score": 0.8000000000000004,
31
+ "anti_cheat_score": 0.9366249999999997,
32
+ "uncertainty_calibration_score": 0.8531250000000004
33
+ },
34
+ "primary_reward_channels": {
35
+ "safety_legality": 0.9469062499999998,
36
+ "clinical_improvement": 0.6273749999999997,
37
+ "dosing_quality": 0.6550000000000001,
38
+ "process_integrity": 0.8225937500000001
39
+ },
40
+ "policy_stack": "bandit-only",
41
+ "failure_mining": {
42
+ "total_rows": 32,
43
+ "failure_rows": 2,
44
+ "top_failure_reasons": [
45
+ {
46
+ "reason": "repeated_action_loop",
47
+ "count": 2
48
+ }
49
+ ]
50
+ }
51
+ },
52
+ "llm_only": {
53
+ "avg_reward": 0.7723913043478261,
54
+ "legality_rate": 1.0,
55
+ "severe_violation_rate": 0.0,
56
+ "abstention_rate": 0.0,
57
+ "avg_episode_length": 1.9565217391304348,
58
+ "success_rate": 0.0,
59
+ "avg_burden_delta": 0.0,
60
+ "avg_safety_delta": 0.4882608695652174,
61
+ "avg_dosing_quality": 0.75,
62
+ "avg_process_fidelity": 0.9000000000000005,
63
+ "exploit_detection_count": 7.0,
64
+ "timeout_rate": 0.0,
65
+ "failure_visible_rate": 0.30434782608695654,
66
+ "avg_invalid_actions": 0.30434782608695654,
67
+ "reward_columns": {
68
+ "format_compliance_score": 0.9989999999999999,
69
+ "candidate_alignment_score": 0.9989999999999999,
70
+ "legality_score": 0.9989999999999999,
71
+ "safety_delta_score": 0.4882608695652174,
72
+ "burden_improvement_score": 0.5,
73
+ "disease_stability_score": 0.8999999999999998,
74
+ "dosing_quality_score": 0.75,
75
+ "abstention_quality_score": 0.5600000000000004,
76
+ "efficiency_score": 0.7027826086956522,
77
+ "process_fidelity_score": 0.9000000000000005,
78
+ "explanation_grounding_score": 0.8000000000000003,
79
+ "anti_cheat_score": 0.6952608695652175,
80
+ "uncertainty_calibration_score": 0.8482608695652176
81
+ },
82
+ "primary_reward_channels": {
83
+ "safety_legality": 0.8853478260869562,
84
+ "clinical_improvement": 0.6290869565217388,
85
+ "dosing_quality": 0.6549999999999998,
86
+ "process_integrity": 0.8504782608695656
87
+ },
88
+ "policy_stack": "llm-only",
89
+ "failure_mining": {
90
+ "total_rows": 23,
91
+ "failure_rows": 7,
92
+ "top_failure_reasons": [
93
+ {
94
+ "reason": "repeated_action_loop",
95
+ "count": 7
96
+ }
97
+ ]
98
+ }
99
+ },
100
+ "llm_bandit": {
101
+ "avg_reward": 0.7647391304347826,
102
+ "legality_rate": 1.0,
103
+ "severe_violation_rate": 0.0,
104
+ "abstention_rate": 0.0,
105
+ "avg_episode_length": 1.9565217391304348,
106
+ "success_rate": 0.0,
107
+ "avg_burden_delta": 0.0,
108
+ "avg_safety_delta": 0.48982608695652174,
109
+ "avg_dosing_quality": 0.717391304347826,
110
+ "avg_process_fidelity": 0.9000000000000005,
111
+ "exploit_detection_count": 7.0,
112
+ "timeout_rate": 0.0,
113
+ "failure_visible_rate": 0.30434782608695654,
114
+ "avg_invalid_actions": 0.30434782608695654,
115
+ "reward_columns": {
116
+ "format_compliance_score": 0.9989999999999999,
117
+ "candidate_alignment_score": 0.9989999999999999,
118
+ "legality_score": 0.9989999999999999,
119
+ "safety_delta_score": 0.48982608695652174,
120
+ "burden_improvement_score": 0.5043478260869565,
121
+ "disease_stability_score": 0.8582608695652173,
122
+ "dosing_quality_score": 0.717391304347826,
123
+ "abstention_quality_score": 0.5600000000000004,
124
+ "efficiency_score": 0.7027826086956522,
125
+ "process_fidelity_score": 0.9000000000000005,
126
+ "explanation_grounding_score": 0.8000000000000003,
127
+ "anti_cheat_score": 0.6952608695652175,
128
+ "uncertainty_calibration_score": 0.8126086956521739
129
+ },
130
+ "primary_reward_channels": {
131
+ "safety_legality": 0.8765217391304347,
132
+ "clinical_improvement": 0.6171739130434781,
133
+ "dosing_quality": 0.6386956521739129,
134
+ "process_integrity": 0.8504782608695656
135
+ },
136
+ "policy_stack": "llm+bandit",
137
+ "failure_mining": {
138
+ "total_rows": 23,
139
+ "failure_rows": 7,
140
+ "top_failure_reasons": [
141
+ {
142
+ "reason": "repeated_action_loop",
143
+ "count": 7
144
+ }
145
+ ]
146
+ }
147
+ }
148
+ }
149
+ }