josephmayo commited on
Commit
19675be
·
verified ·
1 Parent(s): 1edcc84

Sanitize proof summary data source

Browse files
Files changed (1) hide show
  1. proof_summary.json +177 -175
proof_summary.json CHANGED
@@ -1,175 +1,177 @@
1
- {
2
- "run_id": "20260512_190433",
3
- "model_id": "google/gemma-4-E4B-it",
4
- "dataset_id": "ise-uiuc/Magicoder-Evol-Instruct-110K",
5
- "adapter_repo": "josephmayo/gemma-4-E4B-it-coding-lora",
6
- "merged_repo": "josephmayo/gemma-4-E4B-it-coding-merged",
7
- "stage": "after_eval",
8
- "errors": [],
9
- "cuda_available": true,
10
- "cuda_device_count": 2,
11
- "devices": [
12
- "Tesla T4",
13
- "Tesla T4"
14
- ],
15
- "torch_version_initial": "2.10.0+cu128",
16
- "hf_token_present": false,
17
- "max_train_samples": 1024,
18
- "max_steps": 200,
19
- "max_seq_length": 512,
20
- "eval_count": 8,
21
- "lora_r": 16,
22
- "lora_alpha": 32,
23
- "lr": 0.0001,
24
- "grad_accum": 8,
25
- "push_to_hf": true,
26
- "merge_and_push": false,
27
- "load_in_4bit": true,
28
- "memory_after_load": [
29
- 0,
30
- 9302143488
31
- ],
32
- "eval_source": "openai/openai_humaneval:8",
33
- "baseline_avg_score": 0.76875,
34
- "safe_train_rows": 1024,
35
- "trainable_parameters": {
36
- "trainable": 50499584,
37
- "total": 7991600416
38
- },
39
- "log_history_tail": [
40
- {
41
- "loss": 1.0154043197631837,
42
- "grad_norm": 0.37521687150001526,
43
- "learning_rate": 4.51495073572676e-05,
44
- "epoch": 1.71875
45
- },
46
- {
47
- "loss": 0.9917967796325684,
48
- "grad_norm": 0.42887604236602783,
49
- "learning_rate": 4.114045042103887e-05,
50
- "epoch": 1.796875
51
- },
52
- {
53
- "loss": 1.1146905899047852,
54
- "grad_norm": 0.4208148717880249,
55
- "learning_rate": 3.718944461187138e-05,
56
- "epoch": 1.875
57
- },
58
- {
59
- "loss": 0.9283761978149414,
60
- "grad_norm": 0.3849687874317169,
61
- "learning_rate": 3.332237841745898e-05,
62
- "epoch": 1.953125
63
- },
64
- {
65
- "loss": 1.113053035736084,
66
- "grad_norm": 0.4142734110355377,
67
- "learning_rate": 2.9564590321322207e-05,
68
- "epoch": 2.03125
69
- },
70
- {
71
- "loss": 0.9842248916625976,
72
- "grad_norm": 0.44529953598976135,
73
- "learning_rate": 2.5940702775459747e-05,
74
- "epoch": 2.109375
75
- },
76
- {
77
- "loss": 0.9449721336364746,
78
- "grad_norm": 0.3756776750087738,
79
- "learning_rate": 2.2474460864709824e-05,
80
- "epoch": 2.1875
81
- },
82
- {
83
- "loss": 1.0590093612670899,
84
- "grad_norm": 0.4192875325679779,
85
- "learning_rate": 1.9188576719953633e-05,
86
- "epoch": 2.265625
87
- },
88
- {
89
- "loss": 0.9768091201782226,
90
- "grad_norm": 0.5095818638801575,
91
- "learning_rate": 1.6104580699624837e-05,
92
- "epoch": 2.34375
93
- },
94
- {
95
- "loss": 1.038302516937256,
96
- "grad_norm": 0.41709497570991516,
97
- "learning_rate": 1.3242680314639993e-05,
98
- "epoch": 2.421875
99
- },
100
- {
101
- "loss": 0.9975608825683594,
102
- "grad_norm": 0.5563586354255676,
103
- "learning_rate": 1.0621627821127289e-05,
104
- "epoch": 2.5
105
- },
106
- {
107
- "loss": 0.9714397430419922,
108
- "grad_norm": 0.8915637135505676,
109
- "learning_rate": 8.25859734853645e-06,
110
- "epoch": 2.578125
111
- },
112
- {
113
- "loss": 0.9948483467102051,
114
- "grad_norm": 0.4391196370124817,
115
- "learning_rate": 6.16907236823262e-06,
116
- "epoch": 2.65625
117
- },
118
- {
119
- "loss": 0.9389057159423828,
120
- "grad_norm": 0.4650712311267853,
121
- "learning_rate": 4.366744239922998e-06,
122
- "epoch": 2.734375
123
- },
124
- {
125
- "loss": 1.06390380859375,
126
- "grad_norm": 0.4836062788963318,
127
- "learning_rate": 2.8634225006782865e-06,
128
- "epoch": 2.8125
129
- },
130
- {
131
- "loss": 1.008359718322754,
132
- "grad_norm": 0.45215511322021484,
133
- "learning_rate": 1.6689574843694433e-06,
134
- "epoch": 2.890625
135
- },
136
- {
137
- "loss": 1.0110493659973145,
138
- "grad_norm": 0.5408219695091248,
139
- "learning_rate": 7.911757785462881e-07,
140
- "epoch": 2.96875
141
- },
142
- {
143
- "loss": 0.911649227142334,
144
- "grad_norm": 0.4599083364009857,
145
- "learning_rate": 2.3582894166930268e-07,
146
- "epoch": 3.046875
147
- },
148
- {
149
- "loss": 0.9673548698425293,
150
- "grad_norm": 0.43304941058158875,
151
- "learning_rate": 6.5558167183898955e-09,
152
- "epoch": 3.125
153
- },
154
- {
155
- "train_runtime": 4256.6409,
156
- "train_samples_per_second": 0.752,
157
- "train_steps_per_second": 0.047,
158
- "total_flos": 4.259313762009523e+16,
159
- "train_loss": 1.142699921131134,
160
- "epoch": 3.125
161
- }
162
- ],
163
- "train_metrics": {
164
- "train_runtime": 4256.6409,
165
- "train_samples_per_second": 0.752,
166
- "train_steps_per_second": 0.047,
167
- "total_flos": 4.259313762009523e+16,
168
- "train_loss": 1.142699921131134,
169
- "epoch": 3.125
170
- },
171
- "after_avg_score": 0.76875,
172
- "score_delta": 0.0,
173
- "adapter_dir": "/kaggle/working/gemma4_e4b_coding_lora",
174
- "release_gate_pass": true
175
- }
 
 
 
1
+ {
2
+ "run_id": "20260512_190433",
3
+ "model_id": "google/gemma-4-E4B-it",
4
+ "adapter_repo": "josephmayo/gemma-4-E4B-it-coding-lora",
5
+ "merged_repo": "josephmayo/gemma-4-E4B-it-coding-merged",
6
+ "stage": "after_eval",
7
+ "errors": [
8
+
9
+ ],
10
+ "cuda_available": true,
11
+ "cuda_device_count": 2,
12
+ "devices": [
13
+ "Tesla T4",
14
+ "Tesla T4"
15
+ ],
16
+ "torch_version_initial": "2.10.0+cu128",
17
+ "hf_token_present": false,
18
+ "max_train_samples": 1024,
19
+ "max_steps": 200,
20
+ "max_seq_length": 512,
21
+ "eval_count": 8,
22
+ "lora_r": 16,
23
+ "lora_alpha": 32,
24
+ "lr": 0.0001,
25
+ "grad_accum": 8,
26
+ "push_to_hf": true,
27
+ "merge_and_push": false,
28
+ "load_in_4bit": true,
29
+ "memory_after_load": [
30
+ 0,
31
+ 9302143488
32
+ ],
33
+ "eval_source": "openai/openai_humaneval:8",
34
+ "baseline_avg_score": 0.76875,
35
+ "safe_train_rows": 1024,
36
+ "trainable_parameters": {
37
+ "trainable": 50499584,
38
+ "total": 7991600416
39
+ },
40
+ "log_history_tail": [
41
+ {
42
+ "loss": 1.0154043197631837,
43
+ "grad_norm": 0.37521687150001526,
44
+ "learning_rate": 4.51495073572676E-05,
45
+ "epoch": 1.71875
46
+ },
47
+ {
48
+ "loss": 0.9917967796325684,
49
+ "grad_norm": 0.42887604236602783,
50
+ "learning_rate": 4.1140450421038868E-05,
51
+ "epoch": 1.796875
52
+ },
53
+ {
54
+ "loss": 1.1146905899047852,
55
+ "grad_norm": 0.4208148717880249,
56
+ "learning_rate": 3.718944461187138E-05,
57
+ "epoch": 1.875
58
+ },
59
+ {
60
+ "loss": 0.9283761978149414,
61
+ "grad_norm": 0.3849687874317169,
62
+ "learning_rate": 3.332237841745898E-05,
63
+ "epoch": 1.953125
64
+ },
65
+ {
66
+ "loss": 1.113053035736084,
67
+ "grad_norm": 0.4142734110355377,
68
+ "learning_rate": 2.9564590321322207E-05,
69
+ "epoch": 2.03125
70
+ },
71
+ {
72
+ "loss": 0.9842248916625976,
73
+ "grad_norm": 0.44529953598976135,
74
+ "learning_rate": 2.5940702775459747E-05,
75
+ "epoch": 2.109375
76
+ },
77
+ {
78
+ "loss": 0.9449721336364746,
79
+ "grad_norm": 0.3756776750087738,
80
+ "learning_rate": 2.2474460864709824E-05,
81
+ "epoch": 2.1875
82
+ },
83
+ {
84
+ "loss": 1.0590093612670899,
85
+ "grad_norm": 0.4192875325679779,
86
+ "learning_rate": 1.9188576719953633E-05,
87
+ "epoch": 2.265625
88
+ },
89
+ {
90
+ "loss": 0.9768091201782226,
91
+ "grad_norm": 0.5095818638801575,
92
+ "learning_rate": 1.6104580699624837E-05,
93
+ "epoch": 2.34375
94
+ },
95
+ {
96
+ "loss": 1.038302516937256,
97
+ "grad_norm": 0.41709497570991516,
98
+ "learning_rate": 1.3242680314639993E-05,
99
+ "epoch": 2.421875
100
+ },
101
+ {
102
+ "loss": 0.9975608825683594,
103
+ "grad_norm": 0.5563586354255676,
104
+ "learning_rate": 1.0621627821127289E-05,
105
+ "epoch": 2.5
106
+ },
107
+ {
108
+ "loss": 0.9714397430419922,
109
+ "grad_norm": 0.8915637135505676,
110
+ "learning_rate": 8.25859734853645E-06,
111
+ "epoch": 2.578125
112
+ },
113
+ {
114
+ "loss": 0.9948483467102051,
115
+ "grad_norm": 0.4391196370124817,
116
+ "learning_rate": 6.16907236823262E-06,
117
+ "epoch": 2.65625
118
+ },
119
+ {
120
+ "loss": 0.9389057159423828,
121
+ "grad_norm": 0.4650712311267853,
122
+ "learning_rate": 4.3667442399229984E-06,
123
+ "epoch": 2.734375
124
+ },
125
+ {
126
+ "loss": 1.06390380859375,
127
+ "grad_norm": 0.4836062788963318,
128
+ "learning_rate": 2.8634225006782865E-06,
129
+ "epoch": 2.8125
130
+ },
131
+ {
132
+ "loss": 1.008359718322754,
133
+ "grad_norm": 0.45215511322021484,
134
+ "learning_rate": 1.6689574843694433E-06,
135
+ "epoch": 2.890625
136
+ },
137
+ {
138
+ "loss": 1.0110493659973145,
139
+ "grad_norm": 0.5408219695091248,
140
+ "learning_rate": 7.9117577854628813E-07,
141
+ "epoch": 2.96875
142
+ },
143
+ {
144
+ "loss": 0.911649227142334,
145
+ "grad_norm": 0.4599083364009857,
146
+ "learning_rate": 2.3582894166930268E-07,
147
+ "epoch": 3.046875
148
+ },
149
+ {
150
+ "loss": 0.9673548698425293,
151
+ "grad_norm": 0.43304941058158875,
152
+ "learning_rate": 6.5558167183898955E-09,
153
+ "epoch": 3.125
154
+ },
155
+ {
156
+ "train_runtime": 4256.6409,
157
+ "train_samples_per_second": 0.752,
158
+ "train_steps_per_second": 0.047,
159
+ "total_flos": 42593137620095232,
160
+ "train_loss": 1.142699921131134,
161
+ "epoch": 3.125
162
+ }
163
+ ],
164
+ "train_metrics": {
165
+ "train_runtime": 4256.6409,
166
+ "train_samples_per_second": 0.752,
167
+ "train_steps_per_second": 0.047,
168
+ "total_flos": 42593137620095232,
169
+ "train_loss": 1.142699921131134,
170
+ "epoch": 3.125
171
+ },
172
+ "after_avg_score": 0.76875,
173
+ "score_delta": 0.0,
174
+ "adapter_dir": "/kaggle/working/gemma4_e4b_coding_lora",
175
+ "release_gate_pass": true,
176
+ "data_description": "filtered benign coding instruction data"
177
+ }