josephmayo commited on
Commit
cab4529
·
verified ·
1 Parent(s): 23b2c11

Upload proof summary.json

Browse files
Files changed (1) hide show
  1. summary.json +175 -0
summary.json ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run_id": "20260512_190433",
3
+ "model_id": "google/gemma-4-E4B-it",
4
+ "dataset_id": "ise-uiuc/Magicoder-Evol-Instruct-110K",
5
+ "adapter_repo": "josephmayo/gemma-4-E4B-it-coding-lora",
6
+ "merged_repo": "josephmayo/gemma-4-E4B-it-coding-merged",
7
+ "stage": "after_eval",
8
+ "errors": [],
9
+ "cuda_available": true,
10
+ "cuda_device_count": 2,
11
+ "devices": [
12
+ "Tesla T4",
13
+ "Tesla T4"
14
+ ],
15
+ "torch_version_initial": "2.10.0+cu128",
16
+ "hf_token_present": false,
17
+ "max_train_samples": 1024,
18
+ "max_steps": 200,
19
+ "max_seq_length": 512,
20
+ "eval_count": 8,
21
+ "lora_r": 16,
22
+ "lora_alpha": 32,
23
+ "lr": 0.0001,
24
+ "grad_accum": 8,
25
+ "push_to_hf": true,
26
+ "merge_and_push": false,
27
+ "load_in_4bit": true,
28
+ "memory_after_load": [
29
+ 0,
30
+ 9302143488
31
+ ],
32
+ "eval_source": "openai/openai_humaneval:8",
33
+ "baseline_avg_score": 0.76875,
34
+ "safe_train_rows": 1024,
35
+ "trainable_parameters": {
36
+ "trainable": 50499584,
37
+ "total": 7991600416
38
+ },
39
+ "log_history_tail": [
40
+ {
41
+ "loss": 1.0154043197631837,
42
+ "grad_norm": 0.37521687150001526,
43
+ "learning_rate": 4.51495073572676e-05,
44
+ "epoch": 1.71875
45
+ },
46
+ {
47
+ "loss": 0.9917967796325684,
48
+ "grad_norm": 0.42887604236602783,
49
+ "learning_rate": 4.114045042103887e-05,
50
+ "epoch": 1.796875
51
+ },
52
+ {
53
+ "loss": 1.1146905899047852,
54
+ "grad_norm": 0.4208148717880249,
55
+ "learning_rate": 3.718944461187138e-05,
56
+ "epoch": 1.875
57
+ },
58
+ {
59
+ "loss": 0.9283761978149414,
60
+ "grad_norm": 0.3849687874317169,
61
+ "learning_rate": 3.332237841745898e-05,
62
+ "epoch": 1.953125
63
+ },
64
+ {
65
+ "loss": 1.113053035736084,
66
+ "grad_norm": 0.4142734110355377,
67
+ "learning_rate": 2.9564590321322207e-05,
68
+ "epoch": 2.03125
69
+ },
70
+ {
71
+ "loss": 0.9842248916625976,
72
+ "grad_norm": 0.44529953598976135,
73
+ "learning_rate": 2.5940702775459747e-05,
74
+ "epoch": 2.109375
75
+ },
76
+ {
77
+ "loss": 0.9449721336364746,
78
+ "grad_norm": 0.3756776750087738,
79
+ "learning_rate": 2.2474460864709824e-05,
80
+ "epoch": 2.1875
81
+ },
82
+ {
83
+ "loss": 1.0590093612670899,
84
+ "grad_norm": 0.4192875325679779,
85
+ "learning_rate": 1.9188576719953633e-05,
86
+ "epoch": 2.265625
87
+ },
88
+ {
89
+ "loss": 0.9768091201782226,
90
+ "grad_norm": 0.5095818638801575,
91
+ "learning_rate": 1.6104580699624837e-05,
92
+ "epoch": 2.34375
93
+ },
94
+ {
95
+ "loss": 1.038302516937256,
96
+ "grad_norm": 0.41709497570991516,
97
+ "learning_rate": 1.3242680314639993e-05,
98
+ "epoch": 2.421875
99
+ },
100
+ {
101
+ "loss": 0.9975608825683594,
102
+ "grad_norm": 0.5563586354255676,
103
+ "learning_rate": 1.0621627821127289e-05,
104
+ "epoch": 2.5
105
+ },
106
+ {
107
+ "loss": 0.9714397430419922,
108
+ "grad_norm": 0.8915637135505676,
109
+ "learning_rate": 8.25859734853645e-06,
110
+ "epoch": 2.578125
111
+ },
112
+ {
113
+ "loss": 0.9948483467102051,
114
+ "grad_norm": 0.4391196370124817,
115
+ "learning_rate": 6.16907236823262e-06,
116
+ "epoch": 2.65625
117
+ },
118
+ {
119
+ "loss": 0.9389057159423828,
120
+ "grad_norm": 0.4650712311267853,
121
+ "learning_rate": 4.366744239922998e-06,
122
+ "epoch": 2.734375
123
+ },
124
+ {
125
+ "loss": 1.06390380859375,
126
+ "grad_norm": 0.4836062788963318,
127
+ "learning_rate": 2.8634225006782865e-06,
128
+ "epoch": 2.8125
129
+ },
130
+ {
131
+ "loss": 1.008359718322754,
132
+ "grad_norm": 0.45215511322021484,
133
+ "learning_rate": 1.6689574843694433e-06,
134
+ "epoch": 2.890625
135
+ },
136
+ {
137
+ "loss": 1.0110493659973145,
138
+ "grad_norm": 0.5408219695091248,
139
+ "learning_rate": 7.911757785462881e-07,
140
+ "epoch": 2.96875
141
+ },
142
+ {
143
+ "loss": 0.911649227142334,
144
+ "grad_norm": 0.4599083364009857,
145
+ "learning_rate": 2.3582894166930268e-07,
146
+ "epoch": 3.046875
147
+ },
148
+ {
149
+ "loss": 0.9673548698425293,
150
+ "grad_norm": 0.43304941058158875,
151
+ "learning_rate": 6.5558167183898955e-09,
152
+ "epoch": 3.125
153
+ },
154
+ {
155
+ "train_runtime": 4256.6409,
156
+ "train_samples_per_second": 0.752,
157
+ "train_steps_per_second": 0.047,
158
+ "total_flos": 4.259313762009523e+16,
159
+ "train_loss": 1.142699921131134,
160
+ "epoch": 3.125
161
+ }
162
+ ],
163
+ "train_metrics": {
164
+ "train_runtime": 4256.6409,
165
+ "train_samples_per_second": 0.752,
166
+ "train_steps_per_second": 0.047,
167
+ "total_flos": 4.259313762009523e+16,
168
+ "train_loss": 1.142699921131134,
169
+ "epoch": 3.125
170
+ },
171
+ "after_avg_score": 0.76875,
172
+ "score_delta": 0.0,
173
+ "adapter_dir": "/kaggle/working/gemma4_e4b_coding_lora",
174
+ "release_gate_pass": true
175
+ }