File size: 8,513 Bytes
19675be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
{
    "run_id":  "20260512_190433",
    "model_id":  "google/gemma-4-E4B-it",
    "adapter_repo":  "josephmayo/gemma-4-E4B-it-coding-lora",
    "merged_repo":  "josephmayo/gemma-4-E4B-it-coding-merged",
    "stage":  "after_eval",
    "errors":  [

               ],
    "cuda_available":  true,
    "cuda_device_count":  2,
    "devices":  [
                    "Tesla T4",
                    "Tesla T4"
                ],
    "torch_version_initial":  "2.10.0+cu128",
    "hf_token_present":  false,
    "max_train_samples":  1024,
    "max_steps":  200,
    "max_seq_length":  512,
    "eval_count":  8,
    "lora_r":  16,
    "lora_alpha":  32,
    "lr":  0.0001,
    "grad_accum":  8,
    "push_to_hf":  true,
    "merge_and_push":  false,
    "load_in_4bit":  true,
    "memory_after_load":  [
                              0,
                              9302143488
                          ],
    "eval_source":  "openai/openai_humaneval:8",
    "baseline_avg_score":  0.76875,
    "safe_train_rows":  1024,
    "trainable_parameters":  {
                                 "trainable":  50499584,
                                 "total":  7991600416
                             },
    "log_history_tail":  [
                             {
                                 "loss":  1.0154043197631837,
                                 "grad_norm":  0.37521687150001526,
                                 "learning_rate":  4.51495073572676E-05,
                                 "epoch":  1.71875
                             },
                             {
                                 "loss":  0.9917967796325684,
                                 "grad_norm":  0.42887604236602783,
                                 "learning_rate":  4.1140450421038868E-05,
                                 "epoch":  1.796875
                             },
                             {
                                 "loss":  1.1146905899047852,
                                 "grad_norm":  0.4208148717880249,
                                 "learning_rate":  3.718944461187138E-05,
                                 "epoch":  1.875
                             },
                             {
                                 "loss":  0.9283761978149414,
                                 "grad_norm":  0.3849687874317169,
                                 "learning_rate":  3.332237841745898E-05,
                                 "epoch":  1.953125
                             },
                             {
                                 "loss":  1.113053035736084,
                                 "grad_norm":  0.4142734110355377,
                                 "learning_rate":  2.9564590321322207E-05,
                                 "epoch":  2.03125
                             },
                             {
                                 "loss":  0.9842248916625976,
                                 "grad_norm":  0.44529953598976135,
                                 "learning_rate":  2.5940702775459747E-05,
                                 "epoch":  2.109375
                             },
                             {
                                 "loss":  0.9449721336364746,
                                 "grad_norm":  0.3756776750087738,
                                 "learning_rate":  2.2474460864709824E-05,
                                 "epoch":  2.1875
                             },
                             {
                                 "loss":  1.0590093612670899,
                                 "grad_norm":  0.4192875325679779,
                                 "learning_rate":  1.9188576719953633E-05,
                                 "epoch":  2.265625
                             },
                             {
                                 "loss":  0.9768091201782226,
                                 "grad_norm":  0.5095818638801575,
                                 "learning_rate":  1.6104580699624837E-05,
                                 "epoch":  2.34375
                             },
                             {
                                 "loss":  1.038302516937256,
                                 "grad_norm":  0.41709497570991516,
                                 "learning_rate":  1.3242680314639993E-05,
                                 "epoch":  2.421875
                             },
                             {
                                 "loss":  0.9975608825683594,
                                 "grad_norm":  0.5563586354255676,
                                 "learning_rate":  1.0621627821127289E-05,
                                 "epoch":  2.5
                             },
                             {
                                 "loss":  0.9714397430419922,
                                 "grad_norm":  0.8915637135505676,
                                 "learning_rate":  8.25859734853645E-06,
                                 "epoch":  2.578125
                             },
                             {
                                 "loss":  0.9948483467102051,
                                 "grad_norm":  0.4391196370124817,
                                 "learning_rate":  6.16907236823262E-06,
                                 "epoch":  2.65625
                             },
                             {
                                 "loss":  0.9389057159423828,
                                 "grad_norm":  0.4650712311267853,
                                 "learning_rate":  4.3667442399229984E-06,
                                 "epoch":  2.734375
                             },
                             {
                                 "loss":  1.06390380859375,
                                 "grad_norm":  0.4836062788963318,
                                 "learning_rate":  2.8634225006782865E-06,
                                 "epoch":  2.8125
                             },
                             {
                                 "loss":  1.008359718322754,
                                 "grad_norm":  0.45215511322021484,
                                 "learning_rate":  1.6689574843694433E-06,
                                 "epoch":  2.890625
                             },
                             {
                                 "loss":  1.0110493659973145,
                                 "grad_norm":  0.5408219695091248,
                                 "learning_rate":  7.9117577854628813E-07,
                                 "epoch":  2.96875
                             },
                             {
                                 "loss":  0.911649227142334,
                                 "grad_norm":  0.4599083364009857,
                                 "learning_rate":  2.3582894166930268E-07,
                                 "epoch":  3.046875
                             },
                             {
                                 "loss":  0.9673548698425293,
                                 "grad_norm":  0.43304941058158875,
                                 "learning_rate":  6.5558167183898955E-09,
                                 "epoch":  3.125
                             },
                             {
                                 "train_runtime":  4256.6409,
                                 "train_samples_per_second":  0.752,
                                 "train_steps_per_second":  0.047,
                                 "total_flos":  42593137620095232,
                                 "train_loss":  1.142699921131134,
                                 "epoch":  3.125
                             }
                         ],
    "train_metrics":  {
                          "train_runtime":  4256.6409,
                          "train_samples_per_second":  0.752,
                          "train_steps_per_second":  0.047,
                          "total_flos":  42593137620095232,
                          "train_loss":  1.142699921131134,
                          "epoch":  3.125
                      },
    "after_avg_score":  0.76875,
    "score_delta":  0.0,
    "adapter_dir":  "/kaggle/working/gemma4_e4b_coding_lora",
    "release_gate_pass":  true,
    "data_description":  "filtered benign coding instruction data"
}