idobn commited on
Commit
75449cb
·
verified ·
1 Parent(s): 634615e

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ library_name: transformers
4
+ tags:
5
+ - autotrain
6
+ - text-classification
7
+ base_model: microsoft/deberta-v3-large
8
+ widget:
9
+ - text: "I love AutoTrain"
10
+ ---
11
+
12
+ # Model Trained Using AutoTrain
13
+
14
+ - Problem type: Text Classification
15
+
16
+ ## Validation Metrics
17
+ loss: 1.9677170515060425
18
+
19
+ f1_macro: 0.43647747790260216
20
+
21
+ f1_micro: 0.40711847879083374
22
+
23
+ f1_weighted: 0.3874051890698862
24
+
25
+ precision_macro: 0.49034231056721467
26
+
27
+ precision_micro: 0.40711847879083374
28
+
29
+ precision_weighted: 0.4284233711977137
30
+
31
+ recall_macro: 0.4407702395816866
32
+
33
+ recall_micro: 0.40711847879083374
34
+
35
+ recall_weighted: 0.40711847879083374
36
+
37
+ accuracy: 0.40711847879083374
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
checkpoint-1542/config.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "_num_labels": 16,
4
+ "architectures": [
5
+ "DebertaV2ForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 1024,
11
+ "id2label": {
12
+ "0": "enfj",
13
+ "1": "enfp",
14
+ "2": "entj",
15
+ "3": "entp",
16
+ "4": "esfj",
17
+ "5": "esfp",
18
+ "6": "estj",
19
+ "7": "estp",
20
+ "8": "infj",
21
+ "9": "infp",
22
+ "10": "intj",
23
+ "11": "intp",
24
+ "12": "isfj",
25
+ "13": "isfp",
26
+ "14": "istj",
27
+ "15": "istp"
28
+ },
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 4096,
31
+ "label2id": {
32
+ "enfj": 0,
33
+ "enfp": 1,
34
+ "entj": 2,
35
+ "entp": 3,
36
+ "esfj": 4,
37
+ "esfp": 5,
38
+ "estj": 6,
39
+ "estp": 7,
40
+ "infj": 8,
41
+ "infp": 9,
42
+ "intj": 10,
43
+ "intp": 11,
44
+ "isfj": 12,
45
+ "isfp": 13,
46
+ "istj": 14,
47
+ "istp": 15
48
+ },
49
+ "layer_norm_eps": 1e-07,
50
+ "legacy": true,
51
+ "max_position_embeddings": 512,
52
+ "max_relative_positions": -1,
53
+ "model_type": "deberta-v2",
54
+ "norm_rel_ebd": "layer_norm",
55
+ "num_attention_heads": 16,
56
+ "num_hidden_layers": 24,
57
+ "pad_token_id": 0,
58
+ "pooler_dropout": 0,
59
+ "pooler_hidden_act": "gelu",
60
+ "pooler_hidden_size": 1024,
61
+ "pos_att_type": [
62
+ "p2c",
63
+ "c2p"
64
+ ],
65
+ "position_biased_input": false,
66
+ "position_buckets": 256,
67
+ "relative_attention": true,
68
+ "share_att_key": true,
69
+ "torch_dtype": "float32",
70
+ "transformers_version": "4.48.0",
71
+ "type_vocab_size": 0,
72
+ "vocab_size": 128100
73
+ }
checkpoint-1542/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c9b5b9de47af2e45c14a96ab10be692e0caf5e1ae04b1a717235a34c072f012
3
+ size 1740361848
checkpoint-1542/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8890e67ff5e7849ac67c82a5fe3088c782f25b6b8224943a94a612014a413a5f
3
+ size 3480955056
checkpoint-1542/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d440435e7a9ceae19abce66d810fb753ca6f090ea4d3ab8404d4282570a3effa
3
+ size 14244
checkpoint-1542/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9950f111117344b3bf990d7231eaf6b6ce2fcff45a97c769c5f23872f16f7cb5
3
+ size 1064
checkpoint-1542/trainer_state.json ADDED
@@ -0,0 +1,1228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.9677170515060425,
3
+ "best_model_checkpoint": "twitter-mbti-v2/checkpoint-1542",
4
+ "epoch": 6.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1542,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03900536323744515,
13
+ "grad_norm": 3.7978336811065674,
14
+ "learning_rate": 3.90625e-07,
15
+ "loss": 2.833,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.0780107264748903,
20
+ "grad_norm": 2.6848301887512207,
21
+ "learning_rate": 7.8125e-07,
22
+ "loss": 2.8331,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.11701608971233544,
27
+ "grad_norm": 2.5124902725219727,
28
+ "learning_rate": 1.1718750000000001e-06,
29
+ "loss": 2.8206,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.1560214529497806,
34
+ "grad_norm": 3.32080340385437,
35
+ "learning_rate": 1.5625e-06,
36
+ "loss": 2.8144,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.19502681618722575,
41
+ "grad_norm": 3.0330629348754883,
42
+ "learning_rate": 1.953125e-06,
43
+ "loss": 2.821,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.2340321794246709,
48
+ "grad_norm": 2.991515874862671,
49
+ "learning_rate": 2.3437500000000002e-06,
50
+ "loss": 2.8096,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.27303754266211605,
55
+ "grad_norm": 3.302293062210083,
56
+ "learning_rate": 2.7343750000000004e-06,
57
+ "loss": 2.794,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.3120429058995612,
62
+ "grad_norm": 3.306318521499634,
63
+ "learning_rate": 3.125e-06,
64
+ "loss": 2.7629,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.3510482691370063,
69
+ "grad_norm": 2.56471848487854,
70
+ "learning_rate": 3.5156250000000003e-06,
71
+ "loss": 2.7602,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.3900536323744515,
76
+ "grad_norm": 3.474318027496338,
77
+ "learning_rate": 3.90625e-06,
78
+ "loss": 2.6947,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.42905899561189664,
83
+ "grad_norm": 2.990626096725464,
84
+ "learning_rate": 4.296875e-06,
85
+ "loss": 2.7265,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.4680643588493418,
90
+ "grad_norm": 4.794486999511719,
91
+ "learning_rate": 4.6875000000000004e-06,
92
+ "loss": 2.7143,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.5070697220867869,
97
+ "grad_norm": 3.5165743827819824,
98
+ "learning_rate": 5.078125000000001e-06,
99
+ "loss": 2.7444,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.5460750853242321,
104
+ "grad_norm": 2.8984289169311523,
105
+ "learning_rate": 5.468750000000001e-06,
106
+ "loss": 2.7384,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.5850804485616772,
111
+ "grad_norm": 3.9029452800750732,
112
+ "learning_rate": 5.859375e-06,
113
+ "loss": 2.7444,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.6240858117991224,
118
+ "grad_norm": 4.58915376663208,
119
+ "learning_rate": 6.25e-06,
120
+ "loss": 2.6959,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.6630911750365676,
125
+ "grad_norm": 3.549612045288086,
126
+ "learning_rate": 6.6406250000000005e-06,
127
+ "loss": 2.7289,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.7020965382740126,
132
+ "grad_norm": 2.6437020301818848,
133
+ "learning_rate": 7.031250000000001e-06,
134
+ "loss": 2.732,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.7411019015114578,
139
+ "grad_norm": 3.5032520294189453,
140
+ "learning_rate": 7.421875000000001e-06,
141
+ "loss": 2.7296,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.780107264748903,
146
+ "grad_norm": 3.1741631031036377,
147
+ "learning_rate": 7.8125e-06,
148
+ "loss": 2.7316,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.8191126279863481,
153
+ "grad_norm": 2.8320600986480713,
154
+ "learning_rate": 8.203125000000001e-06,
155
+ "loss": 2.7319,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.8581179912237933,
160
+ "grad_norm": 2.8625762462615967,
161
+ "learning_rate": 8.59375e-06,
162
+ "loss": 2.742,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.8971233544612384,
167
+ "grad_norm": 2.858238697052002,
168
+ "learning_rate": 8.984375000000002e-06,
169
+ "loss": 2.7252,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.9361287176986836,
174
+ "grad_norm": 3.340858221054077,
175
+ "learning_rate": 9.375000000000001e-06,
176
+ "loss": 2.7159,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.9751340809361287,
181
+ "grad_norm": 2.8419265747070312,
182
+ "learning_rate": 9.765625e-06,
183
+ "loss": 2.7509,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 1.0,
188
+ "eval_accuracy": 0.12823013164310093,
189
+ "eval_f1_macro": 0.022844391014094804,
190
+ "eval_f1_micro": 0.12823013164310093,
191
+ "eval_f1_weighted": 0.04192036517091411,
192
+ "eval_loss": 2.7105822563171387,
193
+ "eval_precision_macro": 0.03190063069208076,
194
+ "eval_precision_micro": 0.12823013164310093,
195
+ "eval_precision_weighted": 0.0493667840880759,
196
+ "eval_recall_macro": 0.06533440208373821,
197
+ "eval_recall_micro": 0.12823013164310093,
198
+ "eval_recall_weighted": 0.12823013164310093,
199
+ "eval_runtime": 57.6155,
200
+ "eval_samples_per_second": 35.598,
201
+ "eval_steps_per_second": 4.461,
202
+ "step": 257
203
+ },
204
+ {
205
+ "epoch": 1.0117016089712336,
206
+ "grad_norm": 3.288947343826294,
207
+ "learning_rate": 9.98263888888889e-06,
208
+ "loss": 2.5368,
209
+ "step": 260
210
+ },
211
+ {
212
+ "epoch": 1.0507069722086786,
213
+ "grad_norm": 3.894672155380249,
214
+ "learning_rate": 9.939236111111112e-06,
215
+ "loss": 2.7183,
216
+ "step": 270
217
+ },
218
+ {
219
+ "epoch": 1.0897123354461238,
220
+ "grad_norm": 3.038849353790283,
221
+ "learning_rate": 9.895833333333334e-06,
222
+ "loss": 2.7326,
223
+ "step": 280
224
+ },
225
+ {
226
+ "epoch": 1.128717698683569,
227
+ "grad_norm": 4.149471282958984,
228
+ "learning_rate": 9.852430555555557e-06,
229
+ "loss": 2.7304,
230
+ "step": 290
231
+ },
232
+ {
233
+ "epoch": 1.1677230619210142,
234
+ "grad_norm": 3.3252062797546387,
235
+ "learning_rate": 9.80902777777778e-06,
236
+ "loss": 2.7161,
237
+ "step": 300
238
+ },
239
+ {
240
+ "epoch": 1.2067284251584594,
241
+ "grad_norm": 3.7927067279815674,
242
+ "learning_rate": 9.765625e-06,
243
+ "loss": 2.6916,
244
+ "step": 310
245
+ },
246
+ {
247
+ "epoch": 1.2457337883959045,
248
+ "grad_norm": 3.313178777694702,
249
+ "learning_rate": 9.722222222222223e-06,
250
+ "loss": 2.7163,
251
+ "step": 320
252
+ },
253
+ {
254
+ "epoch": 1.2847391516333495,
255
+ "grad_norm": 3.720956802368164,
256
+ "learning_rate": 9.678819444444445e-06,
257
+ "loss": 2.6494,
258
+ "step": 330
259
+ },
260
+ {
261
+ "epoch": 1.3237445148707947,
262
+ "grad_norm": 4.145543575286865,
263
+ "learning_rate": 9.635416666666668e-06,
264
+ "loss": 2.6825,
265
+ "step": 340
266
+ },
267
+ {
268
+ "epoch": 1.3627498781082399,
269
+ "grad_norm": 3.6850345134735107,
270
+ "learning_rate": 9.592013888888888e-06,
271
+ "loss": 2.6725,
272
+ "step": 350
273
+ },
274
+ {
275
+ "epoch": 1.401755241345685,
276
+ "grad_norm": 4.245800495147705,
277
+ "learning_rate": 9.548611111111113e-06,
278
+ "loss": 2.6788,
279
+ "step": 360
280
+ },
281
+ {
282
+ "epoch": 1.4407606045831303,
283
+ "grad_norm": 3.8264000415802,
284
+ "learning_rate": 9.505208333333335e-06,
285
+ "loss": 2.6755,
286
+ "step": 370
287
+ },
288
+ {
289
+ "epoch": 1.4797659678205752,
290
+ "grad_norm": 4.036210060119629,
291
+ "learning_rate": 9.461805555555556e-06,
292
+ "loss": 2.6943,
293
+ "step": 380
294
+ },
295
+ {
296
+ "epoch": 1.5187713310580204,
297
+ "grad_norm": 4.102363586425781,
298
+ "learning_rate": 9.418402777777778e-06,
299
+ "loss": 2.6931,
300
+ "step": 390
301
+ },
302
+ {
303
+ "epoch": 1.5577766942954656,
304
+ "grad_norm": 3.794200897216797,
305
+ "learning_rate": 9.375000000000001e-06,
306
+ "loss": 2.6692,
307
+ "step": 400
308
+ },
309
+ {
310
+ "epoch": 1.5967820575329108,
311
+ "grad_norm": 4.008747577667236,
312
+ "learning_rate": 9.331597222222223e-06,
313
+ "loss": 2.6698,
314
+ "step": 410
315
+ },
316
+ {
317
+ "epoch": 1.635787420770356,
318
+ "grad_norm": 4.523059844970703,
319
+ "learning_rate": 9.288194444444444e-06,
320
+ "loss": 2.6308,
321
+ "step": 420
322
+ },
323
+ {
324
+ "epoch": 1.674792784007801,
325
+ "grad_norm": 3.91786789894104,
326
+ "learning_rate": 9.244791666666667e-06,
327
+ "loss": 2.5987,
328
+ "step": 430
329
+ },
330
+ {
331
+ "epoch": 1.7137981472452464,
332
+ "grad_norm": 4.020909309387207,
333
+ "learning_rate": 9.201388888888889e-06,
334
+ "loss": 2.6354,
335
+ "step": 440
336
+ },
337
+ {
338
+ "epoch": 1.7528035104826913,
339
+ "grad_norm": 3.620465040206909,
340
+ "learning_rate": 9.157986111111112e-06,
341
+ "loss": 2.659,
342
+ "step": 450
343
+ },
344
+ {
345
+ "epoch": 1.7918088737201365,
346
+ "grad_norm": 3.509871482849121,
347
+ "learning_rate": 9.114583333333334e-06,
348
+ "loss": 2.6531,
349
+ "step": 460
350
+ },
351
+ {
352
+ "epoch": 1.8308142369575817,
353
+ "grad_norm": 4.906174659729004,
354
+ "learning_rate": 9.071180555555557e-06,
355
+ "loss": 2.6146,
356
+ "step": 470
357
+ },
358
+ {
359
+ "epoch": 1.8698196001950267,
360
+ "grad_norm": 4.495574951171875,
361
+ "learning_rate": 9.027777777777779e-06,
362
+ "loss": 2.6399,
363
+ "step": 480
364
+ },
365
+ {
366
+ "epoch": 1.908824963432472,
367
+ "grad_norm": 4.081535339355469,
368
+ "learning_rate": 8.984375000000002e-06,
369
+ "loss": 2.5733,
370
+ "step": 490
371
+ },
372
+ {
373
+ "epoch": 1.947830326669917,
374
+ "grad_norm": 4.859089374542236,
375
+ "learning_rate": 8.940972222222222e-06,
376
+ "loss": 2.6333,
377
+ "step": 500
378
+ },
379
+ {
380
+ "epoch": 1.9868356899073623,
381
+ "grad_norm": 4.1170830726623535,
382
+ "learning_rate": 8.897569444444445e-06,
383
+ "loss": 2.6284,
384
+ "step": 510
385
+ },
386
+ {
387
+ "epoch": 2.0,
388
+ "eval_accuracy": 0.16626036079960996,
389
+ "eval_f1_macro": 0.07002851524088888,
390
+ "eval_f1_micro": 0.16626036079960996,
391
+ "eval_f1_weighted": 0.0850951694800105,
392
+ "eval_loss": 2.5804367065429688,
393
+ "eval_precision_macro": 0.1575303519561262,
394
+ "eval_precision_micro": 0.16626036079960996,
395
+ "eval_precision_weighted": 0.14667789639701573,
396
+ "eval_recall_macro": 0.11376388483029318,
397
+ "eval_recall_micro": 0.16626036079960996,
398
+ "eval_recall_weighted": 0.16626036079960996,
399
+ "eval_runtime": 57.5878,
400
+ "eval_samples_per_second": 35.615,
401
+ "eval_steps_per_second": 4.463,
402
+ "step": 514
403
+ },
404
+ {
405
+ "epoch": 2.0234032179424672,
406
+ "grad_norm": 4.6980390548706055,
407
+ "learning_rate": 8.854166666666667e-06,
408
+ "loss": 2.4096,
409
+ "step": 520
410
+ },
411
+ {
412
+ "epoch": 2.062408581179912,
413
+ "grad_norm": 5.933006286621094,
414
+ "learning_rate": 8.81076388888889e-06,
415
+ "loss": 2.5589,
416
+ "step": 530
417
+ },
418
+ {
419
+ "epoch": 2.101413944417357,
420
+ "grad_norm": NaN,
421
+ "learning_rate": 8.77170138888889e-06,
422
+ "loss": 2.563,
423
+ "step": 540
424
+ },
425
+ {
426
+ "epoch": 2.1404193076548026,
427
+ "grad_norm": 5.945810794830322,
428
+ "learning_rate": 8.728298611111112e-06,
429
+ "loss": 2.5338,
430
+ "step": 550
431
+ },
432
+ {
433
+ "epoch": 2.1794246708922476,
434
+ "grad_norm": 5.052691459655762,
435
+ "learning_rate": 8.684895833333335e-06,
436
+ "loss": 2.501,
437
+ "step": 560
438
+ },
439
+ {
440
+ "epoch": 2.218430034129693,
441
+ "grad_norm": 4.916421413421631,
442
+ "learning_rate": 8.641493055555557e-06,
443
+ "loss": 2.4721,
444
+ "step": 570
445
+ },
446
+ {
447
+ "epoch": 2.257435397367138,
448
+ "grad_norm": 5.270256519317627,
449
+ "learning_rate": 8.598090277777778e-06,
450
+ "loss": 2.4879,
451
+ "step": 580
452
+ },
453
+ {
454
+ "epoch": 2.2964407606045834,
455
+ "grad_norm": 6.149162292480469,
456
+ "learning_rate": 8.5546875e-06,
457
+ "loss": 2.407,
458
+ "step": 590
459
+ },
460
+ {
461
+ "epoch": 2.3354461238420283,
462
+ "grad_norm": 5.327962398529053,
463
+ "learning_rate": 8.511284722222223e-06,
464
+ "loss": 2.4255,
465
+ "step": 600
466
+ },
467
+ {
468
+ "epoch": 2.3744514870794733,
469
+ "grad_norm": 5.612864017486572,
470
+ "learning_rate": 8.467881944444445e-06,
471
+ "loss": 2.4923,
472
+ "step": 610
473
+ },
474
+ {
475
+ "epoch": 2.4134568503169187,
476
+ "grad_norm": 5.927056789398193,
477
+ "learning_rate": 8.424479166666666e-06,
478
+ "loss": 2.4304,
479
+ "step": 620
480
+ },
481
+ {
482
+ "epoch": 2.4524622135543637,
483
+ "grad_norm": 6.483325958251953,
484
+ "learning_rate": 8.381076388888889e-06,
485
+ "loss": 2.4392,
486
+ "step": 630
487
+ },
488
+ {
489
+ "epoch": 2.491467576791809,
490
+ "grad_norm": 6.655641555786133,
491
+ "learning_rate": 8.337673611111113e-06,
492
+ "loss": 2.4165,
493
+ "step": 640
494
+ },
495
+ {
496
+ "epoch": 2.530472940029254,
497
+ "grad_norm": 6.099938869476318,
498
+ "learning_rate": 8.294270833333334e-06,
499
+ "loss": 2.4274,
500
+ "step": 650
501
+ },
502
+ {
503
+ "epoch": 2.569478303266699,
504
+ "grad_norm": 7.101120471954346,
505
+ "learning_rate": 8.250868055555556e-06,
506
+ "loss": 2.3871,
507
+ "step": 660
508
+ },
509
+ {
510
+ "epoch": 2.6084836665041444,
511
+ "grad_norm": 6.579291820526123,
512
+ "learning_rate": 8.207465277777779e-06,
513
+ "loss": 2.3718,
514
+ "step": 670
515
+ },
516
+ {
517
+ "epoch": 2.6474890297415894,
518
+ "grad_norm": 6.7963457107543945,
519
+ "learning_rate": 8.164062500000001e-06,
520
+ "loss": 2.3544,
521
+ "step": 680
522
+ },
523
+ {
524
+ "epoch": 2.686494392979035,
525
+ "grad_norm": 10.213848114013672,
526
+ "learning_rate": 8.120659722222224e-06,
527
+ "loss": 2.3501,
528
+ "step": 690
529
+ },
530
+ {
531
+ "epoch": 2.7254997562164798,
532
+ "grad_norm": 6.80430269241333,
533
+ "learning_rate": 8.077256944444444e-06,
534
+ "loss": 2.2806,
535
+ "step": 700
536
+ },
537
+ {
538
+ "epoch": 2.7645051194539247,
539
+ "grad_norm": 6.852050304412842,
540
+ "learning_rate": 8.033854166666667e-06,
541
+ "loss": 2.3389,
542
+ "step": 710
543
+ },
544
+ {
545
+ "epoch": 2.80351048269137,
546
+ "grad_norm": 8.63313102722168,
547
+ "learning_rate": 7.99045138888889e-06,
548
+ "loss": 2.3668,
549
+ "step": 720
550
+ },
551
+ {
552
+ "epoch": 2.842515845928815,
553
+ "grad_norm": 8.344212532043457,
554
+ "learning_rate": 7.947048611111112e-06,
555
+ "loss": 2.2608,
556
+ "step": 730
557
+ },
558
+ {
559
+ "epoch": 2.8815212091662605,
560
+ "grad_norm": 7.747754096984863,
561
+ "learning_rate": 7.903645833333334e-06,
562
+ "loss": 2.3099,
563
+ "step": 740
564
+ },
565
+ {
566
+ "epoch": 2.9205265724037055,
567
+ "grad_norm": 8.065759658813477,
568
+ "learning_rate": 7.860243055555557e-06,
569
+ "loss": 2.1869,
570
+ "step": 750
571
+ },
572
+ {
573
+ "epoch": 2.9595319356411505,
574
+ "grad_norm": 9.365443229675293,
575
+ "learning_rate": 7.81684027777778e-06,
576
+ "loss": 2.266,
577
+ "step": 760
578
+ },
579
+ {
580
+ "epoch": 2.998537298878596,
581
+ "grad_norm": 7.615650653839111,
582
+ "learning_rate": 7.7734375e-06,
583
+ "loss": 2.1923,
584
+ "step": 770
585
+ },
586
+ {
587
+ "epoch": 3.0,
588
+ "eval_accuracy": 0.27693807898586054,
589
+ "eval_f1_macro": 0.23444058368942994,
590
+ "eval_f1_micro": 0.27693807898586054,
591
+ "eval_f1_weighted": 0.2232954691057457,
592
+ "eval_loss": 2.2444159984588623,
593
+ "eval_precision_macro": 0.42520232898903443,
594
+ "eval_precision_micro": 0.27693807898586054,
595
+ "eval_precision_weighted": 0.36698946639719304,
596
+ "eval_recall_macro": 0.2559615112919232,
597
+ "eval_recall_micro": 0.27693807898586054,
598
+ "eval_recall_weighted": 0.27693807898586054,
599
+ "eval_runtime": 57.5648,
600
+ "eval_samples_per_second": 35.629,
601
+ "eval_steps_per_second": 4.465,
602
+ "step": 771
603
+ },
604
+ {
605
+ "epoch": 3.0351048269137006,
606
+ "grad_norm": 6.9213056564331055,
607
+ "learning_rate": 7.730034722222223e-06,
608
+ "loss": 1.9593,
609
+ "step": 780
610
+ },
611
+ {
612
+ "epoch": 3.0741101901511456,
613
+ "grad_norm": 7.0944366455078125,
614
+ "learning_rate": 7.686631944444445e-06,
615
+ "loss": 2.0703,
616
+ "step": 790
617
+ },
618
+ {
619
+ "epoch": 3.113115553388591,
620
+ "grad_norm": 7.207167148590088,
621
+ "learning_rate": 7.643229166666668e-06,
622
+ "loss": 2.1229,
623
+ "step": 800
624
+ },
625
+ {
626
+ "epoch": 3.152120916626036,
627
+ "grad_norm": 8.827300071716309,
628
+ "learning_rate": 7.599826388888889e-06,
629
+ "loss": 2.0435,
630
+ "step": 810
631
+ },
632
+ {
633
+ "epoch": 3.1911262798634814,
634
+ "grad_norm": 8.51149845123291,
635
+ "learning_rate": 7.5564236111111125e-06,
636
+ "loss": 2.1189,
637
+ "step": 820
638
+ },
639
+ {
640
+ "epoch": 3.2301316431009264,
641
+ "grad_norm": 9.00069808959961,
642
+ "learning_rate": 7.513020833333334e-06,
643
+ "loss": 1.9885,
644
+ "step": 830
645
+ },
646
+ {
647
+ "epoch": 3.2691370063383713,
648
+ "grad_norm": 7.135461330413818,
649
+ "learning_rate": 7.469618055555557e-06,
650
+ "loss": 2.043,
651
+ "step": 840
652
+ },
653
+ {
654
+ "epoch": 3.3081423695758168,
655
+ "grad_norm": 7.121860027313232,
656
+ "learning_rate": 7.426215277777778e-06,
657
+ "loss": 2.1203,
658
+ "step": 850
659
+ },
660
+ {
661
+ "epoch": 3.3471477328132617,
662
+ "grad_norm": 7.476314067840576,
663
+ "learning_rate": 7.382812500000001e-06,
664
+ "loss": 2.008,
665
+ "step": 860
666
+ },
667
+ {
668
+ "epoch": 3.386153096050707,
669
+ "grad_norm": 11.115015029907227,
670
+ "learning_rate": 7.339409722222222e-06,
671
+ "loss": 2.0098,
672
+ "step": 870
673
+ },
674
+ {
675
+ "epoch": 3.425158459288152,
676
+ "grad_norm": 6.821423053741455,
677
+ "learning_rate": 7.296006944444445e-06,
678
+ "loss": 2.0252,
679
+ "step": 880
680
+ },
681
+ {
682
+ "epoch": 3.464163822525597,
683
+ "grad_norm": 12.535731315612793,
684
+ "learning_rate": 7.2526041666666665e-06,
685
+ "loss": 1.9571,
686
+ "step": 890
687
+ },
688
+ {
689
+ "epoch": 3.5031691857630425,
690
+ "grad_norm": 8.49023723602295,
691
+ "learning_rate": 7.209201388888889e-06,
692
+ "loss": 1.9901,
693
+ "step": 900
694
+ },
695
+ {
696
+ "epoch": 3.5421745490004874,
697
+ "grad_norm": 9.62365436553955,
698
+ "learning_rate": 7.165798611111112e-06,
699
+ "loss": 1.9254,
700
+ "step": 910
701
+ },
702
+ {
703
+ "epoch": 3.581179912237933,
704
+ "grad_norm": 8.030123710632324,
705
+ "learning_rate": 7.122395833333334e-06,
706
+ "loss": 1.9497,
707
+ "step": 920
708
+ },
709
+ {
710
+ "epoch": 3.620185275475378,
711
+ "grad_norm": 9.932555198669434,
712
+ "learning_rate": 7.0789930555555564e-06,
713
+ "loss": 1.9421,
714
+ "step": 930
715
+ },
716
+ {
717
+ "epoch": 3.659190638712823,
718
+ "grad_norm": 10.256240844726562,
719
+ "learning_rate": 7.035590277777778e-06,
720
+ "loss": 1.8403,
721
+ "step": 940
722
+ },
723
+ {
724
+ "epoch": 3.698196001950268,
725
+ "grad_norm": 8.8562593460083,
726
+ "learning_rate": 6.9921875000000006e-06,
727
+ "loss": 1.835,
728
+ "step": 950
729
+ },
730
+ {
731
+ "epoch": 3.737201365187713,
732
+ "grad_norm": 9.009734153747559,
733
+ "learning_rate": 6.948784722222223e-06,
734
+ "loss": 1.9243,
735
+ "step": 960
736
+ },
737
+ {
738
+ "epoch": 3.7762067284251586,
739
+ "grad_norm": 7.639193058013916,
740
+ "learning_rate": 6.905381944444445e-06,
741
+ "loss": 1.9483,
742
+ "step": 970
743
+ },
744
+ {
745
+ "epoch": 3.8152120916626036,
746
+ "grad_norm": 11.056807518005371,
747
+ "learning_rate": 6.861979166666667e-06,
748
+ "loss": 1.9227,
749
+ "step": 980
750
+ },
751
+ {
752
+ "epoch": 3.8542174549000485,
753
+ "grad_norm": 11.807608604431152,
754
+ "learning_rate": 6.818576388888889e-06,
755
+ "loss": 1.9521,
756
+ "step": 990
757
+ },
758
+ {
759
+ "epoch": 3.893222818137494,
760
+ "grad_norm": 8.170421600341797,
761
+ "learning_rate": 6.775173611111112e-06,
762
+ "loss": 1.8677,
763
+ "step": 1000
764
+ },
765
+ {
766
+ "epoch": 3.932228181374939,
767
+ "grad_norm": 9.836169242858887,
768
+ "learning_rate": 6.731770833333335e-06,
769
+ "loss": 1.8232,
770
+ "step": 1010
771
+ },
772
+ {
773
+ "epoch": 3.9712335446123843,
774
+ "grad_norm": 10.409213066101074,
775
+ "learning_rate": 6.688368055555556e-06,
776
+ "loss": 1.878,
777
+ "step": 1020
778
+ },
779
+ {
780
+ "epoch": 4.0,
781
+ "eval_accuracy": 0.3554363725012189,
782
+ "eval_f1_macro": 0.3424708705030215,
783
+ "eval_f1_micro": 0.3554363725012189,
784
+ "eval_f1_weighted": 0.30902224255328264,
785
+ "eval_loss": 1.9913461208343506,
786
+ "eval_precision_macro": 0.46004367455197387,
787
+ "eval_precision_micro": 0.3554363725012189,
788
+ "eval_precision_weighted": 0.41659948847945955,
789
+ "eval_recall_macro": 0.36864660751423883,
790
+ "eval_recall_micro": 0.3554363725012189,
791
+ "eval_recall_weighted": 0.3554363725012189,
792
+ "eval_runtime": 57.605,
793
+ "eval_samples_per_second": 35.605,
794
+ "eval_steps_per_second": 4.461,
795
+ "step": 1028
796
+ },
797
+ {
798
+ "epoch": 4.007801072647489,
799
+ "grad_norm": 10.35783576965332,
800
+ "learning_rate": 6.644965277777779e-06,
801
+ "loss": 1.8021,
802
+ "step": 1030
803
+ },
804
+ {
805
+ "epoch": 4.0468064358849345,
806
+ "grad_norm": 6.1023101806640625,
807
+ "learning_rate": 6.6015625e-06,
808
+ "loss": 1.6937,
809
+ "step": 1040
810
+ },
811
+ {
812
+ "epoch": 4.0858117991223795,
813
+ "grad_norm": 9.223121643066406,
814
+ "learning_rate": 6.558159722222223e-06,
815
+ "loss": 1.671,
816
+ "step": 1050
817
+ },
818
+ {
819
+ "epoch": 4.124817162359824,
820
+ "grad_norm": 11.505966186523438,
821
+ "learning_rate": 6.5147569444444445e-06,
822
+ "loss": 1.7108,
823
+ "step": 1060
824
+ },
825
+ {
826
+ "epoch": 4.163822525597269,
827
+ "grad_norm": 8.100488662719727,
828
+ "learning_rate": 6.471354166666667e-06,
829
+ "loss": 1.7243,
830
+ "step": 1070
831
+ },
832
+ {
833
+ "epoch": 4.202827888834714,
834
+ "grad_norm": 10.167851448059082,
835
+ "learning_rate": 6.427951388888889e-06,
836
+ "loss": 1.6785,
837
+ "step": 1080
838
+ },
839
+ {
840
+ "epoch": 4.24183325207216,
841
+ "grad_norm": 9.894684791564941,
842
+ "learning_rate": 6.384548611111112e-06,
843
+ "loss": 1.6085,
844
+ "step": 1090
845
+ },
846
+ {
847
+ "epoch": 4.280838615309605,
848
+ "grad_norm": 14.413894653320312,
849
+ "learning_rate": 6.3411458333333344e-06,
850
+ "loss": 1.6503,
851
+ "step": 1100
852
+ },
853
+ {
854
+ "epoch": 4.31984397854705,
855
+ "grad_norm": 9.841206550598145,
856
+ "learning_rate": 6.297743055555556e-06,
857
+ "loss": 1.6533,
858
+ "step": 1110
859
+ },
860
+ {
861
+ "epoch": 4.358849341784495,
862
+ "grad_norm": 10.768450736999512,
863
+ "learning_rate": 6.2543402777777786e-06,
864
+ "loss": 1.6423,
865
+ "step": 1120
866
+ },
867
+ {
868
+ "epoch": 4.39785470502194,
869
+ "grad_norm": 10.424446105957031,
870
+ "learning_rate": 6.2109375e-06,
871
+ "loss": 1.704,
872
+ "step": 1130
873
+ },
874
+ {
875
+ "epoch": 4.436860068259386,
876
+ "grad_norm": 12.57535171508789,
877
+ "learning_rate": 6.167534722222223e-06,
878
+ "loss": 1.6415,
879
+ "step": 1140
880
+ },
881
+ {
882
+ "epoch": 4.475865431496831,
883
+ "grad_norm": 12.024733543395996,
884
+ "learning_rate": 6.124131944444444e-06,
885
+ "loss": 1.6701,
886
+ "step": 1150
887
+ },
888
+ {
889
+ "epoch": 4.514870794734276,
890
+ "grad_norm": 14.477334976196289,
891
+ "learning_rate": 6.080729166666667e-06,
892
+ "loss": 1.6661,
893
+ "step": 1160
894
+ },
895
+ {
896
+ "epoch": 4.553876157971721,
897
+ "grad_norm": 12.958348274230957,
898
+ "learning_rate": 6.037326388888889e-06,
899
+ "loss": 1.6084,
900
+ "step": 1170
901
+ },
902
+ {
903
+ "epoch": 4.592881521209167,
904
+ "grad_norm": 10.68494987487793,
905
+ "learning_rate": 5.993923611111112e-06,
906
+ "loss": 1.6175,
907
+ "step": 1180
908
+ },
909
+ {
910
+ "epoch": 4.631886884446612,
911
+ "grad_norm": 13.702882766723633,
912
+ "learning_rate": 5.950520833333334e-06,
913
+ "loss": 1.6614,
914
+ "step": 1190
915
+ },
916
+ {
917
+ "epoch": 4.670892247684057,
918
+ "grad_norm": 11.554649353027344,
919
+ "learning_rate": 5.907118055555556e-06,
920
+ "loss": 1.5768,
921
+ "step": 1200
922
+ },
923
+ {
924
+ "epoch": 4.709897610921502,
925
+ "grad_norm": 12.292317390441895,
926
+ "learning_rate": 5.863715277777778e-06,
927
+ "loss": 1.5714,
928
+ "step": 1210
929
+ },
930
+ {
931
+ "epoch": 4.748902974158947,
932
+ "grad_norm": 12.34539794921875,
933
+ "learning_rate": 5.820312500000001e-06,
934
+ "loss": 1.6122,
935
+ "step": 1220
936
+ },
937
+ {
938
+ "epoch": 4.7879083373963915,
939
+ "grad_norm": 9.150538444519043,
940
+ "learning_rate": 5.7769097222222225e-06,
941
+ "loss": 1.5502,
942
+ "step": 1230
943
+ },
944
+ {
945
+ "epoch": 4.826913700633837,
946
+ "grad_norm": 12.019886016845703,
947
+ "learning_rate": 5.733506944444445e-06,
948
+ "loss": 1.5283,
949
+ "step": 1240
950
+ },
951
+ {
952
+ "epoch": 4.865919063871282,
953
+ "grad_norm": 13.477303504943848,
954
+ "learning_rate": 5.690104166666667e-06,
955
+ "loss": 1.5673,
956
+ "step": 1250
957
+ },
958
+ {
959
+ "epoch": 4.904924427108727,
960
+ "grad_norm": 14.153918266296387,
961
+ "learning_rate": 5.646701388888889e-06,
962
+ "loss": 1.5456,
963
+ "step": 1260
964
+ },
965
+ {
966
+ "epoch": 4.943929790346172,
967
+ "grad_norm": 12.06778335571289,
968
+ "learning_rate": 5.6032986111111124e-06,
969
+ "loss": 1.5221,
970
+ "step": 1270
971
+ },
972
+ {
973
+ "epoch": 4.982935153583618,
974
+ "grad_norm": 14.565933227539062,
975
+ "learning_rate": 5.559895833333334e-06,
976
+ "loss": 1.4679,
977
+ "step": 1280
978
+ },
979
+ {
980
+ "epoch": 5.0,
981
+ "eval_accuracy": 0.3793271574841541,
982
+ "eval_f1_macro": 0.3788654736153261,
983
+ "eval_f1_micro": 0.3793271574841541,
984
+ "eval_f1_weighted": 0.3406548058322202,
985
+ "eval_loss": 1.980505347251892,
986
+ "eval_precision_macro": 0.40541928460663945,
987
+ "eval_precision_micro": 0.3793271574841541,
988
+ "eval_precision_weighted": 0.3634818029255939,
989
+ "eval_recall_macro": 0.40614544202326364,
990
+ "eval_recall_micro": 0.3793271574841541,
991
+ "eval_recall_weighted": 0.3793271574841541,
992
+ "eval_runtime": 57.5587,
993
+ "eval_samples_per_second": 35.633,
994
+ "eval_steps_per_second": 4.465,
995
+ "step": 1285
996
+ },
997
+ {
998
+ "epoch": 5.0195026816187225,
999
+ "grad_norm": 10.44895076751709,
1000
+ "learning_rate": 5.5164930555555566e-06,
1001
+ "loss": 1.3376,
1002
+ "step": 1290
1003
+ },
1004
+ {
1005
+ "epoch": 5.0585080448561675,
1006
+ "grad_norm": 15.51266860961914,
1007
+ "learning_rate": 5.4774305555555565e-06,
1008
+ "loss": 1.479,
1009
+ "step": 1300
1010
+ },
1011
+ {
1012
+ "epoch": 5.097513408093613,
1013
+ "grad_norm": 11.123811721801758,
1014
+ "learning_rate": 5.434027777777778e-06,
1015
+ "loss": 1.3313,
1016
+ "step": 1310
1017
+ },
1018
+ {
1019
+ "epoch": 5.136518771331058,
1020
+ "grad_norm": 10.911710739135742,
1021
+ "learning_rate": 5.390625000000001e-06,
1022
+ "loss": 1.406,
1023
+ "step": 1320
1024
+ },
1025
+ {
1026
+ "epoch": 5.175524134568503,
1027
+ "grad_norm": 12.338854789733887,
1028
+ "learning_rate": 5.347222222222222e-06,
1029
+ "loss": 1.3619,
1030
+ "step": 1330
1031
+ },
1032
+ {
1033
+ "epoch": 5.214529497805948,
1034
+ "grad_norm": 10.681340217590332,
1035
+ "learning_rate": 5.303819444444445e-06,
1036
+ "loss": 1.3803,
1037
+ "step": 1340
1038
+ },
1039
+ {
1040
+ "epoch": 5.253534861043393,
1041
+ "grad_norm": 12.806472778320312,
1042
+ "learning_rate": 5.260416666666666e-06,
1043
+ "loss": 1.4016,
1044
+ "step": 1350
1045
+ },
1046
+ {
1047
+ "epoch": 5.292540224280838,
1048
+ "grad_norm": 9.82974624633789,
1049
+ "learning_rate": 5.217013888888889e-06,
1050
+ "loss": 1.4298,
1051
+ "step": 1360
1052
+ },
1053
+ {
1054
+ "epoch": 5.331545587518284,
1055
+ "grad_norm": 9.38745403289795,
1056
+ "learning_rate": 5.173611111111112e-06,
1057
+ "loss": 1.2707,
1058
+ "step": 1370
1059
+ },
1060
+ {
1061
+ "epoch": 5.370550950755729,
1062
+ "grad_norm": 11.31470012664795,
1063
+ "learning_rate": 5.130208333333334e-06,
1064
+ "loss": 1.2818,
1065
+ "step": 1380
1066
+ },
1067
+ {
1068
+ "epoch": 5.409556313993174,
1069
+ "grad_norm": 12.014089584350586,
1070
+ "learning_rate": 5.086805555555556e-06,
1071
+ "loss": 1.3868,
1072
+ "step": 1390
1073
+ },
1074
+ {
1075
+ "epoch": 5.448561677230619,
1076
+ "grad_norm": 12.203828811645508,
1077
+ "learning_rate": 5.043402777777778e-06,
1078
+ "loss": 1.2886,
1079
+ "step": 1400
1080
+ },
1081
+ {
1082
+ "epoch": 5.487567040468065,
1083
+ "grad_norm": 15.790043830871582,
1084
+ "learning_rate": 5e-06,
1085
+ "loss": 1.2743,
1086
+ "step": 1410
1087
+ },
1088
+ {
1089
+ "epoch": 5.52657240370551,
1090
+ "grad_norm": 13.66932201385498,
1091
+ "learning_rate": 4.956597222222223e-06,
1092
+ "loss": 1.2872,
1093
+ "step": 1420
1094
+ },
1095
+ {
1096
+ "epoch": 5.565577766942955,
1097
+ "grad_norm": 11.339312553405762,
1098
+ "learning_rate": 4.9131944444444445e-06,
1099
+ "loss": 1.3496,
1100
+ "step": 1430
1101
+ },
1102
+ {
1103
+ "epoch": 5.6045831301804,
1104
+ "grad_norm": 10.514497756958008,
1105
+ "learning_rate": 4.8741319444444444e-06,
1106
+ "loss": 1.2668,
1107
+ "step": 1440
1108
+ },
1109
+ {
1110
+ "epoch": 5.643588493417845,
1111
+ "grad_norm": 14.161802291870117,
1112
+ "learning_rate": 4.830729166666667e-06,
1113
+ "loss": 1.3988,
1114
+ "step": 1450
1115
+ },
1116
+ {
1117
+ "epoch": 5.6825938566552905,
1118
+ "grad_norm": 13.395609855651855,
1119
+ "learning_rate": 4.787326388888889e-06,
1120
+ "loss": 1.2777,
1121
+ "step": 1460
1122
+ },
1123
+ {
1124
+ "epoch": 5.7215992198927355,
1125
+ "grad_norm": 16.105741500854492,
1126
+ "learning_rate": 4.743923611111111e-06,
1127
+ "loss": 1.3207,
1128
+ "step": 1470
1129
+ },
1130
+ {
1131
+ "epoch": 5.76060458313018,
1132
+ "grad_norm": 12.356030464172363,
1133
+ "learning_rate": 4.7005208333333335e-06,
1134
+ "loss": 1.3008,
1135
+ "step": 1480
1136
+ },
1137
+ {
1138
+ "epoch": 5.799609946367625,
1139
+ "grad_norm": 16.024171829223633,
1140
+ "learning_rate": 4.657118055555556e-06,
1141
+ "loss": 1.3169,
1142
+ "step": 1490
1143
+ },
1144
+ {
1145
+ "epoch": 5.83861530960507,
1146
+ "grad_norm": 13.603280067443848,
1147
+ "learning_rate": 4.6137152777777785e-06,
1148
+ "loss": 1.3377,
1149
+ "step": 1500
1150
+ },
1151
+ {
1152
+ "epoch": 5.877620672842516,
1153
+ "grad_norm": 12.428413391113281,
1154
+ "learning_rate": 4.5703125e-06,
1155
+ "loss": 1.3143,
1156
+ "step": 1510
1157
+ },
1158
+ {
1159
+ "epoch": 5.916626036079961,
1160
+ "grad_norm": 12.86538028717041,
1161
+ "learning_rate": 4.526909722222223e-06,
1162
+ "loss": 1.3215,
1163
+ "step": 1520
1164
+ },
1165
+ {
1166
+ "epoch": 5.955631399317406,
1167
+ "grad_norm": 17.084808349609375,
1168
+ "learning_rate": 4.483506944444444e-06,
1169
+ "loss": 1.3156,
1170
+ "step": 1530
1171
+ },
1172
+ {
1173
+ "epoch": 5.994636762554851,
1174
+ "grad_norm": 11.272918701171875,
1175
+ "learning_rate": 4.440104166666668e-06,
1176
+ "loss": 1.2399,
1177
+ "step": 1540
1178
+ },
1179
+ {
1180
+ "epoch": 6.0,
1181
+ "eval_accuracy": 0.40711847879083374,
1182
+ "eval_f1_macro": 0.43647747790260216,
1183
+ "eval_f1_micro": 0.40711847879083374,
1184
+ "eval_f1_weighted": 0.3874051890698862,
1185
+ "eval_loss": 1.9677170515060425,
1186
+ "eval_precision_macro": 0.49034231056721467,
1187
+ "eval_precision_micro": 0.40711847879083374,
1188
+ "eval_precision_weighted": 0.4284233711977137,
1189
+ "eval_recall_macro": 0.4407702395816866,
1190
+ "eval_recall_micro": 0.40711847879083374,
1191
+ "eval_recall_weighted": 0.40711847879083374,
1192
+ "eval_runtime": 57.6083,
1193
+ "eval_samples_per_second": 35.603,
1194
+ "eval_steps_per_second": 4.461,
1195
+ "step": 1542
1196
+ }
1197
+ ],
1198
+ "logging_steps": 10,
1199
+ "max_steps": 2560,
1200
+ "num_input_tokens_seen": 0,
1201
+ "num_train_epochs": 10,
1202
+ "save_steps": 500,
1203
+ "stateful_callbacks": {
1204
+ "EarlyStoppingCallback": {
1205
+ "args": {
1206
+ "early_stopping_patience": 5,
1207
+ "early_stopping_threshold": 0.01
1208
+ },
1209
+ "attributes": {
1210
+ "early_stopping_patience_counter": 0
1211
+ }
1212
+ },
1213
+ "TrainerControl": {
1214
+ "args": {
1215
+ "should_epoch_stop": false,
1216
+ "should_evaluate": false,
1217
+ "should_log": false,
1218
+ "should_save": true,
1219
+ "should_training_stop": false
1220
+ },
1221
+ "attributes": {}
1222
+ }
1223
+ },
1224
+ "total_flos": 4.587586907052442e+16,
1225
+ "train_batch_size": 4,
1226
+ "trial_name": null,
1227
+ "trial_params": null
1228
+ }
checkpoint-1542/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a02b0d078d86ef0a1711524deea83d7a31c2e9047f6f3914251d4766e478633
3
+ size 5368
config.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "_num_labels": 16,
4
+ "architectures": [
5
+ "DebertaV2ForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 1024,
11
+ "id2label": {
12
+ "0": "enfj",
13
+ "1": "enfp",
14
+ "2": "entj",
15
+ "3": "entp",
16
+ "4": "esfj",
17
+ "5": "esfp",
18
+ "6": "estj",
19
+ "7": "estp",
20
+ "8": "infj",
21
+ "9": "infp",
22
+ "10": "intj",
23
+ "11": "intp",
24
+ "12": "isfj",
25
+ "13": "isfp",
26
+ "14": "istj",
27
+ "15": "istp"
28
+ },
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 4096,
31
+ "label2id": {
32
+ "enfj": 0,
33
+ "enfp": 1,
34
+ "entj": 2,
35
+ "entp": 3,
36
+ "esfj": 4,
37
+ "esfp": 5,
38
+ "estj": 6,
39
+ "estp": 7,
40
+ "infj": 8,
41
+ "infp": 9,
42
+ "intj": 10,
43
+ "intp": 11,
44
+ "isfj": 12,
45
+ "isfp": 13,
46
+ "istj": 14,
47
+ "istp": 15
48
+ },
49
+ "layer_norm_eps": 1e-07,
50
+ "legacy": true,
51
+ "max_position_embeddings": 512,
52
+ "max_relative_positions": -1,
53
+ "model_type": "deberta-v2",
54
+ "norm_rel_ebd": "layer_norm",
55
+ "num_attention_heads": 16,
56
+ "num_hidden_layers": 24,
57
+ "pad_token_id": 0,
58
+ "pooler_dropout": 0,
59
+ "pooler_hidden_act": "gelu",
60
+ "pooler_hidden_size": 1024,
61
+ "pos_att_type": [
62
+ "p2c",
63
+ "c2p"
64
+ ],
65
+ "position_biased_input": false,
66
+ "position_buckets": 256,
67
+ "relative_attention": true,
68
+ "share_att_key": true,
69
+ "torch_dtype": "float32",
70
+ "transformers_version": "4.48.0",
71
+ "type_vocab_size": 0,
72
+ "vocab_size": 128100
73
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c9b5b9de47af2e45c14a96ab10be692e0caf5e1ae04b1a717235a34c072f012
3
+ size 1740361848
runs/Jan21_15-38-16_r-idobn-twitter-mbti-2rmya185-a2492-5fds5/events.out.tfevents.1769009898.r-idobn-twitter-mbti-2rmya185-a2492-5fds5.120.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ae04a51f32d0ff40558520b8ac6c5420853b18c481a4a3c208dac0220da9cd3
3
- size 63268
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dd2028fa475d3218fa9bfbe2d5f1ccfd1460eeae1e4a3166dbab412489f3a2f
3
+ size 68464
runs/Jan21_15-38-16_r-idobn-twitter-mbti-2rmya185-a2492-5fds5/events.out.tfevents.1769017304.r-idobn-twitter-mbti-2rmya185-a2492-5fds5.120.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e72f600ffc5d8ad8eeac7b8964d4b71ddaab5ae014180d5a422f8cf63fd89c37
3
+ size 921
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "sp_model_kwargs": {},
55
+ "split_by_punct": false,
56
+ "tokenizer_class": "DebertaV2Tokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "spm"
59
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a02b0d078d86ef0a1711524deea83d7a31c2e9047f6f3914251d4766e478633
3
+ size 5368
training_params.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "twitter-mbti-v2/autotrain-data",
3
+ "model": "microsoft/deberta-v3-large",
4
+ "lr": 1e-05,
5
+ "epochs": 10,
6
+ "max_seq_length": 512,
7
+ "batch_size": 4,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 8,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.01,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "text_column": "autotrain_text",
18
+ "target_column": "autotrain_label",
19
+ "logging_steps": 10,
20
+ "project_name": "twitter-mbti-v2",
21
+ "auto_find_batch_size": false,
22
+ "mixed_precision": "fp16",
23
+ "save_total_limit": 1,
24
+ "push_to_hub": true,
25
+ "eval_strategy": "epoch",
26
+ "username": "idobn",
27
+ "log": "tensorboard",
28
+ "early_stopping_patience": 5,
29
+ "early_stopping_threshold": 0.01
30
+ }