IoannisKat1 commited on
Commit
807a21f
·
verified ·
1 Parent(s): a80efd7

Add finetuned model

Browse files
README.md CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-196/README.md CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-196/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:484814348bf4fcb085f20060a0b9f52191a31badfba256e9f8fbe9f428f90bb0
3
  size 2239607176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:089ae7a9c62feaa78358d73a791e89d04f65868ca6a0ad13dea1ae58ad804883
3
  size 2239607176
checkpoint-196/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c08151ebf618205048df61572e8db5af87c9c5b7778d11bffd1ac1d864c2535
3
  size 4471067142
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91fc8c12c900c84de777963e9a2478995acb79e9b292d730dfb117e380c7c200
3
  size 4471067142
checkpoint-196/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:073fc248f153339698e571b34766dd922834d7173e0dde5ac471b168f6ab29cc
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa08bd9dd367cde376d15e8b982d14cd6729eae58ce75d651531d783eb6f5977
3
  size 14645
checkpoint-196/tokenizer_config.json CHANGED
@@ -47,9 +47,16 @@
47
  "eos_token": "</s>",
48
  "extra_special_tokens": {},
49
  "mask_token": "<mask>",
 
50
  "model_max_length": 512,
 
51
  "pad_token": "<pad>",
 
 
52
  "sep_token": "</s>",
 
53
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
54
  "unk_token": "<unk>"
55
  }
 
47
  "eos_token": "</s>",
48
  "extra_special_tokens": {},
49
  "mask_token": "<mask>",
50
+ "max_length": 512,
51
  "model_max_length": 512,
52
+ "pad_to_multiple_of": null,
53
  "pad_token": "<pad>",
54
+ "pad_token_type_id": 0,
55
+ "padding_side": "right",
56
  "sep_token": "</s>",
57
+ "stride": 0,
58
  "tokenizer_class": "XLMRobertaTokenizer",
59
+ "truncation_side": "right",
60
+ "truncation_strategy": "longest_first",
61
  "unk_token": "<unk>"
62
  }
checkpoint-196/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 98,
3
- "best_metric": 0.3299991425713933,
4
  "best_model_checkpoint": "intfloat/multilingual-e5-large/checkpoint-98",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
@@ -11,1569 +11,1569 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.01020408163265306,
14
- "grad_norm": 196.63563537597656,
15
  "learning_rate": 0.0,
16
- "loss": 4.0658,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.02040816326530612,
21
- "grad_norm": 184.93710327148438,
22
  "learning_rate": 1.0204081632653061e-07,
23
- "loss": 5.2785,
24
  "step": 2
25
  },
26
  {
27
  "epoch": 0.030612244897959183,
28
- "grad_norm": 179.60655212402344,
29
  "learning_rate": 2.0408163265306121e-07,
30
- "loss": 4.349,
31
  "step": 3
32
  },
33
  {
34
  "epoch": 0.04081632653061224,
35
- "grad_norm": 163.9447479248047,
36
  "learning_rate": 3.0612244897959183e-07,
37
- "loss": 3.805,
38
  "step": 4
39
  },
40
  {
41
  "epoch": 0.05102040816326531,
42
- "grad_norm": 164.29776000976562,
43
  "learning_rate": 4.0816326530612243e-07,
44
- "loss": 3.1683,
45
  "step": 5
46
  },
47
  {
48
  "epoch": 0.061224489795918366,
49
- "grad_norm": 209.0354766845703,
50
  "learning_rate": 5.102040816326531e-07,
51
- "loss": 5.3989,
52
  "step": 6
53
  },
54
  {
55
  "epoch": 0.07142857142857142,
56
- "grad_norm": 262.14971923828125,
57
  "learning_rate": 6.122448979591837e-07,
58
- "loss": 8.211,
59
  "step": 7
60
  },
61
  {
62
  "epoch": 0.08163265306122448,
63
- "grad_norm": 188.68443298339844,
64
  "learning_rate": 7.142857142857143e-07,
65
- "loss": 5.3598,
66
  "step": 8
67
  },
68
  {
69
  "epoch": 0.09183673469387756,
70
- "grad_norm": 216.530517578125,
71
  "learning_rate": 8.163265306122449e-07,
72
- "loss": 5.0522,
73
  "step": 9
74
  },
75
  {
76
  "epoch": 0.10204081632653061,
77
- "grad_norm": 231.66384887695312,
78
  "learning_rate": 9.183673469387756e-07,
79
- "loss": 4.4736,
80
  "step": 10
81
  },
82
  {
83
  "epoch": 0.11224489795918367,
84
- "grad_norm": 329.2440490722656,
85
  "learning_rate": 1.0204081632653063e-06,
86
- "loss": 8.3251,
87
  "step": 11
88
  },
89
  {
90
  "epoch": 0.12244897959183673,
91
- "grad_norm": 102.95913696289062,
92
  "learning_rate": 1.122448979591837e-06,
93
- "loss": 2.5822,
94
  "step": 12
95
  },
96
  {
97
  "epoch": 0.1326530612244898,
98
- "grad_norm": 116.47322845458984,
99
  "learning_rate": 1.2244897959183673e-06,
100
- "loss": 2.8464,
101
  "step": 13
102
  },
103
  {
104
  "epoch": 0.14285714285714285,
105
- "grad_norm": 226.98976135253906,
106
  "learning_rate": 1.3265306122448982e-06,
107
- "loss": 5.8915,
108
  "step": 14
109
  },
110
  {
111
  "epoch": 0.15306122448979592,
112
- "grad_norm": 192.4533233642578,
113
  "learning_rate": 1.4285714285714286e-06,
114
- "loss": 7.2637,
115
  "step": 15
116
  },
117
  {
118
  "epoch": 0.16326530612244897,
119
- "grad_norm": 269.7630920410156,
120
  "learning_rate": 1.5306122448979593e-06,
121
- "loss": 7.4234,
122
  "step": 16
123
  },
124
  {
125
  "epoch": 0.17346938775510204,
126
- "grad_norm": 111.28227233886719,
127
  "learning_rate": 1.6326530612244897e-06,
128
- "loss": 2.3787,
129
  "step": 17
130
  },
131
  {
132
  "epoch": 0.1836734693877551,
133
- "grad_norm": 165.55792236328125,
134
  "learning_rate": 1.7346938775510206e-06,
135
- "loss": 3.1947,
136
  "step": 18
137
  },
138
  {
139
  "epoch": 0.19387755102040816,
140
- "grad_norm": 152.33682250976562,
141
  "learning_rate": 1.8367346938775512e-06,
142
- "loss": 3.4787,
143
  "step": 19
144
  },
145
  {
146
  "epoch": 0.20408163265306123,
147
- "grad_norm": 159.368408203125,
148
  "learning_rate": 1.938775510204082e-06,
149
- "loss": 5.1418,
150
  "step": 20
151
  },
152
  {
153
  "epoch": 0.21428571428571427,
154
- "grad_norm": 112.97805786132812,
155
  "learning_rate": 2.0408163265306125e-06,
156
- "loss": 2.3042,
157
  "step": 21
158
  },
159
  {
160
  "epoch": 0.22448979591836735,
161
- "grad_norm": 199.43443298339844,
162
  "learning_rate": 2.1428571428571427e-06,
163
- "loss": 6.6786,
164
  "step": 22
165
  },
166
  {
167
  "epoch": 0.23469387755102042,
168
- "grad_norm": 241.95591735839844,
169
  "learning_rate": 2.244897959183674e-06,
170
- "loss": 6.6721,
171
  "step": 23
172
  },
173
  {
174
  "epoch": 0.24489795918367346,
175
- "grad_norm": 249.65122985839844,
176
  "learning_rate": 2.3469387755102044e-06,
177
- "loss": 4.4896,
178
  "step": 24
179
  },
180
  {
181
  "epoch": 0.25510204081632654,
182
- "grad_norm": 183.51483154296875,
183
  "learning_rate": 2.4489795918367347e-06,
184
- "loss": 3.4416,
185
  "step": 25
186
  },
187
  {
188
  "epoch": 0.2653061224489796,
189
- "grad_norm": 286.1512756347656,
190
  "learning_rate": 2.5510204081632657e-06,
191
- "loss": 7.5134,
192
  "step": 26
193
  },
194
  {
195
  "epoch": 0.2755102040816326,
196
- "grad_norm": 98.32283782958984,
197
  "learning_rate": 2.6530612244897964e-06,
198
- "loss": 1.9577,
199
  "step": 27
200
  },
201
  {
202
  "epoch": 0.2857142857142857,
203
- "grad_norm": 274.64178466796875,
204
  "learning_rate": 2.7551020408163266e-06,
205
- "loss": 4.9552,
206
  "step": 28
207
  },
208
  {
209
  "epoch": 0.29591836734693877,
210
- "grad_norm": 142.77537536621094,
211
  "learning_rate": 2.8571428571428573e-06,
212
- "loss": 2.5202,
213
  "step": 29
214
  },
215
  {
216
  "epoch": 0.30612244897959184,
217
- "grad_norm": 127.23304748535156,
218
  "learning_rate": 2.959183673469388e-06,
219
- "loss": 2.4486,
220
  "step": 30
221
  },
222
  {
223
  "epoch": 0.3163265306122449,
224
- "grad_norm": 99.88568878173828,
225
  "learning_rate": 3.0612244897959185e-06,
226
- "loss": 1.9923,
227
  "step": 31
228
  },
229
  {
230
  "epoch": 0.32653061224489793,
231
- "grad_norm": 151.1445770263672,
232
  "learning_rate": 3.1632653061224496e-06,
233
- "loss": 2.8301,
234
  "step": 32
235
  },
236
  {
237
  "epoch": 0.336734693877551,
238
- "grad_norm": 203.54248046875,
239
  "learning_rate": 3.2653061224489794e-06,
240
- "loss": 3.7414,
241
  "step": 33
242
  },
243
  {
244
  "epoch": 0.3469387755102041,
245
- "grad_norm": 443.4117126464844,
246
  "learning_rate": 3.3673469387755105e-06,
247
- "loss": 5.2738,
248
  "step": 34
249
  },
250
  {
251
  "epoch": 0.35714285714285715,
252
- "grad_norm": 178.9974822998047,
253
  "learning_rate": 3.469387755102041e-06,
254
- "loss": 3.791,
255
  "step": 35
256
  },
257
  {
258
  "epoch": 0.3673469387755102,
259
- "grad_norm": 122.32801818847656,
260
  "learning_rate": 3.5714285714285718e-06,
261
- "loss": 1.9081,
262
  "step": 36
263
  },
264
  {
265
  "epoch": 0.37755102040816324,
266
- "grad_norm": 189.6477813720703,
267
  "learning_rate": 3.6734693877551024e-06,
268
- "loss": 2.4172,
269
  "step": 37
270
  },
271
  {
272
  "epoch": 0.3877551020408163,
273
- "grad_norm": 222.67959594726562,
274
  "learning_rate": 3.7755102040816327e-06,
275
- "loss": 4.0417,
276
  "step": 38
277
  },
278
  {
279
  "epoch": 0.3979591836734694,
280
- "grad_norm": 160.97071838378906,
281
  "learning_rate": 3.877551020408164e-06,
282
- "loss": 3.5591,
283
  "step": 39
284
  },
285
  {
286
  "epoch": 0.40816326530612246,
287
- "grad_norm": 178.01609802246094,
288
  "learning_rate": 3.979591836734694e-06,
289
- "loss": 3.0139,
290
  "step": 40
291
  },
292
  {
293
  "epoch": 0.41836734693877553,
294
- "grad_norm": 142.32168579101562,
295
  "learning_rate": 4.081632653061225e-06,
296
- "loss": 2.4836,
297
  "step": 41
298
  },
299
  {
300
  "epoch": 0.42857142857142855,
301
- "grad_norm": 148.1731719970703,
302
  "learning_rate": 4.183673469387755e-06,
303
- "loss": 2.807,
304
  "step": 42
305
  },
306
  {
307
  "epoch": 0.4387755102040816,
308
- "grad_norm": 152.1929931640625,
309
  "learning_rate": 4.2857142857142855e-06,
310
- "loss": 1.9753,
311
  "step": 43
312
  },
313
  {
314
  "epoch": 0.4489795918367347,
315
- "grad_norm": 219.5394287109375,
316
  "learning_rate": 4.3877551020408165e-06,
317
- "loss": 0.9764,
318
  "step": 44
319
  },
320
  {
321
  "epoch": 0.45918367346938777,
322
- "grad_norm": 95.7768783569336,
323
  "learning_rate": 4.489795918367348e-06,
324
- "loss": 1.1398,
325
  "step": 45
326
  },
327
  {
328
  "epoch": 0.46938775510204084,
329
- "grad_norm": 320.86529541015625,
330
  "learning_rate": 4.591836734693878e-06,
331
- "loss": 6.7812,
332
  "step": 46
333
  },
334
  {
335
  "epoch": 0.47959183673469385,
336
- "grad_norm": 18.277860641479492,
337
  "learning_rate": 4.693877551020409e-06,
338
- "loss": 0.2479,
339
  "step": 47
340
  },
341
  {
342
  "epoch": 0.4897959183673469,
343
- "grad_norm": 405.69024658203125,
344
  "learning_rate": 4.795918367346939e-06,
345
- "loss": 6.041,
346
  "step": 48
347
  },
348
  {
349
  "epoch": 0.5,
350
- "grad_norm": 104.91180419921875,
351
  "learning_rate": 4.897959183673469e-06,
352
- "loss": 1.1839,
353
  "step": 49
354
  },
355
  {
356
  "epoch": 0.5102040816326531,
357
- "grad_norm": 110.48990631103516,
358
  "learning_rate": 5e-06,
359
- "loss": 2.0933,
360
  "step": 50
361
  },
362
  {
363
  "epoch": 0.5204081632653061,
364
- "grad_norm": 135.2900390625,
365
  "learning_rate": 5.1020408163265315e-06,
366
- "loss": 1.8613,
367
  "step": 51
368
  },
369
  {
370
  "epoch": 0.5306122448979592,
371
- "grad_norm": 146.744140625,
372
  "learning_rate": 5.204081632653062e-06,
373
- "loss": 2.9359,
374
  "step": 52
375
  },
376
  {
377
  "epoch": 0.5408163265306123,
378
- "grad_norm": 103.08831787109375,
379
  "learning_rate": 5.306122448979593e-06,
380
- "loss": 1.0129,
381
  "step": 53
382
  },
383
  {
384
  "epoch": 0.5510204081632653,
385
- "grad_norm": 527.3735961914062,
386
  "learning_rate": 5.408163265306123e-06,
387
- "loss": 11.8843,
388
  "step": 54
389
  },
390
  {
391
  "epoch": 0.5612244897959183,
392
- "grad_norm": 374.4621887207031,
393
  "learning_rate": 5.510204081632653e-06,
394
- "loss": 5.8523,
395
  "step": 55
396
  },
397
  {
398
  "epoch": 0.5714285714285714,
399
- "grad_norm": 71.62016296386719,
400
  "learning_rate": 5.6122448979591834e-06,
401
- "loss": 0.832,
402
  "step": 56
403
  },
404
  {
405
  "epoch": 0.5816326530612245,
406
- "grad_norm": 287.0186462402344,
407
  "learning_rate": 5.7142857142857145e-06,
408
- "loss": 3.1778,
409
  "step": 57
410
  },
411
  {
412
  "epoch": 0.5918367346938775,
413
- "grad_norm": 31.022693634033203,
414
  "learning_rate": 5.816326530612246e-06,
415
- "loss": 0.3919,
416
  "step": 58
417
  },
418
  {
419
  "epoch": 0.6020408163265306,
420
- "grad_norm": 412.99176025390625,
421
  "learning_rate": 5.918367346938776e-06,
422
- "loss": 7.2526,
423
  "step": 59
424
  },
425
  {
426
  "epoch": 0.6122448979591837,
427
- "grad_norm": 40.534244537353516,
428
  "learning_rate": 6.020408163265307e-06,
429
- "loss": 0.578,
430
  "step": 60
431
  },
432
  {
433
  "epoch": 0.6224489795918368,
434
- "grad_norm": 226.0243682861328,
435
  "learning_rate": 6.122448979591837e-06,
436
- "loss": 2.5233,
437
  "step": 61
438
  },
439
  {
440
  "epoch": 0.6326530612244898,
441
- "grad_norm": 113.55558013916016,
442
  "learning_rate": 6.224489795918368e-06,
443
- "loss": 1.5694,
444
  "step": 62
445
  },
446
  {
447
  "epoch": 0.6428571428571429,
448
- "grad_norm": 190.6112518310547,
449
  "learning_rate": 6.326530612244899e-06,
450
- "loss": 2.2332,
451
  "step": 63
452
  },
453
  {
454
  "epoch": 0.6530612244897959,
455
- "grad_norm": 391.7434387207031,
456
  "learning_rate": 6.4285714285714295e-06,
457
- "loss": 4.5545,
458
  "step": 64
459
  },
460
  {
461
  "epoch": 0.6632653061224489,
462
- "grad_norm": 214.495361328125,
463
  "learning_rate": 6.530612244897959e-06,
464
- "loss": 1.4804,
465
  "step": 65
466
  },
467
  {
468
  "epoch": 0.673469387755102,
469
- "grad_norm": 62.52560806274414,
470
  "learning_rate": 6.63265306122449e-06,
471
- "loss": 0.4391,
472
  "step": 66
473
  },
474
  {
475
  "epoch": 0.6836734693877551,
476
- "grad_norm": 298.97808837890625,
477
  "learning_rate": 6.734693877551021e-06,
478
- "loss": 3.4715,
479
  "step": 67
480
  },
481
  {
482
  "epoch": 0.6938775510204082,
483
- "grad_norm": 483.84796142578125,
484
  "learning_rate": 6.836734693877551e-06,
485
- "loss": 8.5808,
486
  "step": 68
487
  },
488
  {
489
  "epoch": 0.7040816326530612,
490
- "grad_norm": 482.55743408203125,
491
  "learning_rate": 6.938775510204082e-06,
492
- "loss": 5.6959,
493
  "step": 69
494
  },
495
  {
496
  "epoch": 0.7142857142857143,
497
- "grad_norm": 198.6812744140625,
498
  "learning_rate": 7.0408163265306125e-06,
499
- "loss": 3.8277,
500
  "step": 70
501
  },
502
  {
503
  "epoch": 0.7244897959183674,
504
- "grad_norm": 293.4190673828125,
505
  "learning_rate": 7.1428571428571436e-06,
506
- "loss": 2.1832,
507
  "step": 71
508
  },
509
  {
510
  "epoch": 0.7346938775510204,
511
- "grad_norm": 13.164139747619629,
512
  "learning_rate": 7.244897959183675e-06,
513
- "loss": 0.1244,
514
  "step": 72
515
  },
516
  {
517
  "epoch": 0.7448979591836735,
518
- "grad_norm": 163.4252166748047,
519
  "learning_rate": 7.346938775510205e-06,
520
- "loss": 0.7707,
521
  "step": 73
522
  },
523
  {
524
  "epoch": 0.7551020408163265,
525
- "grad_norm": 193.64401245117188,
526
  "learning_rate": 7.448979591836736e-06,
527
- "loss": 3.4828,
528
  "step": 74
529
  },
530
  {
531
  "epoch": 0.7653061224489796,
532
- "grad_norm": 178.31982421875,
533
  "learning_rate": 7.551020408163265e-06,
534
- "loss": 2.9645,
535
  "step": 75
536
  },
537
  {
538
  "epoch": 0.7755102040816326,
539
- "grad_norm": 28.57689666748047,
540
  "learning_rate": 7.653061224489796e-06,
541
- "loss": 0.2948,
542
  "step": 76
543
  },
544
  {
545
  "epoch": 0.7857142857142857,
546
- "grad_norm": 608.8088989257812,
547
  "learning_rate": 7.755102040816327e-06,
548
- "loss": 12.6456,
549
  "step": 77
550
  },
551
  {
552
  "epoch": 0.7959183673469388,
553
- "grad_norm": 123.08556365966797,
554
  "learning_rate": 7.857142857142858e-06,
555
- "loss": 1.2493,
556
  "step": 78
557
  },
558
  {
559
  "epoch": 0.8061224489795918,
560
- "grad_norm": 225.3292694091797,
561
  "learning_rate": 7.959183673469388e-06,
562
- "loss": 2.6675,
563
  "step": 79
564
  },
565
  {
566
  "epoch": 0.8163265306122449,
567
- "grad_norm": 57.49665069580078,
568
  "learning_rate": 8.06122448979592e-06,
569
- "loss": 0.5642,
570
  "step": 80
571
  },
572
  {
573
  "epoch": 0.826530612244898,
574
- "grad_norm": 247.52210998535156,
575
  "learning_rate": 8.16326530612245e-06,
576
- "loss": 1.6008,
577
  "step": 81
578
  },
579
  {
580
  "epoch": 0.8367346938775511,
581
- "grad_norm": 309.60382080078125,
582
  "learning_rate": 8.26530612244898e-06,
583
- "loss": 3.257,
584
  "step": 82
585
  },
586
  {
587
  "epoch": 0.8469387755102041,
588
- "grad_norm": 183.82882690429688,
589
  "learning_rate": 8.36734693877551e-06,
590
- "loss": 2.8086,
591
  "step": 83
592
  },
593
  {
594
  "epoch": 0.8571428571428571,
595
- "grad_norm": 88.08740234375,
596
  "learning_rate": 8.469387755102042e-06,
597
- "loss": 0.4056,
598
  "step": 84
599
  },
600
  {
601
  "epoch": 0.8673469387755102,
602
- "grad_norm": 515.5130615234375,
603
  "learning_rate": 8.571428571428571e-06,
604
- "loss": 3.711,
605
  "step": 85
606
  },
607
  {
608
  "epoch": 0.8775510204081632,
609
- "grad_norm": 2.946629285812378,
610
  "learning_rate": 8.673469387755103e-06,
611
- "loss": 0.0253,
612
  "step": 86
613
  },
614
  {
615
  "epoch": 0.8877551020408163,
616
- "grad_norm": 31.143714904785156,
617
  "learning_rate": 8.775510204081633e-06,
618
- "loss": 0.168,
619
  "step": 87
620
  },
621
  {
622
  "epoch": 0.8979591836734694,
623
- "grad_norm": 535.6795043945312,
624
  "learning_rate": 8.877551020408163e-06,
625
- "loss": 5.0992,
626
  "step": 88
627
  },
628
  {
629
  "epoch": 0.9081632653061225,
630
- "grad_norm": 577.0897216796875,
631
  "learning_rate": 8.979591836734695e-06,
632
- "loss": 5.3724,
633
  "step": 89
634
  },
635
  {
636
  "epoch": 0.9183673469387755,
637
- "grad_norm": 102.31855773925781,
638
  "learning_rate": 9.081632653061225e-06,
639
- "loss": 0.9172,
640
  "step": 90
641
  },
642
  {
643
  "epoch": 0.9285714285714286,
644
- "grad_norm": 306.1739196777344,
645
  "learning_rate": 9.183673469387756e-06,
646
- "loss": 3.1239,
647
  "step": 91
648
  },
649
  {
650
  "epoch": 0.9387755102040817,
651
- "grad_norm": 191.27415466308594,
652
  "learning_rate": 9.285714285714288e-06,
653
- "loss": 1.4121,
654
  "step": 92
655
  },
656
  {
657
  "epoch": 0.9489795918367347,
658
- "grad_norm": 67.19822692871094,
659
  "learning_rate": 9.387755102040818e-06,
660
- "loss": 0.2599,
661
  "step": 93
662
  },
663
  {
664
  "epoch": 0.9591836734693877,
665
- "grad_norm": 17.93955421447754,
666
  "learning_rate": 9.489795918367348e-06,
667
- "loss": 0.1166,
668
  "step": 94
669
  },
670
  {
671
  "epoch": 0.9693877551020408,
672
- "grad_norm": 23.839630126953125,
673
  "learning_rate": 9.591836734693878e-06,
674
- "loss": 0.1938,
675
  "step": 95
676
  },
677
  {
678
  "epoch": 0.9795918367346939,
679
- "grad_norm": 1459.8140869140625,
680
  "learning_rate": 9.693877551020408e-06,
681
- "loss": 18.5143,
682
  "step": 96
683
  },
684
  {
685
  "epoch": 0.9897959183673469,
686
- "grad_norm": 670.869140625,
687
  "learning_rate": 9.795918367346939e-06,
688
- "loss": 6.1932,
689
  "step": 97
690
  },
691
  {
692
  "epoch": 1.0,
693
- "grad_norm": 942.95849609375,
694
  "learning_rate": 9.89795918367347e-06,
695
- "loss": 20.3042,
696
  "step": 98
697
  },
698
  {
699
  "epoch": 1.0,
700
- "eval_dim_1024_cosine_accuracy@1": 0.3719590268886043,
701
- "eval_dim_1024_cosine_accuracy@10": 0.43982074263764404,
702
- "eval_dim_1024_cosine_accuracy@3": 0.37836107554417414,
703
- "eval_dim_1024_cosine_accuracy@5": 0.4020486555697823,
704
- "eval_dim_1024_cosine_map@100": 0.4604070214987707,
705
- "eval_dim_1024_cosine_mrr@10": 0.383397099770339,
706
- "eval_dim_1024_cosine_ndcg@10": 0.3947688545057553,
707
- "eval_dim_1024_cosine_precision@1": 0.3719590268886043,
708
- "eval_dim_1024_cosine_precision@10": 0.3265044814340589,
709
- "eval_dim_1024_cosine_precision@3": 0.37153222364489963,
710
- "eval_dim_1024_cosine_precision@5": 0.3613316261203585,
711
- "eval_dim_1024_cosine_recall@1": 0.04338670134208909,
712
- "eval_dim_1024_cosine_recall@10": 0.28262195979320087,
713
- "eval_dim_1024_cosine_recall@3": 0.1268773565773867,
714
- "eval_dim_1024_cosine_recall@5": 0.19083511167371434,
715
- "eval_dim_128_cosine_accuracy@1": 0.3053777208706786,
716
- "eval_dim_128_cosine_accuracy@10": 0.37836107554417414,
717
- "eval_dim_128_cosine_accuracy@3": 0.3111395646606914,
718
- "eval_dim_128_cosine_accuracy@5": 0.3361075544174136,
719
- "eval_dim_128_cosine_map@100": 0.3904168259576031,
720
- "eval_dim_128_cosine_mrr@10": 0.3174377070503828,
721
- "eval_dim_128_cosine_ndcg@10": 0.3299991425713933,
722
- "eval_dim_128_cosine_precision@1": 0.3053777208706786,
723
- "eval_dim_128_cosine_precision@10": 0.271830985915493,
724
- "eval_dim_128_cosine_precision@3": 0.30495091762697396,
725
- "eval_dim_128_cosine_precision@5": 0.29641485275288093,
726
- "eval_dim_128_cosine_recall@1": 0.03688049871840266,
727
- "eval_dim_128_cosine_recall@10": 0.24260019246216608,
728
- "eval_dim_128_cosine_recall@3": 0.10779952005618963,
729
- "eval_dim_128_cosine_recall@5": 0.16176912684922656,
730
- "eval_dim_256_cosine_accuracy@1": 0.324583866837388,
731
- "eval_dim_256_cosine_accuracy@10": 0.4058898847631242,
732
- "eval_dim_256_cosine_accuracy@3": 0.33290653008962867,
733
- "eval_dim_256_cosine_accuracy@5": 0.3649167733674776,
734
- "eval_dim_256_cosine_map@100": 0.4232469199200366,
735
- "eval_dim_256_cosine_mrr@10": 0.33872299453285326,
736
- "eval_dim_256_cosine_ndcg@10": 0.35307499975694673,
737
- "eval_dim_256_cosine_precision@1": 0.324583866837388,
738
- "eval_dim_256_cosine_precision@10": 0.2935339308578745,
739
- "eval_dim_256_cosine_precision@3": 0.324583866837388,
740
- "eval_dim_256_cosine_precision@5": 0.31792573623559534,
741
- "eval_dim_256_cosine_recall@1": 0.038313787861467184,
742
- "eval_dim_256_cosine_recall@10": 0.2549878568107636,
743
- "eval_dim_256_cosine_recall@3": 0.11223891931505588,
744
- "eval_dim_256_cosine_recall@5": 0.1697408782100328,
745
- "eval_dim_512_cosine_accuracy@1": 0.34827144686299616,
746
- "eval_dim_512_cosine_accuracy@10": 0.44302176696542894,
747
- "eval_dim_512_cosine_accuracy@3": 0.3553137003841229,
748
- "eval_dim_512_cosine_accuracy@5": 0.39436619718309857,
749
- "eval_dim_512_cosine_map@100": 0.44994622162234726,
750
- "eval_dim_512_cosine_mrr@10": 0.3644033392272826,
751
- "eval_dim_512_cosine_ndcg@10": 0.3807642678190648,
752
- "eval_dim_512_cosine_precision@1": 0.34827144686299616,
753
- "eval_dim_512_cosine_precision@10": 0.31798975672215113,
754
- "eval_dim_512_cosine_precision@3": 0.34827144686299616,
755
- "eval_dim_512_cosine_precision@5": 0.3418693982074264,
756
- "eval_dim_512_cosine_recall@1": 0.04125738861359979,
757
- "eval_dim_512_cosine_recall@10": 0.2752162089385945,
758
- "eval_dim_512_cosine_recall@3": 0.12077279112247459,
759
- "eval_dim_512_cosine_recall@5": 0.18268801127884626,
760
- "eval_dim_64_cosine_accuracy@1": 0.23175416133162613,
761
- "eval_dim_64_cosine_accuracy@10": 0.29833546734955185,
762
- "eval_dim_64_cosine_accuracy@3": 0.23879641485275288,
763
- "eval_dim_64_cosine_accuracy@5": 0.2612035851472471,
764
- "eval_dim_64_cosine_map@100": 0.3151829220617657,
765
- "eval_dim_64_cosine_mrr@10": 0.24303802613661746,
766
- "eval_dim_64_cosine_ndcg@10": 0.2548721998123125,
767
- "eval_dim_64_cosine_precision@1": 0.23175416133162613,
768
- "eval_dim_64_cosine_precision@10": 0.20864276568501922,
769
- "eval_dim_64_cosine_precision@3": 0.23218096457533077,
770
- "eval_dim_64_cosine_precision@5": 0.22740076824583869,
771
- "eval_dim_64_cosine_recall@1": 0.028384798943475897,
772
- "eval_dim_64_cosine_recall@10": 0.19540887275051927,
773
- "eval_dim_64_cosine_recall@3": 0.08325514613360847,
774
- "eval_dim_64_cosine_recall@5": 0.12720688223912358,
775
- "eval_dim_768_cosine_accuracy@1": 0.36619718309859156,
776
- "eval_dim_768_cosine_accuracy@10": 0.44302176696542894,
777
- "eval_dim_768_cosine_accuracy@3": 0.37516005121638923,
778
- "eval_dim_768_cosine_accuracy@5": 0.4014084507042254,
779
- "eval_dim_768_cosine_map@100": 0.4592162636155952,
780
- "eval_dim_768_cosine_mrr@10": 0.37943672133812956,
781
- "eval_dim_768_cosine_ndcg@10": 0.3923099208699586,
782
- "eval_dim_768_cosine_precision@1": 0.36619718309859156,
783
- "eval_dim_768_cosine_precision@10": 0.32541613316261203,
784
- "eval_dim_768_cosine_precision@3": 0.3666239863422962,
785
- "eval_dim_768_cosine_precision@5": 0.35761843790012804,
786
- "eval_dim_768_cosine_recall@1": 0.042908107176418055,
787
- "eval_dim_768_cosine_recall@10": 0.2828362934197418,
788
- "eval_dim_768_cosine_recall@3": 0.12573828441229515,
789
- "eval_dim_768_cosine_recall@5": 0.18984022934199501,
790
- "eval_runtime": 98.8905,
791
  "eval_samples_per_second": 0.0,
792
- "eval_sequential_score": 0.2548721998123125,
793
  "eval_steps_per_second": 0.0,
794
  "step": 98
795
  },
796
  {
797
  "epoch": 1.010204081632653,
798
- "grad_norm": 1352.0535888671875,
799
  "learning_rate": 1e-05,
800
- "loss": 2.3413,
801
  "step": 99
802
  },
803
  {
804
  "epoch": 1.0204081632653061,
805
- "grad_norm": 852.8284301757812,
806
  "learning_rate": 1.0102040816326531e-05,
807
- "loss": 9.0759,
808
  "step": 100
809
  },
810
  {
811
  "epoch": 1.030612244897959,
812
- "grad_norm": 13.654555320739746,
813
  "learning_rate": 1.0204081632653063e-05,
814
- "loss": 0.0652,
815
  "step": 101
816
  },
817
  {
818
  "epoch": 1.0408163265306123,
819
- "grad_norm": 725.5890502929688,
820
  "learning_rate": 1.0306122448979591e-05,
821
- "loss": 9.3494,
822
  "step": 102
823
  },
824
  {
825
  "epoch": 1.0510204081632653,
826
- "grad_norm": 42.87101364135742,
827
  "learning_rate": 1.0408163265306123e-05,
828
- "loss": 0.1636,
829
  "step": 103
830
  },
831
  {
832
  "epoch": 1.0612244897959184,
833
- "grad_norm": 366.9559631347656,
834
  "learning_rate": 1.0510204081632654e-05,
835
- "loss": 2.4237,
836
  "step": 104
837
  },
838
  {
839
  "epoch": 1.0714285714285714,
840
- "grad_norm": 332.9723815917969,
841
  "learning_rate": 1.0612244897959186e-05,
842
- "loss": 1.9761,
843
  "step": 105
844
  },
845
  {
846
  "epoch": 1.0816326530612246,
847
- "grad_norm": 512.0596923828125,
848
  "learning_rate": 1.0714285714285714e-05,
849
- "loss": 3.5402,
850
  "step": 106
851
  },
852
  {
853
  "epoch": 1.0918367346938775,
854
- "grad_norm": 199.25193786621094,
855
  "learning_rate": 1.0816326530612246e-05,
856
- "loss": 1.3885,
857
  "step": 107
858
  },
859
  {
860
  "epoch": 1.1020408163265305,
861
- "grad_norm": 300.2743225097656,
862
  "learning_rate": 1.0918367346938776e-05,
863
- "loss": 1.5145,
864
  "step": 108
865
  },
866
  {
867
  "epoch": 1.1122448979591837,
868
- "grad_norm": 382.1838684082031,
869
  "learning_rate": 1.1020408163265306e-05,
870
- "loss": 3.3087,
871
  "step": 109
872
  },
873
  {
874
  "epoch": 1.1224489795918366,
875
- "grad_norm": 164.595703125,
876
  "learning_rate": 1.1122448979591838e-05,
877
- "loss": 0.576,
878
  "step": 110
879
  },
880
  {
881
  "epoch": 1.1326530612244898,
882
- "grad_norm": 184.7035369873047,
883
  "learning_rate": 1.1224489795918367e-05,
884
- "loss": 1.6206,
885
  "step": 111
886
  },
887
  {
888
  "epoch": 1.1428571428571428,
889
- "grad_norm": 811.6741333007812,
890
  "learning_rate": 1.1326530612244899e-05,
891
- "loss": 10.2227,
892
  "step": 112
893
  },
894
  {
895
  "epoch": 1.153061224489796,
896
- "grad_norm": 470.61212158203125,
897
  "learning_rate": 1.1428571428571429e-05,
898
- "loss": 3.1477,
899
  "step": 113
900
  },
901
  {
902
  "epoch": 1.163265306122449,
903
- "grad_norm": 656.7257690429688,
904
  "learning_rate": 1.1530612244897961e-05,
905
- "loss": 2.6782,
906
  "step": 114
907
  },
908
  {
909
  "epoch": 1.1734693877551021,
910
- "grad_norm": 198.0335235595703,
911
  "learning_rate": 1.1632653061224491e-05,
912
- "loss": 1.013,
913
  "step": 115
914
  },
915
  {
916
  "epoch": 1.183673469387755,
917
- "grad_norm": 793.2557373046875,
918
  "learning_rate": 1.1734693877551021e-05,
919
- "loss": 5.486,
920
  "step": 116
921
  },
922
  {
923
  "epoch": 1.193877551020408,
924
- "grad_norm": 40.89669418334961,
925
  "learning_rate": 1.1836734693877552e-05,
926
- "loss": 0.2245,
927
  "step": 117
928
  },
929
  {
930
  "epoch": 1.2040816326530612,
931
- "grad_norm": 215.04872131347656,
932
  "learning_rate": 1.1938775510204084e-05,
933
- "loss": 1.3157,
934
  "step": 118
935
  },
936
  {
937
  "epoch": 1.2142857142857142,
938
- "grad_norm": 10.823179244995117,
939
  "learning_rate": 1.2040816326530614e-05,
940
- "loss": 0.0276,
941
  "step": 119
942
  },
943
  {
944
  "epoch": 1.2244897959183674,
945
- "grad_norm": 150.44537353515625,
946
  "learning_rate": 1.2142857142857142e-05,
947
- "loss": 1.1907,
948
  "step": 120
949
  },
950
  {
951
  "epoch": 1.2346938775510203,
952
- "grad_norm": 21.41417694091797,
953
  "learning_rate": 1.2244897959183674e-05,
954
- "loss": 0.0694,
955
  "step": 121
956
  },
957
  {
958
  "epoch": 1.2448979591836735,
959
- "grad_norm": 185.08053588867188,
960
  "learning_rate": 1.2346938775510204e-05,
961
- "loss": 1.3295,
962
  "step": 122
963
  },
964
  {
965
  "epoch": 1.2551020408163265,
966
- "grad_norm": 123.85110473632812,
967
  "learning_rate": 1.2448979591836736e-05,
968
- "loss": 0.501,
969
  "step": 123
970
  },
971
  {
972
  "epoch": 1.2653061224489797,
973
- "grad_norm": 337.702392578125,
974
  "learning_rate": 1.2551020408163267e-05,
975
- "loss": 2.1739,
976
  "step": 124
977
  },
978
  {
979
  "epoch": 1.2755102040816326,
980
- "grad_norm": 682.425048828125,
981
  "learning_rate": 1.2653061224489798e-05,
982
- "loss": 5.5839,
983
  "step": 125
984
  },
985
  {
986
  "epoch": 1.2857142857142856,
987
- "grad_norm": 182.4869384765625,
988
  "learning_rate": 1.2755102040816327e-05,
989
- "loss": 0.9169,
990
  "step": 126
991
  },
992
  {
993
  "epoch": 1.2959183673469388,
994
- "grad_norm": 87.18023681640625,
995
  "learning_rate": 1.2857142857142859e-05,
996
- "loss": 0.7417,
997
  "step": 127
998
  },
999
  {
1000
  "epoch": 1.306122448979592,
1001
- "grad_norm": 290.62518310546875,
1002
  "learning_rate": 1.2959183673469389e-05,
1003
- "loss": 4.4272,
1004
  "step": 128
1005
  },
1006
  {
1007
  "epoch": 1.316326530612245,
1008
- "grad_norm": 123.35594940185547,
1009
  "learning_rate": 1.3061224489795918e-05,
1010
- "loss": 1.1165,
1011
  "step": 129
1012
  },
1013
  {
1014
  "epoch": 1.3265306122448979,
1015
- "grad_norm": 530.7383422851562,
1016
  "learning_rate": 1.316326530612245e-05,
1017
- "loss": 4.3749,
1018
  "step": 130
1019
  },
1020
  {
1021
  "epoch": 1.336734693877551,
1022
- "grad_norm": 488.7727966308594,
1023
  "learning_rate": 1.326530612244898e-05,
1024
- "loss": 4.8529,
1025
  "step": 131
1026
  },
1027
  {
1028
  "epoch": 1.346938775510204,
1029
- "grad_norm": 912.1464233398438,
1030
  "learning_rate": 1.3367346938775512e-05,
1031
- "loss": 5.3515,
1032
  "step": 132
1033
  },
1034
  {
1035
  "epoch": 1.3571428571428572,
1036
- "grad_norm": 3.6914279460906982,
1037
  "learning_rate": 1.3469387755102042e-05,
1038
- "loss": 0.0201,
1039
  "step": 133
1040
  },
1041
  {
1042
  "epoch": 1.3673469387755102,
1043
- "grad_norm": 1.16976797580719,
1044
  "learning_rate": 1.3571428571428574e-05,
1045
- "loss": 0.0088,
1046
  "step": 134
1047
  },
1048
  {
1049
  "epoch": 1.3775510204081631,
1050
- "grad_norm": 839.4262084960938,
1051
  "learning_rate": 1.3673469387755102e-05,
1052
- "loss": 2.198,
1053
  "step": 135
1054
  },
1055
  {
1056
  "epoch": 1.3877551020408163,
1057
- "grad_norm": 2.5965521335601807,
1058
  "learning_rate": 1.3775510204081634e-05,
1059
- "loss": 0.0158,
1060
  "step": 136
1061
  },
1062
  {
1063
  "epoch": 1.3979591836734695,
1064
- "grad_norm": 5.312564373016357,
1065
  "learning_rate": 1.3877551020408165e-05,
1066
- "loss": 0.0304,
1067
  "step": 137
1068
  },
1069
  {
1070
  "epoch": 1.4081632653061225,
1071
- "grad_norm": 3.9951765537261963,
1072
  "learning_rate": 1.3979591836734696e-05,
1073
- "loss": 0.0176,
1074
  "step": 138
1075
  },
1076
  {
1077
  "epoch": 1.4183673469387754,
1078
- "grad_norm": 499.74761962890625,
1079
  "learning_rate": 1.4081632653061225e-05,
1080
- "loss": 1.9166,
1081
  "step": 139
1082
  },
1083
  {
1084
  "epoch": 1.4285714285714286,
1085
- "grad_norm": 550.5098266601562,
1086
  "learning_rate": 1.4183673469387755e-05,
1087
- "loss": 5.8026,
1088
  "step": 140
1089
  },
1090
  {
1091
  "epoch": 1.4387755102040816,
1092
- "grad_norm": 465.0825500488281,
1093
  "learning_rate": 1.4285714285714287e-05,
1094
- "loss": 6.2584,
1095
  "step": 141
1096
  },
1097
  {
1098
  "epoch": 1.4489795918367347,
1099
- "grad_norm": 152.85641479492188,
1100
  "learning_rate": 1.4387755102040817e-05,
1101
- "loss": 0.6994,
1102
  "step": 142
1103
  },
1104
  {
1105
  "epoch": 1.4591836734693877,
1106
- "grad_norm": 108.09862518310547,
1107
  "learning_rate": 1.448979591836735e-05,
1108
- "loss": 0.6583,
1109
  "step": 143
1110
  },
1111
  {
1112
  "epoch": 1.469387755102041,
1113
- "grad_norm": 9.0764799118042,
1114
  "learning_rate": 1.4591836734693878e-05,
1115
- "loss": 0.0464,
1116
  "step": 144
1117
  },
1118
  {
1119
  "epoch": 1.4795918367346939,
1120
- "grad_norm": 162.13211059570312,
1121
  "learning_rate": 1.469387755102041e-05,
1122
- "loss": 0.8106,
1123
  "step": 145
1124
  },
1125
  {
1126
  "epoch": 1.489795918367347,
1127
- "grad_norm": 1029.7261962890625,
1128
  "learning_rate": 1.479591836734694e-05,
1129
- "loss": 14.6794,
1130
  "step": 146
1131
  },
1132
  {
1133
  "epoch": 1.5,
1134
- "grad_norm": 321.685791015625,
1135
  "learning_rate": 1.4897959183673472e-05,
1136
- "loss": 1.5998,
1137
  "step": 147
1138
  },
1139
  {
1140
  "epoch": 1.510204081632653,
1141
- "grad_norm": 22.866291046142578,
1142
  "learning_rate": 1.5000000000000002e-05,
1143
- "loss": 0.0695,
1144
  "step": 148
1145
  },
1146
  {
1147
  "epoch": 1.5204081632653061,
1148
- "grad_norm": 188.6576690673828,
1149
  "learning_rate": 1.510204081632653e-05,
1150
- "loss": 0.9395,
1151
  "step": 149
1152
  },
1153
  {
1154
  "epoch": 1.5306122448979593,
1155
- "grad_norm": 1417.1654052734375,
1156
  "learning_rate": 1.5204081632653063e-05,
1157
- "loss": 29.9231,
1158
  "step": 150
1159
  },
1160
  {
1161
  "epoch": 1.5408163265306123,
1162
- "grad_norm": 1.7937166690826416,
1163
  "learning_rate": 1.530612244897959e-05,
1164
- "loss": 0.0107,
1165
  "step": 151
1166
  },
1167
  {
1168
  "epoch": 1.5510204081632653,
1169
- "grad_norm": 6.340953350067139,
1170
  "learning_rate": 1.5408163265306123e-05,
1171
- "loss": 0.0184,
1172
  "step": 152
1173
  },
1174
  {
1175
  "epoch": 1.5612244897959182,
1176
- "grad_norm": 11.218599319458008,
1177
  "learning_rate": 1.5510204081632655e-05,
1178
- "loss": 0.0373,
1179
  "step": 153
1180
  },
1181
  {
1182
  "epoch": 1.5714285714285714,
1183
- "grad_norm": 164.22007751464844,
1184
  "learning_rate": 1.5612244897959187e-05,
1185
- "loss": 0.7196,
1186
  "step": 154
1187
  },
1188
  {
1189
  "epoch": 1.5816326530612246,
1190
- "grad_norm": 173.1319580078125,
1191
  "learning_rate": 1.5714285714285715e-05,
1192
- "loss": 0.9456,
1193
  "step": 155
1194
  },
1195
  {
1196
  "epoch": 1.5918367346938775,
1197
- "grad_norm": 337.6502380371094,
1198
  "learning_rate": 1.5816326530612247e-05,
1199
- "loss": 0.8104,
1200
  "step": 156
1201
  },
1202
  {
1203
  "epoch": 1.6020408163265305,
1204
- "grad_norm": 429.2533264160156,
1205
  "learning_rate": 1.5918367346938776e-05,
1206
- "loss": 1.9366,
1207
  "step": 157
1208
  },
1209
  {
1210
  "epoch": 1.6122448979591837,
1211
- "grad_norm": 877.1956787109375,
1212
  "learning_rate": 1.6020408163265308e-05,
1213
- "loss": 7.7652,
1214
  "step": 158
1215
  },
1216
  {
1217
  "epoch": 1.6224489795918369,
1218
- "grad_norm": 113.66088104248047,
1219
  "learning_rate": 1.612244897959184e-05,
1220
- "loss": 0.521,
1221
  "step": 159
1222
  },
1223
  {
1224
  "epoch": 1.6326530612244898,
1225
- "grad_norm": 11.550941467285156,
1226
  "learning_rate": 1.6224489795918368e-05,
1227
- "loss": 0.0238,
1228
  "step": 160
1229
  },
1230
  {
1231
  "epoch": 1.6428571428571428,
1232
- "grad_norm": 52.35281753540039,
1233
  "learning_rate": 1.63265306122449e-05,
1234
- "loss": 0.1139,
1235
  "step": 161
1236
  },
1237
  {
1238
  "epoch": 1.6530612244897958,
1239
- "grad_norm": 137.2505645751953,
1240
  "learning_rate": 1.642857142857143e-05,
1241
- "loss": 0.5996,
1242
  "step": 162
1243
  },
1244
  {
1245
  "epoch": 1.663265306122449,
1246
- "grad_norm": 0.7507250905036926,
1247
  "learning_rate": 1.653061224489796e-05,
1248
- "loss": 0.0036,
1249
  "step": 163
1250
  },
1251
  {
1252
  "epoch": 1.6734693877551021,
1253
- "grad_norm": 27.53925323486328,
1254
  "learning_rate": 1.6632653061224492e-05,
1255
- "loss": 0.1499,
1256
  "step": 164
1257
  },
1258
  {
1259
  "epoch": 1.683673469387755,
1260
- "grad_norm": 169.983154296875,
1261
  "learning_rate": 1.673469387755102e-05,
1262
- "loss": 0.4617,
1263
  "step": 165
1264
  },
1265
  {
1266
  "epoch": 1.693877551020408,
1267
- "grad_norm": 30.274919509887695,
1268
  "learning_rate": 1.6836734693877553e-05,
1269
- "loss": 0.0959,
1270
  "step": 166
1271
  },
1272
  {
1273
  "epoch": 1.7040816326530612,
1274
- "grad_norm": 38.02618408203125,
1275
  "learning_rate": 1.6938775510204085e-05,
1276
- "loss": 0.1291,
1277
  "step": 167
1278
  },
1279
  {
1280
  "epoch": 1.7142857142857144,
1281
- "grad_norm": 0.5495009422302246,
1282
  "learning_rate": 1.7040816326530613e-05,
1283
- "loss": 0.0018,
1284
  "step": 168
1285
  },
1286
  {
1287
  "epoch": 1.7244897959183674,
1288
- "grad_norm": 1.4790436029434204,
1289
  "learning_rate": 1.7142857142857142e-05,
1290
- "loss": 0.0054,
1291
  "step": 169
1292
  },
1293
  {
1294
  "epoch": 1.7346938775510203,
1295
- "grad_norm": 1496.3338623046875,
1296
  "learning_rate": 1.7244897959183674e-05,
1297
- "loss": 31.5747,
1298
  "step": 170
1299
  },
1300
  {
1301
  "epoch": 1.7448979591836735,
1302
- "grad_norm": 88.00984191894531,
1303
  "learning_rate": 1.7346938775510206e-05,
1304
- "loss": 0.3231,
1305
  "step": 171
1306
  },
1307
  {
1308
  "epoch": 1.7551020408163265,
1309
- "grad_norm": 436.733642578125,
1310
  "learning_rate": 1.7448979591836738e-05,
1311
- "loss": 2.5639,
1312
  "step": 172
1313
  },
1314
  {
1315
  "epoch": 1.7653061224489797,
1316
- "grad_norm": 1169.6356201171875,
1317
  "learning_rate": 1.7551020408163266e-05,
1318
- "loss": 8.2977,
1319
  "step": 173
1320
  },
1321
  {
1322
  "epoch": 1.7755102040816326,
1323
- "grad_norm": 97.72107696533203,
1324
  "learning_rate": 1.7653061224489798e-05,
1325
- "loss": 0.4316,
1326
  "step": 174
1327
  },
1328
  {
1329
  "epoch": 1.7857142857142856,
1330
- "grad_norm": 12.1145601272583,
1331
  "learning_rate": 1.7755102040816327e-05,
1332
- "loss": 0.0233,
1333
  "step": 175
1334
  },
1335
  {
1336
  "epoch": 1.7959183673469388,
1337
- "grad_norm": 203.28208923339844,
1338
  "learning_rate": 1.785714285714286e-05,
1339
- "loss": 1.0278,
1340
  "step": 176
1341
  },
1342
  {
1343
  "epoch": 1.806122448979592,
1344
- "grad_norm": 0.141877681016922,
1345
  "learning_rate": 1.795918367346939e-05,
1346
- "loss": 0.0005,
1347
  "step": 177
1348
  },
1349
  {
1350
  "epoch": 1.816326530612245,
1351
- "grad_norm": 21.75454330444336,
1352
  "learning_rate": 1.806122448979592e-05,
1353
- "loss": 0.1203,
1354
  "step": 178
1355
  },
1356
  {
1357
  "epoch": 1.8265306122448979,
1358
- "grad_norm": 98.08448028564453,
1359
  "learning_rate": 1.816326530612245e-05,
1360
- "loss": 0.2268,
1361
  "step": 179
1362
  },
1363
  {
1364
  "epoch": 1.836734693877551,
1365
- "grad_norm": 279.0924377441406,
1366
  "learning_rate": 1.826530612244898e-05,
1367
- "loss": 3.2014,
1368
  "step": 180
1369
  },
1370
  {
1371
  "epoch": 1.8469387755102042,
1372
- "grad_norm": 348.4731750488281,
1373
  "learning_rate": 1.836734693877551e-05,
1374
- "loss": 3.6799,
1375
  "step": 181
1376
  },
1377
  {
1378
  "epoch": 1.8571428571428572,
1379
- "grad_norm": 77.99913024902344,
1380
  "learning_rate": 1.8469387755102043e-05,
1381
- "loss": 0.2033,
1382
  "step": 182
1383
  },
1384
  {
1385
  "epoch": 1.8673469387755102,
1386
- "grad_norm": 144.7059783935547,
1387
  "learning_rate": 1.8571428571428575e-05,
1388
- "loss": 0.6791,
1389
  "step": 183
1390
  },
1391
  {
1392
  "epoch": 1.8775510204081631,
1393
- "grad_norm": 914.4169921875,
1394
  "learning_rate": 1.8673469387755104e-05,
1395
- "loss": 3.14,
1396
  "step": 184
1397
  },
1398
  {
1399
  "epoch": 1.8877551020408163,
1400
- "grad_norm": 397.94769287109375,
1401
  "learning_rate": 1.8775510204081636e-05,
1402
- "loss": 0.9216,
1403
  "step": 185
1404
  },
1405
  {
1406
  "epoch": 1.8979591836734695,
1407
- "grad_norm": 5.423056125640869,
1408
  "learning_rate": 1.8877551020408164e-05,
1409
- "loss": 0.0194,
1410
  "step": 186
1411
  },
1412
  {
1413
  "epoch": 1.9081632653061225,
1414
- "grad_norm": 124.5344467163086,
1415
  "learning_rate": 1.8979591836734696e-05,
1416
- "loss": 0.3126,
1417
  "step": 187
1418
  },
1419
  {
1420
  "epoch": 1.9183673469387754,
1421
- "grad_norm": 136.97732543945312,
1422
  "learning_rate": 1.9081632653061225e-05,
1423
- "loss": 0.3414,
1424
  "step": 188
1425
  },
1426
  {
1427
  "epoch": 1.9285714285714286,
1428
- "grad_norm": 29.086313247680664,
1429
  "learning_rate": 1.9183673469387756e-05,
1430
- "loss": 0.0684,
1431
  "step": 189
1432
  },
1433
  {
1434
  "epoch": 1.9387755102040818,
1435
- "grad_norm": 4.842129707336426,
1436
  "learning_rate": 1.928571428571429e-05,
1437
- "loss": 0.017,
1438
  "step": 190
1439
  },
1440
  {
1441
  "epoch": 1.9489795918367347,
1442
- "grad_norm": 0.2148096263408661,
1443
  "learning_rate": 1.9387755102040817e-05,
1444
- "loss": 0.0007,
1445
  "step": 191
1446
  },
1447
  {
1448
  "epoch": 1.9591836734693877,
1449
- "grad_norm": 63.7159538269043,
1450
  "learning_rate": 1.948979591836735e-05,
1451
- "loss": 0.2351,
1452
  "step": 192
1453
  },
1454
  {
1455
  "epoch": 1.9693877551020407,
1456
- "grad_norm": 313.18817138671875,
1457
  "learning_rate": 1.9591836734693877e-05,
1458
- "loss": 1.9321,
1459
  "step": 193
1460
  },
1461
  {
1462
  "epoch": 1.9795918367346939,
1463
- "grad_norm": 1025.732177734375,
1464
  "learning_rate": 1.969387755102041e-05,
1465
- "loss": 5.5642,
1466
  "step": 194
1467
  },
1468
  {
1469
  "epoch": 1.989795918367347,
1470
- "grad_norm": 603.5271606445312,
1471
  "learning_rate": 1.979591836734694e-05,
1472
- "loss": 1.3926,
1473
  "step": 195
1474
  },
1475
  {
1476
  "epoch": 2.0,
1477
- "grad_norm": 6.151113033294678,
1478
  "learning_rate": 1.9897959183673473e-05,
1479
- "loss": 0.015,
1480
  "step": 196
1481
  },
1482
  {
1483
  "epoch": 2.0,
1484
- "eval_dim_1024_cosine_accuracy@1": 0.3591549295774648,
1485
- "eval_dim_1024_cosine_accuracy@10": 0.42445582586427655,
1486
- "eval_dim_1024_cosine_accuracy@3": 0.3649167733674776,
1487
- "eval_dim_1024_cosine_accuracy@5": 0.39308578745198464,
1488
- "eval_dim_1024_cosine_map@100": 0.4442656203645836,
1489
- "eval_dim_1024_cosine_mrr@10": 0.37061688311688284,
1490
- "eval_dim_1024_cosine_ndcg@10": 0.38277564232489586,
1491
- "eval_dim_1024_cosine_precision@1": 0.3591549295774648,
1492
- "eval_dim_1024_cosine_precision@10": 0.31914212548015364,
1493
- "eval_dim_1024_cosine_precision@3": 0.35830132309005547,
1494
- "eval_dim_1024_cosine_precision@5": 0.3495518565941101,
1495
- "eval_dim_1024_cosine_recall@1": 0.040925664980934,
1496
- "eval_dim_1024_cosine_recall@10": 0.2663878590041381,
1497
- "eval_dim_1024_cosine_recall@3": 0.11912044022990924,
1498
- "eval_dim_1024_cosine_recall@5": 0.17907504115268522,
1499
- "eval_dim_128_cosine_accuracy@1": 0.3060179257362356,
1500
- "eval_dim_128_cosine_accuracy@10": 0.3687580025608195,
1501
- "eval_dim_128_cosine_accuracy@3": 0.31049935979513443,
1502
- "eval_dim_128_cosine_accuracy@5": 0.3322663252240717,
1503
- "eval_dim_128_cosine_map@100": 0.38523839229741214,
1504
- "eval_dim_128_cosine_mrr@10": 0.3164926427250368,
1505
- "eval_dim_128_cosine_ndcg@10": 0.3273120368593549,
1506
- "eval_dim_128_cosine_precision@1": 0.3060179257362356,
1507
- "eval_dim_128_cosine_precision@10": 0.27099871959026883,
1508
- "eval_dim_128_cosine_precision@3": 0.3053777208706786,
1509
- "eval_dim_128_cosine_precision@5": 0.29679897567221514,
1510
- "eval_dim_128_cosine_recall@1": 0.035453426807775947,
1511
- "eval_dim_128_cosine_recall@10": 0.23154232181544257,
1512
- "eval_dim_128_cosine_recall@3": 0.10361683841932247,
1513
- "eval_dim_128_cosine_recall@5": 0.15549008066651587,
1514
- "eval_dim_256_cosine_accuracy@1": 0.3290653008962868,
1515
- "eval_dim_256_cosine_accuracy@10": 0.3969270166453265,
1516
- "eval_dim_256_cosine_accuracy@3": 0.33354673495518566,
1517
- "eval_dim_256_cosine_accuracy@5": 0.36235595390524966,
1518
- "eval_dim_256_cosine_map@100": 0.4130430492367209,
1519
- "eval_dim_256_cosine_mrr@10": 0.3407241428368185,
1520
- "eval_dim_256_cosine_ndcg@10": 0.3532462927680357,
1521
- "eval_dim_256_cosine_precision@1": 0.3290653008962868,
1522
- "eval_dim_256_cosine_precision@10": 0.29423815620998717,
1523
- "eval_dim_256_cosine_precision@3": 0.3282116944088775,
1524
- "eval_dim_256_cosine_precision@5": 0.3209987195902689,
1525
- "eval_dim_256_cosine_recall@1": 0.03735817465789262,
1526
- "eval_dim_256_cosine_recall@10": 0.24868649598286982,
1527
- "eval_dim_256_cosine_recall@3": 0.10921822534273136,
1528
- "eval_dim_256_cosine_recall@5": 0.165931214027115,
1529
- "eval_dim_512_cosine_accuracy@1": 0.3578745198463508,
1530
- "eval_dim_512_cosine_accuracy@10": 0.42765685019206146,
1531
- "eval_dim_512_cosine_accuracy@3": 0.36299615877080665,
1532
- "eval_dim_512_cosine_accuracy@5": 0.3892445582586428,
1533
- "eval_dim_512_cosine_map@100": 0.44173422420832625,
1534
- "eval_dim_512_cosine_mrr@10": 0.36958849053919446,
1535
- "eval_dim_512_cosine_ndcg@10": 0.3812404238586874,
1536
- "eval_dim_512_cosine_precision@1": 0.3578745198463508,
1537
- "eval_dim_512_cosine_precision@10": 0.3177336747759283,
1538
- "eval_dim_512_cosine_precision@3": 0.35680751173708913,
1539
- "eval_dim_512_cosine_precision@5": 0.34724711907810496,
1540
- "eval_dim_512_cosine_recall@1": 0.04087028201741599,
1541
- "eval_dim_512_cosine_recall@10": 0.2641908975177659,
1542
- "eval_dim_512_cosine_recall@3": 0.11887426573116062,
1543
- "eval_dim_512_cosine_recall@5": 0.17823551058609727,
1544
- "eval_dim_64_cosine_accuracy@1": 0.26504481434058896,
1545
- "eval_dim_64_cosine_accuracy@10": 0.3322663252240717,
1546
  "eval_dim_64_cosine_accuracy@3": 0.26952624839948786,
1547
- "eval_dim_64_cosine_accuracy@5": 0.29257362355953903,
1548
- "eval_dim_64_cosine_map@100": 0.3471195034779171,
1549
- "eval_dim_64_cosine_mrr@10": 0.27606167306871515,
1550
- "eval_dim_64_cosine_ndcg@10": 0.28738876488409937,
1551
- "eval_dim_64_cosine_precision@1": 0.26504481434058896,
1552
- "eval_dim_64_cosine_precision@10": 0.2385403329065301,
1553
- "eval_dim_64_cosine_precision@3": 0.2646180110968843,
1554
- "eval_dim_64_cosine_precision@5": 0.25813060179257363,
1555
- "eval_dim_64_cosine_recall@1": 0.030859353047043094,
1556
- "eval_dim_64_cosine_recall@10": 0.20564514911963047,
1557
- "eval_dim_64_cosine_recall@3": 0.09025134722734952,
1558
- "eval_dim_64_cosine_recall@5": 0.13616521781869687,
1559
- "eval_dim_768_cosine_accuracy@1": 0.3546734955185659,
1560
- "eval_dim_768_cosine_accuracy@10": 0.4186939820742638,
1561
- "eval_dim_768_cosine_accuracy@3": 0.36107554417413573,
1562
- "eval_dim_768_cosine_accuracy@5": 0.3860435339308579,
1563
- "eval_dim_768_cosine_map@100": 0.4380682423691407,
1564
- "eval_dim_768_cosine_mrr@10": 0.36586844298111887,
1565
- "eval_dim_768_cosine_ndcg@10": 0.3776871349900537,
1566
- "eval_dim_768_cosine_precision@1": 0.3546734955185659,
1567
- "eval_dim_768_cosine_precision@10": 0.3146606914212548,
1568
- "eval_dim_768_cosine_precision@3": 0.354033290653009,
1569
- "eval_dim_768_cosine_precision@5": 0.34494238156209983,
1570
- "eval_dim_768_cosine_recall@1": 0.04034211902742892,
1571
- "eval_dim_768_cosine_recall@10": 0.2626701106236,
1572
- "eval_dim_768_cosine_recall@3": 0.11739647757212558,
1573
- "eval_dim_768_cosine_recall@5": 0.17629643014791566,
1574
- "eval_runtime": 98.6366,
1575
  "eval_samples_per_second": 0.0,
1576
- "eval_sequential_score": 0.28738876488409937,
1577
  "eval_steps_per_second": 0.0,
1578
  "step": 196
1579
  }
 
1
  {
2
  "best_global_step": 98,
3
+ "best_metric": 0.323940756796795,
4
  "best_model_checkpoint": "intfloat/multilingual-e5-large/checkpoint-98",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.01020408163265306,
14
+ "grad_norm": 1059.8211669921875,
15
  "learning_rate": 0.0,
16
+ "loss": 9.6954,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.02040816326530612,
21
+ "grad_norm": 890.8715209960938,
22
  "learning_rate": 1.0204081632653061e-07,
23
+ "loss": 11.5048,
24
  "step": 2
25
  },
26
  {
27
  "epoch": 0.030612244897959183,
28
+ "grad_norm": 264.5753173828125,
29
  "learning_rate": 2.0408163265306121e-07,
30
+ "loss": 2.1575,
31
  "step": 3
32
  },
33
  {
34
  "epoch": 0.04081632653061224,
35
+ "grad_norm": 501.6875,
36
  "learning_rate": 3.0612244897959183e-07,
37
+ "loss": 2.6843,
38
  "step": 4
39
  },
40
  {
41
  "epoch": 0.05102040816326531,
42
+ "grad_norm": 6.634378910064697,
43
  "learning_rate": 4.0816326530612243e-07,
44
+ "loss": 0.0364,
45
  "step": 5
46
  },
47
  {
48
  "epoch": 0.061224489795918366,
49
+ "grad_norm": 206.64352416992188,
50
  "learning_rate": 5.102040816326531e-07,
51
+ "loss": 0.705,
52
  "step": 6
53
  },
54
  {
55
  "epoch": 0.07142857142857142,
56
+ "grad_norm": 383.0555114746094,
57
  "learning_rate": 6.122448979591837e-07,
58
+ "loss": 1.9957,
59
  "step": 7
60
  },
61
  {
62
  "epoch": 0.08163265306122448,
63
+ "grad_norm": 212.91329956054688,
64
  "learning_rate": 7.142857142857143e-07,
65
+ "loss": 0.9938,
66
  "step": 8
67
  },
68
  {
69
  "epoch": 0.09183673469387756,
70
+ "grad_norm": 49.36940383911133,
71
  "learning_rate": 8.163265306122449e-07,
72
+ "loss": 0.3187,
73
  "step": 9
74
  },
75
  {
76
  "epoch": 0.10204081632653061,
77
+ "grad_norm": 51.765472412109375,
78
  "learning_rate": 9.183673469387756e-07,
79
+ "loss": 0.1435,
80
  "step": 10
81
  },
82
  {
83
  "epoch": 0.11224489795918367,
84
+ "grad_norm": 13.23577880859375,
85
  "learning_rate": 1.0204081632653063e-06,
86
+ "loss": 0.0818,
87
  "step": 11
88
  },
89
  {
90
  "epoch": 0.12244897959183673,
91
+ "grad_norm": 240.9364776611328,
92
  "learning_rate": 1.122448979591837e-06,
93
+ "loss": 0.6535,
94
  "step": 12
95
  },
96
  {
97
  "epoch": 0.1326530612244898,
98
+ "grad_norm": 117.47791290283203,
99
  "learning_rate": 1.2244897959183673e-06,
100
+ "loss": 0.3915,
101
  "step": 13
102
  },
103
  {
104
  "epoch": 0.14285714285714285,
105
+ "grad_norm": 202.8033447265625,
106
  "learning_rate": 1.3265306122448982e-06,
107
+ "loss": 0.5493,
108
  "step": 14
109
  },
110
  {
111
  "epoch": 0.15306122448979592,
112
+ "grad_norm": 111.81350708007812,
113
  "learning_rate": 1.4285714285714286e-06,
114
+ "loss": 0.7231,
115
  "step": 15
116
  },
117
  {
118
  "epoch": 0.16326530612244897,
119
+ "grad_norm": 10.884031295776367,
120
  "learning_rate": 1.5306122448979593e-06,
121
+ "loss": 0.0715,
122
  "step": 16
123
  },
124
  {
125
  "epoch": 0.17346938775510204,
126
+ "grad_norm": 862.8134155273438,
127
  "learning_rate": 1.6326530612244897e-06,
128
+ "loss": 5.8663,
129
  "step": 17
130
  },
131
  {
132
  "epoch": 0.1836734693877551,
133
+ "grad_norm": 54.59718322753906,
134
  "learning_rate": 1.7346938775510206e-06,
135
+ "loss": 0.2586,
136
  "step": 18
137
  },
138
  {
139
  "epoch": 0.19387755102040816,
140
+ "grad_norm": 140.5866241455078,
141
  "learning_rate": 1.8367346938775512e-06,
142
+ "loss": 0.9353,
143
  "step": 19
144
  },
145
  {
146
  "epoch": 0.20408163265306123,
147
+ "grad_norm": 479.35052490234375,
148
  "learning_rate": 1.938775510204082e-06,
149
+ "loss": 2.5843,
150
  "step": 20
151
  },
152
  {
153
  "epoch": 0.21428571428571427,
154
+ "grad_norm": 388.5758361816406,
155
  "learning_rate": 2.0408163265306125e-06,
156
+ "loss": 2.0583,
157
  "step": 21
158
  },
159
  {
160
  "epoch": 0.22448979591836735,
161
+ "grad_norm": 619.6573486328125,
162
  "learning_rate": 2.1428571428571427e-06,
163
+ "loss": 6.9121,
164
  "step": 22
165
  },
166
  {
167
  "epoch": 0.23469387755102042,
168
+ "grad_norm": 120.95680236816406,
169
  "learning_rate": 2.244897959183674e-06,
170
+ "loss": 1.0921,
171
  "step": 23
172
  },
173
  {
174
  "epoch": 0.24489795918367346,
175
+ "grad_norm": 861.7152709960938,
176
  "learning_rate": 2.3469387755102044e-06,
177
+ "loss": 5.4863,
178
  "step": 24
179
  },
180
  {
181
  "epoch": 0.25510204081632654,
182
+ "grad_norm": 7.463388919830322,
183
  "learning_rate": 2.4489795918367347e-06,
184
+ "loss": 0.0549,
185
  "step": 25
186
  },
187
  {
188
  "epoch": 0.2653061224489796,
189
+ "grad_norm": 357.8828125,
190
  "learning_rate": 2.5510204081632657e-06,
191
+ "loss": 2.345,
192
  "step": 26
193
  },
194
  {
195
  "epoch": 0.2755102040816326,
196
+ "grad_norm": 390.43023681640625,
197
  "learning_rate": 2.6530612244897964e-06,
198
+ "loss": 4.264,
199
  "step": 27
200
  },
201
  {
202
  "epoch": 0.2857142857142857,
203
+ "grad_norm": 382.8008728027344,
204
  "learning_rate": 2.7551020408163266e-06,
205
+ "loss": 2.4847,
206
  "step": 28
207
  },
208
  {
209
  "epoch": 0.29591836734693877,
210
+ "grad_norm": 159.14295959472656,
211
  "learning_rate": 2.8571428571428573e-06,
212
+ "loss": 0.7634,
213
  "step": 29
214
  },
215
  {
216
  "epoch": 0.30612244897959184,
217
+ "grad_norm": 361.00201416015625,
218
  "learning_rate": 2.959183673469388e-06,
219
+ "loss": 2.047,
220
  "step": 30
221
  },
222
  {
223
  "epoch": 0.3163265306122449,
224
+ "grad_norm": 81.90242004394531,
225
  "learning_rate": 3.0612244897959185e-06,
226
+ "loss": 0.694,
227
  "step": 31
228
  },
229
  {
230
  "epoch": 0.32653061224489793,
231
+ "grad_norm": 294.4106750488281,
232
  "learning_rate": 3.1632653061224496e-06,
233
+ "loss": 0.7417,
234
  "step": 32
235
  },
236
  {
237
  "epoch": 0.336734693877551,
238
+ "grad_norm": 322.22308349609375,
239
  "learning_rate": 3.2653061224489794e-06,
240
+ "loss": 1.9942,
241
  "step": 33
242
  },
243
  {
244
  "epoch": 0.3469387755102041,
245
+ "grad_norm": 449.238525390625,
246
  "learning_rate": 3.3673469387755105e-06,
247
+ "loss": 2.8978,
248
  "step": 34
249
  },
250
  {
251
  "epoch": 0.35714285714285715,
252
+ "grad_norm": 1.8455325365066528,
253
  "learning_rate": 3.469387755102041e-06,
254
+ "loss": 0.0126,
255
  "step": 35
256
  },
257
  {
258
  "epoch": 0.3673469387755102,
259
+ "grad_norm": 310.52740478515625,
260
  "learning_rate": 3.5714285714285718e-06,
261
+ "loss": 1.9776,
262
  "step": 36
263
  },
264
  {
265
  "epoch": 0.37755102040816324,
266
+ "grad_norm": 237.73545837402344,
267
  "learning_rate": 3.6734693877551024e-06,
268
+ "loss": 1.5667,
269
  "step": 37
270
  },
271
  {
272
  "epoch": 0.3877551020408163,
273
+ "grad_norm": 817.3215942382812,
274
  "learning_rate": 3.7755102040816327e-06,
275
+ "loss": 5.5693,
276
  "step": 38
277
  },
278
  {
279
  "epoch": 0.3979591836734694,
280
+ "grad_norm": 364.91326904296875,
281
  "learning_rate": 3.877551020408164e-06,
282
+ "loss": 1.6802,
283
  "step": 39
284
  },
285
  {
286
  "epoch": 0.40816326530612246,
287
+ "grad_norm": 34.629112243652344,
288
  "learning_rate": 3.979591836734694e-06,
289
+ "loss": 0.2144,
290
  "step": 40
291
  },
292
  {
293
  "epoch": 0.41836734693877553,
294
+ "grad_norm": 43.43345260620117,
295
  "learning_rate": 4.081632653061225e-06,
296
+ "loss": 0.1797,
297
  "step": 41
298
  },
299
  {
300
  "epoch": 0.42857142857142855,
301
+ "grad_norm": 544.13134765625,
302
  "learning_rate": 4.183673469387755e-06,
303
+ "loss": 5.7559,
304
  "step": 42
305
  },
306
  {
307
  "epoch": 0.4387755102040816,
308
+ "grad_norm": 357.5466003417969,
309
  "learning_rate": 4.2857142857142855e-06,
310
+ "loss": 2.6372,
311
  "step": 43
312
  },
313
  {
314
  "epoch": 0.4489795918367347,
315
+ "grad_norm": 458.7740783691406,
316
  "learning_rate": 4.3877551020408165e-06,
317
+ "loss": 1.8447,
318
  "step": 44
319
  },
320
  {
321
  "epoch": 0.45918367346938777,
322
+ "grad_norm": 668.6949462890625,
323
  "learning_rate": 4.489795918367348e-06,
324
+ "loss": 2.8156,
325
  "step": 45
326
  },
327
  {
328
  "epoch": 0.46938775510204084,
329
+ "grad_norm": 382.7512512207031,
330
  "learning_rate": 4.591836734693878e-06,
331
+ "loss": 3.1588,
332
  "step": 46
333
  },
334
  {
335
  "epoch": 0.47959183673469385,
336
+ "grad_norm": 9.869824409484863,
337
  "learning_rate": 4.693877551020409e-06,
338
+ "loss": 0.0552,
339
  "step": 47
340
  },
341
  {
342
  "epoch": 0.4897959183673469,
343
+ "grad_norm": 448.4170837402344,
344
  "learning_rate": 4.795918367346939e-06,
345
+ "loss": 3.3053,
346
  "step": 48
347
  },
348
  {
349
  "epoch": 0.5,
350
+ "grad_norm": 471.82855224609375,
351
  "learning_rate": 4.897959183673469e-06,
352
+ "loss": 2.8332,
353
  "step": 49
354
  },
355
  {
356
  "epoch": 0.5102040816326531,
357
+ "grad_norm": 210.9025115966797,
358
  "learning_rate": 5e-06,
359
+ "loss": 1.1961,
360
  "step": 50
361
  },
362
  {
363
  "epoch": 0.5204081632653061,
364
+ "grad_norm": 164.64920043945312,
365
  "learning_rate": 5.1020408163265315e-06,
366
+ "loss": 1.0106,
367
  "step": 51
368
  },
369
  {
370
  "epoch": 0.5306122448979592,
371
+ "grad_norm": 386.0244140625,
372
  "learning_rate": 5.204081632653062e-06,
373
+ "loss": 2.4593,
374
  "step": 52
375
  },
376
  {
377
  "epoch": 0.5408163265306123,
378
+ "grad_norm": 419.1893310546875,
379
  "learning_rate": 5.306122448979593e-06,
380
+ "loss": 3.4849,
381
  "step": 53
382
  },
383
  {
384
  "epoch": 0.5510204081632653,
385
+ "grad_norm": 10.212640762329102,
386
  "learning_rate": 5.408163265306123e-06,
387
+ "loss": 0.0338,
388
  "step": 54
389
  },
390
  {
391
  "epoch": 0.5612244897959183,
392
+ "grad_norm": 419.79815673828125,
393
  "learning_rate": 5.510204081632653e-06,
394
+ "loss": 1.5319,
395
  "step": 55
396
  },
397
  {
398
  "epoch": 0.5714285714285714,
399
+ "grad_norm": 6.56746768951416,
400
  "learning_rate": 5.6122448979591834e-06,
401
+ "loss": 0.0419,
402
  "step": 56
403
  },
404
  {
405
  "epoch": 0.5816326530612245,
406
+ "grad_norm": 15.671188354492188,
407
  "learning_rate": 5.7142857142857145e-06,
408
+ "loss": 0.1098,
409
  "step": 57
410
  },
411
  {
412
  "epoch": 0.5918367346938775,
413
+ "grad_norm": 8.410639762878418,
414
  "learning_rate": 5.816326530612246e-06,
415
+ "loss": 0.0457,
416
  "step": 58
417
  },
418
  {
419
  "epoch": 0.6020408163265306,
420
+ "grad_norm": 6.06464147567749,
421
  "learning_rate": 5.918367346938776e-06,
422
+ "loss": 0.0273,
423
  "step": 59
424
  },
425
  {
426
  "epoch": 0.6122448979591837,
427
+ "grad_norm": 293.1927185058594,
428
  "learning_rate": 6.020408163265307e-06,
429
+ "loss": 1.2946,
430
  "step": 60
431
  },
432
  {
433
  "epoch": 0.6224489795918368,
434
+ "grad_norm": 189.53306579589844,
435
  "learning_rate": 6.122448979591837e-06,
436
+ "loss": 3.4121,
437
  "step": 61
438
  },
439
  {
440
  "epoch": 0.6326530612244898,
441
+ "grad_norm": 455.4539489746094,
442
  "learning_rate": 6.224489795918368e-06,
443
+ "loss": 2.6015,
444
  "step": 62
445
  },
446
  {
447
  "epoch": 0.6428571428571429,
448
+ "grad_norm": 351.75830078125,
449
  "learning_rate": 6.326530612244899e-06,
450
+ "loss": 2.0358,
451
  "step": 63
452
  },
453
  {
454
  "epoch": 0.6530612244897959,
455
+ "grad_norm": 1400.6083984375,
456
  "learning_rate": 6.4285714285714295e-06,
457
+ "loss": 7.3114,
458
  "step": 64
459
  },
460
  {
461
  "epoch": 0.6632653061224489,
462
+ "grad_norm": 1247.4736328125,
463
  "learning_rate": 6.530612244897959e-06,
464
+ "loss": 6.8888,
465
  "step": 65
466
  },
467
  {
468
  "epoch": 0.673469387755102,
469
+ "grad_norm": 296.157470703125,
470
  "learning_rate": 6.63265306122449e-06,
471
+ "loss": 1.6606,
472
  "step": 66
473
  },
474
  {
475
  "epoch": 0.6836734693877551,
476
+ "grad_norm": 461.7646484375,
477
  "learning_rate": 6.734693877551021e-06,
478
+ "loss": 5.2343,
479
  "step": 67
480
  },
481
  {
482
  "epoch": 0.6938775510204082,
483
+ "grad_norm": 443.15264892578125,
484
  "learning_rate": 6.836734693877551e-06,
485
+ "loss": 2.1977,
486
  "step": 68
487
  },
488
  {
489
  "epoch": 0.7040816326530612,
490
+ "grad_norm": 31.333446502685547,
491
  "learning_rate": 6.938775510204082e-06,
492
+ "loss": 0.1702,
493
  "step": 69
494
  },
495
  {
496
  "epoch": 0.7142857142857143,
497
+ "grad_norm": 603.3770751953125,
498
  "learning_rate": 7.0408163265306125e-06,
499
+ "loss": 3.5715,
500
  "step": 70
501
  },
502
  {
503
  "epoch": 0.7244897959183674,
504
+ "grad_norm": 190.58395385742188,
505
  "learning_rate": 7.1428571428571436e-06,
506
+ "loss": 1.4736,
507
  "step": 71
508
  },
509
  {
510
  "epoch": 0.7346938775510204,
511
+ "grad_norm": 211.7954559326172,
512
  "learning_rate": 7.244897959183675e-06,
513
+ "loss": 1.0967,
514
  "step": 72
515
  },
516
  {
517
  "epoch": 0.7448979591836735,
518
+ "grad_norm": 288.28448486328125,
519
  "learning_rate": 7.346938775510205e-06,
520
+ "loss": 1.2098,
521
  "step": 73
522
  },
523
  {
524
  "epoch": 0.7551020408163265,
525
+ "grad_norm": 535.2803344726562,
526
  "learning_rate": 7.448979591836736e-06,
527
+ "loss": 1.9541,
528
  "step": 74
529
  },
530
  {
531
  "epoch": 0.7653061224489796,
532
+ "grad_norm": 1270.0836181640625,
533
  "learning_rate": 7.551020408163265e-06,
534
+ "loss": 4.0992,
535
  "step": 75
536
  },
537
  {
538
  "epoch": 0.7755102040816326,
539
+ "grad_norm": 2.131913661956787,
540
  "learning_rate": 7.653061224489796e-06,
541
+ "loss": 0.0145,
542
  "step": 76
543
  },
544
  {
545
  "epoch": 0.7857142857142857,
546
+ "grad_norm": 2.6484782695770264,
547
  "learning_rate": 7.755102040816327e-06,
548
+ "loss": 0.0079,
549
  "step": 77
550
  },
551
  {
552
  "epoch": 0.7959183673469388,
553
+ "grad_norm": 18.671253204345703,
554
  "learning_rate": 7.857142857142858e-06,
555
+ "loss": 0.1081,
556
  "step": 78
557
  },
558
  {
559
  "epoch": 0.8061224489795918,
560
+ "grad_norm": 282.3451843261719,
561
  "learning_rate": 7.959183673469388e-06,
562
+ "loss": 1.7446,
563
  "step": 79
564
  },
565
  {
566
  "epoch": 0.8163265306122449,
567
+ "grad_norm": 303.16900634765625,
568
  "learning_rate": 8.06122448979592e-06,
569
+ "loss": 0.6343,
570
  "step": 80
571
  },
572
  {
573
  "epoch": 0.826530612244898,
574
+ "grad_norm": 899.4592895507812,
575
  "learning_rate": 8.16326530612245e-06,
576
+ "loss": 4.7374,
577
  "step": 81
578
  },
579
  {
580
  "epoch": 0.8367346938775511,
581
+ "grad_norm": 600.3280639648438,
582
  "learning_rate": 8.26530612244898e-06,
583
+ "loss": 3.1082,
584
  "step": 82
585
  },
586
  {
587
  "epoch": 0.8469387755102041,
588
+ "grad_norm": 3.1936967372894287,
589
  "learning_rate": 8.36734693877551e-06,
590
+ "loss": 0.0144,
591
  "step": 83
592
  },
593
  {
594
  "epoch": 0.8571428571428571,
595
+ "grad_norm": 1.3846139907836914,
596
  "learning_rate": 8.469387755102042e-06,
597
+ "loss": 0.0057,
598
  "step": 84
599
  },
600
  {
601
  "epoch": 0.8673469387755102,
602
+ "grad_norm": 197.3724822998047,
603
  "learning_rate": 8.571428571428571e-06,
604
+ "loss": 0.7656,
605
  "step": 85
606
  },
607
  {
608
  "epoch": 0.8775510204081632,
609
+ "grad_norm": 545.4349365234375,
610
  "learning_rate": 8.673469387755103e-06,
611
+ "loss": 1.5191,
612
  "step": 86
613
  },
614
  {
615
  "epoch": 0.8877551020408163,
616
+ "grad_norm": 121.33210754394531,
617
  "learning_rate": 8.775510204081633e-06,
618
+ "loss": 0.1942,
619
  "step": 87
620
  },
621
  {
622
  "epoch": 0.8979591836734694,
623
+ "grad_norm": 48.77962112426758,
624
  "learning_rate": 8.877551020408163e-06,
625
+ "loss": 0.2429,
626
  "step": 88
627
  },
628
  {
629
  "epoch": 0.9081632653061225,
630
+ "grad_norm": 664.6809692382812,
631
  "learning_rate": 8.979591836734695e-06,
632
+ "loss": 7.0608,
633
  "step": 89
634
  },
635
  {
636
  "epoch": 0.9183673469387755,
637
+ "grad_norm": 67.93673706054688,
638
  "learning_rate": 9.081632653061225e-06,
639
+ "loss": 0.1635,
640
  "step": 90
641
  },
642
  {
643
  "epoch": 0.9285714285714286,
644
+ "grad_norm": 14.213589668273926,
645
  "learning_rate": 9.183673469387756e-06,
646
+ "loss": 0.057,
647
  "step": 91
648
  },
649
  {
650
  "epoch": 0.9387755102040817,
651
+ "grad_norm": 550.2100219726562,
652
  "learning_rate": 9.285714285714288e-06,
653
+ "loss": 3.1796,
654
  "step": 92
655
  },
656
  {
657
  "epoch": 0.9489795918367347,
658
+ "grad_norm": 516.64794921875,
659
  "learning_rate": 9.387755102040818e-06,
660
+ "loss": 2.4068,
661
  "step": 93
662
  },
663
  {
664
  "epoch": 0.9591836734693877,
665
+ "grad_norm": 227.85704040527344,
666
  "learning_rate": 9.489795918367348e-06,
667
+ "loss": 0.9694,
668
  "step": 94
669
  },
670
  {
671
  "epoch": 0.9693877551020408,
672
+ "grad_norm": 103.72978973388672,
673
  "learning_rate": 9.591836734693878e-06,
674
+ "loss": 0.4878,
675
  "step": 95
676
  },
677
  {
678
  "epoch": 0.9795918367346939,
679
+ "grad_norm": 113.07623291015625,
680
  "learning_rate": 9.693877551020408e-06,
681
+ "loss": 0.4105,
682
  "step": 96
683
  },
684
  {
685
  "epoch": 0.9897959183673469,
686
+ "grad_norm": 1049.9190673828125,
687
  "learning_rate": 9.795918367346939e-06,
688
+ "loss": 4.5006,
689
  "step": 97
690
  },
691
  {
692
  "epoch": 1.0,
693
+ "grad_norm": 419.75555419921875,
694
  "learning_rate": 9.89795918367347e-06,
695
+ "loss": 2.2675,
696
  "step": 98
697
  },
698
  {
699
  "epoch": 1.0,
700
+ "eval_dim_1024_cosine_accuracy@1": 0.34571062740076824,
701
+ "eval_dim_1024_cosine_accuracy@10": 0.4231754161331626,
702
+ "eval_dim_1024_cosine_accuracy@3": 0.352112676056338,
703
+ "eval_dim_1024_cosine_accuracy@5": 0.3854033290653009,
704
+ "eval_dim_1024_cosine_map@100": 0.4413040417287197,
705
+ "eval_dim_1024_cosine_mrr@10": 0.3591188545413894,
706
+ "eval_dim_1024_cosine_ndcg@10": 0.37218117046458954,
707
+ "eval_dim_1024_cosine_precision@1": 0.34571062740076824,
708
+ "eval_dim_1024_cosine_precision@10": 0.3060179257362356,
709
+ "eval_dim_1024_cosine_precision@3": 0.34571062740076824,
710
+ "eval_dim_1024_cosine_precision@5": 0.33751600512163893,
711
+ "eval_dim_1024_cosine_recall@1": 0.04202665510348477,
712
+ "eval_dim_1024_cosine_recall@10": 0.2720738817855689,
713
+ "eval_dim_1024_cosine_recall@3": 0.12344281372964075,
714
+ "eval_dim_1024_cosine_recall@5": 0.1862277356935127,
715
+ "eval_dim_128_cosine_accuracy@1": 0.30089628681177977,
716
+ "eval_dim_128_cosine_accuracy@10": 0.3687580025608195,
717
+ "eval_dim_128_cosine_accuracy@3": 0.3060179257362356,
718
+ "eval_dim_128_cosine_accuracy@5": 0.33354673495518566,
719
+ "eval_dim_128_cosine_map@100": 0.383805456031232,
720
+ "eval_dim_128_cosine_mrr@10": 0.31238618580167843,
721
+ "eval_dim_128_cosine_ndcg@10": 0.323940756796795,
722
+ "eval_dim_128_cosine_precision@1": 0.30089628681177977,
723
+ "eval_dim_128_cosine_precision@10": 0.2669014084507042,
724
+ "eval_dim_128_cosine_precision@3": 0.3006828851899274,
725
+ "eval_dim_128_cosine_precision@5": 0.29334186939820744,
726
+ "eval_dim_128_cosine_recall@1": 0.03621488699964182,
727
+ "eval_dim_128_cosine_recall@10": 0.23934767939840923,
728
+ "eval_dim_128_cosine_recall@3": 0.10619628777590438,
729
+ "eval_dim_128_cosine_recall@5": 0.16065683687574547,
730
+ "eval_dim_256_cosine_accuracy@1": 0.3181818181818182,
731
+ "eval_dim_256_cosine_accuracy@10": 0.39244558258642764,
732
+ "eval_dim_256_cosine_accuracy@3": 0.323303457106274,
733
+ "eval_dim_256_cosine_accuracy@5": 0.35723431498079383,
734
+ "eval_dim_256_cosine_map@100": 0.4066610643364293,
735
+ "eval_dim_256_cosine_mrr@10": 0.3309775318578131,
736
+ "eval_dim_256_cosine_ndcg@10": 0.34364332074782783,
737
+ "eval_dim_256_cosine_precision@1": 0.3181818181818182,
738
+ "eval_dim_256_cosine_precision@10": 0.28348271446862994,
739
+ "eval_dim_256_cosine_precision@3": 0.3175416133162612,
740
+ "eval_dim_256_cosine_precision@5": 0.31024327784891165,
741
+ "eval_dim_256_cosine_recall@1": 0.03856083314138909,
742
+ "eval_dim_256_cosine_recall@10": 0.2532060107296034,
743
+ "eval_dim_256_cosine_recall@3": 0.11327276127499253,
744
+ "eval_dim_256_cosine_recall@5": 0.17121955970972744,
745
+ "eval_dim_512_cosine_accuracy@1": 0.33674775928297057,
746
+ "eval_dim_512_cosine_accuracy@10": 0.41613316261203587,
747
+ "eval_dim_512_cosine_accuracy@3": 0.34314980793854033,
748
+ "eval_dim_512_cosine_accuracy@5": 0.37708066581306016,
749
+ "eval_dim_512_cosine_map@100": 0.4299582620106213,
750
+ "eval_dim_512_cosine_mrr@10": 0.3503564315184028,
751
+ "eval_dim_512_cosine_ndcg@10": 0.362727691265461,
752
+ "eval_dim_512_cosine_precision@1": 0.33674775928297057,
753
+ "eval_dim_512_cosine_precision@10": 0.2976312419974392,
754
+ "eval_dim_512_cosine_precision@3": 0.3363209560392659,
755
+ "eval_dim_512_cosine_precision@5": 0.32816901408450705,
756
+ "eval_dim_512_cosine_recall@1": 0.04109877030791802,
757
+ "eval_dim_512_cosine_recall@10": 0.26524116778193035,
758
+ "eval_dim_512_cosine_recall@3": 0.12056066628670095,
759
+ "eval_dim_512_cosine_recall@5": 0.18158399214837667,
760
+ "eval_dim_64_cosine_accuracy@1": 0.24647887323943662,
761
+ "eval_dim_64_cosine_accuracy@10": 0.31562099871959026,
762
+ "eval_dim_64_cosine_accuracy@3": 0.25096030729833546,
763
+ "eval_dim_64_cosine_accuracy@5": 0.27784891165172854,
764
+ "eval_dim_64_cosine_map@100": 0.32717183255723853,
765
+ "eval_dim_64_cosine_mrr@10": 0.25797791801312897,
766
+ "eval_dim_64_cosine_ndcg@10": 0.2694272980700995,
767
+ "eval_dim_64_cosine_precision@1": 0.24647887323943662,
768
+ "eval_dim_64_cosine_precision@10": 0.22029449423815622,
769
+ "eval_dim_64_cosine_precision@3": 0.24647887323943662,
770
+ "eval_dim_64_cosine_precision@5": 0.24033290653008965,
771
+ "eval_dim_64_cosine_recall@1": 0.030698008105366027,
772
+ "eval_dim_64_cosine_recall@10": 0.20675794932386124,
773
+ "eval_dim_64_cosine_recall@3": 0.0905145081182266,
774
+ "eval_dim_64_cosine_recall@5": 0.13744818119581018,
775
+ "eval_dim_768_cosine_accuracy@1": 0.3412291933418694,
776
+ "eval_dim_768_cosine_accuracy@10": 0.4174135723431498,
777
+ "eval_dim_768_cosine_accuracy@3": 0.34763124199743917,
778
+ "eval_dim_768_cosine_accuracy@5": 0.37964148527528807,
779
+ "eval_dim_768_cosine_map@100": 0.4350972821264766,
780
+ "eval_dim_768_cosine_mrr@10": 0.35454393024815517,
781
+ "eval_dim_768_cosine_ndcg@10": 0.36777711697459586,
782
+ "eval_dim_768_cosine_precision@1": 0.3412291933418694,
783
+ "eval_dim_768_cosine_precision@10": 0.3030089628681178,
784
+ "eval_dim_768_cosine_precision@3": 0.3414425949637217,
785
+ "eval_dim_768_cosine_precision@5": 0.33341869398207424,
786
+ "eval_dim_768_cosine_recall@1": 0.041218661006119914,
787
+ "eval_dim_768_cosine_recall@10": 0.2695735755366756,
788
+ "eval_dim_768_cosine_recall@3": 0.12127491338376899,
789
+ "eval_dim_768_cosine_recall@5": 0.18314999106768198,
790
+ "eval_runtime": 98.9256,
791
  "eval_samples_per_second": 0.0,
792
+ "eval_sequential_score": 0.2694272980700995,
793
  "eval_steps_per_second": 0.0,
794
  "step": 98
795
  },
796
  {
797
  "epoch": 1.010204081632653,
798
+ "grad_norm": 164.98011779785156,
799
  "learning_rate": 1e-05,
800
+ "loss": 0.9602,
801
  "step": 99
802
  },
803
  {
804
  "epoch": 1.0204081632653061,
805
+ "grad_norm": 928.5584106445312,
806
  "learning_rate": 1.0102040816326531e-05,
807
+ "loss": 5.0193,
808
  "step": 100
809
  },
810
  {
811
  "epoch": 1.030612244897959,
812
+ "grad_norm": 144.56446838378906,
813
  "learning_rate": 1.0204081632653063e-05,
814
+ "loss": 1.1252,
815
  "step": 101
816
  },
817
  {
818
  "epoch": 1.0408163265306123,
819
+ "grad_norm": 185.31761169433594,
820
  "learning_rate": 1.0306122448979591e-05,
821
+ "loss": 0.7896,
822
  "step": 102
823
  },
824
  {
825
  "epoch": 1.0510204081632653,
826
+ "grad_norm": 331.85076904296875,
827
  "learning_rate": 1.0408163265306123e-05,
828
+ "loss": 1.2793,
829
  "step": 103
830
  },
831
  {
832
  "epoch": 1.0612244897959184,
833
+ "grad_norm": 70.96832275390625,
834
  "learning_rate": 1.0510204081632654e-05,
835
+ "loss": 0.3422,
836
  "step": 104
837
  },
838
  {
839
  "epoch": 1.0714285714285714,
840
+ "grad_norm": 5.096808910369873,
841
  "learning_rate": 1.0612244897959186e-05,
842
+ "loss": 0.0204,
843
  "step": 105
844
  },
845
  {
846
  "epoch": 1.0816326530612246,
847
+ "grad_norm": 5.112201690673828,
848
  "learning_rate": 1.0714285714285714e-05,
849
+ "loss": 0.018,
850
  "step": 106
851
  },
852
  {
853
  "epoch": 1.0918367346938775,
854
+ "grad_norm": 1.532448410987854,
855
  "learning_rate": 1.0816326530612246e-05,
856
+ "loss": 0.0082,
857
  "step": 107
858
  },
859
  {
860
  "epoch": 1.1020408163265305,
861
+ "grad_norm": 784.9140625,
862
  "learning_rate": 1.0918367346938776e-05,
863
+ "loss": 6.0895,
864
  "step": 108
865
  },
866
  {
867
  "epoch": 1.1122448979591837,
868
+ "grad_norm": 3.4372496604919434,
869
  "learning_rate": 1.1020408163265306e-05,
870
+ "loss": 0.0115,
871
  "step": 109
872
  },
873
  {
874
  "epoch": 1.1224489795918366,
875
+ "grad_norm": 45.12334442138672,
876
  "learning_rate": 1.1122448979591838e-05,
877
+ "loss": 0.2657,
878
  "step": 110
879
  },
880
  {
881
  "epoch": 1.1326530612244898,
882
+ "grad_norm": 3.1384634971618652,
883
  "learning_rate": 1.1224489795918367e-05,
884
+ "loss": 0.0232,
885
  "step": 111
886
  },
887
  {
888
  "epoch": 1.1428571428571428,
889
+ "grad_norm": 225.58758544921875,
890
  "learning_rate": 1.1326530612244899e-05,
891
+ "loss": 1.4261,
892
  "step": 112
893
  },
894
  {
895
  "epoch": 1.153061224489796,
896
+ "grad_norm": 858.461181640625,
897
  "learning_rate": 1.1428571428571429e-05,
898
+ "loss": 5.6396,
899
  "step": 113
900
  },
901
  {
902
  "epoch": 1.163265306122449,
903
+ "grad_norm": 57.779808044433594,
904
  "learning_rate": 1.1530612244897961e-05,
905
+ "loss": 0.2395,
906
  "step": 114
907
  },
908
  {
909
  "epoch": 1.1734693877551021,
910
+ "grad_norm": 0.24931341409683228,
911
  "learning_rate": 1.1632653061224491e-05,
912
+ "loss": 0.001,
913
  "step": 115
914
  },
915
  {
916
  "epoch": 1.183673469387755,
917
+ "grad_norm": 307.6712646484375,
918
  "learning_rate": 1.1734693877551021e-05,
919
+ "loss": 1.053,
920
  "step": 116
921
  },
922
  {
923
  "epoch": 1.193877551020408,
924
+ "grad_norm": 7.2576212882995605,
925
  "learning_rate": 1.1836734693877552e-05,
926
+ "loss": 0.0335,
927
  "step": 117
928
  },
929
  {
930
  "epoch": 1.2040816326530612,
931
+ "grad_norm": 367.11077880859375,
932
  "learning_rate": 1.1938775510204084e-05,
933
+ "loss": 1.9711,
934
  "step": 118
935
  },
936
  {
937
  "epoch": 1.2142857142857142,
938
+ "grad_norm": 433.3495178222656,
939
  "learning_rate": 1.2040816326530614e-05,
940
+ "loss": 1.7967,
941
  "step": 119
942
  },
943
  {
944
  "epoch": 1.2244897959183674,
945
+ "grad_norm": 1.5050230026245117,
946
  "learning_rate": 1.2142857142857142e-05,
947
+ "loss": 0.0046,
948
  "step": 120
949
  },
950
  {
951
  "epoch": 1.2346938775510203,
952
+ "grad_norm": 0.04484110698103905,
953
  "learning_rate": 1.2244897959183674e-05,
954
+ "loss": 0.0002,
955
  "step": 121
956
  },
957
  {
958
  "epoch": 1.2448979591836735,
959
+ "grad_norm": 43.718109130859375,
960
  "learning_rate": 1.2346938775510204e-05,
961
+ "loss": 0.0585,
962
  "step": 122
963
  },
964
  {
965
  "epoch": 1.2551020408163265,
966
+ "grad_norm": 109.87353515625,
967
  "learning_rate": 1.2448979591836736e-05,
968
+ "loss": 0.3547,
969
  "step": 123
970
  },
971
  {
972
  "epoch": 1.2653061224489797,
973
+ "grad_norm": 952.017578125,
974
  "learning_rate": 1.2551020408163267e-05,
975
+ "loss": 6.193,
976
  "step": 124
977
  },
978
  {
979
  "epoch": 1.2755102040816326,
980
+ "grad_norm": 2.541386365890503,
981
  "learning_rate": 1.2653061224489798e-05,
982
+ "loss": 0.0073,
983
  "step": 125
984
  },
985
  {
986
  "epoch": 1.2857142857142856,
987
+ "grad_norm": 116.8653793334961,
988
  "learning_rate": 1.2755102040816327e-05,
989
+ "loss": 0.3095,
990
  "step": 126
991
  },
992
  {
993
  "epoch": 1.2959183673469388,
994
+ "grad_norm": 1.1585338115692139,
995
  "learning_rate": 1.2857142857142859e-05,
996
+ "loss": 0.0026,
997
  "step": 127
998
  },
999
  {
1000
  "epoch": 1.306122448979592,
1001
+ "grad_norm": 2.4128425121307373,
1002
  "learning_rate": 1.2959183673469389e-05,
1003
+ "loss": 0.0065,
1004
  "step": 128
1005
  },
1006
  {
1007
  "epoch": 1.316326530612245,
1008
+ "grad_norm": 11.24067497253418,
1009
  "learning_rate": 1.3061224489795918e-05,
1010
+ "loss": 0.0326,
1011
  "step": 129
1012
  },
1013
  {
1014
  "epoch": 1.3265306122448979,
1015
+ "grad_norm": 3.200199842453003,
1016
  "learning_rate": 1.316326530612245e-05,
1017
+ "loss": 0.0121,
1018
  "step": 130
1019
  },
1020
  {
1021
  "epoch": 1.336734693877551,
1022
+ "grad_norm": 314.07379150390625,
1023
  "learning_rate": 1.326530612244898e-05,
1024
+ "loss": 2.081,
1025
  "step": 131
1026
  },
1027
  {
1028
  "epoch": 1.346938775510204,
1029
+ "grad_norm": 11.617761611938477,
1030
  "learning_rate": 1.3367346938775512e-05,
1031
+ "loss": 0.0329,
1032
  "step": 132
1033
  },
1034
  {
1035
  "epoch": 1.3571428571428572,
1036
+ "grad_norm": 579.8010864257812,
1037
  "learning_rate": 1.3469387755102042e-05,
1038
+ "loss": 4.8144,
1039
  "step": 133
1040
  },
1041
  {
1042
  "epoch": 1.3673469387755102,
1043
+ "grad_norm": 502.57574462890625,
1044
  "learning_rate": 1.3571428571428574e-05,
1045
+ "loss": 1.8287,
1046
  "step": 134
1047
  },
1048
  {
1049
  "epoch": 1.3775510204081631,
1050
+ "grad_norm": 0.3074304163455963,
1051
  "learning_rate": 1.3673469387755102e-05,
1052
+ "loss": 0.0016,
1053
  "step": 135
1054
  },
1055
  {
1056
  "epoch": 1.3877551020408163,
1057
+ "grad_norm": 674.5262451171875,
1058
  "learning_rate": 1.3775510204081634e-05,
1059
+ "loss": 2.7057,
1060
  "step": 136
1061
  },
1062
  {
1063
  "epoch": 1.3979591836734695,
1064
+ "grad_norm": 2.085172176361084,
1065
  "learning_rate": 1.3877551020408165e-05,
1066
+ "loss": 0.0087,
1067
  "step": 137
1068
  },
1069
  {
1070
  "epoch": 1.4081632653061225,
1071
+ "grad_norm": 146.2124786376953,
1072
  "learning_rate": 1.3979591836734696e-05,
1073
+ "loss": 0.7368,
1074
  "step": 138
1075
  },
1076
  {
1077
  "epoch": 1.4183673469387754,
1078
+ "grad_norm": 39.309967041015625,
1079
  "learning_rate": 1.4081632653061225e-05,
1080
+ "loss": 0.1354,
1081
  "step": 139
1082
  },
1083
  {
1084
  "epoch": 1.4285714285714286,
1085
+ "grad_norm": 18.70266342163086,
1086
  "learning_rate": 1.4183673469387755e-05,
1087
+ "loss": 0.0446,
1088
  "step": 140
1089
  },
1090
  {
1091
  "epoch": 1.4387755102040816,
1092
+ "grad_norm": 60.91520690917969,
1093
  "learning_rate": 1.4285714285714287e-05,
1094
+ "loss": 0.2849,
1095
  "step": 141
1096
  },
1097
  {
1098
  "epoch": 1.4489795918367347,
1099
+ "grad_norm": 560.072509765625,
1100
  "learning_rate": 1.4387755102040817e-05,
1101
+ "loss": 6.2924,
1102
  "step": 142
1103
  },
1104
  {
1105
  "epoch": 1.4591836734693877,
1106
+ "grad_norm": 161.52471923828125,
1107
  "learning_rate": 1.448979591836735e-05,
1108
+ "loss": 0.4827,
1109
  "step": 143
1110
  },
1111
  {
1112
  "epoch": 1.469387755102041,
1113
+ "grad_norm": 870.7469482421875,
1114
  "learning_rate": 1.4591836734693878e-05,
1115
+ "loss": 7.8315,
1116
  "step": 144
1117
  },
1118
  {
1119
  "epoch": 1.4795918367346939,
1120
+ "grad_norm": 716.3013305664062,
1121
  "learning_rate": 1.469387755102041e-05,
1122
+ "loss": 6.0618,
1123
  "step": 145
1124
  },
1125
  {
1126
  "epoch": 1.489795918367347,
1127
+ "grad_norm": 242.5034637451172,
1128
  "learning_rate": 1.479591836734694e-05,
1129
+ "loss": 1.0472,
1130
  "step": 146
1131
  },
1132
  {
1133
  "epoch": 1.5,
1134
+ "grad_norm": 0.19287815690040588,
1135
  "learning_rate": 1.4897959183673472e-05,
1136
+ "loss": 0.0007,
1137
  "step": 147
1138
  },
1139
  {
1140
  "epoch": 1.510204081632653,
1141
+ "grad_norm": 14.570304870605469,
1142
  "learning_rate": 1.5000000000000002e-05,
1143
+ "loss": 0.0433,
1144
  "step": 148
1145
  },
1146
  {
1147
  "epoch": 1.5204081632653061,
1148
+ "grad_norm": 183.6922607421875,
1149
  "learning_rate": 1.510204081632653e-05,
1150
+ "loss": 1.116,
1151
  "step": 149
1152
  },
1153
  {
1154
  "epoch": 1.5306122448979593,
1155
+ "grad_norm": 390.4358825683594,
1156
  "learning_rate": 1.5204081632653063e-05,
1157
+ "loss": 1.5491,
1158
  "step": 150
1159
  },
1160
  {
1161
  "epoch": 1.5408163265306123,
1162
+ "grad_norm": 69.31669616699219,
1163
  "learning_rate": 1.530612244897959e-05,
1164
+ "loss": 0.2423,
1165
  "step": 151
1166
  },
1167
  {
1168
  "epoch": 1.5510204081632653,
1169
+ "grad_norm": 125.4091796875,
1170
  "learning_rate": 1.5408163265306123e-05,
1171
+ "loss": 0.4355,
1172
  "step": 152
1173
  },
1174
  {
1175
  "epoch": 1.5612244897959182,
1176
+ "grad_norm": 1.3179243803024292,
1177
  "learning_rate": 1.5510204081632655e-05,
1178
+ "loss": 0.0043,
1179
  "step": 153
1180
  },
1181
  {
1182
  "epoch": 1.5714285714285714,
1183
+ "grad_norm": 18.853076934814453,
1184
  "learning_rate": 1.5612244897959187e-05,
1185
+ "loss": 0.059,
1186
  "step": 154
1187
  },
1188
  {
1189
  "epoch": 1.5816326530612246,
1190
+ "grad_norm": 4.13621711730957,
1191
  "learning_rate": 1.5714285714285715e-05,
1192
+ "loss": 0.0175,
1193
  "step": 155
1194
  },
1195
  {
1196
  "epoch": 1.5918367346938775,
1197
+ "grad_norm": 265.3294982910156,
1198
  "learning_rate": 1.5816326530612247e-05,
1199
+ "loss": 2.8813,
1200
  "step": 156
1201
  },
1202
  {
1203
  "epoch": 1.6020408163265305,
1204
+ "grad_norm": 123.54573822021484,
1205
  "learning_rate": 1.5918367346938776e-05,
1206
+ "loss": 0.4372,
1207
  "step": 157
1208
  },
1209
  {
1210
  "epoch": 1.6122448979591837,
1211
+ "grad_norm": 16.84331512451172,
1212
  "learning_rate": 1.6020408163265308e-05,
1213
+ "loss": 0.0611,
1214
  "step": 158
1215
  },
1216
  {
1217
  "epoch": 1.6224489795918369,
1218
+ "grad_norm": 306.5478820800781,
1219
  "learning_rate": 1.612244897959184e-05,
1220
+ "loss": 4.6339,
1221
  "step": 159
1222
  },
1223
  {
1224
  "epoch": 1.6326530612244898,
1225
+ "grad_norm": 482.53204345703125,
1226
  "learning_rate": 1.6224489795918368e-05,
1227
+ "loss": 2.1581,
1228
  "step": 160
1229
  },
1230
  {
1231
  "epoch": 1.6428571428571428,
1232
+ "grad_norm": 410.847900390625,
1233
  "learning_rate": 1.63265306122449e-05,
1234
+ "loss": 1.9109,
1235
  "step": 161
1236
  },
1237
  {
1238
  "epoch": 1.6530612244897958,
1239
+ "grad_norm": 1973.649658203125,
1240
  "learning_rate": 1.642857142857143e-05,
1241
+ "loss": 10.7888,
1242
  "step": 162
1243
  },
1244
  {
1245
  "epoch": 1.663265306122449,
1246
+ "grad_norm": 184.83628845214844,
1247
  "learning_rate": 1.653061224489796e-05,
1248
+ "loss": 4.4287,
1249
  "step": 163
1250
  },
1251
  {
1252
  "epoch": 1.6734693877551021,
1253
+ "grad_norm": 697.2540893554688,
1254
  "learning_rate": 1.6632653061224492e-05,
1255
+ "loss": 4.1106,
1256
  "step": 164
1257
  },
1258
  {
1259
  "epoch": 1.683673469387755,
1260
+ "grad_norm": 513.15966796875,
1261
  "learning_rate": 1.673469387755102e-05,
1262
+ "loss": 3.8159,
1263
  "step": 165
1264
  },
1265
  {
1266
  "epoch": 1.693877551020408,
1267
+ "grad_norm": 23.47126007080078,
1268
  "learning_rate": 1.6836734693877553e-05,
1269
+ "loss": 0.0468,
1270
  "step": 166
1271
  },
1272
  {
1273
  "epoch": 1.7040816326530612,
1274
+ "grad_norm": 0.6484940648078918,
1275
  "learning_rate": 1.6938775510204085e-05,
1276
+ "loss": 0.0023,
1277
  "step": 167
1278
  },
1279
  {
1280
  "epoch": 1.7142857142857144,
1281
+ "grad_norm": 0.809281051158905,
1282
  "learning_rate": 1.7040816326530613e-05,
1283
+ "loss": 0.0031,
1284
  "step": 168
1285
  },
1286
  {
1287
  "epoch": 1.7244897959183674,
1288
+ "grad_norm": 648.1061401367188,
1289
  "learning_rate": 1.7142857142857142e-05,
1290
+ "loss": 3.0379,
1291
  "step": 169
1292
  },
1293
  {
1294
  "epoch": 1.7346938775510203,
1295
+ "grad_norm": 1.748665690422058,
1296
  "learning_rate": 1.7244897959183674e-05,
1297
+ "loss": 0.0058,
1298
  "step": 170
1299
  },
1300
  {
1301
  "epoch": 1.7448979591836735,
1302
+ "grad_norm": 2.6888437271118164,
1303
  "learning_rate": 1.7346938775510206e-05,
1304
+ "loss": 0.0097,
1305
  "step": 171
1306
  },
1307
  {
1308
  "epoch": 1.7551020408163265,
1309
+ "grad_norm": 44.931114196777344,
1310
  "learning_rate": 1.7448979591836738e-05,
1311
+ "loss": 0.114,
1312
  "step": 172
1313
  },
1314
  {
1315
  "epoch": 1.7653061224489797,
1316
+ "grad_norm": 17.355039596557617,
1317
  "learning_rate": 1.7551020408163266e-05,
1318
+ "loss": 0.0376,
1319
  "step": 173
1320
  },
1321
  {
1322
  "epoch": 1.7755102040816326,
1323
+ "grad_norm": 0.23637208342552185,
1324
  "learning_rate": 1.7653061224489798e-05,
1325
+ "loss": 0.0006,
1326
  "step": 174
1327
  },
1328
  {
1329
  "epoch": 1.7857142857142856,
1330
+ "grad_norm": 879.6422119140625,
1331
  "learning_rate": 1.7755102040816327e-05,
1332
+ "loss": 1.7519,
1333
  "step": 175
1334
  },
1335
  {
1336
  "epoch": 1.7959183673469388,
1337
+ "grad_norm": 783.4127807617188,
1338
  "learning_rate": 1.785714285714286e-05,
1339
+ "loss": 3.5166,
1340
  "step": 176
1341
  },
1342
  {
1343
  "epoch": 1.806122448979592,
1344
+ "grad_norm": 431.9672546386719,
1345
  "learning_rate": 1.795918367346939e-05,
1346
+ "loss": 2.073,
1347
  "step": 177
1348
  },
1349
  {
1350
  "epoch": 1.816326530612245,
1351
+ "grad_norm": 25.601001739501953,
1352
  "learning_rate": 1.806122448979592e-05,
1353
+ "loss": 0.1532,
1354
  "step": 178
1355
  },
1356
  {
1357
  "epoch": 1.8265306122448979,
1358
+ "grad_norm": 365.6300048828125,
1359
  "learning_rate": 1.816326530612245e-05,
1360
+ "loss": 2.0969,
1361
  "step": 179
1362
  },
1363
  {
1364
  "epoch": 1.836734693877551,
1365
+ "grad_norm": 620.0676879882812,
1366
  "learning_rate": 1.826530612244898e-05,
1367
+ "loss": 1.867,
1368
  "step": 180
1369
  },
1370
  {
1371
  "epoch": 1.8469387755102042,
1372
+ "grad_norm": 1705.588134765625,
1373
  "learning_rate": 1.836734693877551e-05,
1374
+ "loss": 18.7505,
1375
  "step": 181
1376
  },
1377
  {
1378
  "epoch": 1.8571428571428572,
1379
+ "grad_norm": 683.8563842773438,
1380
  "learning_rate": 1.8469387755102043e-05,
1381
+ "loss": 2.5291,
1382
  "step": 182
1383
  },
1384
  {
1385
  "epoch": 1.8673469387755102,
1386
+ "grad_norm": 312.474609375,
1387
  "learning_rate": 1.8571428571428575e-05,
1388
+ "loss": 2.8375,
1389
  "step": 183
1390
  },
1391
  {
1392
  "epoch": 1.8775510204081631,
1393
+ "grad_norm": 37.57782745361328,
1394
  "learning_rate": 1.8673469387755104e-05,
1395
+ "loss": 0.0902,
1396
  "step": 184
1397
  },
1398
  {
1399
  "epoch": 1.8877551020408163,
1400
+ "grad_norm": 8.877613067626953,
1401
  "learning_rate": 1.8775510204081636e-05,
1402
+ "loss": 0.0139,
1403
  "step": 185
1404
  },
1405
  {
1406
  "epoch": 1.8979591836734695,
1407
+ "grad_norm": 8.716789245605469,
1408
  "learning_rate": 1.8877551020408164e-05,
1409
+ "loss": 0.0356,
1410
  "step": 186
1411
  },
1412
  {
1413
  "epoch": 1.9081632653061225,
1414
+ "grad_norm": 21.247528076171875,
1415
  "learning_rate": 1.8979591836734696e-05,
1416
+ "loss": 0.0838,
1417
  "step": 187
1418
  },
1419
  {
1420
  "epoch": 1.9183673469387754,
1421
+ "grad_norm": 9.923478126525879,
1422
  "learning_rate": 1.9081632653061225e-05,
1423
+ "loss": 0.0391,
1424
  "step": 188
1425
  },
1426
  {
1427
  "epoch": 1.9285714285714286,
1428
+ "grad_norm": 314.7723388671875,
1429
  "learning_rate": 1.9183673469387756e-05,
1430
+ "loss": 1.2579,
1431
  "step": 189
1432
  },
1433
  {
1434
  "epoch": 1.9387755102040818,
1435
+ "grad_norm": 1043.2823486328125,
1436
  "learning_rate": 1.928571428571429e-05,
1437
+ "loss": 9.3381,
1438
  "step": 190
1439
  },
1440
  {
1441
  "epoch": 1.9489795918367347,
1442
+ "grad_norm": 26.932323455810547,
1443
  "learning_rate": 1.9387755102040817e-05,
1444
+ "loss": 0.094,
1445
  "step": 191
1446
  },
1447
  {
1448
  "epoch": 1.9591836734693877,
1449
+ "grad_norm": 19.69426727294922,
1450
  "learning_rate": 1.948979591836735e-05,
1451
+ "loss": 0.0638,
1452
  "step": 192
1453
  },
1454
  {
1455
  "epoch": 1.9693877551020407,
1456
+ "grad_norm": 537.5110473632812,
1457
  "learning_rate": 1.9591836734693877e-05,
1458
+ "loss": 4.3027,
1459
  "step": 193
1460
  },
1461
  {
1462
  "epoch": 1.9795918367346939,
1463
+ "grad_norm": 0.6821572780609131,
1464
  "learning_rate": 1.969387755102041e-05,
1465
+ "loss": 0.002,
1466
  "step": 194
1467
  },
1468
  {
1469
  "epoch": 1.989795918367347,
1470
+ "grad_norm": 476.9329528808594,
1471
  "learning_rate": 1.979591836734694e-05,
1472
+ "loss": 0.9772,
1473
  "step": 195
1474
  },
1475
  {
1476
  "epoch": 2.0,
1477
+ "grad_norm": 2.22475266456604,
1478
  "learning_rate": 1.9897959183673473e-05,
1479
+ "loss": 0.0053,
1480
  "step": 196
1481
  },
1482
  {
1483
  "epoch": 2.0,
1484
+ "eval_dim_1024_cosine_accuracy@1": 0.3290653008962868,
1485
+ "eval_dim_1024_cosine_accuracy@10": 0.3886043533930858,
1486
+ "eval_dim_1024_cosine_accuracy@3": 0.3348271446862996,
1487
+ "eval_dim_1024_cosine_accuracy@5": 0.3559539052496799,
1488
+ "eval_dim_1024_cosine_map@100": 0.4165482880126111,
1489
+ "eval_dim_1024_cosine_mrr@10": 0.3392725037091231,
1490
+ "eval_dim_1024_cosine_ndcg@10": 0.34967137880514326,
1491
+ "eval_dim_1024_cosine_precision@1": 0.3290653008962868,
1492
+ "eval_dim_1024_cosine_precision@10": 0.28380281690140846,
1493
+ "eval_dim_1024_cosine_precision@3": 0.32885189927443453,
1494
+ "eval_dim_1024_cosine_precision@5": 0.31869398207426375,
1495
+ "eval_dim_1024_cosine_recall@1": 0.04062540337753272,
1496
+ "eval_dim_1024_cosine_recall@10": 0.2609802153031206,
1497
+ "eval_dim_1024_cosine_recall@3": 0.11937529555421877,
1498
+ "eval_dim_1024_cosine_recall@5": 0.17929032559391017,
1499
+ "eval_dim_128_cosine_accuracy@1": 0.3028169014084507,
1500
+ "eval_dim_128_cosine_accuracy@10": 0.354033290653009,
1501
+ "eval_dim_128_cosine_accuracy@3": 0.3066581306017926,
1502
+ "eval_dim_128_cosine_accuracy@5": 0.3258642765685019,
1503
+ "eval_dim_128_cosine_map@100": 0.3737489129899149,
1504
+ "eval_dim_128_cosine_mrr@10": 0.31138141983212375,
1505
+ "eval_dim_128_cosine_ndcg@10": 0.3200987320599894,
1506
+ "eval_dim_128_cosine_precision@1": 0.3028169014084507,
1507
+ "eval_dim_128_cosine_precision@10": 0.26325224071702946,
1508
+ "eval_dim_128_cosine_precision@3": 0.30217669654289375,
1509
+ "eval_dim_128_cosine_precision@5": 0.29334186939820744,
1510
+ "eval_dim_128_cosine_recall@1": 0.03581534845465155,
1511
+ "eval_dim_128_cosine_recall@10": 0.23457162530017844,
1512
+ "eval_dim_128_cosine_recall@3": 0.10498018962345104,
1513
+ "eval_dim_128_cosine_recall@5": 0.15825094621698793,
1514
+ "eval_dim_256_cosine_accuracy@1": 0.3111395646606914,
1515
+ "eval_dim_256_cosine_accuracy@10": 0.36619718309859156,
1516
+ "eval_dim_256_cosine_accuracy@3": 0.31882202304737517,
1517
+ "eval_dim_256_cosine_accuracy@5": 0.3418693982074264,
1518
+ "eval_dim_256_cosine_map@100": 0.3876570902519949,
1519
+ "eval_dim_256_cosine_mrr@10": 0.32126318517163555,
1520
+ "eval_dim_256_cosine_ndcg@10": 0.3316834258973034,
1521
+ "eval_dim_256_cosine_precision@1": 0.3111395646606914,
1522
+ "eval_dim_256_cosine_precision@10": 0.2723431498079385,
1523
+ "eval_dim_256_cosine_precision@3": 0.31156636790439607,
1524
+ "eval_dim_256_cosine_precision@5": 0.30371318822023047,
1525
+ "eval_dim_256_cosine_recall@1": 0.03702717845490271,
1526
+ "eval_dim_256_cosine_recall@10": 0.24388584743594785,
1527
+ "eval_dim_256_cosine_recall@3": 0.10903486138141442,
1528
+ "eval_dim_256_cosine_recall@5": 0.16522998831931382,
1529
+ "eval_dim_512_cosine_accuracy@1": 0.324583866837388,
1530
+ "eval_dim_512_cosine_accuracy@10": 0.3886043533930858,
1531
+ "eval_dim_512_cosine_accuracy@3": 0.33034571062740076,
1532
+ "eval_dim_512_cosine_accuracy@5": 0.3553137003841229,
1533
+ "eval_dim_512_cosine_map@100": 0.4105799203347045,
1534
+ "eval_dim_512_cosine_mrr@10": 0.3356639839034201,
1535
+ "eval_dim_512_cosine_ndcg@10": 0.34755602204164354,
1536
+ "eval_dim_512_cosine_precision@1": 0.324583866837388,
1537
+ "eval_dim_512_cosine_precision@10": 0.28425096030729835,
1538
+ "eval_dim_512_cosine_precision@3": 0.3243704652155356,
1539
+ "eval_dim_512_cosine_precision@5": 0.31549295774647884,
1540
+ "eval_dim_512_cosine_recall@1": 0.039408176645563966,
1541
+ "eval_dim_512_cosine_recall@10": 0.2588290716980974,
1542
+ "eval_dim_512_cosine_recall@3": 0.11569400881462148,
1543
+ "eval_dim_512_cosine_recall@5": 0.17452688474231048,
1544
+ "eval_dim_64_cosine_accuracy@1": 0.264404609475032,
1545
+ "eval_dim_64_cosine_accuracy@10": 0.3220230473751601,
1546
  "eval_dim_64_cosine_accuracy@3": 0.26952624839948786,
1547
+ "eval_dim_64_cosine_accuracy@5": 0.2912932138284251,
1548
+ "eval_dim_64_cosine_map@100": 0.3356052539796121,
1549
+ "eval_dim_64_cosine_mrr@10": 0.27433820092270755,
1550
+ "eval_dim_64_cosine_ndcg@10": 0.28363892738216534,
1551
+ "eval_dim_64_cosine_precision@1": 0.264404609475032,
1552
+ "eval_dim_64_cosine_precision@10": 0.23399487836107555,
1553
+ "eval_dim_64_cosine_precision@3": 0.2639778062313273,
1554
+ "eval_dim_64_cosine_precision@5": 0.2573623559539053,
1555
+ "eval_dim_64_cosine_recall@1": 0.03137978486480133,
1556
+ "eval_dim_64_cosine_recall@10": 0.2079536154587263,
1557
+ "eval_dim_64_cosine_recall@3": 0.09184879304327909,
1558
+ "eval_dim_64_cosine_recall@5": 0.13906413978147564,
1559
+ "eval_dim_768_cosine_accuracy@1": 0.3290653008962868,
1560
+ "eval_dim_768_cosine_accuracy@10": 0.3911651728553137,
1561
+ "eval_dim_768_cosine_accuracy@3": 0.3348271446862996,
1562
+ "eval_dim_768_cosine_accuracy@5": 0.3565941101152369,
1563
+ "eval_dim_768_cosine_map@100": 0.41513115137941903,
1564
+ "eval_dim_768_cosine_mrr@10": 0.3396022600247949,
1565
+ "eval_dim_768_cosine_ndcg@10": 0.35038934007937644,
1566
+ "eval_dim_768_cosine_precision@1": 0.3290653008962868,
1567
+ "eval_dim_768_cosine_precision@10": 0.2860435339308579,
1568
+ "eval_dim_768_cosine_precision@3": 0.32885189927443453,
1569
+ "eval_dim_768_cosine_precision@5": 0.31907810499359796,
1570
+ "eval_dim_768_cosine_recall@1": 0.040070803135958795,
1571
+ "eval_dim_768_cosine_recall@10": 0.2600215922621299,
1572
+ "eval_dim_768_cosine_recall@3": 0.11769625185650755,
1573
+ "eval_dim_768_cosine_recall@5": 0.17699013287798807,
1574
+ "eval_runtime": 98.8385,
1575
  "eval_samples_per_second": 0.0,
1576
+ "eval_sequential_score": 0.28363892738216534,
1577
  "eval_steps_per_second": 0.0,
1578
  "step": 196
1579
  }
checkpoint-196/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a0318feaae2d8aeaebeec5835b4bc77eb79a026385dfb908828064d2f4fd294
3
  size 6097
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cceeb921bff1b3eaa3a7113b28b14620d5699e95bb7b35174f20a7a1c3b15f4
3
  size 6097
checkpoint-294/README.md CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-294/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78c216aabdfb5d2e5bf950f1cd5a80335096dea547e29ed3b49b85d0858df394
3
  size 2239607176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09849343350ff46a6254e168518133fa950e7d965a39bb17836de90c99dc3eb5
3
  size 2239607176
checkpoint-294/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82cb9db2c86db21e09c6e695d166b2b80f61223909d1f30787b93eb60455ace8
3
  size 4471067142
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01c827acbc0b550725a7c193ccbab2f50c0d7bfabfe169d2bc57b8f905543e59
3
  size 4471067142
checkpoint-294/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:318bad8325e8be8bdf0a5b272a40b3c1e9ce3b69291a451ca0c924f7dacab8bc
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bdc8f1e2d846953d00ba606f4cf92976f5653cd22fea2aacf347840fdb304ea
3
  size 14645
checkpoint-294/tokenizer_config.json CHANGED
@@ -47,9 +47,16 @@
47
  "eos_token": "</s>",
48
  "extra_special_tokens": {},
49
  "mask_token": "<mask>",
 
50
  "model_max_length": 512,
 
51
  "pad_token": "<pad>",
 
 
52
  "sep_token": "</s>",
 
53
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
54
  "unk_token": "<unk>"
55
  }
 
47
  "eos_token": "</s>",
48
  "extra_special_tokens": {},
49
  "mask_token": "<mask>",
50
+ "max_length": 512,
51
  "model_max_length": 512,
52
+ "pad_to_multiple_of": null,
53
  "pad_token": "<pad>",
54
+ "pad_token_type_id": 0,
55
+ "padding_side": "right",
56
  "sep_token": "</s>",
57
+ "stride": 0,
58
  "tokenizer_class": "XLMRobertaTokenizer",
59
+ "truncation_side": "right",
60
+ "truncation_strategy": "longest_first",
61
  "unk_token": "<unk>"
62
  }
checkpoint-294/trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-294/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a0318feaae2d8aeaebeec5835b4bc77eb79a026385dfb908828064d2f4fd294
3
  size 6097
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cceeb921bff1b3eaa3a7113b28b14620d5699e95bb7b35174f20a7a1c3b15f4
3
  size 6097
checkpoint-98/README.md CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-98/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad96896c31a7a57d2d2591979a3de4018f6e0a81c8eb47198774b1b88cd52c55
3
  size 2239607176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c726e0ea295d8e17a907aec896c93461dc49634c2088e18d04e5586a8586c39
3
  size 2239607176
checkpoint-98/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20282c36c7a9cbb242a2f093dea3da6be7e335cb5a7077742e22a6e220d97616
3
  size 4471067142
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72e969dabaac83e51c992252e9d3abc15303ac1cdb489fbffcfcae2b29a18150
3
  size 4471067142
checkpoint-98/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42beaf0575196ccbd002c1b61b2ae1f21df3e7e108abc6b5b6e4c2fc22e71b1d
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4f05f697e2a026dbb8be0397c5f3215957e05bbf5897dea20c686e5f8917f13
3
  size 14645
checkpoint-98/tokenizer_config.json CHANGED
@@ -47,9 +47,16 @@
47
  "eos_token": "</s>",
48
  "extra_special_tokens": {},
49
  "mask_token": "<mask>",
 
50
  "model_max_length": 512,
 
51
  "pad_token": "<pad>",
 
 
52
  "sep_token": "</s>",
 
53
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
54
  "unk_token": "<unk>"
55
  }
 
47
  "eos_token": "</s>",
48
  "extra_special_tokens": {},
49
  "mask_token": "<mask>",
50
+ "max_length": 512,
51
  "model_max_length": 512,
52
+ "pad_to_multiple_of": null,
53
  "pad_token": "<pad>",
54
+ "pad_token_type_id": 0,
55
+ "padding_side": "right",
56
  "sep_token": "</s>",
57
+ "stride": 0,
58
  "tokenizer_class": "XLMRobertaTokenizer",
59
+ "truncation_side": "right",
60
+ "truncation_strategy": "longest_first",
61
  "unk_token": "<unk>"
62
  }
checkpoint-98/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 98,
3
- "best_metric": 0.3299991425713933,
4
  "best_model_checkpoint": "intfloat/multilingual-e5-large/checkpoint-98",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
@@ -11,785 +11,785 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.01020408163265306,
14
- "grad_norm": 196.63563537597656,
15
  "learning_rate": 0.0,
16
- "loss": 4.0658,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.02040816326530612,
21
- "grad_norm": 184.93710327148438,
22
  "learning_rate": 1.0204081632653061e-07,
23
- "loss": 5.2785,
24
  "step": 2
25
  },
26
  {
27
  "epoch": 0.030612244897959183,
28
- "grad_norm": 179.60655212402344,
29
  "learning_rate": 2.0408163265306121e-07,
30
- "loss": 4.349,
31
  "step": 3
32
  },
33
  {
34
  "epoch": 0.04081632653061224,
35
- "grad_norm": 163.9447479248047,
36
  "learning_rate": 3.0612244897959183e-07,
37
- "loss": 3.805,
38
  "step": 4
39
  },
40
  {
41
  "epoch": 0.05102040816326531,
42
- "grad_norm": 164.29776000976562,
43
  "learning_rate": 4.0816326530612243e-07,
44
- "loss": 3.1683,
45
  "step": 5
46
  },
47
  {
48
  "epoch": 0.061224489795918366,
49
- "grad_norm": 209.0354766845703,
50
  "learning_rate": 5.102040816326531e-07,
51
- "loss": 5.3989,
52
  "step": 6
53
  },
54
  {
55
  "epoch": 0.07142857142857142,
56
- "grad_norm": 262.14971923828125,
57
  "learning_rate": 6.122448979591837e-07,
58
- "loss": 8.211,
59
  "step": 7
60
  },
61
  {
62
  "epoch": 0.08163265306122448,
63
- "grad_norm": 188.68443298339844,
64
  "learning_rate": 7.142857142857143e-07,
65
- "loss": 5.3598,
66
  "step": 8
67
  },
68
  {
69
  "epoch": 0.09183673469387756,
70
- "grad_norm": 216.530517578125,
71
  "learning_rate": 8.163265306122449e-07,
72
- "loss": 5.0522,
73
  "step": 9
74
  },
75
  {
76
  "epoch": 0.10204081632653061,
77
- "grad_norm": 231.66384887695312,
78
  "learning_rate": 9.183673469387756e-07,
79
- "loss": 4.4736,
80
  "step": 10
81
  },
82
  {
83
  "epoch": 0.11224489795918367,
84
- "grad_norm": 329.2440490722656,
85
  "learning_rate": 1.0204081632653063e-06,
86
- "loss": 8.3251,
87
  "step": 11
88
  },
89
  {
90
  "epoch": 0.12244897959183673,
91
- "grad_norm": 102.95913696289062,
92
  "learning_rate": 1.122448979591837e-06,
93
- "loss": 2.5822,
94
  "step": 12
95
  },
96
  {
97
  "epoch": 0.1326530612244898,
98
- "grad_norm": 116.47322845458984,
99
  "learning_rate": 1.2244897959183673e-06,
100
- "loss": 2.8464,
101
  "step": 13
102
  },
103
  {
104
  "epoch": 0.14285714285714285,
105
- "grad_norm": 226.98976135253906,
106
  "learning_rate": 1.3265306122448982e-06,
107
- "loss": 5.8915,
108
  "step": 14
109
  },
110
  {
111
  "epoch": 0.15306122448979592,
112
- "grad_norm": 192.4533233642578,
113
  "learning_rate": 1.4285714285714286e-06,
114
- "loss": 7.2637,
115
  "step": 15
116
  },
117
  {
118
  "epoch": 0.16326530612244897,
119
- "grad_norm": 269.7630920410156,
120
  "learning_rate": 1.5306122448979593e-06,
121
- "loss": 7.4234,
122
  "step": 16
123
  },
124
  {
125
  "epoch": 0.17346938775510204,
126
- "grad_norm": 111.28227233886719,
127
  "learning_rate": 1.6326530612244897e-06,
128
- "loss": 2.3787,
129
  "step": 17
130
  },
131
  {
132
  "epoch": 0.1836734693877551,
133
- "grad_norm": 165.55792236328125,
134
  "learning_rate": 1.7346938775510206e-06,
135
- "loss": 3.1947,
136
  "step": 18
137
  },
138
  {
139
  "epoch": 0.19387755102040816,
140
- "grad_norm": 152.33682250976562,
141
  "learning_rate": 1.8367346938775512e-06,
142
- "loss": 3.4787,
143
  "step": 19
144
  },
145
  {
146
  "epoch": 0.20408163265306123,
147
- "grad_norm": 159.368408203125,
148
  "learning_rate": 1.938775510204082e-06,
149
- "loss": 5.1418,
150
  "step": 20
151
  },
152
  {
153
  "epoch": 0.21428571428571427,
154
- "grad_norm": 112.97805786132812,
155
  "learning_rate": 2.0408163265306125e-06,
156
- "loss": 2.3042,
157
  "step": 21
158
  },
159
  {
160
  "epoch": 0.22448979591836735,
161
- "grad_norm": 199.43443298339844,
162
  "learning_rate": 2.1428571428571427e-06,
163
- "loss": 6.6786,
164
  "step": 22
165
  },
166
  {
167
  "epoch": 0.23469387755102042,
168
- "grad_norm": 241.95591735839844,
169
  "learning_rate": 2.244897959183674e-06,
170
- "loss": 6.6721,
171
  "step": 23
172
  },
173
  {
174
  "epoch": 0.24489795918367346,
175
- "grad_norm": 249.65122985839844,
176
  "learning_rate": 2.3469387755102044e-06,
177
- "loss": 4.4896,
178
  "step": 24
179
  },
180
  {
181
  "epoch": 0.25510204081632654,
182
- "grad_norm": 183.51483154296875,
183
  "learning_rate": 2.4489795918367347e-06,
184
- "loss": 3.4416,
185
  "step": 25
186
  },
187
  {
188
  "epoch": 0.2653061224489796,
189
- "grad_norm": 286.1512756347656,
190
  "learning_rate": 2.5510204081632657e-06,
191
- "loss": 7.5134,
192
  "step": 26
193
  },
194
  {
195
  "epoch": 0.2755102040816326,
196
- "grad_norm": 98.32283782958984,
197
  "learning_rate": 2.6530612244897964e-06,
198
- "loss": 1.9577,
199
  "step": 27
200
  },
201
  {
202
  "epoch": 0.2857142857142857,
203
- "grad_norm": 274.64178466796875,
204
  "learning_rate": 2.7551020408163266e-06,
205
- "loss": 4.9552,
206
  "step": 28
207
  },
208
  {
209
  "epoch": 0.29591836734693877,
210
- "grad_norm": 142.77537536621094,
211
  "learning_rate": 2.8571428571428573e-06,
212
- "loss": 2.5202,
213
  "step": 29
214
  },
215
  {
216
  "epoch": 0.30612244897959184,
217
- "grad_norm": 127.23304748535156,
218
  "learning_rate": 2.959183673469388e-06,
219
- "loss": 2.4486,
220
  "step": 30
221
  },
222
  {
223
  "epoch": 0.3163265306122449,
224
- "grad_norm": 99.88568878173828,
225
  "learning_rate": 3.0612244897959185e-06,
226
- "loss": 1.9923,
227
  "step": 31
228
  },
229
  {
230
  "epoch": 0.32653061224489793,
231
- "grad_norm": 151.1445770263672,
232
  "learning_rate": 3.1632653061224496e-06,
233
- "loss": 2.8301,
234
  "step": 32
235
  },
236
  {
237
  "epoch": 0.336734693877551,
238
- "grad_norm": 203.54248046875,
239
  "learning_rate": 3.2653061224489794e-06,
240
- "loss": 3.7414,
241
  "step": 33
242
  },
243
  {
244
  "epoch": 0.3469387755102041,
245
- "grad_norm": 443.4117126464844,
246
  "learning_rate": 3.3673469387755105e-06,
247
- "loss": 5.2738,
248
  "step": 34
249
  },
250
  {
251
  "epoch": 0.35714285714285715,
252
- "grad_norm": 178.9974822998047,
253
  "learning_rate": 3.469387755102041e-06,
254
- "loss": 3.791,
255
  "step": 35
256
  },
257
  {
258
  "epoch": 0.3673469387755102,
259
- "grad_norm": 122.32801818847656,
260
  "learning_rate": 3.5714285714285718e-06,
261
- "loss": 1.9081,
262
  "step": 36
263
  },
264
  {
265
  "epoch": 0.37755102040816324,
266
- "grad_norm": 189.6477813720703,
267
  "learning_rate": 3.6734693877551024e-06,
268
- "loss": 2.4172,
269
  "step": 37
270
  },
271
  {
272
  "epoch": 0.3877551020408163,
273
- "grad_norm": 222.67959594726562,
274
  "learning_rate": 3.7755102040816327e-06,
275
- "loss": 4.0417,
276
  "step": 38
277
  },
278
  {
279
  "epoch": 0.3979591836734694,
280
- "grad_norm": 160.97071838378906,
281
  "learning_rate": 3.877551020408164e-06,
282
- "loss": 3.5591,
283
  "step": 39
284
  },
285
  {
286
  "epoch": 0.40816326530612246,
287
- "grad_norm": 178.01609802246094,
288
  "learning_rate": 3.979591836734694e-06,
289
- "loss": 3.0139,
290
  "step": 40
291
  },
292
  {
293
  "epoch": 0.41836734693877553,
294
- "grad_norm": 142.32168579101562,
295
  "learning_rate": 4.081632653061225e-06,
296
- "loss": 2.4836,
297
  "step": 41
298
  },
299
  {
300
  "epoch": 0.42857142857142855,
301
- "grad_norm": 148.1731719970703,
302
  "learning_rate": 4.183673469387755e-06,
303
- "loss": 2.807,
304
  "step": 42
305
  },
306
  {
307
  "epoch": 0.4387755102040816,
308
- "grad_norm": 152.1929931640625,
309
  "learning_rate": 4.2857142857142855e-06,
310
- "loss": 1.9753,
311
  "step": 43
312
  },
313
  {
314
  "epoch": 0.4489795918367347,
315
- "grad_norm": 219.5394287109375,
316
  "learning_rate": 4.3877551020408165e-06,
317
- "loss": 0.9764,
318
  "step": 44
319
  },
320
  {
321
  "epoch": 0.45918367346938777,
322
- "grad_norm": 95.7768783569336,
323
  "learning_rate": 4.489795918367348e-06,
324
- "loss": 1.1398,
325
  "step": 45
326
  },
327
  {
328
  "epoch": 0.46938775510204084,
329
- "grad_norm": 320.86529541015625,
330
  "learning_rate": 4.591836734693878e-06,
331
- "loss": 6.7812,
332
  "step": 46
333
  },
334
  {
335
  "epoch": 0.47959183673469385,
336
- "grad_norm": 18.277860641479492,
337
  "learning_rate": 4.693877551020409e-06,
338
- "loss": 0.2479,
339
  "step": 47
340
  },
341
  {
342
  "epoch": 0.4897959183673469,
343
- "grad_norm": 405.69024658203125,
344
  "learning_rate": 4.795918367346939e-06,
345
- "loss": 6.041,
346
  "step": 48
347
  },
348
  {
349
  "epoch": 0.5,
350
- "grad_norm": 104.91180419921875,
351
  "learning_rate": 4.897959183673469e-06,
352
- "loss": 1.1839,
353
  "step": 49
354
  },
355
  {
356
  "epoch": 0.5102040816326531,
357
- "grad_norm": 110.48990631103516,
358
  "learning_rate": 5e-06,
359
- "loss": 2.0933,
360
  "step": 50
361
  },
362
  {
363
  "epoch": 0.5204081632653061,
364
- "grad_norm": 135.2900390625,
365
  "learning_rate": 5.1020408163265315e-06,
366
- "loss": 1.8613,
367
  "step": 51
368
  },
369
  {
370
  "epoch": 0.5306122448979592,
371
- "grad_norm": 146.744140625,
372
  "learning_rate": 5.204081632653062e-06,
373
- "loss": 2.9359,
374
  "step": 52
375
  },
376
  {
377
  "epoch": 0.5408163265306123,
378
- "grad_norm": 103.08831787109375,
379
  "learning_rate": 5.306122448979593e-06,
380
- "loss": 1.0129,
381
  "step": 53
382
  },
383
  {
384
  "epoch": 0.5510204081632653,
385
- "grad_norm": 527.3735961914062,
386
  "learning_rate": 5.408163265306123e-06,
387
- "loss": 11.8843,
388
  "step": 54
389
  },
390
  {
391
  "epoch": 0.5612244897959183,
392
- "grad_norm": 374.4621887207031,
393
  "learning_rate": 5.510204081632653e-06,
394
- "loss": 5.8523,
395
  "step": 55
396
  },
397
  {
398
  "epoch": 0.5714285714285714,
399
- "grad_norm": 71.62016296386719,
400
  "learning_rate": 5.6122448979591834e-06,
401
- "loss": 0.832,
402
  "step": 56
403
  },
404
  {
405
  "epoch": 0.5816326530612245,
406
- "grad_norm": 287.0186462402344,
407
  "learning_rate": 5.7142857142857145e-06,
408
- "loss": 3.1778,
409
  "step": 57
410
  },
411
  {
412
  "epoch": 0.5918367346938775,
413
- "grad_norm": 31.022693634033203,
414
  "learning_rate": 5.816326530612246e-06,
415
- "loss": 0.3919,
416
  "step": 58
417
  },
418
  {
419
  "epoch": 0.6020408163265306,
420
- "grad_norm": 412.99176025390625,
421
  "learning_rate": 5.918367346938776e-06,
422
- "loss": 7.2526,
423
  "step": 59
424
  },
425
  {
426
  "epoch": 0.6122448979591837,
427
- "grad_norm": 40.534244537353516,
428
  "learning_rate": 6.020408163265307e-06,
429
- "loss": 0.578,
430
  "step": 60
431
  },
432
  {
433
  "epoch": 0.6224489795918368,
434
- "grad_norm": 226.0243682861328,
435
  "learning_rate": 6.122448979591837e-06,
436
- "loss": 2.5233,
437
  "step": 61
438
  },
439
  {
440
  "epoch": 0.6326530612244898,
441
- "grad_norm": 113.55558013916016,
442
  "learning_rate": 6.224489795918368e-06,
443
- "loss": 1.5694,
444
  "step": 62
445
  },
446
  {
447
  "epoch": 0.6428571428571429,
448
- "grad_norm": 190.6112518310547,
449
  "learning_rate": 6.326530612244899e-06,
450
- "loss": 2.2332,
451
  "step": 63
452
  },
453
  {
454
  "epoch": 0.6530612244897959,
455
- "grad_norm": 391.7434387207031,
456
  "learning_rate": 6.4285714285714295e-06,
457
- "loss": 4.5545,
458
  "step": 64
459
  },
460
  {
461
  "epoch": 0.6632653061224489,
462
- "grad_norm": 214.495361328125,
463
  "learning_rate": 6.530612244897959e-06,
464
- "loss": 1.4804,
465
  "step": 65
466
  },
467
  {
468
  "epoch": 0.673469387755102,
469
- "grad_norm": 62.52560806274414,
470
  "learning_rate": 6.63265306122449e-06,
471
- "loss": 0.4391,
472
  "step": 66
473
  },
474
  {
475
  "epoch": 0.6836734693877551,
476
- "grad_norm": 298.97808837890625,
477
  "learning_rate": 6.734693877551021e-06,
478
- "loss": 3.4715,
479
  "step": 67
480
  },
481
  {
482
  "epoch": 0.6938775510204082,
483
- "grad_norm": 483.84796142578125,
484
  "learning_rate": 6.836734693877551e-06,
485
- "loss": 8.5808,
486
  "step": 68
487
  },
488
  {
489
  "epoch": 0.7040816326530612,
490
- "grad_norm": 482.55743408203125,
491
  "learning_rate": 6.938775510204082e-06,
492
- "loss": 5.6959,
493
  "step": 69
494
  },
495
  {
496
  "epoch": 0.7142857142857143,
497
- "grad_norm": 198.6812744140625,
498
  "learning_rate": 7.0408163265306125e-06,
499
- "loss": 3.8277,
500
  "step": 70
501
  },
502
  {
503
  "epoch": 0.7244897959183674,
504
- "grad_norm": 293.4190673828125,
505
  "learning_rate": 7.1428571428571436e-06,
506
- "loss": 2.1832,
507
  "step": 71
508
  },
509
  {
510
  "epoch": 0.7346938775510204,
511
- "grad_norm": 13.164139747619629,
512
  "learning_rate": 7.244897959183675e-06,
513
- "loss": 0.1244,
514
  "step": 72
515
  },
516
  {
517
  "epoch": 0.7448979591836735,
518
- "grad_norm": 163.4252166748047,
519
  "learning_rate": 7.346938775510205e-06,
520
- "loss": 0.7707,
521
  "step": 73
522
  },
523
  {
524
  "epoch": 0.7551020408163265,
525
- "grad_norm": 193.64401245117188,
526
  "learning_rate": 7.448979591836736e-06,
527
- "loss": 3.4828,
528
  "step": 74
529
  },
530
  {
531
  "epoch": 0.7653061224489796,
532
- "grad_norm": 178.31982421875,
533
  "learning_rate": 7.551020408163265e-06,
534
- "loss": 2.9645,
535
  "step": 75
536
  },
537
  {
538
  "epoch": 0.7755102040816326,
539
- "grad_norm": 28.57689666748047,
540
  "learning_rate": 7.653061224489796e-06,
541
- "loss": 0.2948,
542
  "step": 76
543
  },
544
  {
545
  "epoch": 0.7857142857142857,
546
- "grad_norm": 608.8088989257812,
547
  "learning_rate": 7.755102040816327e-06,
548
- "loss": 12.6456,
549
  "step": 77
550
  },
551
  {
552
  "epoch": 0.7959183673469388,
553
- "grad_norm": 123.08556365966797,
554
  "learning_rate": 7.857142857142858e-06,
555
- "loss": 1.2493,
556
  "step": 78
557
  },
558
  {
559
  "epoch": 0.8061224489795918,
560
- "grad_norm": 225.3292694091797,
561
  "learning_rate": 7.959183673469388e-06,
562
- "loss": 2.6675,
563
  "step": 79
564
  },
565
  {
566
  "epoch": 0.8163265306122449,
567
- "grad_norm": 57.49665069580078,
568
  "learning_rate": 8.06122448979592e-06,
569
- "loss": 0.5642,
570
  "step": 80
571
  },
572
  {
573
  "epoch": 0.826530612244898,
574
- "grad_norm": 247.52210998535156,
575
  "learning_rate": 8.16326530612245e-06,
576
- "loss": 1.6008,
577
  "step": 81
578
  },
579
  {
580
  "epoch": 0.8367346938775511,
581
- "grad_norm": 309.60382080078125,
582
  "learning_rate": 8.26530612244898e-06,
583
- "loss": 3.257,
584
  "step": 82
585
  },
586
  {
587
  "epoch": 0.8469387755102041,
588
- "grad_norm": 183.82882690429688,
589
  "learning_rate": 8.36734693877551e-06,
590
- "loss": 2.8086,
591
  "step": 83
592
  },
593
  {
594
  "epoch": 0.8571428571428571,
595
- "grad_norm": 88.08740234375,
596
  "learning_rate": 8.469387755102042e-06,
597
- "loss": 0.4056,
598
  "step": 84
599
  },
600
  {
601
  "epoch": 0.8673469387755102,
602
- "grad_norm": 515.5130615234375,
603
  "learning_rate": 8.571428571428571e-06,
604
- "loss": 3.711,
605
  "step": 85
606
  },
607
  {
608
  "epoch": 0.8775510204081632,
609
- "grad_norm": 2.946629285812378,
610
  "learning_rate": 8.673469387755103e-06,
611
- "loss": 0.0253,
612
  "step": 86
613
  },
614
  {
615
  "epoch": 0.8877551020408163,
616
- "grad_norm": 31.143714904785156,
617
  "learning_rate": 8.775510204081633e-06,
618
- "loss": 0.168,
619
  "step": 87
620
  },
621
  {
622
  "epoch": 0.8979591836734694,
623
- "grad_norm": 535.6795043945312,
624
  "learning_rate": 8.877551020408163e-06,
625
- "loss": 5.0992,
626
  "step": 88
627
  },
628
  {
629
  "epoch": 0.9081632653061225,
630
- "grad_norm": 577.0897216796875,
631
  "learning_rate": 8.979591836734695e-06,
632
- "loss": 5.3724,
633
  "step": 89
634
  },
635
  {
636
  "epoch": 0.9183673469387755,
637
- "grad_norm": 102.31855773925781,
638
  "learning_rate": 9.081632653061225e-06,
639
- "loss": 0.9172,
640
  "step": 90
641
  },
642
  {
643
  "epoch": 0.9285714285714286,
644
- "grad_norm": 306.1739196777344,
645
  "learning_rate": 9.183673469387756e-06,
646
- "loss": 3.1239,
647
  "step": 91
648
  },
649
  {
650
  "epoch": 0.9387755102040817,
651
- "grad_norm": 191.27415466308594,
652
  "learning_rate": 9.285714285714288e-06,
653
- "loss": 1.4121,
654
  "step": 92
655
  },
656
  {
657
  "epoch": 0.9489795918367347,
658
- "grad_norm": 67.19822692871094,
659
  "learning_rate": 9.387755102040818e-06,
660
- "loss": 0.2599,
661
  "step": 93
662
  },
663
  {
664
  "epoch": 0.9591836734693877,
665
- "grad_norm": 17.93955421447754,
666
  "learning_rate": 9.489795918367348e-06,
667
- "loss": 0.1166,
668
  "step": 94
669
  },
670
  {
671
  "epoch": 0.9693877551020408,
672
- "grad_norm": 23.839630126953125,
673
  "learning_rate": 9.591836734693878e-06,
674
- "loss": 0.1938,
675
  "step": 95
676
  },
677
  {
678
  "epoch": 0.9795918367346939,
679
- "grad_norm": 1459.8140869140625,
680
  "learning_rate": 9.693877551020408e-06,
681
- "loss": 18.5143,
682
  "step": 96
683
  },
684
  {
685
  "epoch": 0.9897959183673469,
686
- "grad_norm": 670.869140625,
687
  "learning_rate": 9.795918367346939e-06,
688
- "loss": 6.1932,
689
  "step": 97
690
  },
691
  {
692
  "epoch": 1.0,
693
- "grad_norm": 942.95849609375,
694
  "learning_rate": 9.89795918367347e-06,
695
- "loss": 20.3042,
696
  "step": 98
697
  },
698
  {
699
  "epoch": 1.0,
700
- "eval_dim_1024_cosine_accuracy@1": 0.3719590268886043,
701
- "eval_dim_1024_cosine_accuracy@10": 0.43982074263764404,
702
- "eval_dim_1024_cosine_accuracy@3": 0.37836107554417414,
703
- "eval_dim_1024_cosine_accuracy@5": 0.4020486555697823,
704
- "eval_dim_1024_cosine_map@100": 0.4604070214987707,
705
- "eval_dim_1024_cosine_mrr@10": 0.383397099770339,
706
- "eval_dim_1024_cosine_ndcg@10": 0.3947688545057553,
707
- "eval_dim_1024_cosine_precision@1": 0.3719590268886043,
708
- "eval_dim_1024_cosine_precision@10": 0.3265044814340589,
709
- "eval_dim_1024_cosine_precision@3": 0.37153222364489963,
710
- "eval_dim_1024_cosine_precision@5": 0.3613316261203585,
711
- "eval_dim_1024_cosine_recall@1": 0.04338670134208909,
712
- "eval_dim_1024_cosine_recall@10": 0.28262195979320087,
713
- "eval_dim_1024_cosine_recall@3": 0.1268773565773867,
714
- "eval_dim_1024_cosine_recall@5": 0.19083511167371434,
715
- "eval_dim_128_cosine_accuracy@1": 0.3053777208706786,
716
- "eval_dim_128_cosine_accuracy@10": 0.37836107554417414,
717
- "eval_dim_128_cosine_accuracy@3": 0.3111395646606914,
718
- "eval_dim_128_cosine_accuracy@5": 0.3361075544174136,
719
- "eval_dim_128_cosine_map@100": 0.3904168259576031,
720
- "eval_dim_128_cosine_mrr@10": 0.3174377070503828,
721
- "eval_dim_128_cosine_ndcg@10": 0.3299991425713933,
722
- "eval_dim_128_cosine_precision@1": 0.3053777208706786,
723
- "eval_dim_128_cosine_precision@10": 0.271830985915493,
724
- "eval_dim_128_cosine_precision@3": 0.30495091762697396,
725
- "eval_dim_128_cosine_precision@5": 0.29641485275288093,
726
- "eval_dim_128_cosine_recall@1": 0.03688049871840266,
727
- "eval_dim_128_cosine_recall@10": 0.24260019246216608,
728
- "eval_dim_128_cosine_recall@3": 0.10779952005618963,
729
- "eval_dim_128_cosine_recall@5": 0.16176912684922656,
730
- "eval_dim_256_cosine_accuracy@1": 0.324583866837388,
731
- "eval_dim_256_cosine_accuracy@10": 0.4058898847631242,
732
- "eval_dim_256_cosine_accuracy@3": 0.33290653008962867,
733
- "eval_dim_256_cosine_accuracy@5": 0.3649167733674776,
734
- "eval_dim_256_cosine_map@100": 0.4232469199200366,
735
- "eval_dim_256_cosine_mrr@10": 0.33872299453285326,
736
- "eval_dim_256_cosine_ndcg@10": 0.35307499975694673,
737
- "eval_dim_256_cosine_precision@1": 0.324583866837388,
738
- "eval_dim_256_cosine_precision@10": 0.2935339308578745,
739
- "eval_dim_256_cosine_precision@3": 0.324583866837388,
740
- "eval_dim_256_cosine_precision@5": 0.31792573623559534,
741
- "eval_dim_256_cosine_recall@1": 0.038313787861467184,
742
- "eval_dim_256_cosine_recall@10": 0.2549878568107636,
743
- "eval_dim_256_cosine_recall@3": 0.11223891931505588,
744
- "eval_dim_256_cosine_recall@5": 0.1697408782100328,
745
- "eval_dim_512_cosine_accuracy@1": 0.34827144686299616,
746
- "eval_dim_512_cosine_accuracy@10": 0.44302176696542894,
747
- "eval_dim_512_cosine_accuracy@3": 0.3553137003841229,
748
- "eval_dim_512_cosine_accuracy@5": 0.39436619718309857,
749
- "eval_dim_512_cosine_map@100": 0.44994622162234726,
750
- "eval_dim_512_cosine_mrr@10": 0.3644033392272826,
751
- "eval_dim_512_cosine_ndcg@10": 0.3807642678190648,
752
- "eval_dim_512_cosine_precision@1": 0.34827144686299616,
753
- "eval_dim_512_cosine_precision@10": 0.31798975672215113,
754
- "eval_dim_512_cosine_precision@3": 0.34827144686299616,
755
- "eval_dim_512_cosine_precision@5": 0.3418693982074264,
756
- "eval_dim_512_cosine_recall@1": 0.04125738861359979,
757
- "eval_dim_512_cosine_recall@10": 0.2752162089385945,
758
- "eval_dim_512_cosine_recall@3": 0.12077279112247459,
759
- "eval_dim_512_cosine_recall@5": 0.18268801127884626,
760
- "eval_dim_64_cosine_accuracy@1": 0.23175416133162613,
761
- "eval_dim_64_cosine_accuracy@10": 0.29833546734955185,
762
- "eval_dim_64_cosine_accuracy@3": 0.23879641485275288,
763
- "eval_dim_64_cosine_accuracy@5": 0.2612035851472471,
764
- "eval_dim_64_cosine_map@100": 0.3151829220617657,
765
- "eval_dim_64_cosine_mrr@10": 0.24303802613661746,
766
- "eval_dim_64_cosine_ndcg@10": 0.2548721998123125,
767
- "eval_dim_64_cosine_precision@1": 0.23175416133162613,
768
- "eval_dim_64_cosine_precision@10": 0.20864276568501922,
769
- "eval_dim_64_cosine_precision@3": 0.23218096457533077,
770
- "eval_dim_64_cosine_precision@5": 0.22740076824583869,
771
- "eval_dim_64_cosine_recall@1": 0.028384798943475897,
772
- "eval_dim_64_cosine_recall@10": 0.19540887275051927,
773
- "eval_dim_64_cosine_recall@3": 0.08325514613360847,
774
- "eval_dim_64_cosine_recall@5": 0.12720688223912358,
775
- "eval_dim_768_cosine_accuracy@1": 0.36619718309859156,
776
- "eval_dim_768_cosine_accuracy@10": 0.44302176696542894,
777
- "eval_dim_768_cosine_accuracy@3": 0.37516005121638923,
778
- "eval_dim_768_cosine_accuracy@5": 0.4014084507042254,
779
- "eval_dim_768_cosine_map@100": 0.4592162636155952,
780
- "eval_dim_768_cosine_mrr@10": 0.37943672133812956,
781
- "eval_dim_768_cosine_ndcg@10": 0.3923099208699586,
782
- "eval_dim_768_cosine_precision@1": 0.36619718309859156,
783
- "eval_dim_768_cosine_precision@10": 0.32541613316261203,
784
- "eval_dim_768_cosine_precision@3": 0.3666239863422962,
785
- "eval_dim_768_cosine_precision@5": 0.35761843790012804,
786
- "eval_dim_768_cosine_recall@1": 0.042908107176418055,
787
- "eval_dim_768_cosine_recall@10": 0.2828362934197418,
788
- "eval_dim_768_cosine_recall@3": 0.12573828441229515,
789
- "eval_dim_768_cosine_recall@5": 0.18984022934199501,
790
- "eval_runtime": 98.8905,
791
  "eval_samples_per_second": 0.0,
792
- "eval_sequential_score": 0.2548721998123125,
793
  "eval_steps_per_second": 0.0,
794
  "step": 98
795
  }
 
1
  {
2
  "best_global_step": 98,
3
+ "best_metric": 0.323940756796795,
4
  "best_model_checkpoint": "intfloat/multilingual-e5-large/checkpoint-98",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.01020408163265306,
14
+ "grad_norm": 1059.8211669921875,
15
  "learning_rate": 0.0,
16
+ "loss": 9.6954,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.02040816326530612,
21
+ "grad_norm": 890.8715209960938,
22
  "learning_rate": 1.0204081632653061e-07,
23
+ "loss": 11.5048,
24
  "step": 2
25
  },
26
  {
27
  "epoch": 0.030612244897959183,
28
+ "grad_norm": 264.5753173828125,
29
  "learning_rate": 2.0408163265306121e-07,
30
+ "loss": 2.1575,
31
  "step": 3
32
  },
33
  {
34
  "epoch": 0.04081632653061224,
35
+ "grad_norm": 501.6875,
36
  "learning_rate": 3.0612244897959183e-07,
37
+ "loss": 2.6843,
38
  "step": 4
39
  },
40
  {
41
  "epoch": 0.05102040816326531,
42
+ "grad_norm": 6.634378910064697,
43
  "learning_rate": 4.0816326530612243e-07,
44
+ "loss": 0.0364,
45
  "step": 5
46
  },
47
  {
48
  "epoch": 0.061224489795918366,
49
+ "grad_norm": 206.64352416992188,
50
  "learning_rate": 5.102040816326531e-07,
51
+ "loss": 0.705,
52
  "step": 6
53
  },
54
  {
55
  "epoch": 0.07142857142857142,
56
+ "grad_norm": 383.0555114746094,
57
  "learning_rate": 6.122448979591837e-07,
58
+ "loss": 1.9957,
59
  "step": 7
60
  },
61
  {
62
  "epoch": 0.08163265306122448,
63
+ "grad_norm": 212.91329956054688,
64
  "learning_rate": 7.142857142857143e-07,
65
+ "loss": 0.9938,
66
  "step": 8
67
  },
68
  {
69
  "epoch": 0.09183673469387756,
70
+ "grad_norm": 49.36940383911133,
71
  "learning_rate": 8.163265306122449e-07,
72
+ "loss": 0.3187,
73
  "step": 9
74
  },
75
  {
76
  "epoch": 0.10204081632653061,
77
+ "grad_norm": 51.765472412109375,
78
  "learning_rate": 9.183673469387756e-07,
79
+ "loss": 0.1435,
80
  "step": 10
81
  },
82
  {
83
  "epoch": 0.11224489795918367,
84
+ "grad_norm": 13.23577880859375,
85
  "learning_rate": 1.0204081632653063e-06,
86
+ "loss": 0.0818,
87
  "step": 11
88
  },
89
  {
90
  "epoch": 0.12244897959183673,
91
+ "grad_norm": 240.9364776611328,
92
  "learning_rate": 1.122448979591837e-06,
93
+ "loss": 0.6535,
94
  "step": 12
95
  },
96
  {
97
  "epoch": 0.1326530612244898,
98
+ "grad_norm": 117.47791290283203,
99
  "learning_rate": 1.2244897959183673e-06,
100
+ "loss": 0.3915,
101
  "step": 13
102
  },
103
  {
104
  "epoch": 0.14285714285714285,
105
+ "grad_norm": 202.8033447265625,
106
  "learning_rate": 1.3265306122448982e-06,
107
+ "loss": 0.5493,
108
  "step": 14
109
  },
110
  {
111
  "epoch": 0.15306122448979592,
112
+ "grad_norm": 111.81350708007812,
113
  "learning_rate": 1.4285714285714286e-06,
114
+ "loss": 0.7231,
115
  "step": 15
116
  },
117
  {
118
  "epoch": 0.16326530612244897,
119
+ "grad_norm": 10.884031295776367,
120
  "learning_rate": 1.5306122448979593e-06,
121
+ "loss": 0.0715,
122
  "step": 16
123
  },
124
  {
125
  "epoch": 0.17346938775510204,
126
+ "grad_norm": 862.8134155273438,
127
  "learning_rate": 1.6326530612244897e-06,
128
+ "loss": 5.8663,
129
  "step": 17
130
  },
131
  {
132
  "epoch": 0.1836734693877551,
133
+ "grad_norm": 54.59718322753906,
134
  "learning_rate": 1.7346938775510206e-06,
135
+ "loss": 0.2586,
136
  "step": 18
137
  },
138
  {
139
  "epoch": 0.19387755102040816,
140
+ "grad_norm": 140.5866241455078,
141
  "learning_rate": 1.8367346938775512e-06,
142
+ "loss": 0.9353,
143
  "step": 19
144
  },
145
  {
146
  "epoch": 0.20408163265306123,
147
+ "grad_norm": 479.35052490234375,
148
  "learning_rate": 1.938775510204082e-06,
149
+ "loss": 2.5843,
150
  "step": 20
151
  },
152
  {
153
  "epoch": 0.21428571428571427,
154
+ "grad_norm": 388.5758361816406,
155
  "learning_rate": 2.0408163265306125e-06,
156
+ "loss": 2.0583,
157
  "step": 21
158
  },
159
  {
160
  "epoch": 0.22448979591836735,
161
+ "grad_norm": 619.6573486328125,
162
  "learning_rate": 2.1428571428571427e-06,
163
+ "loss": 6.9121,
164
  "step": 22
165
  },
166
  {
167
  "epoch": 0.23469387755102042,
168
+ "grad_norm": 120.95680236816406,
169
  "learning_rate": 2.244897959183674e-06,
170
+ "loss": 1.0921,
171
  "step": 23
172
  },
173
  {
174
  "epoch": 0.24489795918367346,
175
+ "grad_norm": 861.7152709960938,
176
  "learning_rate": 2.3469387755102044e-06,
177
+ "loss": 5.4863,
178
  "step": 24
179
  },
180
  {
181
  "epoch": 0.25510204081632654,
182
+ "grad_norm": 7.463388919830322,
183
  "learning_rate": 2.4489795918367347e-06,
184
+ "loss": 0.0549,
185
  "step": 25
186
  },
187
  {
188
  "epoch": 0.2653061224489796,
189
+ "grad_norm": 357.8828125,
190
  "learning_rate": 2.5510204081632657e-06,
191
+ "loss": 2.345,
192
  "step": 26
193
  },
194
  {
195
  "epoch": 0.2755102040816326,
196
+ "grad_norm": 390.43023681640625,
197
  "learning_rate": 2.6530612244897964e-06,
198
+ "loss": 4.264,
199
  "step": 27
200
  },
201
  {
202
  "epoch": 0.2857142857142857,
203
+ "grad_norm": 382.8008728027344,
204
  "learning_rate": 2.7551020408163266e-06,
205
+ "loss": 2.4847,
206
  "step": 28
207
  },
208
  {
209
  "epoch": 0.29591836734693877,
210
+ "grad_norm": 159.14295959472656,
211
  "learning_rate": 2.8571428571428573e-06,
212
+ "loss": 0.7634,
213
  "step": 29
214
  },
215
  {
216
  "epoch": 0.30612244897959184,
217
+ "grad_norm": 361.00201416015625,
218
  "learning_rate": 2.959183673469388e-06,
219
+ "loss": 2.047,
220
  "step": 30
221
  },
222
  {
223
  "epoch": 0.3163265306122449,
224
+ "grad_norm": 81.90242004394531,
225
  "learning_rate": 3.0612244897959185e-06,
226
+ "loss": 0.694,
227
  "step": 31
228
  },
229
  {
230
  "epoch": 0.32653061224489793,
231
+ "grad_norm": 294.4106750488281,
232
  "learning_rate": 3.1632653061224496e-06,
233
+ "loss": 0.7417,
234
  "step": 32
235
  },
236
  {
237
  "epoch": 0.336734693877551,
238
+ "grad_norm": 322.22308349609375,
239
  "learning_rate": 3.2653061224489794e-06,
240
+ "loss": 1.9942,
241
  "step": 33
242
  },
243
  {
244
  "epoch": 0.3469387755102041,
245
+ "grad_norm": 449.238525390625,
246
  "learning_rate": 3.3673469387755105e-06,
247
+ "loss": 2.8978,
248
  "step": 34
249
  },
250
  {
251
  "epoch": 0.35714285714285715,
252
+ "grad_norm": 1.8455325365066528,
253
  "learning_rate": 3.469387755102041e-06,
254
+ "loss": 0.0126,
255
  "step": 35
256
  },
257
  {
258
  "epoch": 0.3673469387755102,
259
+ "grad_norm": 310.52740478515625,
260
  "learning_rate": 3.5714285714285718e-06,
261
+ "loss": 1.9776,
262
  "step": 36
263
  },
264
  {
265
  "epoch": 0.37755102040816324,
266
+ "grad_norm": 237.73545837402344,
267
  "learning_rate": 3.6734693877551024e-06,
268
+ "loss": 1.5667,
269
  "step": 37
270
  },
271
  {
272
  "epoch": 0.3877551020408163,
273
+ "grad_norm": 817.3215942382812,
274
  "learning_rate": 3.7755102040816327e-06,
275
+ "loss": 5.5693,
276
  "step": 38
277
  },
278
  {
279
  "epoch": 0.3979591836734694,
280
+ "grad_norm": 364.91326904296875,
281
  "learning_rate": 3.877551020408164e-06,
282
+ "loss": 1.6802,
283
  "step": 39
284
  },
285
  {
286
  "epoch": 0.40816326530612246,
287
+ "grad_norm": 34.629112243652344,
288
  "learning_rate": 3.979591836734694e-06,
289
+ "loss": 0.2144,
290
  "step": 40
291
  },
292
  {
293
  "epoch": 0.41836734693877553,
294
+ "grad_norm": 43.43345260620117,
295
  "learning_rate": 4.081632653061225e-06,
296
+ "loss": 0.1797,
297
  "step": 41
298
  },
299
  {
300
  "epoch": 0.42857142857142855,
301
+ "grad_norm": 544.13134765625,
302
  "learning_rate": 4.183673469387755e-06,
303
+ "loss": 5.7559,
304
  "step": 42
305
  },
306
  {
307
  "epoch": 0.4387755102040816,
308
+ "grad_norm": 357.5466003417969,
309
  "learning_rate": 4.2857142857142855e-06,
310
+ "loss": 2.6372,
311
  "step": 43
312
  },
313
  {
314
  "epoch": 0.4489795918367347,
315
+ "grad_norm": 458.7740783691406,
316
  "learning_rate": 4.3877551020408165e-06,
317
+ "loss": 1.8447,
318
  "step": 44
319
  },
320
  {
321
  "epoch": 0.45918367346938777,
322
+ "grad_norm": 668.6949462890625,
323
  "learning_rate": 4.489795918367348e-06,
324
+ "loss": 2.8156,
325
  "step": 45
326
  },
327
  {
328
  "epoch": 0.46938775510204084,
329
+ "grad_norm": 382.7512512207031,
330
  "learning_rate": 4.591836734693878e-06,
331
+ "loss": 3.1588,
332
  "step": 46
333
  },
334
  {
335
  "epoch": 0.47959183673469385,
336
+ "grad_norm": 9.869824409484863,
337
  "learning_rate": 4.693877551020409e-06,
338
+ "loss": 0.0552,
339
  "step": 47
340
  },
341
  {
342
  "epoch": 0.4897959183673469,
343
+ "grad_norm": 448.4170837402344,
344
  "learning_rate": 4.795918367346939e-06,
345
+ "loss": 3.3053,
346
  "step": 48
347
  },
348
  {
349
  "epoch": 0.5,
350
+ "grad_norm": 471.82855224609375,
351
  "learning_rate": 4.897959183673469e-06,
352
+ "loss": 2.8332,
353
  "step": 49
354
  },
355
  {
356
  "epoch": 0.5102040816326531,
357
+ "grad_norm": 210.9025115966797,
358
  "learning_rate": 5e-06,
359
+ "loss": 1.1961,
360
  "step": 50
361
  },
362
  {
363
  "epoch": 0.5204081632653061,
364
+ "grad_norm": 164.64920043945312,
365
  "learning_rate": 5.1020408163265315e-06,
366
+ "loss": 1.0106,
367
  "step": 51
368
  },
369
  {
370
  "epoch": 0.5306122448979592,
371
+ "grad_norm": 386.0244140625,
372
  "learning_rate": 5.204081632653062e-06,
373
+ "loss": 2.4593,
374
  "step": 52
375
  },
376
  {
377
  "epoch": 0.5408163265306123,
378
+ "grad_norm": 419.1893310546875,
379
  "learning_rate": 5.306122448979593e-06,
380
+ "loss": 3.4849,
381
  "step": 53
382
  },
383
  {
384
  "epoch": 0.5510204081632653,
385
+ "grad_norm": 10.212640762329102,
386
  "learning_rate": 5.408163265306123e-06,
387
+ "loss": 0.0338,
388
  "step": 54
389
  },
390
  {
391
  "epoch": 0.5612244897959183,
392
+ "grad_norm": 419.79815673828125,
393
  "learning_rate": 5.510204081632653e-06,
394
+ "loss": 1.5319,
395
  "step": 55
396
  },
397
  {
398
  "epoch": 0.5714285714285714,
399
+ "grad_norm": 6.56746768951416,
400
  "learning_rate": 5.6122448979591834e-06,
401
+ "loss": 0.0419,
402
  "step": 56
403
  },
404
  {
405
  "epoch": 0.5816326530612245,
406
+ "grad_norm": 15.671188354492188,
407
  "learning_rate": 5.7142857142857145e-06,
408
+ "loss": 0.1098,
409
  "step": 57
410
  },
411
  {
412
  "epoch": 0.5918367346938775,
413
+ "grad_norm": 8.410639762878418,
414
  "learning_rate": 5.816326530612246e-06,
415
+ "loss": 0.0457,
416
  "step": 58
417
  },
418
  {
419
  "epoch": 0.6020408163265306,
420
+ "grad_norm": 6.06464147567749,
421
  "learning_rate": 5.918367346938776e-06,
422
+ "loss": 0.0273,
423
  "step": 59
424
  },
425
  {
426
  "epoch": 0.6122448979591837,
427
+ "grad_norm": 293.1927185058594,
428
  "learning_rate": 6.020408163265307e-06,
429
+ "loss": 1.2946,
430
  "step": 60
431
  },
432
  {
433
  "epoch": 0.6224489795918368,
434
+ "grad_norm": 189.53306579589844,
435
  "learning_rate": 6.122448979591837e-06,
436
+ "loss": 3.4121,
437
  "step": 61
438
  },
439
  {
440
  "epoch": 0.6326530612244898,
441
+ "grad_norm": 455.4539489746094,
442
  "learning_rate": 6.224489795918368e-06,
443
+ "loss": 2.6015,
444
  "step": 62
445
  },
446
  {
447
  "epoch": 0.6428571428571429,
448
+ "grad_norm": 351.75830078125,
449
  "learning_rate": 6.326530612244899e-06,
450
+ "loss": 2.0358,
451
  "step": 63
452
  },
453
  {
454
  "epoch": 0.6530612244897959,
455
+ "grad_norm": 1400.6083984375,
456
  "learning_rate": 6.4285714285714295e-06,
457
+ "loss": 7.3114,
458
  "step": 64
459
  },
460
  {
461
  "epoch": 0.6632653061224489,
462
+ "grad_norm": 1247.4736328125,
463
  "learning_rate": 6.530612244897959e-06,
464
+ "loss": 6.8888,
465
  "step": 65
466
  },
467
  {
468
  "epoch": 0.673469387755102,
469
+ "grad_norm": 296.157470703125,
470
  "learning_rate": 6.63265306122449e-06,
471
+ "loss": 1.6606,
472
  "step": 66
473
  },
474
  {
475
  "epoch": 0.6836734693877551,
476
+ "grad_norm": 461.7646484375,
477
  "learning_rate": 6.734693877551021e-06,
478
+ "loss": 5.2343,
479
  "step": 67
480
  },
481
  {
482
  "epoch": 0.6938775510204082,
483
+ "grad_norm": 443.15264892578125,
484
  "learning_rate": 6.836734693877551e-06,
485
+ "loss": 2.1977,
486
  "step": 68
487
  },
488
  {
489
  "epoch": 0.7040816326530612,
490
+ "grad_norm": 31.333446502685547,
491
  "learning_rate": 6.938775510204082e-06,
492
+ "loss": 0.1702,
493
  "step": 69
494
  },
495
  {
496
  "epoch": 0.7142857142857143,
497
+ "grad_norm": 603.3770751953125,
498
  "learning_rate": 7.0408163265306125e-06,
499
+ "loss": 3.5715,
500
  "step": 70
501
  },
502
  {
503
  "epoch": 0.7244897959183674,
504
+ "grad_norm": 190.58395385742188,
505
  "learning_rate": 7.1428571428571436e-06,
506
+ "loss": 1.4736,
507
  "step": 71
508
  },
509
  {
510
  "epoch": 0.7346938775510204,
511
+ "grad_norm": 211.7954559326172,
512
  "learning_rate": 7.244897959183675e-06,
513
+ "loss": 1.0967,
514
  "step": 72
515
  },
516
  {
517
  "epoch": 0.7448979591836735,
518
+ "grad_norm": 288.28448486328125,
519
  "learning_rate": 7.346938775510205e-06,
520
+ "loss": 1.2098,
521
  "step": 73
522
  },
523
  {
524
  "epoch": 0.7551020408163265,
525
+ "grad_norm": 535.2803344726562,
526
  "learning_rate": 7.448979591836736e-06,
527
+ "loss": 1.9541,
528
  "step": 74
529
  },
530
  {
531
  "epoch": 0.7653061224489796,
532
+ "grad_norm": 1270.0836181640625,
533
  "learning_rate": 7.551020408163265e-06,
534
+ "loss": 4.0992,
535
  "step": 75
536
  },
537
  {
538
  "epoch": 0.7755102040816326,
539
+ "grad_norm": 2.131913661956787,
540
  "learning_rate": 7.653061224489796e-06,
541
+ "loss": 0.0145,
542
  "step": 76
543
  },
544
  {
545
  "epoch": 0.7857142857142857,
546
+ "grad_norm": 2.6484782695770264,
547
  "learning_rate": 7.755102040816327e-06,
548
+ "loss": 0.0079,
549
  "step": 77
550
  },
551
  {
552
  "epoch": 0.7959183673469388,
553
+ "grad_norm": 18.671253204345703,
554
  "learning_rate": 7.857142857142858e-06,
555
+ "loss": 0.1081,
556
  "step": 78
557
  },
558
  {
559
  "epoch": 0.8061224489795918,
560
+ "grad_norm": 282.3451843261719,
561
  "learning_rate": 7.959183673469388e-06,
562
+ "loss": 1.7446,
563
  "step": 79
564
  },
565
  {
566
  "epoch": 0.8163265306122449,
567
+ "grad_norm": 303.16900634765625,
568
  "learning_rate": 8.06122448979592e-06,
569
+ "loss": 0.6343,
570
  "step": 80
571
  },
572
  {
573
  "epoch": 0.826530612244898,
574
+ "grad_norm": 899.4592895507812,
575
  "learning_rate": 8.16326530612245e-06,
576
+ "loss": 4.7374,
577
  "step": 81
578
  },
579
  {
580
  "epoch": 0.8367346938775511,
581
+ "grad_norm": 600.3280639648438,
582
  "learning_rate": 8.26530612244898e-06,
583
+ "loss": 3.1082,
584
  "step": 82
585
  },
586
  {
587
  "epoch": 0.8469387755102041,
588
+ "grad_norm": 3.1936967372894287,
589
  "learning_rate": 8.36734693877551e-06,
590
+ "loss": 0.0144,
591
  "step": 83
592
  },
593
  {
594
  "epoch": 0.8571428571428571,
595
+ "grad_norm": 1.3846139907836914,
596
  "learning_rate": 8.469387755102042e-06,
597
+ "loss": 0.0057,
598
  "step": 84
599
  },
600
  {
601
  "epoch": 0.8673469387755102,
602
+ "grad_norm": 197.3724822998047,
603
  "learning_rate": 8.571428571428571e-06,
604
+ "loss": 0.7656,
605
  "step": 85
606
  },
607
  {
608
  "epoch": 0.8775510204081632,
609
+ "grad_norm": 545.4349365234375,
610
  "learning_rate": 8.673469387755103e-06,
611
+ "loss": 1.5191,
612
  "step": 86
613
  },
614
  {
615
  "epoch": 0.8877551020408163,
616
+ "grad_norm": 121.33210754394531,
617
  "learning_rate": 8.775510204081633e-06,
618
+ "loss": 0.1942,
619
  "step": 87
620
  },
621
  {
622
  "epoch": 0.8979591836734694,
623
+ "grad_norm": 48.77962112426758,
624
  "learning_rate": 8.877551020408163e-06,
625
+ "loss": 0.2429,
626
  "step": 88
627
  },
628
  {
629
  "epoch": 0.9081632653061225,
630
+ "grad_norm": 664.6809692382812,
631
  "learning_rate": 8.979591836734695e-06,
632
+ "loss": 7.0608,
633
  "step": 89
634
  },
635
  {
636
  "epoch": 0.9183673469387755,
637
+ "grad_norm": 67.93673706054688,
638
  "learning_rate": 9.081632653061225e-06,
639
+ "loss": 0.1635,
640
  "step": 90
641
  },
642
  {
643
  "epoch": 0.9285714285714286,
644
+ "grad_norm": 14.213589668273926,
645
  "learning_rate": 9.183673469387756e-06,
646
+ "loss": 0.057,
647
  "step": 91
648
  },
649
  {
650
  "epoch": 0.9387755102040817,
651
+ "grad_norm": 550.2100219726562,
652
  "learning_rate": 9.285714285714288e-06,
653
+ "loss": 3.1796,
654
  "step": 92
655
  },
656
  {
657
  "epoch": 0.9489795918367347,
658
+ "grad_norm": 516.64794921875,
659
  "learning_rate": 9.387755102040818e-06,
660
+ "loss": 2.4068,
661
  "step": 93
662
  },
663
  {
664
  "epoch": 0.9591836734693877,
665
+ "grad_norm": 227.85704040527344,
666
  "learning_rate": 9.489795918367348e-06,
667
+ "loss": 0.9694,
668
  "step": 94
669
  },
670
  {
671
  "epoch": 0.9693877551020408,
672
+ "grad_norm": 103.72978973388672,
673
  "learning_rate": 9.591836734693878e-06,
674
+ "loss": 0.4878,
675
  "step": 95
676
  },
677
  {
678
  "epoch": 0.9795918367346939,
679
+ "grad_norm": 113.07623291015625,
680
  "learning_rate": 9.693877551020408e-06,
681
+ "loss": 0.4105,
682
  "step": 96
683
  },
684
  {
685
  "epoch": 0.9897959183673469,
686
+ "grad_norm": 1049.9190673828125,
687
  "learning_rate": 9.795918367346939e-06,
688
+ "loss": 4.5006,
689
  "step": 97
690
  },
691
  {
692
  "epoch": 1.0,
693
+ "grad_norm": 419.75555419921875,
694
  "learning_rate": 9.89795918367347e-06,
695
+ "loss": 2.2675,
696
  "step": 98
697
  },
698
  {
699
  "epoch": 1.0,
700
+ "eval_dim_1024_cosine_accuracy@1": 0.34571062740076824,
701
+ "eval_dim_1024_cosine_accuracy@10": 0.4231754161331626,
702
+ "eval_dim_1024_cosine_accuracy@3": 0.352112676056338,
703
+ "eval_dim_1024_cosine_accuracy@5": 0.3854033290653009,
704
+ "eval_dim_1024_cosine_map@100": 0.4413040417287197,
705
+ "eval_dim_1024_cosine_mrr@10": 0.3591188545413894,
706
+ "eval_dim_1024_cosine_ndcg@10": 0.37218117046458954,
707
+ "eval_dim_1024_cosine_precision@1": 0.34571062740076824,
708
+ "eval_dim_1024_cosine_precision@10": 0.3060179257362356,
709
+ "eval_dim_1024_cosine_precision@3": 0.34571062740076824,
710
+ "eval_dim_1024_cosine_precision@5": 0.33751600512163893,
711
+ "eval_dim_1024_cosine_recall@1": 0.04202665510348477,
712
+ "eval_dim_1024_cosine_recall@10": 0.2720738817855689,
713
+ "eval_dim_1024_cosine_recall@3": 0.12344281372964075,
714
+ "eval_dim_1024_cosine_recall@5": 0.1862277356935127,
715
+ "eval_dim_128_cosine_accuracy@1": 0.30089628681177977,
716
+ "eval_dim_128_cosine_accuracy@10": 0.3687580025608195,
717
+ "eval_dim_128_cosine_accuracy@3": 0.3060179257362356,
718
+ "eval_dim_128_cosine_accuracy@5": 0.33354673495518566,
719
+ "eval_dim_128_cosine_map@100": 0.383805456031232,
720
+ "eval_dim_128_cosine_mrr@10": 0.31238618580167843,
721
+ "eval_dim_128_cosine_ndcg@10": 0.323940756796795,
722
+ "eval_dim_128_cosine_precision@1": 0.30089628681177977,
723
+ "eval_dim_128_cosine_precision@10": 0.2669014084507042,
724
+ "eval_dim_128_cosine_precision@3": 0.3006828851899274,
725
+ "eval_dim_128_cosine_precision@5": 0.29334186939820744,
726
+ "eval_dim_128_cosine_recall@1": 0.03621488699964182,
727
+ "eval_dim_128_cosine_recall@10": 0.23934767939840923,
728
+ "eval_dim_128_cosine_recall@3": 0.10619628777590438,
729
+ "eval_dim_128_cosine_recall@5": 0.16065683687574547,
730
+ "eval_dim_256_cosine_accuracy@1": 0.3181818181818182,
731
+ "eval_dim_256_cosine_accuracy@10": 0.39244558258642764,
732
+ "eval_dim_256_cosine_accuracy@3": 0.323303457106274,
733
+ "eval_dim_256_cosine_accuracy@5": 0.35723431498079383,
734
+ "eval_dim_256_cosine_map@100": 0.4066610643364293,
735
+ "eval_dim_256_cosine_mrr@10": 0.3309775318578131,
736
+ "eval_dim_256_cosine_ndcg@10": 0.34364332074782783,
737
+ "eval_dim_256_cosine_precision@1": 0.3181818181818182,
738
+ "eval_dim_256_cosine_precision@10": 0.28348271446862994,
739
+ "eval_dim_256_cosine_precision@3": 0.3175416133162612,
740
+ "eval_dim_256_cosine_precision@5": 0.31024327784891165,
741
+ "eval_dim_256_cosine_recall@1": 0.03856083314138909,
742
+ "eval_dim_256_cosine_recall@10": 0.2532060107296034,
743
+ "eval_dim_256_cosine_recall@3": 0.11327276127499253,
744
+ "eval_dim_256_cosine_recall@5": 0.17121955970972744,
745
+ "eval_dim_512_cosine_accuracy@1": 0.33674775928297057,
746
+ "eval_dim_512_cosine_accuracy@10": 0.41613316261203587,
747
+ "eval_dim_512_cosine_accuracy@3": 0.34314980793854033,
748
+ "eval_dim_512_cosine_accuracy@5": 0.37708066581306016,
749
+ "eval_dim_512_cosine_map@100": 0.4299582620106213,
750
+ "eval_dim_512_cosine_mrr@10": 0.3503564315184028,
751
+ "eval_dim_512_cosine_ndcg@10": 0.362727691265461,
752
+ "eval_dim_512_cosine_precision@1": 0.33674775928297057,
753
+ "eval_dim_512_cosine_precision@10": 0.2976312419974392,
754
+ "eval_dim_512_cosine_precision@3": 0.3363209560392659,
755
+ "eval_dim_512_cosine_precision@5": 0.32816901408450705,
756
+ "eval_dim_512_cosine_recall@1": 0.04109877030791802,
757
+ "eval_dim_512_cosine_recall@10": 0.26524116778193035,
758
+ "eval_dim_512_cosine_recall@3": 0.12056066628670095,
759
+ "eval_dim_512_cosine_recall@5": 0.18158399214837667,
760
+ "eval_dim_64_cosine_accuracy@1": 0.24647887323943662,
761
+ "eval_dim_64_cosine_accuracy@10": 0.31562099871959026,
762
+ "eval_dim_64_cosine_accuracy@3": 0.25096030729833546,
763
+ "eval_dim_64_cosine_accuracy@5": 0.27784891165172854,
764
+ "eval_dim_64_cosine_map@100": 0.32717183255723853,
765
+ "eval_dim_64_cosine_mrr@10": 0.25797791801312897,
766
+ "eval_dim_64_cosine_ndcg@10": 0.2694272980700995,
767
+ "eval_dim_64_cosine_precision@1": 0.24647887323943662,
768
+ "eval_dim_64_cosine_precision@10": 0.22029449423815622,
769
+ "eval_dim_64_cosine_precision@3": 0.24647887323943662,
770
+ "eval_dim_64_cosine_precision@5": 0.24033290653008965,
771
+ "eval_dim_64_cosine_recall@1": 0.030698008105366027,
772
+ "eval_dim_64_cosine_recall@10": 0.20675794932386124,
773
+ "eval_dim_64_cosine_recall@3": 0.0905145081182266,
774
+ "eval_dim_64_cosine_recall@5": 0.13744818119581018,
775
+ "eval_dim_768_cosine_accuracy@1": 0.3412291933418694,
776
+ "eval_dim_768_cosine_accuracy@10": 0.4174135723431498,
777
+ "eval_dim_768_cosine_accuracy@3": 0.34763124199743917,
778
+ "eval_dim_768_cosine_accuracy@5": 0.37964148527528807,
779
+ "eval_dim_768_cosine_map@100": 0.4350972821264766,
780
+ "eval_dim_768_cosine_mrr@10": 0.35454393024815517,
781
+ "eval_dim_768_cosine_ndcg@10": 0.36777711697459586,
782
+ "eval_dim_768_cosine_precision@1": 0.3412291933418694,
783
+ "eval_dim_768_cosine_precision@10": 0.3030089628681178,
784
+ "eval_dim_768_cosine_precision@3": 0.3414425949637217,
785
+ "eval_dim_768_cosine_precision@5": 0.33341869398207424,
786
+ "eval_dim_768_cosine_recall@1": 0.041218661006119914,
787
+ "eval_dim_768_cosine_recall@10": 0.2695735755366756,
788
+ "eval_dim_768_cosine_recall@3": 0.12127491338376899,
789
+ "eval_dim_768_cosine_recall@5": 0.18314999106768198,
790
+ "eval_runtime": 98.9256,
791
  "eval_samples_per_second": 0.0,
792
+ "eval_sequential_score": 0.2694272980700995,
793
  "eval_steps_per_second": 0.0,
794
  "step": 98
795
  }
checkpoint-98/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a0318feaae2d8aeaebeec5835b4bc77eb79a026385dfb908828064d2f4fd294
3
  size 6097
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cceeb921bff1b3eaa3a7113b28b14620d5699e95bb7b35174f20a7a1c3b15f4
3
  size 6097
eval/Information-Retrieval_evaluation_dim_1024_results.csv CHANGED
@@ -2,3 +2,6 @@ epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accurac
2
  1.0,98,0.3719590268886043,0.37836107554417414,0.4020486555697823,0.43982074263764404,0.3719590268886043,0.04338670134208909,0.37153222364489963,0.1268773565773867,0.3613316261203585,0.19083511167371434,0.3265044814340589,0.28262195979320087,0.383397099770339,0.3947688545057553,0.4604070214987707
3
  2.0,196,0.3591549295774648,0.3649167733674776,0.39308578745198464,0.42445582586427655,0.3591549295774648,0.040925664980934,0.35830132309005547,0.11912044022990924,0.3495518565941101,0.17907504115268522,0.31914212548015364,0.2663878590041381,0.37061688311688284,0.38277564232489586,0.4442656203645836
4
  3.0,294,0.3354673495518566,0.3405889884763124,0.3725992317541613,0.41357234314980795,0.3354673495518566,0.0398374526951408,0.33418693982074266,0.11613369449549121,0.3261203585147247,0.1745427554610417,0.2996798975672215,0.26082122731297214,0.34860069507956787,0.3620561774122382,0.4292335776181432
 
 
 
 
2
  1.0,98,0.3719590268886043,0.37836107554417414,0.4020486555697823,0.43982074263764404,0.3719590268886043,0.04338670134208909,0.37153222364489963,0.1268773565773867,0.3613316261203585,0.19083511167371434,0.3265044814340589,0.28262195979320087,0.383397099770339,0.3947688545057553,0.4604070214987707
3
  2.0,196,0.3591549295774648,0.3649167733674776,0.39308578745198464,0.42445582586427655,0.3591549295774648,0.040925664980934,0.35830132309005547,0.11912044022990924,0.3495518565941101,0.17907504115268522,0.31914212548015364,0.2663878590041381,0.37061688311688284,0.38277564232489586,0.4442656203645836
4
  3.0,294,0.3354673495518566,0.3405889884763124,0.3725992317541613,0.41357234314980795,0.3354673495518566,0.0398374526951408,0.33418693982074266,0.11613369449549121,0.3261203585147247,0.1745427554610417,0.2996798975672215,0.26082122731297214,0.34860069507956787,0.3620561774122382,0.4292335776181432
5
+ 1.0,98,0.34571062740076824,0.352112676056338,0.3854033290653009,0.4231754161331626,0.34571062740076824,0.04202665510348477,0.34571062740076824,0.12344281372964075,0.33751600512163893,0.1862277356935127,0.3060179257362356,0.2720738817855689,0.3591188545413894,0.37218117046458954,0.4413040417287197
6
+ 2.0,196,0.3290653008962868,0.3348271446862996,0.3559539052496799,0.3886043533930858,0.3290653008962868,0.04062540337753272,0.32885189927443453,0.11937529555421877,0.31869398207426375,0.17929032559391017,0.28380281690140846,0.2609802153031206,0.3392725037091231,0.34967137880514326,0.4165482880126111
7
+ 3.0,294,0.3053777208706786,0.31434058898847633,0.34635083226632524,0.3854033290653009,0.3053777208706786,0.03802085156159601,0.30644472897994024,0.11197884027712995,0.2998719590268886,0.16921874866295042,0.27516005121638926,0.2530788160980187,0.31961389549417674,0.3342308256907798,0.3945564342654779
eval/Information-Retrieval_evaluation_dim_128_results.csv CHANGED
@@ -2,3 +2,6 @@ epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accurac
2
  1.0,98,0.3053777208706786,0.3111395646606914,0.3361075544174136,0.37836107554417414,0.3053777208706786,0.03688049871840266,0.30495091762697396,0.10779952005618963,0.29641485275288093,0.16176912684922656,0.271830985915493,0.24260019246216608,0.3174377070503828,0.3299991425713933,0.3904168259576031
3
  2.0,196,0.3060179257362356,0.31049935979513443,0.3322663252240717,0.3687580025608195,0.3060179257362356,0.035453426807775947,0.3053777208706786,0.10361683841932247,0.29679897567221514,0.15549008066651587,0.27099871959026883,0.23154232181544257,0.3164926427250368,0.3273120368593549,0.38523839229741214
4
  3.0,294,0.2874519846350832,0.2919334186939821,0.3207426376440461,0.36299615877080665,0.2874519846350832,0.03347207506231567,0.28638497652582157,0.09769125063098655,0.2796414852752881,0.14652568796520726,0.260179257362356,0.2200069729736681,0.29988542365302884,0.3121618176452898,0.37222007780985544
 
 
 
 
2
  1.0,98,0.3053777208706786,0.3111395646606914,0.3361075544174136,0.37836107554417414,0.3053777208706786,0.03688049871840266,0.30495091762697396,0.10779952005618963,0.29641485275288093,0.16176912684922656,0.271830985915493,0.24260019246216608,0.3174377070503828,0.3299991425713933,0.3904168259576031
3
  2.0,196,0.3060179257362356,0.31049935979513443,0.3322663252240717,0.3687580025608195,0.3060179257362356,0.035453426807775947,0.3053777208706786,0.10361683841932247,0.29679897567221514,0.15549008066651587,0.27099871959026883,0.23154232181544257,0.3164926427250368,0.3273120368593549,0.38523839229741214
4
  3.0,294,0.2874519846350832,0.2919334186939821,0.3207426376440461,0.36299615877080665,0.2874519846350832,0.03347207506231567,0.28638497652582157,0.09769125063098655,0.2796414852752881,0.14652568796520726,0.260179257362356,0.2200069729736681,0.29988542365302884,0.3121618176452898,0.37222007780985544
5
+ 1.0,98,0.30089628681177977,0.3060179257362356,0.33354673495518566,0.3687580025608195,0.30089628681177977,0.03621488699964182,0.3006828851899274,0.10619628777590438,0.29334186939820744,0.16065683687574547,0.2669014084507042,0.23934767939840923,0.31238618580167843,0.323940756796795,0.383805456031232
6
+ 2.0,196,0.3028169014084507,0.3066581306017926,0.3258642765685019,0.354033290653009,0.3028169014084507,0.03581534845465155,0.30217669654289375,0.10498018962345104,0.29334186939820744,0.15825094621698793,0.26325224071702946,0.23457162530017844,0.31138141983212375,0.3200987320599894,0.3737489129899149
7
+ 3.0,294,0.26632522407170295,0.2714468629961588,0.29769526248399486,0.3322663252240717,0.26632522407170295,0.032637352520298826,0.26632522407170295,0.09577557901596723,0.2601792573623559,0.14494055334056014,0.23681177976952622,0.2162161410106577,0.2774843505477303,0.2888228791481225,0.3452084373303918
eval/Information-Retrieval_evaluation_dim_256_results.csv CHANGED
@@ -2,3 +2,6 @@ epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accurac
2
  1.0,98,0.324583866837388,0.33290653008962867,0.3649167733674776,0.4058898847631242,0.324583866837388,0.038313787861467184,0.324583866837388,0.11223891931505588,0.31792573623559534,0.1697408782100328,0.2935339308578745,0.2549878568107636,0.33872299453285326,0.35307499975694673,0.4232469199200366
3
  2.0,196,0.3290653008962868,0.33354673495518566,0.36235595390524966,0.3969270166453265,0.3290653008962868,0.03735817465789262,0.3282116944088775,0.10921822534273136,0.3209987195902689,0.165931214027115,0.29423815620998717,0.24868649598286982,0.3407241428368185,0.3532462927680357,0.4130430492367209
4
  3.0,294,0.3213828425096031,0.32842509603072984,0.3559539052496799,0.38988476312419973,0.3213828425096031,0.03683491421575636,0.32159624413145543,0.10829890857523781,0.31459667093469906,0.16421046118001698,0.28732394366197184,0.2447394908113676,0.3334143751397268,0.34585160474489407,0.40656810490109624
 
 
 
 
2
  1.0,98,0.324583866837388,0.33290653008962867,0.3649167733674776,0.4058898847631242,0.324583866837388,0.038313787861467184,0.324583866837388,0.11223891931505588,0.31792573623559534,0.1697408782100328,0.2935339308578745,0.2549878568107636,0.33872299453285326,0.35307499975694673,0.4232469199200366
3
  2.0,196,0.3290653008962868,0.33354673495518566,0.36235595390524966,0.3969270166453265,0.3290653008962868,0.03735817465789262,0.3282116944088775,0.10921822534273136,0.3209987195902689,0.165931214027115,0.29423815620998717,0.24868649598286982,0.3407241428368185,0.3532462927680357,0.4130430492367209
4
  3.0,294,0.3213828425096031,0.32842509603072984,0.3559539052496799,0.38988476312419973,0.3213828425096031,0.03683491421575636,0.32159624413145543,0.10829890857523781,0.31459667093469906,0.16421046118001698,0.28732394366197184,0.2447394908113676,0.3334143751397268,0.34585160474489407,0.40656810490109624
5
+ 1.0,98,0.3181818181818182,0.323303457106274,0.35723431498079383,0.39244558258642764,0.3181818181818182,0.03856083314138909,0.3175416133162612,0.11327276127499253,0.31024327784891165,0.17121955970972744,0.28348271446862994,0.2532060107296034,0.3309775318578131,0.34364332074782783,0.4066610643364293
6
+ 2.0,196,0.3111395646606914,0.31882202304737517,0.3418693982074264,0.36619718309859156,0.3111395646606914,0.03702717845490271,0.31156636790439607,0.10903486138141442,0.30371318822023047,0.16522998831931382,0.2723431498079385,0.24388584743594785,0.32126318517163555,0.3316834258973034,0.3876570902519949
7
+ 3.0,294,0.2874519846350832,0.293213828425096,0.3181818181818182,0.353393085787452,0.2874519846350832,0.03468470626522805,0.2874519846350832,0.10190738055739662,0.28015364916773366,0.15339992748676054,0.2553777208706786,0.2285021587131539,0.29859866268316915,0.30974666081054697,0.36682428388570304
eval/Information-Retrieval_evaluation_dim_512_results.csv CHANGED
@@ -2,3 +2,6 @@ epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accurac
2
  1.0,98,0.34827144686299616,0.3553137003841229,0.39436619718309857,0.44302176696542894,0.34827144686299616,0.04125738861359979,0.34827144686299616,0.12077279112247459,0.3418693982074264,0.18268801127884626,0.31798975672215113,0.2752162089385945,0.3644033392272826,0.3807642678190648,0.44994622162234726
3
  2.0,196,0.3578745198463508,0.36299615877080665,0.3892445582586428,0.42765685019206146,0.3578745198463508,0.04087028201741599,0.35680751173708913,0.11887426573116062,0.34724711907810496,0.17823551058609727,0.3177336747759283,0.2641908975177659,0.36958849053919446,0.3812404238586874,0.44173422420832625
4
  3.0,294,0.33162612035851474,0.3405889884763124,0.3681177976952625,0.4039692701664533,0.33162612035851474,0.038588929588341196,0.3322663252240717,0.11297571247042615,0.3250960307298335,0.17031603260342357,0.29769526248399486,0.2554519940509474,0.3444845842733163,0.3580070743078956,0.42093121870079336
 
 
 
 
2
  1.0,98,0.34827144686299616,0.3553137003841229,0.39436619718309857,0.44302176696542894,0.34827144686299616,0.04125738861359979,0.34827144686299616,0.12077279112247459,0.3418693982074264,0.18268801127884626,0.31798975672215113,0.2752162089385945,0.3644033392272826,0.3807642678190648,0.44994622162234726
3
  2.0,196,0.3578745198463508,0.36299615877080665,0.3892445582586428,0.42765685019206146,0.3578745198463508,0.04087028201741599,0.35680751173708913,0.11887426573116062,0.34724711907810496,0.17823551058609727,0.3177336747759283,0.2641908975177659,0.36958849053919446,0.3812404238586874,0.44173422420832625
4
  3.0,294,0.33162612035851474,0.3405889884763124,0.3681177976952625,0.4039692701664533,0.33162612035851474,0.038588929588341196,0.3322663252240717,0.11297571247042615,0.3250960307298335,0.17031603260342357,0.29769526248399486,0.2554519940509474,0.3444845842733163,0.3580070743078956,0.42093121870079336
5
+ 1.0,98,0.33674775928297057,0.34314980793854033,0.37708066581306016,0.41613316261203587,0.33674775928297057,0.04109877030791802,0.3363209560392659,0.12056066628670095,0.32816901408450705,0.18158399214837667,0.2976312419974392,0.26524116778193035,0.3503564315184028,0.362727691265461,0.4299582620106213
6
+ 2.0,196,0.324583866837388,0.33034571062740076,0.3553137003841229,0.3886043533930858,0.324583866837388,0.039408176645563966,0.3243704652155356,0.11569400881462148,0.31549295774647884,0.17452688474231048,0.28425096030729835,0.2588290716980974,0.3356639839034201,0.34755602204164354,0.4105799203347045
7
+ 3.0,294,0.29449423815621,0.3060179257362356,0.3361075544174136,0.3738796414852753,0.29449423815621,0.03628785736063772,0.29641485275288093,0.10691245199699026,0.29078104993597953,0.16186564288415414,0.2677336747759283,0.24344608505680018,0.30902409405930487,0.3240238070684528,0.3843895305060905
eval/Information-Retrieval_evaluation_dim_64_results.csv CHANGED
@@ -2,3 +2,6 @@ epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accurac
2
  1.0,98,0.23175416133162613,0.23879641485275288,0.2612035851472471,0.29833546734955185,0.23175416133162613,0.028384798943475897,0.23218096457533077,0.08325514613360847,0.22740076824583869,0.12720688223912358,0.20864276568501922,0.19540887275051927,0.24303802613661746,0.2548721998123125,0.3151829220617657
3
  2.0,196,0.26504481434058896,0.26952624839948786,0.29257362355953903,0.3322663252240717,0.26504481434058896,0.030859353047043094,0.2646180110968843,0.09025134722734952,0.25813060179257363,0.13616521781869687,0.2385403329065301,0.20564514911963047,0.27606167306871515,0.28738876488409937,0.3471195034779171
4
  3.0,294,0.25096030729833546,0.2560819462227913,0.2900128040973111,0.32970550576184376,0.25096030729833546,0.0281516535128601,0.2507469056764831,0.08222758111049663,0.24788732394366192,0.12591556967755266,0.23604353393085786,0.19940598657336947,0.2645008942544151,0.27958348757665724,0.33353078928131286
 
 
 
 
2
  1.0,98,0.23175416133162613,0.23879641485275288,0.2612035851472471,0.29833546734955185,0.23175416133162613,0.028384798943475897,0.23218096457533077,0.08325514613360847,0.22740076824583869,0.12720688223912358,0.20864276568501922,0.19540887275051927,0.24303802613661746,0.2548721998123125,0.3151829220617657
3
  2.0,196,0.26504481434058896,0.26952624839948786,0.29257362355953903,0.3322663252240717,0.26504481434058896,0.030859353047043094,0.2646180110968843,0.09025134722734952,0.25813060179257363,0.13616521781869687,0.2385403329065301,0.20564514911963047,0.27606167306871515,0.28738876488409937,0.3471195034779171
4
  3.0,294,0.25096030729833546,0.2560819462227913,0.2900128040973111,0.32970550576184376,0.25096030729833546,0.0281516535128601,0.2507469056764831,0.08222758111049663,0.24788732394366192,0.12591556967755266,0.23604353393085786,0.19940598657336947,0.2645008942544151,0.27958348757665724,0.33353078928131286
5
+ 1.0,98,0.24647887323943662,0.25096030729833546,0.27784891165172854,0.31562099871959026,0.24647887323943662,0.030698008105366027,0.24647887323943662,0.0905145081182266,0.24033290653008965,0.13744818119581018,0.22029449423815622,0.20675794932386124,0.25797791801312897,0.2694272980700995,0.32717183255723853
6
+ 2.0,196,0.264404609475032,0.26952624839948786,0.2912932138284251,0.3220230473751601,0.264404609475032,0.03137978486480133,0.2639778062313273,0.09184879304327909,0.2573623559539053,0.13906413978147564,0.23399487836107555,0.2079536154587263,0.27433820092270755,0.28363892738216534,0.3356052539796121
7
+ 3.0,294,0.24647887323943662,0.25160051216389245,0.28040973111395645,0.3111395646606914,0.24647887323943662,0.029677140839872655,0.24669227486128895,0.08750621650497534,0.24186939820742642,0.13309179677669725,0.22227912932138283,0.19988984965370699,0.2576418104587115,0.26920687806072385,0.32242948081625944
eval/Information-Retrieval_evaluation_dim_768_results.csv CHANGED
@@ -2,3 +2,6 @@ epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accurac
2
  1.0,98,0.36619718309859156,0.37516005121638923,0.4014084507042254,0.44302176696542894,0.36619718309859156,0.042908107176418055,0.3666239863422962,0.12573828441229515,0.35761843790012804,0.18984022934199501,0.32541613316261203,0.2828362934197418,0.37943672133812956,0.3923099208699586,0.4592162636155952
3
  2.0,196,0.3546734955185659,0.36107554417413573,0.3860435339308579,0.4186939820742638,0.3546734955185659,0.04034211902742892,0.354033290653009,0.11739647757212558,0.34494238156209983,0.17629643014791566,0.3146606914212548,0.2626701106236,0.36586844298111887,0.3776871349900537,0.4380682423691407
4
  3.0,294,0.32842509603072984,0.3361075544174136,0.36299615877080665,0.4039692701664533,0.32842509603072984,0.03923116896195945,0.32863849765258213,0.1147561273639918,0.3199743918053777,0.17206648000872884,0.2928297055057618,0.25514859248143046,0.3415119504908234,0.35462738075585315,0.4227568005919578
 
 
 
 
2
  1.0,98,0.36619718309859156,0.37516005121638923,0.4014084507042254,0.44302176696542894,0.36619718309859156,0.042908107176418055,0.3666239863422962,0.12573828441229515,0.35761843790012804,0.18984022934199501,0.32541613316261203,0.2828362934197418,0.37943672133812956,0.3923099208699586,0.4592162636155952
3
  2.0,196,0.3546734955185659,0.36107554417413573,0.3860435339308579,0.4186939820742638,0.3546734955185659,0.04034211902742892,0.354033290653009,0.11739647757212558,0.34494238156209983,0.17629643014791566,0.3146606914212548,0.2626701106236,0.36586844298111887,0.3776871349900537,0.4380682423691407
4
  3.0,294,0.32842509603072984,0.3361075544174136,0.36299615877080665,0.4039692701664533,0.32842509603072984,0.03923116896195945,0.32863849765258213,0.1147561273639918,0.3199743918053777,0.17206648000872884,0.2928297055057618,0.25514859248143046,0.3415119504908234,0.35462738075585315,0.4227568005919578
5
+ 1.0,98,0.3412291933418694,0.34763124199743917,0.37964148527528807,0.4174135723431498,0.3412291933418694,0.041218661006119914,0.3414425949637217,0.12127491338376899,0.33341869398207424,0.18314999106768198,0.3030089628681178,0.2695735755366756,0.35454393024815517,0.36777711697459586,0.4350972821264766
6
+ 2.0,196,0.3290653008962868,0.3348271446862996,0.3565941101152369,0.3911651728553137,0.3290653008962868,0.040070803135958795,0.32885189927443453,0.11769625185650755,0.31907810499359796,0.17699013287798807,0.2860435339308579,0.2600215922621299,0.3396022600247949,0.35038934007937644,0.41513115137941903
7
+ 3.0,294,0.29961587708066584,0.30985915492957744,0.3393085787451985,0.382202304737516,0.29961587708066584,0.036933576189293056,0.3008962868117797,0.10879088395240075,0.2946222791293214,0.16490167901637093,0.2716389244558258,0.24941789989219518,0.31423414222709956,0.32886000357585454,0.39053989548220974
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad96896c31a7a57d2d2591979a3de4018f6e0a81c8eb47198774b1b88cd52c55
3
  size 2239607176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c726e0ea295d8e17a907aec896c93461dc49634c2088e18d04e5586a8586c39
3
  size 2239607176
tokenizer_config.json CHANGED
@@ -47,9 +47,16 @@
47
  "eos_token": "</s>",
48
  "extra_special_tokens": {},
49
  "mask_token": "<mask>",
 
50
  "model_max_length": 512,
 
51
  "pad_token": "<pad>",
 
 
52
  "sep_token": "</s>",
 
53
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
54
  "unk_token": "<unk>"
55
  }
 
47
  "eos_token": "</s>",
48
  "extra_special_tokens": {},
49
  "mask_token": "<mask>",
50
+ "max_length": 512,
51
  "model_max_length": 512,
52
+ "pad_to_multiple_of": null,
53
  "pad_token": "<pad>",
54
+ "pad_token_type_id": 0,
55
+ "padding_side": "right",
56
  "sep_token": "</s>",
57
+ "stride": 0,
58
  "tokenizer_class": "XLMRobertaTokenizer",
59
+ "truncation_side": "right",
60
+ "truncation_strategy": "longest_first",
61
  "unk_token": "<unk>"
62
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a0318feaae2d8aeaebeec5835b4bc77eb79a026385dfb908828064d2f4fd294
3
  size 6097
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cceeb921bff1b3eaa3a7113b28b14620d5699e95bb7b35174f20a7a1c3b15f4
3
  size 6097