IoannisKat1 commited on
Commit
a80efd7
·
verified ·
1 Parent(s): 731a827

Add finetuned model

Browse files
README.md CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-196/README.md CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-196/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86fee9b378922f1db9f68cf51a4941e02dfb183276ac89a16c8edbed98e30b9e
3
  size 2239607176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:484814348bf4fcb085f20060a0b9f52191a31badfba256e9f8fbe9f428f90bb0
3
  size 2239607176
checkpoint-196/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c03ddaba15f7dd601cacfb32507cfd2ceb73de3b6a0540a718091ec7eb2e678
3
  size 4471067142
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c08151ebf618205048df61572e8db5af87c9c5b7778d11bffd1ac1d864c2535
3
  size 4471067142
checkpoint-196/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa08bd9dd367cde376d15e8b982d14cd6729eae58ce75d651531d783eb6f5977
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:073fc248f153339698e571b34766dd922834d7173e0dde5ac471b168f6ab29cc
3
  size 14645
checkpoint-196/tokenizer_config.json CHANGED
@@ -47,16 +47,9 @@
47
  "eos_token": "</s>",
48
  "extra_special_tokens": {},
49
  "mask_token": "<mask>",
50
- "max_length": 512,
51
  "model_max_length": 512,
52
- "pad_to_multiple_of": null,
53
  "pad_token": "<pad>",
54
- "pad_token_type_id": 0,
55
- "padding_side": "right",
56
  "sep_token": "</s>",
57
- "stride": 0,
58
  "tokenizer_class": "XLMRobertaTokenizer",
59
- "truncation_side": "right",
60
- "truncation_strategy": "longest_first",
61
  "unk_token": "<unk>"
62
  }
 
47
  "eos_token": "</s>",
48
  "extra_special_tokens": {},
49
  "mask_token": "<mask>",
 
50
  "model_max_length": 512,
 
51
  "pad_token": "<pad>",
 
 
52
  "sep_token": "</s>",
 
53
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
54
  "unk_token": "<unk>"
55
  }
checkpoint-196/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 98,
3
- "best_metric": 0.3312285498294292,
4
  "best_model_checkpoint": "intfloat/multilingual-e5-large/checkpoint-98",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
@@ -11,1569 +11,1569 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.01020408163265306,
14
- "grad_norm": 973.273681640625,
15
  "learning_rate": 0.0,
16
- "loss": 15.8588,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.02040816326530612,
21
- "grad_norm": 1016.8517456054688,
22
  "learning_rate": 1.0204081632653061e-07,
23
- "loss": 10.7411,
24
  "step": 2
25
  },
26
  {
27
  "epoch": 0.030612244897959183,
28
- "grad_norm": 166.88465881347656,
29
  "learning_rate": 2.0408163265306121e-07,
30
- "loss": 1.3873,
31
  "step": 3
32
  },
33
  {
34
  "epoch": 0.04081632653061224,
35
- "grad_norm": 108.06741333007812,
36
  "learning_rate": 3.0612244897959183e-07,
37
- "loss": 0.9088,
38
  "step": 4
39
  },
40
  {
41
  "epoch": 0.05102040816326531,
42
- "grad_norm": 1.1959134340286255,
43
  "learning_rate": 4.0816326530612243e-07,
44
- "loss": 0.0077,
45
  "step": 5
46
  },
47
  {
48
  "epoch": 0.061224489795918366,
49
- "grad_norm": 130.83908081054688,
50
  "learning_rate": 5.102040816326531e-07,
51
- "loss": 0.6016,
52
  "step": 6
53
  },
54
  {
55
  "epoch": 0.07142857142857142,
56
- "grad_norm": 318.3863525390625,
57
  "learning_rate": 6.122448979591837e-07,
58
- "loss": 1.6714,
59
  "step": 7
60
  },
61
  {
62
  "epoch": 0.08163265306122448,
63
- "grad_norm": 74.26002502441406,
64
  "learning_rate": 7.142857142857143e-07,
65
- "loss": 0.4211,
66
  "step": 8
67
  },
68
  {
69
  "epoch": 0.09183673469387756,
70
- "grad_norm": 32.4500846862793,
71
  "learning_rate": 8.163265306122449e-07,
72
- "loss": 0.1996,
73
  "step": 9
74
  },
75
  {
76
  "epoch": 0.10204081632653061,
77
- "grad_norm": 41.27345275878906,
78
  "learning_rate": 9.183673469387756e-07,
79
- "loss": 0.1895,
80
  "step": 10
81
  },
82
  {
83
  "epoch": 0.11224489795918367,
84
- "grad_norm": 27.35291862487793,
85
  "learning_rate": 1.0204081632653063e-06,
86
- "loss": 0.1358,
87
  "step": 11
88
  },
89
  {
90
  "epoch": 0.12244897959183673,
91
- "grad_norm": 103.75244903564453,
92
  "learning_rate": 1.122448979591837e-06,
93
- "loss": 0.5552,
94
  "step": 12
95
  },
96
  {
97
  "epoch": 0.1326530612244898,
98
- "grad_norm": 155.97923278808594,
99
  "learning_rate": 1.2244897959183673e-06,
100
- "loss": 0.5141,
101
  "step": 13
102
  },
103
  {
104
  "epoch": 0.14285714285714285,
105
- "grad_norm": 53.757484436035156,
106
  "learning_rate": 1.3265306122448982e-06,
107
- "loss": 0.1955,
108
  "step": 14
109
  },
110
  {
111
  "epoch": 0.15306122448979592,
112
- "grad_norm": 175.17491149902344,
113
  "learning_rate": 1.4285714285714286e-06,
114
- "loss": 1.9114,
115
  "step": 15
116
  },
117
  {
118
  "epoch": 0.16326530612244897,
119
- "grad_norm": 49.02252197265625,
120
  "learning_rate": 1.5306122448979593e-06,
121
- "loss": 0.2645,
122
  "step": 16
123
  },
124
  {
125
  "epoch": 0.17346938775510204,
126
- "grad_norm": 999.3756103515625,
127
  "learning_rate": 1.6326530612244897e-06,
128
- "loss": 7.5545,
129
  "step": 17
130
  },
131
  {
132
  "epoch": 0.1836734693877551,
133
- "grad_norm": 149.2627410888672,
134
  "learning_rate": 1.7346938775510206e-06,
135
- "loss": 0.4297,
136
  "step": 18
137
  },
138
  {
139
  "epoch": 0.19387755102040816,
140
- "grad_norm": 204.95181274414062,
141
  "learning_rate": 1.8367346938775512e-06,
142
- "loss": 0.678,
143
  "step": 19
144
  },
145
  {
146
  "epoch": 0.20408163265306123,
147
- "grad_norm": 103.94851684570312,
148
  "learning_rate": 1.938775510204082e-06,
149
- "loss": 0.4634,
150
  "step": 20
151
  },
152
  {
153
  "epoch": 0.21428571428571427,
154
- "grad_norm": 536.7100219726562,
155
  "learning_rate": 2.0408163265306125e-06,
156
- "loss": 4.2252,
157
  "step": 21
158
  },
159
  {
160
  "epoch": 0.22448979591836735,
161
- "grad_norm": 444.44805908203125,
162
  "learning_rate": 2.1428571428571427e-06,
163
- "loss": 3.9985,
164
  "step": 22
165
  },
166
  {
167
  "epoch": 0.23469387755102042,
168
- "grad_norm": 170.50369262695312,
169
  "learning_rate": 2.244897959183674e-06,
170
- "loss": 1.9242,
171
  "step": 23
172
  },
173
  {
174
  "epoch": 0.24489795918367346,
175
- "grad_norm": 626.5487060546875,
176
  "learning_rate": 2.3469387755102044e-06,
177
- "loss": 3.2716,
178
  "step": 24
179
  },
180
  {
181
  "epoch": 0.25510204081632654,
182
- "grad_norm": 51.353050231933594,
183
  "learning_rate": 2.4489795918367347e-06,
184
- "loss": 0.123,
185
  "step": 25
186
  },
187
  {
188
  "epoch": 0.2653061224489796,
189
- "grad_norm": 108.25341796875,
190
  "learning_rate": 2.5510204081632657e-06,
191
- "loss": 1.0011,
192
  "step": 26
193
  },
194
  {
195
  "epoch": 0.2755102040816326,
196
- "grad_norm": 322.83502197265625,
197
  "learning_rate": 2.6530612244897964e-06,
198
- "loss": 3.5846,
199
  "step": 27
200
  },
201
  {
202
  "epoch": 0.2857142857142857,
203
- "grad_norm": 203.38458251953125,
204
  "learning_rate": 2.7551020408163266e-06,
205
- "loss": 1.1365,
206
  "step": 28
207
  },
208
  {
209
  "epoch": 0.29591836734693877,
210
- "grad_norm": 127.78427124023438,
211
  "learning_rate": 2.8571428571428573e-06,
212
- "loss": 0.7149,
213
  "step": 29
214
  },
215
  {
216
  "epoch": 0.30612244897959184,
217
- "grad_norm": 283.67645263671875,
218
  "learning_rate": 2.959183673469388e-06,
219
- "loss": 1.2629,
220
  "step": 30
221
  },
222
  {
223
  "epoch": 0.3163265306122449,
224
- "grad_norm": 82.65542602539062,
225
  "learning_rate": 3.0612244897959185e-06,
226
- "loss": 0.6459,
227
  "step": 31
228
  },
229
  {
230
  "epoch": 0.32653061224489793,
231
- "grad_norm": 42.66185760498047,
232
  "learning_rate": 3.1632653061224496e-06,
233
- "loss": 0.1934,
234
  "step": 32
235
  },
236
  {
237
  "epoch": 0.336734693877551,
238
- "grad_norm": 212.1294708251953,
239
  "learning_rate": 3.2653061224489794e-06,
240
- "loss": 1.4897,
241
  "step": 33
242
  },
243
  {
244
  "epoch": 0.3469387755102041,
245
- "grad_norm": 188.0417022705078,
246
  "learning_rate": 3.3673469387755105e-06,
247
- "loss": 0.8561,
248
  "step": 34
249
  },
250
  {
251
  "epoch": 0.35714285714285715,
252
- "grad_norm": 2.0467610359191895,
253
  "learning_rate": 3.469387755102041e-06,
254
- "loss": 0.0128,
255
  "step": 35
256
  },
257
  {
258
  "epoch": 0.3673469387755102,
259
- "grad_norm": 283.3966979980469,
260
  "learning_rate": 3.5714285714285718e-06,
261
- "loss": 1.4952,
262
  "step": 36
263
  },
264
  {
265
  "epoch": 0.37755102040816324,
266
- "grad_norm": 60.74869155883789,
267
  "learning_rate": 3.6734693877551024e-06,
268
- "loss": 0.3181,
269
  "step": 37
270
  },
271
  {
272
  "epoch": 0.3877551020408163,
273
- "grad_norm": 824.6165771484375,
274
  "learning_rate": 3.7755102040816327e-06,
275
- "loss": 6.3681,
276
  "step": 38
277
  },
278
  {
279
  "epoch": 0.3979591836734694,
280
- "grad_norm": 231.1636962890625,
281
  "learning_rate": 3.877551020408164e-06,
282
- "loss": 1.4487,
283
  "step": 39
284
  },
285
  {
286
  "epoch": 0.40816326530612246,
287
- "grad_norm": 26.46611785888672,
288
  "learning_rate": 3.979591836734694e-06,
289
- "loss": 0.1702,
290
  "step": 40
291
  },
292
  {
293
  "epoch": 0.41836734693877553,
294
- "grad_norm": 75.88525390625,
295
  "learning_rate": 4.081632653061225e-06,
296
- "loss": 0.2513,
297
  "step": 41
298
  },
299
  {
300
  "epoch": 0.42857142857142855,
301
- "grad_norm": 465.83392333984375,
302
  "learning_rate": 4.183673469387755e-06,
303
- "loss": 4.1595,
304
  "step": 42
305
  },
306
  {
307
  "epoch": 0.4387755102040816,
308
- "grad_norm": 306.2772521972656,
309
  "learning_rate": 4.2857142857142855e-06,
310
- "loss": 2.7347,
311
  "step": 43
312
  },
313
  {
314
  "epoch": 0.4489795918367347,
315
- "grad_norm": 488.9759521484375,
316
  "learning_rate": 4.3877551020408165e-06,
317
- "loss": 2.3182,
318
  "step": 44
319
  },
320
  {
321
  "epoch": 0.45918367346938777,
322
- "grad_norm": 355.1698913574219,
323
  "learning_rate": 4.489795918367348e-06,
324
- "loss": 1.3285,
325
  "step": 45
326
  },
327
  {
328
  "epoch": 0.46938775510204084,
329
- "grad_norm": 263.558349609375,
330
  "learning_rate": 4.591836734693878e-06,
331
- "loss": 2.1155,
332
  "step": 46
333
  },
334
  {
335
  "epoch": 0.47959183673469385,
336
- "grad_norm": 9.667963981628418,
337
  "learning_rate": 4.693877551020409e-06,
338
- "loss": 0.0645,
339
  "step": 47
340
  },
341
  {
342
  "epoch": 0.4897959183673469,
343
- "grad_norm": 957.79345703125,
344
  "learning_rate": 4.795918367346939e-06,
345
- "loss": 7.1283,
346
  "step": 48
347
  },
348
  {
349
  "epoch": 0.5,
350
- "grad_norm": 160.0965118408203,
351
  "learning_rate": 4.897959183673469e-06,
352
- "loss": 0.711,
353
  "step": 49
354
  },
355
  {
356
  "epoch": 0.5102040816326531,
357
- "grad_norm": 93.697265625,
358
  "learning_rate": 5e-06,
359
- "loss": 0.4716,
360
  "step": 50
361
  },
362
  {
363
  "epoch": 0.5204081632653061,
364
- "grad_norm": 292.9518737792969,
365
  "learning_rate": 5.1020408163265315e-06,
366
- "loss": 2.2895,
367
  "step": 51
368
  },
369
  {
370
  "epoch": 0.5306122448979592,
371
- "grad_norm": 335.4564514160156,
372
  "learning_rate": 5.204081632653062e-06,
373
- "loss": 1.9235,
374
  "step": 52
375
  },
376
  {
377
  "epoch": 0.5408163265306123,
378
- "grad_norm": 138.63575744628906,
379
  "learning_rate": 5.306122448979593e-06,
380
- "loss": 0.8777,
381
  "step": 53
382
  },
383
  {
384
  "epoch": 0.5510204081632653,
385
- "grad_norm": 1.011594533920288,
386
  "learning_rate": 5.408163265306123e-06,
387
- "loss": 0.0038,
388
  "step": 54
389
  },
390
  {
391
  "epoch": 0.5612244897959183,
392
- "grad_norm": 506.25152587890625,
393
  "learning_rate": 5.510204081632653e-06,
394
- "loss": 1.5598,
395
  "step": 55
396
  },
397
  {
398
  "epoch": 0.5714285714285714,
399
- "grad_norm": 2.2550530433654785,
400
  "learning_rate": 5.6122448979591834e-06,
401
- "loss": 0.0177,
402
  "step": 56
403
  },
404
  {
405
  "epoch": 0.5816326530612245,
406
- "grad_norm": 13.93323802947998,
407
  "learning_rate": 5.7142857142857145e-06,
408
- "loss": 0.0837,
409
  "step": 57
410
  },
411
  {
412
  "epoch": 0.5918367346938775,
413
- "grad_norm": 7.279649257659912,
414
  "learning_rate": 5.816326530612246e-06,
415
- "loss": 0.0429,
416
  "step": 58
417
  },
418
  {
419
  "epoch": 0.6020408163265306,
420
- "grad_norm": 0.9923371076583862,
421
  "learning_rate": 5.918367346938776e-06,
422
- "loss": 0.0071,
423
  "step": 59
424
  },
425
  {
426
  "epoch": 0.6122448979591837,
427
- "grad_norm": 743.8301391601562,
428
  "learning_rate": 6.020408163265307e-06,
429
- "loss": 2.7217,
430
  "step": 60
431
  },
432
  {
433
  "epoch": 0.6224489795918368,
434
- "grad_norm": 227.04403686523438,
435
  "learning_rate": 6.122448979591837e-06,
436
- "loss": 3.9013,
437
  "step": 61
438
  },
439
  {
440
  "epoch": 0.6326530612244898,
441
- "grad_norm": 193.12701416015625,
442
  "learning_rate": 6.224489795918368e-06,
443
- "loss": 1.417,
444
  "step": 62
445
  },
446
  {
447
  "epoch": 0.6428571428571429,
448
- "grad_norm": 642.7814331054688,
449
  "learning_rate": 6.326530612244899e-06,
450
- "loss": 3.5854,
451
  "step": 63
452
  },
453
  {
454
  "epoch": 0.6530612244897959,
455
- "grad_norm": 1007.544189453125,
456
  "learning_rate": 6.4285714285714295e-06,
457
- "loss": 12.918,
458
  "step": 64
459
  },
460
  {
461
  "epoch": 0.6632653061224489,
462
- "grad_norm": 1310.942138671875,
463
  "learning_rate": 6.530612244897959e-06,
464
- "loss": 7.1566,
465
  "step": 65
466
  },
467
  {
468
  "epoch": 0.673469387755102,
469
- "grad_norm": 810.1301879882812,
470
  "learning_rate": 6.63265306122449e-06,
471
- "loss": 3.9897,
472
  "step": 66
473
  },
474
  {
475
  "epoch": 0.6836734693877551,
476
- "grad_norm": 513.1759643554688,
477
  "learning_rate": 6.734693877551021e-06,
478
- "loss": 8.1139,
479
  "step": 67
480
  },
481
  {
482
  "epoch": 0.6938775510204082,
483
- "grad_norm": 1414.8878173828125,
484
  "learning_rate": 6.836734693877551e-06,
485
- "loss": 5.7005,
486
  "step": 68
487
  },
488
  {
489
  "epoch": 0.7040816326530612,
490
- "grad_norm": 31.607126235961914,
491
  "learning_rate": 6.938775510204082e-06,
492
- "loss": 0.1219,
493
  "step": 69
494
  },
495
  {
496
  "epoch": 0.7142857142857143,
497
- "grad_norm": 799.9751586914062,
498
  "learning_rate": 7.0408163265306125e-06,
499
- "loss": 5.7849,
500
  "step": 70
501
  },
502
  {
503
  "epoch": 0.7244897959183674,
504
- "grad_norm": 132.71778869628906,
505
  "learning_rate": 7.1428571428571436e-06,
506
- "loss": 1.0726,
507
  "step": 71
508
  },
509
  {
510
  "epoch": 0.7346938775510204,
511
- "grad_norm": 256.61041259765625,
512
  "learning_rate": 7.244897959183675e-06,
513
- "loss": 1.2599,
514
  "step": 72
515
  },
516
  {
517
  "epoch": 0.7448979591836735,
518
- "grad_norm": 192.0435333251953,
519
  "learning_rate": 7.346938775510205e-06,
520
- "loss": 0.6473,
521
  "step": 73
522
  },
523
  {
524
  "epoch": 0.7551020408163265,
525
- "grad_norm": 293.7915954589844,
526
  "learning_rate": 7.448979591836736e-06,
527
- "loss": 1.0397,
528
  "step": 74
529
  },
530
  {
531
  "epoch": 0.7653061224489796,
532
- "grad_norm": 312.2645263671875,
533
  "learning_rate": 7.551020408163265e-06,
534
- "loss": 1.5555,
535
  "step": 75
536
  },
537
  {
538
  "epoch": 0.7755102040816326,
539
- "grad_norm": 1.417815923690796,
540
  "learning_rate": 7.653061224489796e-06,
541
- "loss": 0.0078,
542
  "step": 76
543
  },
544
  {
545
  "epoch": 0.7857142857142857,
546
- "grad_norm": 1.4391653537750244,
547
  "learning_rate": 7.755102040816327e-06,
548
- "loss": 0.0048,
549
  "step": 77
550
  },
551
  {
552
  "epoch": 0.7959183673469388,
553
- "grad_norm": 5.628185749053955,
554
  "learning_rate": 7.857142857142858e-06,
555
- "loss": 0.0323,
556
  "step": 78
557
  },
558
  {
559
  "epoch": 0.8061224489795918,
560
- "grad_norm": 264.5353698730469,
561
  "learning_rate": 7.959183673469388e-06,
562
- "loss": 1.7425,
563
  "step": 79
564
  },
565
  {
566
  "epoch": 0.8163265306122449,
567
- "grad_norm": 1.5278851985931396,
568
  "learning_rate": 8.06122448979592e-06,
569
- "loss": 0.0035,
570
  "step": 80
571
  },
572
  {
573
  "epoch": 0.826530612244898,
574
- "grad_norm": 932.3336181640625,
575
  "learning_rate": 8.16326530612245e-06,
576
- "loss": 6.4849,
577
  "step": 81
578
  },
579
  {
580
  "epoch": 0.8367346938775511,
581
- "grad_norm": 635.4749145507812,
582
  "learning_rate": 8.26530612244898e-06,
583
- "loss": 4.3767,
584
  "step": 82
585
  },
586
  {
587
  "epoch": 0.8469387755102041,
588
- "grad_norm": 8.875201225280762,
589
  "learning_rate": 8.36734693877551e-06,
590
- "loss": 0.0186,
591
  "step": 83
592
  },
593
  {
594
  "epoch": 0.8571428571428571,
595
- "grad_norm": 0.15500876307487488,
596
  "learning_rate": 8.469387755102042e-06,
597
- "loss": 0.0008,
598
  "step": 84
599
  },
600
  {
601
  "epoch": 0.8673469387755102,
602
- "grad_norm": 269.5357666015625,
603
  "learning_rate": 8.571428571428571e-06,
604
- "loss": 0.8354,
605
  "step": 85
606
  },
607
  {
608
  "epoch": 0.8775510204081632,
609
- "grad_norm": 5.054287910461426,
610
  "learning_rate": 8.673469387755103e-06,
611
- "loss": 0.0162,
612
  "step": 86
613
  },
614
  {
615
  "epoch": 0.8877551020408163,
616
- "grad_norm": 84.90735626220703,
617
  "learning_rate": 8.775510204081633e-06,
618
- "loss": 0.1282,
619
  "step": 87
620
  },
621
  {
622
  "epoch": 0.8979591836734694,
623
- "grad_norm": 81.53719329833984,
624
  "learning_rate": 8.877551020408163e-06,
625
- "loss": 0.4514,
626
  "step": 88
627
  },
628
  {
629
  "epoch": 0.9081632653061225,
630
- "grad_norm": 547.4005126953125,
631
  "learning_rate": 8.979591836734695e-06,
632
- "loss": 4.9103,
633
  "step": 89
634
  },
635
  {
636
  "epoch": 0.9183673469387755,
637
- "grad_norm": 25.792213439941406,
638
  "learning_rate": 9.081632653061225e-06,
639
- "loss": 0.0762,
640
  "step": 90
641
  },
642
  {
643
  "epoch": 0.9285714285714286,
644
- "grad_norm": 10.455421447753906,
645
  "learning_rate": 9.183673469387756e-06,
646
- "loss": 0.0444,
647
  "step": 91
648
  },
649
  {
650
  "epoch": 0.9387755102040817,
651
- "grad_norm": 472.54376220703125,
652
  "learning_rate": 9.285714285714288e-06,
653
- "loss": 1.8609,
654
  "step": 92
655
  },
656
  {
657
  "epoch": 0.9489795918367347,
658
- "grad_norm": 31.092357635498047,
659
  "learning_rate": 9.387755102040818e-06,
660
- "loss": 0.1489,
661
  "step": 93
662
  },
663
  {
664
  "epoch": 0.9591836734693877,
665
- "grad_norm": 231.94151306152344,
666
  "learning_rate": 9.489795918367348e-06,
667
- "loss": 0.5926,
668
  "step": 94
669
  },
670
  {
671
  "epoch": 0.9693877551020408,
672
- "grad_norm": 211.05117797851562,
673
  "learning_rate": 9.591836734693878e-06,
674
- "loss": 0.5344,
675
  "step": 95
676
  },
677
  {
678
  "epoch": 0.9795918367346939,
679
- "grad_norm": 217.01339721679688,
680
  "learning_rate": 9.693877551020408e-06,
681
- "loss": 0.4693,
682
  "step": 96
683
  },
684
  {
685
  "epoch": 0.9897959183673469,
686
- "grad_norm": 1123.96484375,
687
  "learning_rate": 9.795918367346939e-06,
688
- "loss": 9.2282,
689
  "step": 97
690
  },
691
  {
692
  "epoch": 1.0,
693
- "grad_norm": 741.597412109375,
694
  "learning_rate": 9.89795918367347e-06,
695
- "loss": 4.6238,
696
  "step": 98
697
  },
698
  {
699
  "epoch": 1.0,
700
- "eval_dim_1024_cosine_accuracy@1": 0.36235595390524966,
701
- "eval_dim_1024_cosine_accuracy@10": 0.4334186939820743,
702
- "eval_dim_1024_cosine_accuracy@3": 0.3681177976952625,
703
- "eval_dim_1024_cosine_accuracy@5": 0.39308578745198464,
704
- "eval_dim_1024_cosine_map@100": 0.45394800707643057,
705
- "eval_dim_1024_cosine_mrr@10": 0.37430415828303115,
706
- "eval_dim_1024_cosine_ndcg@10": 0.3858809020056271,
707
- "eval_dim_1024_cosine_precision@1": 0.36235595390524966,
708
- "eval_dim_1024_cosine_precision@10": 0.3176696542893726,
709
- "eval_dim_1024_cosine_precision@3": 0.36192915066154496,
710
- "eval_dim_1024_cosine_precision@5": 0.35172855313700385,
711
- "eval_dim_1024_cosine_recall@1": 0.04346309464734114,
712
- "eval_dim_1024_cosine_recall@10": 0.28096984500258326,
713
- "eval_dim_1024_cosine_recall@3": 0.12757812796185336,
714
- "eval_dim_1024_cosine_recall@5": 0.19200836801442767,
715
- "eval_dim_128_cosine_accuracy@1": 0.3085787451984635,
716
- "eval_dim_128_cosine_accuracy@10": 0.37964148527528807,
717
- "eval_dim_128_cosine_accuracy@3": 0.31241997439180536,
718
  "eval_dim_128_cosine_accuracy@5": 0.3361075544174136,
719
- "eval_dim_128_cosine_map@100": 0.3963095303049961,
720
- "eval_dim_128_cosine_mrr@10": 0.3199812511432227,
721
- "eval_dim_128_cosine_ndcg@10": 0.3312285498294292,
722
- "eval_dim_128_cosine_precision@1": 0.3085787451984635,
723
- "eval_dim_128_cosine_precision@10": 0.2752880921895006,
724
- "eval_dim_128_cosine_precision@3": 0.3079385403329065,
725
- "eval_dim_128_cosine_precision@5": 0.29961587708066584,
726
- "eval_dim_128_cosine_recall@1": 0.036297623853982414,
727
- "eval_dim_128_cosine_recall@10": 0.24000960695821508,
728
- "eval_dim_128_cosine_recall@3": 0.10638786483158841,
729
- "eval_dim_128_cosine_recall@5": 0.16032639984514846,
730
- "eval_dim_256_cosine_accuracy@1": 0.3437900128040973,
731
- "eval_dim_256_cosine_accuracy@10": 0.41101152368758004,
732
- "eval_dim_256_cosine_accuracy@3": 0.34763124199743917,
733
- "eval_dim_256_cosine_accuracy@5": 0.3764404609475032,
734
- "eval_dim_256_cosine_map@100": 0.4298669852983799,
735
- "eval_dim_256_cosine_mrr@10": 0.3551361197487955,
736
- "eval_dim_256_cosine_ndcg@10": 0.3670052960875804,
737
- "eval_dim_256_cosine_precision@1": 0.3437900128040973,
738
- "eval_dim_256_cosine_precision@10": 0.3040973111395647,
739
- "eval_dim_256_cosine_precision@3": 0.342936406316688,
740
- "eval_dim_256_cosine_precision@5": 0.33457106274007686,
741
- "eval_dim_256_cosine_recall@1": 0.04013102608834382,
742
- "eval_dim_256_cosine_recall@10": 0.2648598688529433,
743
- "eval_dim_256_cosine_recall@3": 0.11771735023719074,
744
- "eval_dim_256_cosine_recall@5": 0.17837935755014916,
745
- "eval_dim_512_cosine_accuracy@1": 0.35979513444302175,
746
- "eval_dim_512_cosine_accuracy@10": 0.4334186939820743,
747
- "eval_dim_512_cosine_accuracy@3": 0.36555697823303457,
748
- "eval_dim_512_cosine_accuracy@5": 0.3911651728553137,
749
- "eval_dim_512_cosine_map@100": 0.4476805587612892,
750
- "eval_dim_512_cosine_mrr@10": 0.37212542934373866,
751
- "eval_dim_512_cosine_ndcg@10": 0.3843750966464458,
752
- "eval_dim_512_cosine_precision@1": 0.35979513444302175,
753
- "eval_dim_512_cosine_precision@10": 0.3173495518565941,
754
- "eval_dim_512_cosine_precision@3": 0.35936833119931705,
755
- "eval_dim_512_cosine_precision@5": 0.34967989756722156,
756
- "eval_dim_512_cosine_recall@1": 0.04265405128130224,
757
- "eval_dim_512_cosine_recall@10": 0.2781876565001863,
758
- "eval_dim_512_cosine_recall@3": 0.12523102347193127,
759
- "eval_dim_512_cosine_recall@5": 0.18912519336740205,
760
- "eval_dim_64_cosine_accuracy@1": 0.2740076824583867,
761
- "eval_dim_64_cosine_accuracy@10": 0.3354673495518566,
762
- "eval_dim_64_cosine_accuracy@3": 0.27848911651728553,
763
- "eval_dim_64_cosine_accuracy@5": 0.30153649167733676,
764
- "eval_dim_64_cosine_map@100": 0.3539045084602349,
765
- "eval_dim_64_cosine_mrr@10": 0.28429414873076814,
766
- "eval_dim_64_cosine_ndcg@10": 0.29402896525927075,
767
- "eval_dim_64_cosine_precision@1": 0.2740076824583867,
768
- "eval_dim_64_cosine_precision@10": 0.24571062740076827,
769
- "eval_dim_64_cosine_precision@3": 0.27315407597097735,
770
- "eval_dim_64_cosine_precision@5": 0.2670934699103713,
771
- "eval_dim_64_cosine_recall@1": 0.03167890172057568,
772
- "eval_dim_64_cosine_recall@10": 0.21092883720941633,
773
- "eval_dim_64_cosine_recall@3": 0.09267023360511464,
774
- "eval_dim_64_cosine_recall@5": 0.14048625468314752,
775
- "eval_dim_768_cosine_accuracy@1": 0.3591549295774648,
776
- "eval_dim_768_cosine_accuracy@10": 0.4334186939820743,
777
- "eval_dim_768_cosine_accuracy@3": 0.3649167733674776,
778
- "eval_dim_768_cosine_accuracy@5": 0.3892445582586428,
779
- "eval_dim_768_cosine_map@100": 0.4493001842217619,
780
- "eval_dim_768_cosine_mrr@10": 0.37149335406377615,
781
- "eval_dim_768_cosine_ndcg@10": 0.38308181752122755,
782
- "eval_dim_768_cosine_precision@1": 0.3591549295774648,
783
- "eval_dim_768_cosine_precision@10": 0.31670934699103714,
784
- "eval_dim_768_cosine_precision@3": 0.3587281263337601,
785
- "eval_dim_768_cosine_precision@5": 0.34852752880921894,
786
- "eval_dim_768_cosine_recall@1": 0.04250079684114586,
787
- "eval_dim_768_cosine_recall@10": 0.27695909667507057,
788
- "eval_dim_768_cosine_recall@3": 0.12462187901616553,
789
- "eval_dim_768_cosine_recall@5": 0.1875478484365334,
790
- "eval_runtime": 99.0843,
791
  "eval_samples_per_second": 0.0,
792
- "eval_sequential_score": 0.29402896525927075,
793
  "eval_steps_per_second": 0.0,
794
  "step": 98
795
  },
796
  {
797
  "epoch": 1.010204081632653,
798
- "grad_norm": 342.861328125,
799
  "learning_rate": 1e-05,
800
- "loss": 1.9644,
801
  "step": 99
802
  },
803
  {
804
  "epoch": 1.0204081632653061,
805
- "grad_norm": 761.8235473632812,
806
  "learning_rate": 1.0102040816326531e-05,
807
- "loss": 7.4242,
808
  "step": 100
809
  },
810
  {
811
  "epoch": 1.030612244897959,
812
- "grad_norm": 146.39175415039062,
813
  "learning_rate": 1.0204081632653063e-05,
814
- "loss": 0.9592,
815
  "step": 101
816
  },
817
  {
818
  "epoch": 1.0408163265306123,
819
- "grad_norm": 69.37447357177734,
820
  "learning_rate": 1.0306122448979591e-05,
821
- "loss": 0.3051,
822
  "step": 102
823
  },
824
  {
825
  "epoch": 1.0510204081632653,
826
- "grad_norm": 241.93687438964844,
827
  "learning_rate": 1.0408163265306123e-05,
828
- "loss": 0.926,
829
  "step": 103
830
  },
831
  {
832
  "epoch": 1.0612244897959184,
833
- "grad_norm": 13.75313949584961,
834
  "learning_rate": 1.0510204081632654e-05,
835
- "loss": 0.0751,
836
  "step": 104
837
  },
838
  {
839
  "epoch": 1.0714285714285714,
840
- "grad_norm": 1.861573576927185,
841
  "learning_rate": 1.0612244897959186e-05,
842
- "loss": 0.0111,
843
  "step": 105
844
  },
845
  {
846
  "epoch": 1.0816326530612246,
847
- "grad_norm": 1.4446377754211426,
848
  "learning_rate": 1.0714285714285714e-05,
849
- "loss": 0.0072,
850
  "step": 106
851
  },
852
  {
853
  "epoch": 1.0918367346938775,
854
- "grad_norm": 2.217988967895508,
855
  "learning_rate": 1.0816326530612246e-05,
856
- "loss": 0.0107,
857
  "step": 107
858
  },
859
  {
860
  "epoch": 1.1020408163265305,
861
- "grad_norm": 620.331787109375,
862
  "learning_rate": 1.0918367346938776e-05,
863
- "loss": 3.4505,
864
  "step": 108
865
  },
866
  {
867
  "epoch": 1.1122448979591837,
868
- "grad_norm": 1.4038218259811401,
869
  "learning_rate": 1.1020408163265306e-05,
870
- "loss": 0.005,
871
  "step": 109
872
  },
873
  {
874
  "epoch": 1.1224489795918366,
875
- "grad_norm": 46.48203659057617,
876
  "learning_rate": 1.1122448979591838e-05,
877
- "loss": 0.1701,
878
  "step": 110
879
  },
880
  {
881
  "epoch": 1.1326530612244898,
882
- "grad_norm": 6.003911972045898,
883
  "learning_rate": 1.1224489795918367e-05,
884
- "loss": 0.027,
885
  "step": 111
886
  },
887
  {
888
  "epoch": 1.1428571428571428,
889
- "grad_norm": 379.09527587890625,
890
  "learning_rate": 1.1326530612244899e-05,
891
- "loss": 1.3824,
892
  "step": 112
893
  },
894
  {
895
  "epoch": 1.153061224489796,
896
- "grad_norm": 1103.1077880859375,
897
  "learning_rate": 1.1428571428571429e-05,
898
- "loss": 8.1459,
899
  "step": 113
900
  },
901
  {
902
  "epoch": 1.163265306122449,
903
- "grad_norm": 29.499439239501953,
904
  "learning_rate": 1.1530612244897961e-05,
905
- "loss": 0.0917,
906
  "step": 114
907
  },
908
  {
909
  "epoch": 1.1734693877551021,
910
- "grad_norm": 0.06352390348911285,
911
  "learning_rate": 1.1632653061224491e-05,
912
- "loss": 0.0003,
913
  "step": 115
914
  },
915
  {
916
  "epoch": 1.183673469387755,
917
- "grad_norm": 111.55418395996094,
918
  "learning_rate": 1.1734693877551021e-05,
919
- "loss": 0.3716,
920
  "step": 116
921
  },
922
  {
923
  "epoch": 1.193877551020408,
924
- "grad_norm": 32.166500091552734,
925
  "learning_rate": 1.1836734693877552e-05,
926
- "loss": 0.1704,
927
  "step": 117
928
  },
929
  {
930
  "epoch": 1.2040816326530612,
931
- "grad_norm": 870.0745239257812,
932
  "learning_rate": 1.1938775510204084e-05,
933
- "loss": 9.8059,
934
  "step": 118
935
  },
936
  {
937
  "epoch": 1.2142857142857142,
938
- "grad_norm": 139.17662048339844,
939
  "learning_rate": 1.2040816326530614e-05,
940
- "loss": 0.5882,
941
  "step": 119
942
  },
943
  {
944
  "epoch": 1.2244897959183674,
945
- "grad_norm": 28.489713668823242,
946
  "learning_rate": 1.2142857142857142e-05,
947
- "loss": 0.0531,
948
  "step": 120
949
  },
950
  {
951
  "epoch": 1.2346938775510203,
952
- "grad_norm": 0.18062859773635864,
953
  "learning_rate": 1.2244897959183674e-05,
954
- "loss": 0.0005,
955
  "step": 121
956
  },
957
  {
958
  "epoch": 1.2448979591836735,
959
- "grad_norm": 8.26645565032959,
960
  "learning_rate": 1.2346938775510204e-05,
961
- "loss": 0.0314,
962
  "step": 122
963
  },
964
  {
965
  "epoch": 1.2551020408163265,
966
- "grad_norm": 64.67955017089844,
967
  "learning_rate": 1.2448979591836736e-05,
968
- "loss": 0.1811,
969
  "step": 123
970
  },
971
  {
972
  "epoch": 1.2653061224489797,
973
- "grad_norm": 420.44439697265625,
974
  "learning_rate": 1.2551020408163267e-05,
975
- "loss": 2.6136,
976
  "step": 124
977
  },
978
  {
979
  "epoch": 1.2755102040816326,
980
- "grad_norm": 3.5323660373687744,
981
  "learning_rate": 1.2653061224489798e-05,
982
- "loss": 0.0087,
983
  "step": 125
984
  },
985
  {
986
  "epoch": 1.2857142857142856,
987
- "grad_norm": 52.854801177978516,
988
  "learning_rate": 1.2755102040816327e-05,
989
- "loss": 0.1269,
990
  "step": 126
991
  },
992
  {
993
  "epoch": 1.2959183673469388,
994
- "grad_norm": 4.583413124084473,
995
  "learning_rate": 1.2857142857142859e-05,
996
- "loss": 0.0091,
997
  "step": 127
998
  },
999
  {
1000
  "epoch": 1.306122448979592,
1001
- "grad_norm": 17.20958137512207,
1002
  "learning_rate": 1.2959183673469389e-05,
1003
- "loss": 0.0467,
1004
  "step": 128
1005
  },
1006
  {
1007
  "epoch": 1.316326530612245,
1008
- "grad_norm": 8.821357727050781,
1009
  "learning_rate": 1.3061224489795918e-05,
1010
- "loss": 0.0282,
1011
  "step": 129
1012
  },
1013
  {
1014
  "epoch": 1.3265306122448979,
1015
- "grad_norm": 0.3024923503398895,
1016
  "learning_rate": 1.316326530612245e-05,
1017
- "loss": 0.0012,
1018
  "step": 130
1019
  },
1020
  {
1021
  "epoch": 1.336734693877551,
1022
- "grad_norm": 1110.76513671875,
1023
  "learning_rate": 1.326530612244898e-05,
1024
- "loss": 3.5135,
1025
  "step": 131
1026
  },
1027
  {
1028
  "epoch": 1.346938775510204,
1029
- "grad_norm": 4.655632495880127,
1030
  "learning_rate": 1.3367346938775512e-05,
1031
- "loss": 0.0186,
1032
  "step": 132
1033
  },
1034
  {
1035
  "epoch": 1.3571428571428572,
1036
- "grad_norm": 641.764404296875,
1037
  "learning_rate": 1.3469387755102042e-05,
1038
- "loss": 3.2599,
1039
  "step": 133
1040
  },
1041
  {
1042
  "epoch": 1.3673469387755102,
1043
- "grad_norm": 1076.8260498046875,
1044
  "learning_rate": 1.3571428571428574e-05,
1045
- "loss": 5.5417,
1046
  "step": 134
1047
  },
1048
  {
1049
  "epoch": 1.3775510204081631,
1050
- "grad_norm": 0.5416738390922546,
1051
  "learning_rate": 1.3673469387755102e-05,
1052
- "loss": 0.0019,
1053
  "step": 135
1054
  },
1055
  {
1056
  "epoch": 1.3877551020408163,
1057
- "grad_norm": 200.03311157226562,
1058
  "learning_rate": 1.3775510204081634e-05,
1059
- "loss": 0.5649,
1060
  "step": 136
1061
  },
1062
  {
1063
  "epoch": 1.3979591836734695,
1064
- "grad_norm": 35.22038650512695,
1065
  "learning_rate": 1.3877551020408165e-05,
1066
- "loss": 0.084,
1067
  "step": 137
1068
  },
1069
  {
1070
  "epoch": 1.4081632653061225,
1071
- "grad_norm": 141.9106903076172,
1072
  "learning_rate": 1.3979591836734696e-05,
1073
- "loss": 0.6062,
1074
  "step": 138
1075
  },
1076
  {
1077
  "epoch": 1.4183673469387754,
1078
- "grad_norm": 15.920783996582031,
1079
  "learning_rate": 1.4081632653061225e-05,
1080
- "loss": 0.0639,
1081
  "step": 139
1082
  },
1083
  {
1084
  "epoch": 1.4285714285714286,
1085
- "grad_norm": 206.33274841308594,
1086
  "learning_rate": 1.4183673469387755e-05,
1087
- "loss": 0.4069,
1088
  "step": 140
1089
  },
1090
  {
1091
  "epoch": 1.4387755102040816,
1092
- "grad_norm": 51.149173736572266,
1093
  "learning_rate": 1.4285714285714287e-05,
1094
- "loss": 0.2462,
1095
  "step": 141
1096
  },
1097
  {
1098
  "epoch": 1.4489795918367347,
1099
- "grad_norm": 658.653564453125,
1100
  "learning_rate": 1.4387755102040817e-05,
1101
- "loss": 4.9288,
1102
  "step": 142
1103
  },
1104
  {
1105
  "epoch": 1.4591836734693877,
1106
- "grad_norm": 63.49065399169922,
1107
  "learning_rate": 1.448979591836735e-05,
1108
- "loss": 0.1852,
1109
  "step": 143
1110
  },
1111
  {
1112
  "epoch": 1.469387755102041,
1113
- "grad_norm": 1453.699462890625,
1114
  "learning_rate": 1.4591836734693878e-05,
1115
- "loss": 3.0971,
1116
  "step": 144
1117
  },
1118
  {
1119
  "epoch": 1.4795918367346939,
1120
- "grad_norm": 499.0628662109375,
1121
  "learning_rate": 1.469387755102041e-05,
1122
- "loss": 3.787,
1123
  "step": 145
1124
  },
1125
  {
1126
  "epoch": 1.489795918367347,
1127
- "grad_norm": 253.33152770996094,
1128
  "learning_rate": 1.479591836734694e-05,
1129
- "loss": 0.8474,
1130
  "step": 146
1131
  },
1132
  {
1133
  "epoch": 1.5,
1134
- "grad_norm": 0.8343175649642944,
1135
  "learning_rate": 1.4897959183673472e-05,
1136
- "loss": 0.0028,
1137
  "step": 147
1138
  },
1139
  {
1140
  "epoch": 1.510204081632653,
1141
- "grad_norm": 38.5785026550293,
1142
  "learning_rate": 1.5000000000000002e-05,
1143
- "loss": 0.0931,
1144
  "step": 148
1145
  },
1146
  {
1147
  "epoch": 1.5204081632653061,
1148
- "grad_norm": 563.4974365234375,
1149
  "learning_rate": 1.510204081632653e-05,
1150
- "loss": 1.8378,
1151
  "step": 149
1152
  },
1153
  {
1154
  "epoch": 1.5306122448979593,
1155
- "grad_norm": 749.0945434570312,
1156
  "learning_rate": 1.5204081632653063e-05,
1157
- "loss": 2.6074,
1158
  "step": 150
1159
  },
1160
  {
1161
  "epoch": 1.5408163265306123,
1162
- "grad_norm": 62.52786636352539,
1163
  "learning_rate": 1.530612244897959e-05,
1164
- "loss": 0.1441,
1165
  "step": 151
1166
  },
1167
  {
1168
  "epoch": 1.5510204081632653,
1169
- "grad_norm": 281.54400634765625,
1170
  "learning_rate": 1.5408163265306123e-05,
1171
- "loss": 0.5622,
1172
  "step": 152
1173
  },
1174
  {
1175
  "epoch": 1.5612244897959182,
1176
- "grad_norm": 1.1233166456222534,
1177
  "learning_rate": 1.5510204081632655e-05,
1178
- "loss": 0.0049,
1179
  "step": 153
1180
  },
1181
  {
1182
  "epoch": 1.5714285714285714,
1183
- "grad_norm": 9.458003044128418,
1184
  "learning_rate": 1.5612244897959187e-05,
1185
- "loss": 0.0268,
1186
  "step": 154
1187
  },
1188
  {
1189
  "epoch": 1.5816326530612246,
1190
- "grad_norm": 7.9042439460754395,
1191
  "learning_rate": 1.5714285714285715e-05,
1192
- "loss": 0.0281,
1193
  "step": 155
1194
  },
1195
  {
1196
  "epoch": 1.5918367346938775,
1197
- "grad_norm": 402.8667907714844,
1198
  "learning_rate": 1.5816326530612247e-05,
1199
- "loss": 2.9755,
1200
  "step": 156
1201
  },
1202
  {
1203
  "epoch": 1.6020408163265305,
1204
- "grad_norm": 359.3101806640625,
1205
  "learning_rate": 1.5918367346938776e-05,
1206
- "loss": 1.0982,
1207
  "step": 157
1208
  },
1209
  {
1210
  "epoch": 1.6122448979591837,
1211
- "grad_norm": 26.466707229614258,
1212
  "learning_rate": 1.6020408163265308e-05,
1213
- "loss": 0.0621,
1214
  "step": 158
1215
  },
1216
  {
1217
  "epoch": 1.6224489795918369,
1218
- "grad_norm": 472.1581726074219,
1219
  "learning_rate": 1.612244897959184e-05,
1220
- "loss": 6.9631,
1221
  "step": 159
1222
  },
1223
  {
1224
  "epoch": 1.6326530612244898,
1225
- "grad_norm": 812.54638671875,
1226
  "learning_rate": 1.6224489795918368e-05,
1227
- "loss": 4.7216,
1228
  "step": 160
1229
  },
1230
  {
1231
  "epoch": 1.6428571428571428,
1232
- "grad_norm": 252.12796020507812,
1233
  "learning_rate": 1.63265306122449e-05,
1234
- "loss": 0.848,
1235
  "step": 161
1236
  },
1237
  {
1238
  "epoch": 1.6530612244897958,
1239
- "grad_norm": 1087.48828125,
1240
  "learning_rate": 1.642857142857143e-05,
1241
- "loss": 5.6006,
1242
  "step": 162
1243
  },
1244
  {
1245
  "epoch": 1.663265306122449,
1246
- "grad_norm": 280.405517578125,
1247
  "learning_rate": 1.653061224489796e-05,
1248
- "loss": 4.299,
1249
  "step": 163
1250
  },
1251
  {
1252
  "epoch": 1.6734693877551021,
1253
- "grad_norm": 457.81494140625,
1254
  "learning_rate": 1.6632653061224492e-05,
1255
- "loss": 2.042,
1256
  "step": 164
1257
  },
1258
  {
1259
  "epoch": 1.683673469387755,
1260
- "grad_norm": 511.0380859375,
1261
  "learning_rate": 1.673469387755102e-05,
1262
- "loss": 2.4823,
1263
  "step": 165
1264
  },
1265
  {
1266
  "epoch": 1.693877551020408,
1267
- "grad_norm": 7.505221366882324,
1268
  "learning_rate": 1.6836734693877553e-05,
1269
- "loss": 0.0189,
1270
  "step": 166
1271
  },
1272
  {
1273
  "epoch": 1.7040816326530612,
1274
- "grad_norm": 1.01173734664917,
1275
  "learning_rate": 1.6938775510204085e-05,
1276
- "loss": 0.0039,
1277
  "step": 167
1278
  },
1279
  {
1280
  "epoch": 1.7142857142857144,
1281
- "grad_norm": 0.5971992015838623,
1282
  "learning_rate": 1.7040816326530613e-05,
1283
- "loss": 0.0024,
1284
  "step": 168
1285
  },
1286
  {
1287
  "epoch": 1.7244897959183674,
1288
- "grad_norm": 505.6401672363281,
1289
  "learning_rate": 1.7142857142857142e-05,
1290
- "loss": 2.0453,
1291
  "step": 169
1292
  },
1293
  {
1294
  "epoch": 1.7346938775510203,
1295
- "grad_norm": 4.466002464294434,
1296
  "learning_rate": 1.7244897959183674e-05,
1297
- "loss": 0.0092,
1298
  "step": 170
1299
  },
1300
  {
1301
  "epoch": 1.7448979591836735,
1302
- "grad_norm": 1.1195125579833984,
1303
  "learning_rate": 1.7346938775510206e-05,
1304
- "loss": 0.0029,
1305
  "step": 171
1306
  },
1307
  {
1308
  "epoch": 1.7551020408163265,
1309
- "grad_norm": 104.82202911376953,
1310
  "learning_rate": 1.7448979591836738e-05,
1311
- "loss": 0.3271,
1312
  "step": 172
1313
  },
1314
  {
1315
  "epoch": 1.7653061224489797,
1316
- "grad_norm": 1.860406756401062,
1317
  "learning_rate": 1.7551020408163266e-05,
1318
- "loss": 0.0054,
1319
  "step": 173
1320
  },
1321
  {
1322
  "epoch": 1.7755102040816326,
1323
- "grad_norm": 0.044311508536338806,
1324
  "learning_rate": 1.7653061224489798e-05,
1325
- "loss": 0.0002,
1326
  "step": 174
1327
  },
1328
  {
1329
  "epoch": 1.7857142857142856,
1330
- "grad_norm": 40.70656204223633,
1331
  "learning_rate": 1.7755102040816327e-05,
1332
- "loss": 0.0685,
1333
  "step": 175
1334
  },
1335
  {
1336
  "epoch": 1.7959183673469388,
1337
- "grad_norm": 395.348388671875,
1338
  "learning_rate": 1.785714285714286e-05,
1339
- "loss": 1.3097,
1340
  "step": 176
1341
  },
1342
  {
1343
  "epoch": 1.806122448979592,
1344
- "grad_norm": 326.2778015136719,
1345
  "learning_rate": 1.795918367346939e-05,
1346
- "loss": 1.8817,
1347
  "step": 177
1348
  },
1349
  {
1350
  "epoch": 1.816326530612245,
1351
- "grad_norm": 41.05072784423828,
1352
  "learning_rate": 1.806122448979592e-05,
1353
- "loss": 0.2497,
1354
  "step": 178
1355
  },
1356
  {
1357
  "epoch": 1.8265306122448979,
1358
- "grad_norm": 121.29589080810547,
1359
  "learning_rate": 1.816326530612245e-05,
1360
- "loss": 0.5822,
1361
  "step": 179
1362
  },
1363
  {
1364
  "epoch": 1.836734693877551,
1365
- "grad_norm": 711.2618408203125,
1366
  "learning_rate": 1.826530612244898e-05,
1367
- "loss": 1.8103,
1368
  "step": 180
1369
  },
1370
  {
1371
  "epoch": 1.8469387755102042,
1372
- "grad_norm": 500.7347106933594,
1373
  "learning_rate": 1.836734693877551e-05,
1374
- "loss": 1.5506,
1375
  "step": 181
1376
  },
1377
  {
1378
  "epoch": 1.8571428571428572,
1379
- "grad_norm": 252.05322265625,
1380
  "learning_rate": 1.8469387755102043e-05,
1381
- "loss": 1.281,
1382
  "step": 182
1383
  },
1384
  {
1385
  "epoch": 1.8673469387755102,
1386
- "grad_norm": 370.9935302734375,
1387
  "learning_rate": 1.8571428571428575e-05,
1388
- "loss": 2.8616,
1389
  "step": 183
1390
  },
1391
  {
1392
  "epoch": 1.8775510204081631,
1393
- "grad_norm": 4.682647705078125,
1394
  "learning_rate": 1.8673469387755104e-05,
1395
- "loss": 0.0118,
1396
  "step": 184
1397
  },
1398
  {
1399
  "epoch": 1.8877551020408163,
1400
- "grad_norm": 2.143557548522949,
1401
  "learning_rate": 1.8775510204081636e-05,
1402
- "loss": 0.0038,
1403
  "step": 185
1404
  },
1405
  {
1406
  "epoch": 1.8979591836734695,
1407
- "grad_norm": 6.499508857727051,
1408
  "learning_rate": 1.8877551020408164e-05,
1409
- "loss": 0.0331,
1410
  "step": 186
1411
  },
1412
  {
1413
  "epoch": 1.9081632653061225,
1414
- "grad_norm": 7.2162089347839355,
1415
  "learning_rate": 1.8979591836734696e-05,
1416
- "loss": 0.0273,
1417
  "step": 187
1418
  },
1419
  {
1420
  "epoch": 1.9183673469387754,
1421
- "grad_norm": 23.073841094970703,
1422
  "learning_rate": 1.9081632653061225e-05,
1423
- "loss": 0.1026,
1424
  "step": 188
1425
  },
1426
  {
1427
  "epoch": 1.9285714285714286,
1428
- "grad_norm": 48.74525833129883,
1429
  "learning_rate": 1.9183673469387756e-05,
1430
- "loss": 0.1942,
1431
  "step": 189
1432
  },
1433
  {
1434
  "epoch": 1.9387755102040818,
1435
- "grad_norm": 384.64678955078125,
1436
  "learning_rate": 1.928571428571429e-05,
1437
- "loss": 3.4886,
1438
  "step": 190
1439
  },
1440
  {
1441
  "epoch": 1.9489795918367347,
1442
- "grad_norm": 103.53422546386719,
1443
  "learning_rate": 1.9387755102040817e-05,
1444
- "loss": 0.628,
1445
  "step": 191
1446
  },
1447
  {
1448
  "epoch": 1.9591836734693877,
1449
- "grad_norm": 42.5008544921875,
1450
  "learning_rate": 1.948979591836735e-05,
1451
- "loss": 0.1967,
1452
  "step": 192
1453
  },
1454
  {
1455
  "epoch": 1.9693877551020407,
1456
- "grad_norm": 145.1553955078125,
1457
  "learning_rate": 1.9591836734693877e-05,
1458
- "loss": 3.9822,
1459
  "step": 193
1460
  },
1461
  {
1462
  "epoch": 1.9795918367346939,
1463
- "grad_norm": 0.07428821176290512,
1464
  "learning_rate": 1.969387755102041e-05,
1465
- "loss": 0.0003,
1466
  "step": 194
1467
  },
1468
  {
1469
  "epoch": 1.989795918367347,
1470
- "grad_norm": 545.6088256835938,
1471
  "learning_rate": 1.979591836734694e-05,
1472
- "loss": 3.7309,
1473
  "step": 195
1474
  },
1475
  {
1476
  "epoch": 2.0,
1477
- "grad_norm": 0.5490627288818359,
1478
  "learning_rate": 1.9897959183673473e-05,
1479
- "loss": 0.0024,
1480
  "step": 196
1481
  },
1482
  {
1483
  "epoch": 2.0,
1484
- "eval_dim_1024_cosine_accuracy@1": 0.32522407170294493,
1485
- "eval_dim_1024_cosine_accuracy@10": 0.3969270166453265,
1486
- "eval_dim_1024_cosine_accuracy@3": 0.33290653008962867,
1487
- "eval_dim_1024_cosine_accuracy@5": 0.36043533930857874,
1488
- "eval_dim_1024_cosine_map@100": 0.4164888021641558,
1489
- "eval_dim_1024_cosine_mrr@10": 0.33769460195516493,
1490
- "eval_dim_1024_cosine_ndcg@10": 0.34986350069216465,
1491
- "eval_dim_1024_cosine_precision@1": 0.32522407170294493,
1492
- "eval_dim_1024_cosine_precision@10": 0.28361075544174136,
1493
- "eval_dim_1024_cosine_precision@3": 0.3254374733247973,
1494
- "eval_dim_1024_cosine_precision@5": 0.31626120358514725,
1495
- "eval_dim_1024_cosine_recall@1": 0.04113491331982186,
1496
- "eval_dim_1024_cosine_recall@10": 0.2664549051060991,
1497
- "eval_dim_1024_cosine_recall@3": 0.12080229545561262,
1498
- "eval_dim_1024_cosine_recall@5": 0.18183789253196145,
1499
- "eval_dim_128_cosine_accuracy@1": 0.30217669654289375,
1500
- "eval_dim_128_cosine_accuracy@10": 0.3546734955185659,
1501
- "eval_dim_128_cosine_accuracy@3": 0.3072983354673495,
1502
- "eval_dim_128_cosine_accuracy@5": 0.3265044814340589,
1503
- "eval_dim_128_cosine_map@100": 0.38014172959059034,
1504
- "eval_dim_128_cosine_mrr@10": 0.3112729406743488,
1505
- "eval_dim_128_cosine_ndcg@10": 0.32071443787836906,
1506
- "eval_dim_128_cosine_precision@1": 0.30217669654289375,
1507
- "eval_dim_128_cosine_precision@10": 0.26312419974391804,
1508
- "eval_dim_128_cosine_precision@3": 0.30239009816474605,
1509
- "eval_dim_128_cosine_precision@5": 0.29359795134443023,
1510
- "eval_dim_128_cosine_recall@1": 0.03603846894598867,
1511
- "eval_dim_128_cosine_recall@10": 0.23664446759855584,
1512
- "eval_dim_128_cosine_recall@3": 0.10607255532328354,
1513
- "eval_dim_128_cosine_recall@5": 0.15998840334482403,
1514
- "eval_dim_256_cosine_accuracy@1": 0.31049935979513443,
1515
- "eval_dim_256_cosine_accuracy@10": 0.3725992317541613,
1516
- "eval_dim_256_cosine_accuracy@3": 0.31882202304737517,
1517
- "eval_dim_256_cosine_accuracy@5": 0.34571062740076824,
1518
- "eval_dim_256_cosine_map@100": 0.3940538127924734,
1519
- "eval_dim_256_cosine_mrr@10": 0.3219094872263883,
1520
- "eval_dim_256_cosine_ndcg@10": 0.33365785011470184,
1521
- "eval_dim_256_cosine_precision@1": 0.31049935979513443,
1522
- "eval_dim_256_cosine_precision@10": 0.2727272727272727,
1523
- "eval_dim_256_cosine_precision@3": 0.3109261630388391,
1524
- "eval_dim_256_cosine_precision@5": 0.3035851472471191,
1525
- "eval_dim_256_cosine_recall@1": 0.0379038673811849,
1526
- "eval_dim_256_cosine_recall@10": 0.25061548215235363,
1527
- "eval_dim_256_cosine_recall@3": 0.11184662439829526,
1528
- "eval_dim_256_cosine_recall@5": 0.16972372403865282,
1529
- "eval_dim_512_cosine_accuracy@1": 0.32842509603072984,
1530
- "eval_dim_512_cosine_accuracy@10": 0.39564660691421255,
1531
- "eval_dim_512_cosine_accuracy@3": 0.33418693982074266,
1532
- "eval_dim_512_cosine_accuracy@5": 0.36555697823303457,
1533
- "eval_dim_512_cosine_map@100": 0.4125328284000196,
1534
- "eval_dim_512_cosine_mrr@10": 0.34027168058858154,
1535
- "eval_dim_512_cosine_ndcg@10": 0.3525488928748249,
1536
- "eval_dim_512_cosine_precision@1": 0.32842509603072984,
1537
- "eval_dim_512_cosine_precision@10": 0.28693982074263763,
1538
- "eval_dim_512_cosine_precision@3": 0.3282116944088775,
1539
- "eval_dim_512_cosine_precision@5": 0.31997439180537773,
1540
- "eval_dim_512_cosine_recall@1": 0.04071091183465321,
1541
- "eval_dim_512_cosine_recall@10": 0.2638449444559509,
1542
- "eval_dim_512_cosine_recall@3": 0.11970757850133786,
1543
- "eval_dim_512_cosine_recall@5": 0.1806811237454132,
1544
- "eval_dim_64_cosine_accuracy@1": 0.28040973111395645,
1545
- "eval_dim_64_cosine_accuracy@10": 0.3348271446862996,
1546
- "eval_dim_64_cosine_accuracy@3": 0.28297055057618437,
1547
- "eval_dim_64_cosine_accuracy@5": 0.3072983354673495,
1548
- "eval_dim_64_cosine_map@100": 0.35085623648833997,
1549
- "eval_dim_64_cosine_mrr@10": 0.28944678170030247,
1550
- "eval_dim_64_cosine_ndcg@10": 0.2991224720529457,
1551
- "eval_dim_64_cosine_precision@1": 0.28040973111395645,
1552
- "eval_dim_64_cosine_precision@10": 0.24878361075544175,
1553
- "eval_dim_64_cosine_precision@3": 0.27955612462654716,
1554
- "eval_dim_64_cosine_precision@5": 0.27247119078105,
1555
- "eval_dim_64_cosine_recall@1": 0.03187808455878807,
1556
- "eval_dim_64_cosine_recall@10": 0.2128007008801171,
1557
- "eval_dim_64_cosine_recall@3": 0.09363361347149868,
1558
- "eval_dim_64_cosine_recall@5": 0.14192536615474802,
1559
- "eval_dim_768_cosine_accuracy@1": 0.32970550576184376,
1560
- "eval_dim_768_cosine_accuracy@10": 0.3994878361075544,
1561
- "eval_dim_768_cosine_accuracy@3": 0.33418693982074266,
1562
- "eval_dim_768_cosine_accuracy@5": 0.36427656850192064,
1563
- "eval_dim_768_cosine_map@100": 0.4160652625925415,
1564
- "eval_dim_768_cosine_mrr@10": 0.3415124585899229,
1565
- "eval_dim_768_cosine_ndcg@10": 0.35370573856938964,
1566
- "eval_dim_768_cosine_precision@1": 0.32970550576184376,
1567
- "eval_dim_768_cosine_precision@10": 0.2877720870678617,
1568
- "eval_dim_768_cosine_precision@3": 0.3288518992744345,
1569
- "eval_dim_768_cosine_precision@5": 0.31997439180537773,
1570
- "eval_dim_768_cosine_recall@1": 0.040955758827011135,
1571
- "eval_dim_768_cosine_recall@10": 0.26685683005601735,
1572
- "eval_dim_768_cosine_recall@3": 0.12009305539695316,
1573
- "eval_dim_768_cosine_recall@5": 0.18142212378067016,
1574
- "eval_runtime": 99.167,
1575
  "eval_samples_per_second": 0.0,
1576
- "eval_sequential_score": 0.2991224720529457,
1577
  "eval_steps_per_second": 0.0,
1578
  "step": 196
1579
  }
 
1
  {
2
  "best_global_step": 98,
3
+ "best_metric": 0.3299991425713933,
4
  "best_model_checkpoint": "intfloat/multilingual-e5-large/checkpoint-98",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.01020408163265306,
14
+ "grad_norm": 196.63563537597656,
15
  "learning_rate": 0.0,
16
+ "loss": 4.0658,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.02040816326530612,
21
+ "grad_norm": 184.93710327148438,
22
  "learning_rate": 1.0204081632653061e-07,
23
+ "loss": 5.2785,
24
  "step": 2
25
  },
26
  {
27
  "epoch": 0.030612244897959183,
28
+ "grad_norm": 179.60655212402344,
29
  "learning_rate": 2.0408163265306121e-07,
30
+ "loss": 4.349,
31
  "step": 3
32
  },
33
  {
34
  "epoch": 0.04081632653061224,
35
+ "grad_norm": 163.9447479248047,
36
  "learning_rate": 3.0612244897959183e-07,
37
+ "loss": 3.805,
38
  "step": 4
39
  },
40
  {
41
  "epoch": 0.05102040816326531,
42
+ "grad_norm": 164.29776000976562,
43
  "learning_rate": 4.0816326530612243e-07,
44
+ "loss": 3.1683,
45
  "step": 5
46
  },
47
  {
48
  "epoch": 0.061224489795918366,
49
+ "grad_norm": 209.0354766845703,
50
  "learning_rate": 5.102040816326531e-07,
51
+ "loss": 5.3989,
52
  "step": 6
53
  },
54
  {
55
  "epoch": 0.07142857142857142,
56
+ "grad_norm": 262.14971923828125,
57
  "learning_rate": 6.122448979591837e-07,
58
+ "loss": 8.211,
59
  "step": 7
60
  },
61
  {
62
  "epoch": 0.08163265306122448,
63
+ "grad_norm": 188.68443298339844,
64
  "learning_rate": 7.142857142857143e-07,
65
+ "loss": 5.3598,
66
  "step": 8
67
  },
68
  {
69
  "epoch": 0.09183673469387756,
70
+ "grad_norm": 216.530517578125,
71
  "learning_rate": 8.163265306122449e-07,
72
+ "loss": 5.0522,
73
  "step": 9
74
  },
75
  {
76
  "epoch": 0.10204081632653061,
77
+ "grad_norm": 231.66384887695312,
78
  "learning_rate": 9.183673469387756e-07,
79
+ "loss": 4.4736,
80
  "step": 10
81
  },
82
  {
83
  "epoch": 0.11224489795918367,
84
+ "grad_norm": 329.2440490722656,
85
  "learning_rate": 1.0204081632653063e-06,
86
+ "loss": 8.3251,
87
  "step": 11
88
  },
89
  {
90
  "epoch": 0.12244897959183673,
91
+ "grad_norm": 102.95913696289062,
92
  "learning_rate": 1.122448979591837e-06,
93
+ "loss": 2.5822,
94
  "step": 12
95
  },
96
  {
97
  "epoch": 0.1326530612244898,
98
+ "grad_norm": 116.47322845458984,
99
  "learning_rate": 1.2244897959183673e-06,
100
+ "loss": 2.8464,
101
  "step": 13
102
  },
103
  {
104
  "epoch": 0.14285714285714285,
105
+ "grad_norm": 226.98976135253906,
106
  "learning_rate": 1.3265306122448982e-06,
107
+ "loss": 5.8915,
108
  "step": 14
109
  },
110
  {
111
  "epoch": 0.15306122448979592,
112
+ "grad_norm": 192.4533233642578,
113
  "learning_rate": 1.4285714285714286e-06,
114
+ "loss": 7.2637,
115
  "step": 15
116
  },
117
  {
118
  "epoch": 0.16326530612244897,
119
+ "grad_norm": 269.7630920410156,
120
  "learning_rate": 1.5306122448979593e-06,
121
+ "loss": 7.4234,
122
  "step": 16
123
  },
124
  {
125
  "epoch": 0.17346938775510204,
126
+ "grad_norm": 111.28227233886719,
127
  "learning_rate": 1.6326530612244897e-06,
128
+ "loss": 2.3787,
129
  "step": 17
130
  },
131
  {
132
  "epoch": 0.1836734693877551,
133
+ "grad_norm": 165.55792236328125,
134
  "learning_rate": 1.7346938775510206e-06,
135
+ "loss": 3.1947,
136
  "step": 18
137
  },
138
  {
139
  "epoch": 0.19387755102040816,
140
+ "grad_norm": 152.33682250976562,
141
  "learning_rate": 1.8367346938775512e-06,
142
+ "loss": 3.4787,
143
  "step": 19
144
  },
145
  {
146
  "epoch": 0.20408163265306123,
147
+ "grad_norm": 159.368408203125,
148
  "learning_rate": 1.938775510204082e-06,
149
+ "loss": 5.1418,
150
  "step": 20
151
  },
152
  {
153
  "epoch": 0.21428571428571427,
154
+ "grad_norm": 112.97805786132812,
155
  "learning_rate": 2.0408163265306125e-06,
156
+ "loss": 2.3042,
157
  "step": 21
158
  },
159
  {
160
  "epoch": 0.22448979591836735,
161
+ "grad_norm": 199.43443298339844,
162
  "learning_rate": 2.1428571428571427e-06,
163
+ "loss": 6.6786,
164
  "step": 22
165
  },
166
  {
167
  "epoch": 0.23469387755102042,
168
+ "grad_norm": 241.95591735839844,
169
  "learning_rate": 2.244897959183674e-06,
170
+ "loss": 6.6721,
171
  "step": 23
172
  },
173
  {
174
  "epoch": 0.24489795918367346,
175
+ "grad_norm": 249.65122985839844,
176
  "learning_rate": 2.3469387755102044e-06,
177
+ "loss": 4.4896,
178
  "step": 24
179
  },
180
  {
181
  "epoch": 0.25510204081632654,
182
+ "grad_norm": 183.51483154296875,
183
  "learning_rate": 2.4489795918367347e-06,
184
+ "loss": 3.4416,
185
  "step": 25
186
  },
187
  {
188
  "epoch": 0.2653061224489796,
189
+ "grad_norm": 286.1512756347656,
190
  "learning_rate": 2.5510204081632657e-06,
191
+ "loss": 7.5134,
192
  "step": 26
193
  },
194
  {
195
  "epoch": 0.2755102040816326,
196
+ "grad_norm": 98.32283782958984,
197
  "learning_rate": 2.6530612244897964e-06,
198
+ "loss": 1.9577,
199
  "step": 27
200
  },
201
  {
202
  "epoch": 0.2857142857142857,
203
+ "grad_norm": 274.64178466796875,
204
  "learning_rate": 2.7551020408163266e-06,
205
+ "loss": 4.9552,
206
  "step": 28
207
  },
208
  {
209
  "epoch": 0.29591836734693877,
210
+ "grad_norm": 142.77537536621094,
211
  "learning_rate": 2.8571428571428573e-06,
212
+ "loss": 2.5202,
213
  "step": 29
214
  },
215
  {
216
  "epoch": 0.30612244897959184,
217
+ "grad_norm": 127.23304748535156,
218
  "learning_rate": 2.959183673469388e-06,
219
+ "loss": 2.4486,
220
  "step": 30
221
  },
222
  {
223
  "epoch": 0.3163265306122449,
224
+ "grad_norm": 99.88568878173828,
225
  "learning_rate": 3.0612244897959185e-06,
226
+ "loss": 1.9923,
227
  "step": 31
228
  },
229
  {
230
  "epoch": 0.32653061224489793,
231
+ "grad_norm": 151.1445770263672,
232
  "learning_rate": 3.1632653061224496e-06,
233
+ "loss": 2.8301,
234
  "step": 32
235
  },
236
  {
237
  "epoch": 0.336734693877551,
238
+ "grad_norm": 203.54248046875,
239
  "learning_rate": 3.2653061224489794e-06,
240
+ "loss": 3.7414,
241
  "step": 33
242
  },
243
  {
244
  "epoch": 0.3469387755102041,
245
+ "grad_norm": 443.4117126464844,
246
  "learning_rate": 3.3673469387755105e-06,
247
+ "loss": 5.2738,
248
  "step": 34
249
  },
250
  {
251
  "epoch": 0.35714285714285715,
252
+ "grad_norm": 178.9974822998047,
253
  "learning_rate": 3.469387755102041e-06,
254
+ "loss": 3.791,
255
  "step": 35
256
  },
257
  {
258
  "epoch": 0.3673469387755102,
259
+ "grad_norm": 122.32801818847656,
260
  "learning_rate": 3.5714285714285718e-06,
261
+ "loss": 1.9081,
262
  "step": 36
263
  },
264
  {
265
  "epoch": 0.37755102040816324,
266
+ "grad_norm": 189.6477813720703,
267
  "learning_rate": 3.6734693877551024e-06,
268
+ "loss": 2.4172,
269
  "step": 37
270
  },
271
  {
272
  "epoch": 0.3877551020408163,
273
+ "grad_norm": 222.67959594726562,
274
  "learning_rate": 3.7755102040816327e-06,
275
+ "loss": 4.0417,
276
  "step": 38
277
  },
278
  {
279
  "epoch": 0.3979591836734694,
280
+ "grad_norm": 160.97071838378906,
281
  "learning_rate": 3.877551020408164e-06,
282
+ "loss": 3.5591,
283
  "step": 39
284
  },
285
  {
286
  "epoch": 0.40816326530612246,
287
+ "grad_norm": 178.01609802246094,
288
  "learning_rate": 3.979591836734694e-06,
289
+ "loss": 3.0139,
290
  "step": 40
291
  },
292
  {
293
  "epoch": 0.41836734693877553,
294
+ "grad_norm": 142.32168579101562,
295
  "learning_rate": 4.081632653061225e-06,
296
+ "loss": 2.4836,
297
  "step": 41
298
  },
299
  {
300
  "epoch": 0.42857142857142855,
301
+ "grad_norm": 148.1731719970703,
302
  "learning_rate": 4.183673469387755e-06,
303
+ "loss": 2.807,
304
  "step": 42
305
  },
306
  {
307
  "epoch": 0.4387755102040816,
308
+ "grad_norm": 152.1929931640625,
309
  "learning_rate": 4.2857142857142855e-06,
310
+ "loss": 1.9753,
311
  "step": 43
312
  },
313
  {
314
  "epoch": 0.4489795918367347,
315
+ "grad_norm": 219.5394287109375,
316
  "learning_rate": 4.3877551020408165e-06,
317
+ "loss": 0.9764,
318
  "step": 44
319
  },
320
  {
321
  "epoch": 0.45918367346938777,
322
+ "grad_norm": 95.7768783569336,
323
  "learning_rate": 4.489795918367348e-06,
324
+ "loss": 1.1398,
325
  "step": 45
326
  },
327
  {
328
  "epoch": 0.46938775510204084,
329
+ "grad_norm": 320.86529541015625,
330
  "learning_rate": 4.591836734693878e-06,
331
+ "loss": 6.7812,
332
  "step": 46
333
  },
334
  {
335
  "epoch": 0.47959183673469385,
336
+ "grad_norm": 18.277860641479492,
337
  "learning_rate": 4.693877551020409e-06,
338
+ "loss": 0.2479,
339
  "step": 47
340
  },
341
  {
342
  "epoch": 0.4897959183673469,
343
+ "grad_norm": 405.69024658203125,
344
  "learning_rate": 4.795918367346939e-06,
345
+ "loss": 6.041,
346
  "step": 48
347
  },
348
  {
349
  "epoch": 0.5,
350
+ "grad_norm": 104.91180419921875,
351
  "learning_rate": 4.897959183673469e-06,
352
+ "loss": 1.1839,
353
  "step": 49
354
  },
355
  {
356
  "epoch": 0.5102040816326531,
357
+ "grad_norm": 110.48990631103516,
358
  "learning_rate": 5e-06,
359
+ "loss": 2.0933,
360
  "step": 50
361
  },
362
  {
363
  "epoch": 0.5204081632653061,
364
+ "grad_norm": 135.2900390625,
365
  "learning_rate": 5.1020408163265315e-06,
366
+ "loss": 1.8613,
367
  "step": 51
368
  },
369
  {
370
  "epoch": 0.5306122448979592,
371
+ "grad_norm": 146.744140625,
372
  "learning_rate": 5.204081632653062e-06,
373
+ "loss": 2.9359,
374
  "step": 52
375
  },
376
  {
377
  "epoch": 0.5408163265306123,
378
+ "grad_norm": 103.08831787109375,
379
  "learning_rate": 5.306122448979593e-06,
380
+ "loss": 1.0129,
381
  "step": 53
382
  },
383
  {
384
  "epoch": 0.5510204081632653,
385
+ "grad_norm": 527.3735961914062,
386
  "learning_rate": 5.408163265306123e-06,
387
+ "loss": 11.8843,
388
  "step": 54
389
  },
390
  {
391
  "epoch": 0.5612244897959183,
392
+ "grad_norm": 374.4621887207031,
393
  "learning_rate": 5.510204081632653e-06,
394
+ "loss": 5.8523,
395
  "step": 55
396
  },
397
  {
398
  "epoch": 0.5714285714285714,
399
+ "grad_norm": 71.62016296386719,
400
  "learning_rate": 5.6122448979591834e-06,
401
+ "loss": 0.832,
402
  "step": 56
403
  },
404
  {
405
  "epoch": 0.5816326530612245,
406
+ "grad_norm": 287.0186462402344,
407
  "learning_rate": 5.7142857142857145e-06,
408
+ "loss": 3.1778,
409
  "step": 57
410
  },
411
  {
412
  "epoch": 0.5918367346938775,
413
+ "grad_norm": 31.022693634033203,
414
  "learning_rate": 5.816326530612246e-06,
415
+ "loss": 0.3919,
416
  "step": 58
417
  },
418
  {
419
  "epoch": 0.6020408163265306,
420
+ "grad_norm": 412.99176025390625,
421
  "learning_rate": 5.918367346938776e-06,
422
+ "loss": 7.2526,
423
  "step": 59
424
  },
425
  {
426
  "epoch": 0.6122448979591837,
427
+ "grad_norm": 40.534244537353516,
428
  "learning_rate": 6.020408163265307e-06,
429
+ "loss": 0.578,
430
  "step": 60
431
  },
432
  {
433
  "epoch": 0.6224489795918368,
434
+ "grad_norm": 226.0243682861328,
435
  "learning_rate": 6.122448979591837e-06,
436
+ "loss": 2.5233,
437
  "step": 61
438
  },
439
  {
440
  "epoch": 0.6326530612244898,
441
+ "grad_norm": 113.55558013916016,
442
  "learning_rate": 6.224489795918368e-06,
443
+ "loss": 1.5694,
444
  "step": 62
445
  },
446
  {
447
  "epoch": 0.6428571428571429,
448
+ "grad_norm": 190.6112518310547,
449
  "learning_rate": 6.326530612244899e-06,
450
+ "loss": 2.2332,
451
  "step": 63
452
  },
453
  {
454
  "epoch": 0.6530612244897959,
455
+ "grad_norm": 391.7434387207031,
456
  "learning_rate": 6.4285714285714295e-06,
457
+ "loss": 4.5545,
458
  "step": 64
459
  },
460
  {
461
  "epoch": 0.6632653061224489,
462
+ "grad_norm": 214.495361328125,
463
  "learning_rate": 6.530612244897959e-06,
464
+ "loss": 1.4804,
465
  "step": 65
466
  },
467
  {
468
  "epoch": 0.673469387755102,
469
+ "grad_norm": 62.52560806274414,
470
  "learning_rate": 6.63265306122449e-06,
471
+ "loss": 0.4391,
472
  "step": 66
473
  },
474
  {
475
  "epoch": 0.6836734693877551,
476
+ "grad_norm": 298.97808837890625,
477
  "learning_rate": 6.734693877551021e-06,
478
+ "loss": 3.4715,
479
  "step": 67
480
  },
481
  {
482
  "epoch": 0.6938775510204082,
483
+ "grad_norm": 483.84796142578125,
484
  "learning_rate": 6.836734693877551e-06,
485
+ "loss": 8.5808,
486
  "step": 68
487
  },
488
  {
489
  "epoch": 0.7040816326530612,
490
+ "grad_norm": 482.55743408203125,
491
  "learning_rate": 6.938775510204082e-06,
492
+ "loss": 5.6959,
493
  "step": 69
494
  },
495
  {
496
  "epoch": 0.7142857142857143,
497
+ "grad_norm": 198.6812744140625,
498
  "learning_rate": 7.0408163265306125e-06,
499
+ "loss": 3.8277,
500
  "step": 70
501
  },
502
  {
503
  "epoch": 0.7244897959183674,
504
+ "grad_norm": 293.4190673828125,
505
  "learning_rate": 7.1428571428571436e-06,
506
+ "loss": 2.1832,
507
  "step": 71
508
  },
509
  {
510
  "epoch": 0.7346938775510204,
511
+ "grad_norm": 13.164139747619629,
512
  "learning_rate": 7.244897959183675e-06,
513
+ "loss": 0.1244,
514
  "step": 72
515
  },
516
  {
517
  "epoch": 0.7448979591836735,
518
+ "grad_norm": 163.4252166748047,
519
  "learning_rate": 7.346938775510205e-06,
520
+ "loss": 0.7707,
521
  "step": 73
522
  },
523
  {
524
  "epoch": 0.7551020408163265,
525
+ "grad_norm": 193.64401245117188,
526
  "learning_rate": 7.448979591836736e-06,
527
+ "loss": 3.4828,
528
  "step": 74
529
  },
530
  {
531
  "epoch": 0.7653061224489796,
532
+ "grad_norm": 178.31982421875,
533
  "learning_rate": 7.551020408163265e-06,
534
+ "loss": 2.9645,
535
  "step": 75
536
  },
537
  {
538
  "epoch": 0.7755102040816326,
539
+ "grad_norm": 28.57689666748047,
540
  "learning_rate": 7.653061224489796e-06,
541
+ "loss": 0.2948,
542
  "step": 76
543
  },
544
  {
545
  "epoch": 0.7857142857142857,
546
+ "grad_norm": 608.8088989257812,
547
  "learning_rate": 7.755102040816327e-06,
548
+ "loss": 12.6456,
549
  "step": 77
550
  },
551
  {
552
  "epoch": 0.7959183673469388,
553
+ "grad_norm": 123.08556365966797,
554
  "learning_rate": 7.857142857142858e-06,
555
+ "loss": 1.2493,
556
  "step": 78
557
  },
558
  {
559
  "epoch": 0.8061224489795918,
560
+ "grad_norm": 225.3292694091797,
561
  "learning_rate": 7.959183673469388e-06,
562
+ "loss": 2.6675,
563
  "step": 79
564
  },
565
  {
566
  "epoch": 0.8163265306122449,
567
+ "grad_norm": 57.49665069580078,
568
  "learning_rate": 8.06122448979592e-06,
569
+ "loss": 0.5642,
570
  "step": 80
571
  },
572
  {
573
  "epoch": 0.826530612244898,
574
+ "grad_norm": 247.52210998535156,
575
  "learning_rate": 8.16326530612245e-06,
576
+ "loss": 1.6008,
577
  "step": 81
578
  },
579
  {
580
  "epoch": 0.8367346938775511,
581
+ "grad_norm": 309.60382080078125,
582
  "learning_rate": 8.26530612244898e-06,
583
+ "loss": 3.257,
584
  "step": 82
585
  },
586
  {
587
  "epoch": 0.8469387755102041,
588
+ "grad_norm": 183.82882690429688,
589
  "learning_rate": 8.36734693877551e-06,
590
+ "loss": 2.8086,
591
  "step": 83
592
  },
593
  {
594
  "epoch": 0.8571428571428571,
595
+ "grad_norm": 88.08740234375,
596
  "learning_rate": 8.469387755102042e-06,
597
+ "loss": 0.4056,
598
  "step": 84
599
  },
600
  {
601
  "epoch": 0.8673469387755102,
602
+ "grad_norm": 515.5130615234375,
603
  "learning_rate": 8.571428571428571e-06,
604
+ "loss": 3.711,
605
  "step": 85
606
  },
607
  {
608
  "epoch": 0.8775510204081632,
609
+ "grad_norm": 2.946629285812378,
610
  "learning_rate": 8.673469387755103e-06,
611
+ "loss": 0.0253,
612
  "step": 86
613
  },
614
  {
615
  "epoch": 0.8877551020408163,
616
+ "grad_norm": 31.143714904785156,
617
  "learning_rate": 8.775510204081633e-06,
618
+ "loss": 0.168,
619
  "step": 87
620
  },
621
  {
622
  "epoch": 0.8979591836734694,
623
+ "grad_norm": 535.6795043945312,
624
  "learning_rate": 8.877551020408163e-06,
625
+ "loss": 5.0992,
626
  "step": 88
627
  },
628
  {
629
  "epoch": 0.9081632653061225,
630
+ "grad_norm": 577.0897216796875,
631
  "learning_rate": 8.979591836734695e-06,
632
+ "loss": 5.3724,
633
  "step": 89
634
  },
635
  {
636
  "epoch": 0.9183673469387755,
637
+ "grad_norm": 102.31855773925781,
638
  "learning_rate": 9.081632653061225e-06,
639
+ "loss": 0.9172,
640
  "step": 90
641
  },
642
  {
643
  "epoch": 0.9285714285714286,
644
+ "grad_norm": 306.1739196777344,
645
  "learning_rate": 9.183673469387756e-06,
646
+ "loss": 3.1239,
647
  "step": 91
648
  },
649
  {
650
  "epoch": 0.9387755102040817,
651
+ "grad_norm": 191.27415466308594,
652
  "learning_rate": 9.285714285714288e-06,
653
+ "loss": 1.4121,
654
  "step": 92
655
  },
656
  {
657
  "epoch": 0.9489795918367347,
658
+ "grad_norm": 67.19822692871094,
659
  "learning_rate": 9.387755102040818e-06,
660
+ "loss": 0.2599,
661
  "step": 93
662
  },
663
  {
664
  "epoch": 0.9591836734693877,
665
+ "grad_norm": 17.93955421447754,
666
  "learning_rate": 9.489795918367348e-06,
667
+ "loss": 0.1166,
668
  "step": 94
669
  },
670
  {
671
  "epoch": 0.9693877551020408,
672
+ "grad_norm": 23.839630126953125,
673
  "learning_rate": 9.591836734693878e-06,
674
+ "loss": 0.1938,
675
  "step": 95
676
  },
677
  {
678
  "epoch": 0.9795918367346939,
679
+ "grad_norm": 1459.8140869140625,
680
  "learning_rate": 9.693877551020408e-06,
681
+ "loss": 18.5143,
682
  "step": 96
683
  },
684
  {
685
  "epoch": 0.9897959183673469,
686
+ "grad_norm": 670.869140625,
687
  "learning_rate": 9.795918367346939e-06,
688
+ "loss": 6.1932,
689
  "step": 97
690
  },
691
  {
692
  "epoch": 1.0,
693
+ "grad_norm": 942.95849609375,
694
  "learning_rate": 9.89795918367347e-06,
695
+ "loss": 20.3042,
696
  "step": 98
697
  },
698
  {
699
  "epoch": 1.0,
700
+ "eval_dim_1024_cosine_accuracy@1": 0.3719590268886043,
701
+ "eval_dim_1024_cosine_accuracy@10": 0.43982074263764404,
702
+ "eval_dim_1024_cosine_accuracy@3": 0.37836107554417414,
703
+ "eval_dim_1024_cosine_accuracy@5": 0.4020486555697823,
704
+ "eval_dim_1024_cosine_map@100": 0.4604070214987707,
705
+ "eval_dim_1024_cosine_mrr@10": 0.383397099770339,
706
+ "eval_dim_1024_cosine_ndcg@10": 0.3947688545057553,
707
+ "eval_dim_1024_cosine_precision@1": 0.3719590268886043,
708
+ "eval_dim_1024_cosine_precision@10": 0.3265044814340589,
709
+ "eval_dim_1024_cosine_precision@3": 0.37153222364489963,
710
+ "eval_dim_1024_cosine_precision@5": 0.3613316261203585,
711
+ "eval_dim_1024_cosine_recall@1": 0.04338670134208909,
712
+ "eval_dim_1024_cosine_recall@10": 0.28262195979320087,
713
+ "eval_dim_1024_cosine_recall@3": 0.1268773565773867,
714
+ "eval_dim_1024_cosine_recall@5": 0.19083511167371434,
715
+ "eval_dim_128_cosine_accuracy@1": 0.3053777208706786,
716
+ "eval_dim_128_cosine_accuracy@10": 0.37836107554417414,
717
+ "eval_dim_128_cosine_accuracy@3": 0.3111395646606914,
718
  "eval_dim_128_cosine_accuracy@5": 0.3361075544174136,
719
+ "eval_dim_128_cosine_map@100": 0.3904168259576031,
720
+ "eval_dim_128_cosine_mrr@10": 0.3174377070503828,
721
+ "eval_dim_128_cosine_ndcg@10": 0.3299991425713933,
722
+ "eval_dim_128_cosine_precision@1": 0.3053777208706786,
723
+ "eval_dim_128_cosine_precision@10": 0.271830985915493,
724
+ "eval_dim_128_cosine_precision@3": 0.30495091762697396,
725
+ "eval_dim_128_cosine_precision@5": 0.29641485275288093,
726
+ "eval_dim_128_cosine_recall@1": 0.03688049871840266,
727
+ "eval_dim_128_cosine_recall@10": 0.24260019246216608,
728
+ "eval_dim_128_cosine_recall@3": 0.10779952005618963,
729
+ "eval_dim_128_cosine_recall@5": 0.16176912684922656,
730
+ "eval_dim_256_cosine_accuracy@1": 0.324583866837388,
731
+ "eval_dim_256_cosine_accuracy@10": 0.4058898847631242,
732
+ "eval_dim_256_cosine_accuracy@3": 0.33290653008962867,
733
+ "eval_dim_256_cosine_accuracy@5": 0.3649167733674776,
734
+ "eval_dim_256_cosine_map@100": 0.4232469199200366,
735
+ "eval_dim_256_cosine_mrr@10": 0.33872299453285326,
736
+ "eval_dim_256_cosine_ndcg@10": 0.35307499975694673,
737
+ "eval_dim_256_cosine_precision@1": 0.324583866837388,
738
+ "eval_dim_256_cosine_precision@10": 0.2935339308578745,
739
+ "eval_dim_256_cosine_precision@3": 0.324583866837388,
740
+ "eval_dim_256_cosine_precision@5": 0.31792573623559534,
741
+ "eval_dim_256_cosine_recall@1": 0.038313787861467184,
742
+ "eval_dim_256_cosine_recall@10": 0.2549878568107636,
743
+ "eval_dim_256_cosine_recall@3": 0.11223891931505588,
744
+ "eval_dim_256_cosine_recall@5": 0.1697408782100328,
745
+ "eval_dim_512_cosine_accuracy@1": 0.34827144686299616,
746
+ "eval_dim_512_cosine_accuracy@10": 0.44302176696542894,
747
+ "eval_dim_512_cosine_accuracy@3": 0.3553137003841229,
748
+ "eval_dim_512_cosine_accuracy@5": 0.39436619718309857,
749
+ "eval_dim_512_cosine_map@100": 0.44994622162234726,
750
+ "eval_dim_512_cosine_mrr@10": 0.3644033392272826,
751
+ "eval_dim_512_cosine_ndcg@10": 0.3807642678190648,
752
+ "eval_dim_512_cosine_precision@1": 0.34827144686299616,
753
+ "eval_dim_512_cosine_precision@10": 0.31798975672215113,
754
+ "eval_dim_512_cosine_precision@3": 0.34827144686299616,
755
+ "eval_dim_512_cosine_precision@5": 0.3418693982074264,
756
+ "eval_dim_512_cosine_recall@1": 0.04125738861359979,
757
+ "eval_dim_512_cosine_recall@10": 0.2752162089385945,
758
+ "eval_dim_512_cosine_recall@3": 0.12077279112247459,
759
+ "eval_dim_512_cosine_recall@5": 0.18268801127884626,
760
+ "eval_dim_64_cosine_accuracy@1": 0.23175416133162613,
761
+ "eval_dim_64_cosine_accuracy@10": 0.29833546734955185,
762
+ "eval_dim_64_cosine_accuracy@3": 0.23879641485275288,
763
+ "eval_dim_64_cosine_accuracy@5": 0.2612035851472471,
764
+ "eval_dim_64_cosine_map@100": 0.3151829220617657,
765
+ "eval_dim_64_cosine_mrr@10": 0.24303802613661746,
766
+ "eval_dim_64_cosine_ndcg@10": 0.2548721998123125,
767
+ "eval_dim_64_cosine_precision@1": 0.23175416133162613,
768
+ "eval_dim_64_cosine_precision@10": 0.20864276568501922,
769
+ "eval_dim_64_cosine_precision@3": 0.23218096457533077,
770
+ "eval_dim_64_cosine_precision@5": 0.22740076824583869,
771
+ "eval_dim_64_cosine_recall@1": 0.028384798943475897,
772
+ "eval_dim_64_cosine_recall@10": 0.19540887275051927,
773
+ "eval_dim_64_cosine_recall@3": 0.08325514613360847,
774
+ "eval_dim_64_cosine_recall@5": 0.12720688223912358,
775
+ "eval_dim_768_cosine_accuracy@1": 0.36619718309859156,
776
+ "eval_dim_768_cosine_accuracy@10": 0.44302176696542894,
777
+ "eval_dim_768_cosine_accuracy@3": 0.37516005121638923,
778
+ "eval_dim_768_cosine_accuracy@5": 0.4014084507042254,
779
+ "eval_dim_768_cosine_map@100": 0.4592162636155952,
780
+ "eval_dim_768_cosine_mrr@10": 0.37943672133812956,
781
+ "eval_dim_768_cosine_ndcg@10": 0.3923099208699586,
782
+ "eval_dim_768_cosine_precision@1": 0.36619718309859156,
783
+ "eval_dim_768_cosine_precision@10": 0.32541613316261203,
784
+ "eval_dim_768_cosine_precision@3": 0.3666239863422962,
785
+ "eval_dim_768_cosine_precision@5": 0.35761843790012804,
786
+ "eval_dim_768_cosine_recall@1": 0.042908107176418055,
787
+ "eval_dim_768_cosine_recall@10": 0.2828362934197418,
788
+ "eval_dim_768_cosine_recall@3": 0.12573828441229515,
789
+ "eval_dim_768_cosine_recall@5": 0.18984022934199501,
790
+ "eval_runtime": 98.8905,
791
  "eval_samples_per_second": 0.0,
792
+ "eval_sequential_score": 0.2548721998123125,
793
  "eval_steps_per_second": 0.0,
794
  "step": 98
795
  },
796
  {
797
  "epoch": 1.010204081632653,
798
+ "grad_norm": 1352.0535888671875,
799
  "learning_rate": 1e-05,
800
+ "loss": 2.3413,
801
  "step": 99
802
  },
803
  {
804
  "epoch": 1.0204081632653061,
805
+ "grad_norm": 852.8284301757812,
806
  "learning_rate": 1.0102040816326531e-05,
807
+ "loss": 9.0759,
808
  "step": 100
809
  },
810
  {
811
  "epoch": 1.030612244897959,
812
+ "grad_norm": 13.654555320739746,
813
  "learning_rate": 1.0204081632653063e-05,
814
+ "loss": 0.0652,
815
  "step": 101
816
  },
817
  {
818
  "epoch": 1.0408163265306123,
819
+ "grad_norm": 725.5890502929688,
820
  "learning_rate": 1.0306122448979591e-05,
821
+ "loss": 9.3494,
822
  "step": 102
823
  },
824
  {
825
  "epoch": 1.0510204081632653,
826
+ "grad_norm": 42.87101364135742,
827
  "learning_rate": 1.0408163265306123e-05,
828
+ "loss": 0.1636,
829
  "step": 103
830
  },
831
  {
832
  "epoch": 1.0612244897959184,
833
+ "grad_norm": 366.9559631347656,
834
  "learning_rate": 1.0510204081632654e-05,
835
+ "loss": 2.4237,
836
  "step": 104
837
  },
838
  {
839
  "epoch": 1.0714285714285714,
840
+ "grad_norm": 332.9723815917969,
841
  "learning_rate": 1.0612244897959186e-05,
842
+ "loss": 1.9761,
843
  "step": 105
844
  },
845
  {
846
  "epoch": 1.0816326530612246,
847
+ "grad_norm": 512.0596923828125,
848
  "learning_rate": 1.0714285714285714e-05,
849
+ "loss": 3.5402,
850
  "step": 106
851
  },
852
  {
853
  "epoch": 1.0918367346938775,
854
+ "grad_norm": 199.25193786621094,
855
  "learning_rate": 1.0816326530612246e-05,
856
+ "loss": 1.3885,
857
  "step": 107
858
  },
859
  {
860
  "epoch": 1.1020408163265305,
861
+ "grad_norm": 300.2743225097656,
862
  "learning_rate": 1.0918367346938776e-05,
863
+ "loss": 1.5145,
864
  "step": 108
865
  },
866
  {
867
  "epoch": 1.1122448979591837,
868
+ "grad_norm": 382.1838684082031,
869
  "learning_rate": 1.1020408163265306e-05,
870
+ "loss": 3.3087,
871
  "step": 109
872
  },
873
  {
874
  "epoch": 1.1224489795918366,
875
+ "grad_norm": 164.595703125,
876
  "learning_rate": 1.1122448979591838e-05,
877
+ "loss": 0.576,
878
  "step": 110
879
  },
880
  {
881
  "epoch": 1.1326530612244898,
882
+ "grad_norm": 184.7035369873047,
883
  "learning_rate": 1.1224489795918367e-05,
884
+ "loss": 1.6206,
885
  "step": 111
886
  },
887
  {
888
  "epoch": 1.1428571428571428,
889
+ "grad_norm": 811.6741333007812,
890
  "learning_rate": 1.1326530612244899e-05,
891
+ "loss": 10.2227,
892
  "step": 112
893
  },
894
  {
895
  "epoch": 1.153061224489796,
896
+ "grad_norm": 470.61212158203125,
897
  "learning_rate": 1.1428571428571429e-05,
898
+ "loss": 3.1477,
899
  "step": 113
900
  },
901
  {
902
  "epoch": 1.163265306122449,
903
+ "grad_norm": 656.7257690429688,
904
  "learning_rate": 1.1530612244897961e-05,
905
+ "loss": 2.6782,
906
  "step": 114
907
  },
908
  {
909
  "epoch": 1.1734693877551021,
910
+ "grad_norm": 198.0335235595703,
911
  "learning_rate": 1.1632653061224491e-05,
912
+ "loss": 1.013,
913
  "step": 115
914
  },
915
  {
916
  "epoch": 1.183673469387755,
917
+ "grad_norm": 793.2557373046875,
918
  "learning_rate": 1.1734693877551021e-05,
919
+ "loss": 5.486,
920
  "step": 116
921
  },
922
  {
923
  "epoch": 1.193877551020408,
924
+ "grad_norm": 40.89669418334961,
925
  "learning_rate": 1.1836734693877552e-05,
926
+ "loss": 0.2245,
927
  "step": 117
928
  },
929
  {
930
  "epoch": 1.2040816326530612,
931
+ "grad_norm": 215.04872131347656,
932
  "learning_rate": 1.1938775510204084e-05,
933
+ "loss": 1.3157,
934
  "step": 118
935
  },
936
  {
937
  "epoch": 1.2142857142857142,
938
+ "grad_norm": 10.823179244995117,
939
  "learning_rate": 1.2040816326530614e-05,
940
+ "loss": 0.0276,
941
  "step": 119
942
  },
943
  {
944
  "epoch": 1.2244897959183674,
945
+ "grad_norm": 150.44537353515625,
946
  "learning_rate": 1.2142857142857142e-05,
947
+ "loss": 1.1907,
948
  "step": 120
949
  },
950
  {
951
  "epoch": 1.2346938775510203,
952
+ "grad_norm": 21.41417694091797,
953
  "learning_rate": 1.2244897959183674e-05,
954
+ "loss": 0.0694,
955
  "step": 121
956
  },
957
  {
958
  "epoch": 1.2448979591836735,
959
+ "grad_norm": 185.08053588867188,
960
  "learning_rate": 1.2346938775510204e-05,
961
+ "loss": 1.3295,
962
  "step": 122
963
  },
964
  {
965
  "epoch": 1.2551020408163265,
966
+ "grad_norm": 123.85110473632812,
967
  "learning_rate": 1.2448979591836736e-05,
968
+ "loss": 0.501,
969
  "step": 123
970
  },
971
  {
972
  "epoch": 1.2653061224489797,
973
+ "grad_norm": 337.702392578125,
974
  "learning_rate": 1.2551020408163267e-05,
975
+ "loss": 2.1739,
976
  "step": 124
977
  },
978
  {
979
  "epoch": 1.2755102040816326,
980
+ "grad_norm": 682.425048828125,
981
  "learning_rate": 1.2653061224489798e-05,
982
+ "loss": 5.5839,
983
  "step": 125
984
  },
985
  {
986
  "epoch": 1.2857142857142856,
987
+ "grad_norm": 182.4869384765625,
988
  "learning_rate": 1.2755102040816327e-05,
989
+ "loss": 0.9169,
990
  "step": 126
991
  },
992
  {
993
  "epoch": 1.2959183673469388,
994
+ "grad_norm": 87.18023681640625,
995
  "learning_rate": 1.2857142857142859e-05,
996
+ "loss": 0.7417,
997
  "step": 127
998
  },
999
  {
1000
  "epoch": 1.306122448979592,
1001
+ "grad_norm": 290.62518310546875,
1002
  "learning_rate": 1.2959183673469389e-05,
1003
+ "loss": 4.4272,
1004
  "step": 128
1005
  },
1006
  {
1007
  "epoch": 1.316326530612245,
1008
+ "grad_norm": 123.35594940185547,
1009
  "learning_rate": 1.3061224489795918e-05,
1010
+ "loss": 1.1165,
1011
  "step": 129
1012
  },
1013
  {
1014
  "epoch": 1.3265306122448979,
1015
+ "grad_norm": 530.7383422851562,
1016
  "learning_rate": 1.316326530612245e-05,
1017
+ "loss": 4.3749,
1018
  "step": 130
1019
  },
1020
  {
1021
  "epoch": 1.336734693877551,
1022
+ "grad_norm": 488.7727966308594,
1023
  "learning_rate": 1.326530612244898e-05,
1024
+ "loss": 4.8529,
1025
  "step": 131
1026
  },
1027
  {
1028
  "epoch": 1.346938775510204,
1029
+ "grad_norm": 912.1464233398438,
1030
  "learning_rate": 1.3367346938775512e-05,
1031
+ "loss": 5.3515,
1032
  "step": 132
1033
  },
1034
  {
1035
  "epoch": 1.3571428571428572,
1036
+ "grad_norm": 3.6914279460906982,
1037
  "learning_rate": 1.3469387755102042e-05,
1038
+ "loss": 0.0201,
1039
  "step": 133
1040
  },
1041
  {
1042
  "epoch": 1.3673469387755102,
1043
+ "grad_norm": 1.16976797580719,
1044
  "learning_rate": 1.3571428571428574e-05,
1045
+ "loss": 0.0088,
1046
  "step": 134
1047
  },
1048
  {
1049
  "epoch": 1.3775510204081631,
1050
+ "grad_norm": 839.4262084960938,
1051
  "learning_rate": 1.3673469387755102e-05,
1052
+ "loss": 2.198,
1053
  "step": 135
1054
  },
1055
  {
1056
  "epoch": 1.3877551020408163,
1057
+ "grad_norm": 2.5965521335601807,
1058
  "learning_rate": 1.3775510204081634e-05,
1059
+ "loss": 0.0158,
1060
  "step": 136
1061
  },
1062
  {
1063
  "epoch": 1.3979591836734695,
1064
+ "grad_norm": 5.312564373016357,
1065
  "learning_rate": 1.3877551020408165e-05,
1066
+ "loss": 0.0304,
1067
  "step": 137
1068
  },
1069
  {
1070
  "epoch": 1.4081632653061225,
1071
+ "grad_norm": 3.9951765537261963,
1072
  "learning_rate": 1.3979591836734696e-05,
1073
+ "loss": 0.0176,
1074
  "step": 138
1075
  },
1076
  {
1077
  "epoch": 1.4183673469387754,
1078
+ "grad_norm": 499.74761962890625,
1079
  "learning_rate": 1.4081632653061225e-05,
1080
+ "loss": 1.9166,
1081
  "step": 139
1082
  },
1083
  {
1084
  "epoch": 1.4285714285714286,
1085
+ "grad_norm": 550.5098266601562,
1086
  "learning_rate": 1.4183673469387755e-05,
1087
+ "loss": 5.8026,
1088
  "step": 140
1089
  },
1090
  {
1091
  "epoch": 1.4387755102040816,
1092
+ "grad_norm": 465.0825500488281,
1093
  "learning_rate": 1.4285714285714287e-05,
1094
+ "loss": 6.2584,
1095
  "step": 141
1096
  },
1097
  {
1098
  "epoch": 1.4489795918367347,
1099
+ "grad_norm": 152.85641479492188,
1100
  "learning_rate": 1.4387755102040817e-05,
1101
+ "loss": 0.6994,
1102
  "step": 142
1103
  },
1104
  {
1105
  "epoch": 1.4591836734693877,
1106
+ "grad_norm": 108.09862518310547,
1107
  "learning_rate": 1.448979591836735e-05,
1108
+ "loss": 0.6583,
1109
  "step": 143
1110
  },
1111
  {
1112
  "epoch": 1.469387755102041,
1113
+ "grad_norm": 9.0764799118042,
1114
  "learning_rate": 1.4591836734693878e-05,
1115
+ "loss": 0.0464,
1116
  "step": 144
1117
  },
1118
  {
1119
  "epoch": 1.4795918367346939,
1120
+ "grad_norm": 162.13211059570312,
1121
  "learning_rate": 1.469387755102041e-05,
1122
+ "loss": 0.8106,
1123
  "step": 145
1124
  },
1125
  {
1126
  "epoch": 1.489795918367347,
1127
+ "grad_norm": 1029.7261962890625,
1128
  "learning_rate": 1.479591836734694e-05,
1129
+ "loss": 14.6794,
1130
  "step": 146
1131
  },
1132
  {
1133
  "epoch": 1.5,
1134
+ "grad_norm": 321.685791015625,
1135
  "learning_rate": 1.4897959183673472e-05,
1136
+ "loss": 1.5998,
1137
  "step": 147
1138
  },
1139
  {
1140
  "epoch": 1.510204081632653,
1141
+ "grad_norm": 22.866291046142578,
1142
  "learning_rate": 1.5000000000000002e-05,
1143
+ "loss": 0.0695,
1144
  "step": 148
1145
  },
1146
  {
1147
  "epoch": 1.5204081632653061,
1148
+ "grad_norm": 188.6576690673828,
1149
  "learning_rate": 1.510204081632653e-05,
1150
+ "loss": 0.9395,
1151
  "step": 149
1152
  },
1153
  {
1154
  "epoch": 1.5306122448979593,
1155
+ "grad_norm": 1417.1654052734375,
1156
  "learning_rate": 1.5204081632653063e-05,
1157
+ "loss": 29.9231,
1158
  "step": 150
1159
  },
1160
  {
1161
  "epoch": 1.5408163265306123,
1162
+ "grad_norm": 1.7937166690826416,
1163
  "learning_rate": 1.530612244897959e-05,
1164
+ "loss": 0.0107,
1165
  "step": 151
1166
  },
1167
  {
1168
  "epoch": 1.5510204081632653,
1169
+ "grad_norm": 6.340953350067139,
1170
  "learning_rate": 1.5408163265306123e-05,
1171
+ "loss": 0.0184,
1172
  "step": 152
1173
  },
1174
  {
1175
  "epoch": 1.5612244897959182,
1176
+ "grad_norm": 11.218599319458008,
1177
  "learning_rate": 1.5510204081632655e-05,
1178
+ "loss": 0.0373,
1179
  "step": 153
1180
  },
1181
  {
1182
  "epoch": 1.5714285714285714,
1183
+ "grad_norm": 164.22007751464844,
1184
  "learning_rate": 1.5612244897959187e-05,
1185
+ "loss": 0.7196,
1186
  "step": 154
1187
  },
1188
  {
1189
  "epoch": 1.5816326530612246,
1190
+ "grad_norm": 173.1319580078125,
1191
  "learning_rate": 1.5714285714285715e-05,
1192
+ "loss": 0.9456,
1193
  "step": 155
1194
  },
1195
  {
1196
  "epoch": 1.5918367346938775,
1197
+ "grad_norm": 337.6502380371094,
1198
  "learning_rate": 1.5816326530612247e-05,
1199
+ "loss": 0.8104,
1200
  "step": 156
1201
  },
1202
  {
1203
  "epoch": 1.6020408163265305,
1204
+ "grad_norm": 429.2533264160156,
1205
  "learning_rate": 1.5918367346938776e-05,
1206
+ "loss": 1.9366,
1207
  "step": 157
1208
  },
1209
  {
1210
  "epoch": 1.6122448979591837,
1211
+ "grad_norm": 877.1956787109375,
1212
  "learning_rate": 1.6020408163265308e-05,
1213
+ "loss": 7.7652,
1214
  "step": 158
1215
  },
1216
  {
1217
  "epoch": 1.6224489795918369,
1218
+ "grad_norm": 113.66088104248047,
1219
  "learning_rate": 1.612244897959184e-05,
1220
+ "loss": 0.521,
1221
  "step": 159
1222
  },
1223
  {
1224
  "epoch": 1.6326530612244898,
1225
+ "grad_norm": 11.550941467285156,
1226
  "learning_rate": 1.6224489795918368e-05,
1227
+ "loss": 0.0238,
1228
  "step": 160
1229
  },
1230
  {
1231
  "epoch": 1.6428571428571428,
1232
+ "grad_norm": 52.35281753540039,
1233
  "learning_rate": 1.63265306122449e-05,
1234
+ "loss": 0.1139,
1235
  "step": 161
1236
  },
1237
  {
1238
  "epoch": 1.6530612244897958,
1239
+ "grad_norm": 137.2505645751953,
1240
  "learning_rate": 1.642857142857143e-05,
1241
+ "loss": 0.5996,
1242
  "step": 162
1243
  },
1244
  {
1245
  "epoch": 1.663265306122449,
1246
+ "grad_norm": 0.7507250905036926,
1247
  "learning_rate": 1.653061224489796e-05,
1248
+ "loss": 0.0036,
1249
  "step": 163
1250
  },
1251
  {
1252
  "epoch": 1.6734693877551021,
1253
+ "grad_norm": 27.53925323486328,
1254
  "learning_rate": 1.6632653061224492e-05,
1255
+ "loss": 0.1499,
1256
  "step": 164
1257
  },
1258
  {
1259
  "epoch": 1.683673469387755,
1260
+ "grad_norm": 169.983154296875,
1261
  "learning_rate": 1.673469387755102e-05,
1262
+ "loss": 0.4617,
1263
  "step": 165
1264
  },
1265
  {
1266
  "epoch": 1.693877551020408,
1267
+ "grad_norm": 30.274919509887695,
1268
  "learning_rate": 1.6836734693877553e-05,
1269
+ "loss": 0.0959,
1270
  "step": 166
1271
  },
1272
  {
1273
  "epoch": 1.7040816326530612,
1274
+ "grad_norm": 38.02618408203125,
1275
  "learning_rate": 1.6938775510204085e-05,
1276
+ "loss": 0.1291,
1277
  "step": 167
1278
  },
1279
  {
1280
  "epoch": 1.7142857142857144,
1281
+ "grad_norm": 0.5495009422302246,
1282
  "learning_rate": 1.7040816326530613e-05,
1283
+ "loss": 0.0018,
1284
  "step": 168
1285
  },
1286
  {
1287
  "epoch": 1.7244897959183674,
1288
+ "grad_norm": 1.4790436029434204,
1289
  "learning_rate": 1.7142857142857142e-05,
1290
+ "loss": 0.0054,
1291
  "step": 169
1292
  },
1293
  {
1294
  "epoch": 1.7346938775510203,
1295
+ "grad_norm": 1496.3338623046875,
1296
  "learning_rate": 1.7244897959183674e-05,
1297
+ "loss": 31.5747,
1298
  "step": 170
1299
  },
1300
  {
1301
  "epoch": 1.7448979591836735,
1302
+ "grad_norm": 88.00984191894531,
1303
  "learning_rate": 1.7346938775510206e-05,
1304
+ "loss": 0.3231,
1305
  "step": 171
1306
  },
1307
  {
1308
  "epoch": 1.7551020408163265,
1309
+ "grad_norm": 436.733642578125,
1310
  "learning_rate": 1.7448979591836738e-05,
1311
+ "loss": 2.5639,
1312
  "step": 172
1313
  },
1314
  {
1315
  "epoch": 1.7653061224489797,
1316
+ "grad_norm": 1169.6356201171875,
1317
  "learning_rate": 1.7551020408163266e-05,
1318
+ "loss": 8.2977,
1319
  "step": 173
1320
  },
1321
  {
1322
  "epoch": 1.7755102040816326,
1323
+ "grad_norm": 97.72107696533203,
1324
  "learning_rate": 1.7653061224489798e-05,
1325
+ "loss": 0.4316,
1326
  "step": 174
1327
  },
1328
  {
1329
  "epoch": 1.7857142857142856,
1330
+ "grad_norm": 12.1145601272583,
1331
  "learning_rate": 1.7755102040816327e-05,
1332
+ "loss": 0.0233,
1333
  "step": 175
1334
  },
1335
  {
1336
  "epoch": 1.7959183673469388,
1337
+ "grad_norm": 203.28208923339844,
1338
  "learning_rate": 1.785714285714286e-05,
1339
+ "loss": 1.0278,
1340
  "step": 176
1341
  },
1342
  {
1343
  "epoch": 1.806122448979592,
1344
+ "grad_norm": 0.141877681016922,
1345
  "learning_rate": 1.795918367346939e-05,
1346
+ "loss": 0.0005,
1347
  "step": 177
1348
  },
1349
  {
1350
  "epoch": 1.816326530612245,
1351
+ "grad_norm": 21.75454330444336,
1352
  "learning_rate": 1.806122448979592e-05,
1353
+ "loss": 0.1203,
1354
  "step": 178
1355
  },
1356
  {
1357
  "epoch": 1.8265306122448979,
1358
+ "grad_norm": 98.08448028564453,
1359
  "learning_rate": 1.816326530612245e-05,
1360
+ "loss": 0.2268,
1361
  "step": 179
1362
  },
1363
  {
1364
  "epoch": 1.836734693877551,
1365
+ "grad_norm": 279.0924377441406,
1366
  "learning_rate": 1.826530612244898e-05,
1367
+ "loss": 3.2014,
1368
  "step": 180
1369
  },
1370
  {
1371
  "epoch": 1.8469387755102042,
1372
+ "grad_norm": 348.4731750488281,
1373
  "learning_rate": 1.836734693877551e-05,
1374
+ "loss": 3.6799,
1375
  "step": 181
1376
  },
1377
  {
1378
  "epoch": 1.8571428571428572,
1379
+ "grad_norm": 77.99913024902344,
1380
  "learning_rate": 1.8469387755102043e-05,
1381
+ "loss": 0.2033,
1382
  "step": 182
1383
  },
1384
  {
1385
  "epoch": 1.8673469387755102,
1386
+ "grad_norm": 144.7059783935547,
1387
  "learning_rate": 1.8571428571428575e-05,
1388
+ "loss": 0.6791,
1389
  "step": 183
1390
  },
1391
  {
1392
  "epoch": 1.8775510204081631,
1393
+ "grad_norm": 914.4169921875,
1394
  "learning_rate": 1.8673469387755104e-05,
1395
+ "loss": 3.14,
1396
  "step": 184
1397
  },
1398
  {
1399
  "epoch": 1.8877551020408163,
1400
+ "grad_norm": 397.94769287109375,
1401
  "learning_rate": 1.8775510204081636e-05,
1402
+ "loss": 0.9216,
1403
  "step": 185
1404
  },
1405
  {
1406
  "epoch": 1.8979591836734695,
1407
+ "grad_norm": 5.423056125640869,
1408
  "learning_rate": 1.8877551020408164e-05,
1409
+ "loss": 0.0194,
1410
  "step": 186
1411
  },
1412
  {
1413
  "epoch": 1.9081632653061225,
1414
+ "grad_norm": 124.5344467163086,
1415
  "learning_rate": 1.8979591836734696e-05,
1416
+ "loss": 0.3126,
1417
  "step": 187
1418
  },
1419
  {
1420
  "epoch": 1.9183673469387754,
1421
+ "grad_norm": 136.97732543945312,
1422
  "learning_rate": 1.9081632653061225e-05,
1423
+ "loss": 0.3414,
1424
  "step": 188
1425
  },
1426
  {
1427
  "epoch": 1.9285714285714286,
1428
+ "grad_norm": 29.086313247680664,
1429
  "learning_rate": 1.9183673469387756e-05,
1430
+ "loss": 0.0684,
1431
  "step": 189
1432
  },
1433
  {
1434
  "epoch": 1.9387755102040818,
1435
+ "grad_norm": 4.842129707336426,
1436
  "learning_rate": 1.928571428571429e-05,
1437
+ "loss": 0.017,
1438
  "step": 190
1439
  },
1440
  {
1441
  "epoch": 1.9489795918367347,
1442
+ "grad_norm": 0.2148096263408661,
1443
  "learning_rate": 1.9387755102040817e-05,
1444
+ "loss": 0.0007,
1445
  "step": 191
1446
  },
1447
  {
1448
  "epoch": 1.9591836734693877,
1449
+ "grad_norm": 63.7159538269043,
1450
  "learning_rate": 1.948979591836735e-05,
1451
+ "loss": 0.2351,
1452
  "step": 192
1453
  },
1454
  {
1455
  "epoch": 1.9693877551020407,
1456
+ "grad_norm": 313.18817138671875,
1457
  "learning_rate": 1.9591836734693877e-05,
1458
+ "loss": 1.9321,
1459
  "step": 193
1460
  },
1461
  {
1462
  "epoch": 1.9795918367346939,
1463
+ "grad_norm": 1025.732177734375,
1464
  "learning_rate": 1.969387755102041e-05,
1465
+ "loss": 5.5642,
1466
  "step": 194
1467
  },
1468
  {
1469
  "epoch": 1.989795918367347,
1470
+ "grad_norm": 603.5271606445312,
1471
  "learning_rate": 1.979591836734694e-05,
1472
+ "loss": 1.3926,
1473
  "step": 195
1474
  },
1475
  {
1476
  "epoch": 2.0,
1477
+ "grad_norm": 6.151113033294678,
1478
  "learning_rate": 1.9897959183673473e-05,
1479
+ "loss": 0.015,
1480
  "step": 196
1481
  },
1482
  {
1483
  "epoch": 2.0,
1484
+ "eval_dim_1024_cosine_accuracy@1": 0.3591549295774648,
1485
+ "eval_dim_1024_cosine_accuracy@10": 0.42445582586427655,
1486
+ "eval_dim_1024_cosine_accuracy@3": 0.3649167733674776,
1487
+ "eval_dim_1024_cosine_accuracy@5": 0.39308578745198464,
1488
+ "eval_dim_1024_cosine_map@100": 0.4442656203645836,
1489
+ "eval_dim_1024_cosine_mrr@10": 0.37061688311688284,
1490
+ "eval_dim_1024_cosine_ndcg@10": 0.38277564232489586,
1491
+ "eval_dim_1024_cosine_precision@1": 0.3591549295774648,
1492
+ "eval_dim_1024_cosine_precision@10": 0.31914212548015364,
1493
+ "eval_dim_1024_cosine_precision@3": 0.35830132309005547,
1494
+ "eval_dim_1024_cosine_precision@5": 0.3495518565941101,
1495
+ "eval_dim_1024_cosine_recall@1": 0.040925664980934,
1496
+ "eval_dim_1024_cosine_recall@10": 0.2663878590041381,
1497
+ "eval_dim_1024_cosine_recall@3": 0.11912044022990924,
1498
+ "eval_dim_1024_cosine_recall@5": 0.17907504115268522,
1499
+ "eval_dim_128_cosine_accuracy@1": 0.3060179257362356,
1500
+ "eval_dim_128_cosine_accuracy@10": 0.3687580025608195,
1501
+ "eval_dim_128_cosine_accuracy@3": 0.31049935979513443,
1502
+ "eval_dim_128_cosine_accuracy@5": 0.3322663252240717,
1503
+ "eval_dim_128_cosine_map@100": 0.38523839229741214,
1504
+ "eval_dim_128_cosine_mrr@10": 0.3164926427250368,
1505
+ "eval_dim_128_cosine_ndcg@10": 0.3273120368593549,
1506
+ "eval_dim_128_cosine_precision@1": 0.3060179257362356,
1507
+ "eval_dim_128_cosine_precision@10": 0.27099871959026883,
1508
+ "eval_dim_128_cosine_precision@3": 0.3053777208706786,
1509
+ "eval_dim_128_cosine_precision@5": 0.29679897567221514,
1510
+ "eval_dim_128_cosine_recall@1": 0.035453426807775947,
1511
+ "eval_dim_128_cosine_recall@10": 0.23154232181544257,
1512
+ "eval_dim_128_cosine_recall@3": 0.10361683841932247,
1513
+ "eval_dim_128_cosine_recall@5": 0.15549008066651587,
1514
+ "eval_dim_256_cosine_accuracy@1": 0.3290653008962868,
1515
+ "eval_dim_256_cosine_accuracy@10": 0.3969270166453265,
1516
+ "eval_dim_256_cosine_accuracy@3": 0.33354673495518566,
1517
+ "eval_dim_256_cosine_accuracy@5": 0.36235595390524966,
1518
+ "eval_dim_256_cosine_map@100": 0.4130430492367209,
1519
+ "eval_dim_256_cosine_mrr@10": 0.3407241428368185,
1520
+ "eval_dim_256_cosine_ndcg@10": 0.3532462927680357,
1521
+ "eval_dim_256_cosine_precision@1": 0.3290653008962868,
1522
+ "eval_dim_256_cosine_precision@10": 0.29423815620998717,
1523
+ "eval_dim_256_cosine_precision@3": 0.3282116944088775,
1524
+ "eval_dim_256_cosine_precision@5": 0.3209987195902689,
1525
+ "eval_dim_256_cosine_recall@1": 0.03735817465789262,
1526
+ "eval_dim_256_cosine_recall@10": 0.24868649598286982,
1527
+ "eval_dim_256_cosine_recall@3": 0.10921822534273136,
1528
+ "eval_dim_256_cosine_recall@5": 0.165931214027115,
1529
+ "eval_dim_512_cosine_accuracy@1": 0.3578745198463508,
1530
+ "eval_dim_512_cosine_accuracy@10": 0.42765685019206146,
1531
+ "eval_dim_512_cosine_accuracy@3": 0.36299615877080665,
1532
+ "eval_dim_512_cosine_accuracy@5": 0.3892445582586428,
1533
+ "eval_dim_512_cosine_map@100": 0.44173422420832625,
1534
+ "eval_dim_512_cosine_mrr@10": 0.36958849053919446,
1535
+ "eval_dim_512_cosine_ndcg@10": 0.3812404238586874,
1536
+ "eval_dim_512_cosine_precision@1": 0.3578745198463508,
1537
+ "eval_dim_512_cosine_precision@10": 0.3177336747759283,
1538
+ "eval_dim_512_cosine_precision@3": 0.35680751173708913,
1539
+ "eval_dim_512_cosine_precision@5": 0.34724711907810496,
1540
+ "eval_dim_512_cosine_recall@1": 0.04087028201741599,
1541
+ "eval_dim_512_cosine_recall@10": 0.2641908975177659,
1542
+ "eval_dim_512_cosine_recall@3": 0.11887426573116062,
1543
+ "eval_dim_512_cosine_recall@5": 0.17823551058609727,
1544
+ "eval_dim_64_cosine_accuracy@1": 0.26504481434058896,
1545
+ "eval_dim_64_cosine_accuracy@10": 0.3322663252240717,
1546
+ "eval_dim_64_cosine_accuracy@3": 0.26952624839948786,
1547
+ "eval_dim_64_cosine_accuracy@5": 0.29257362355953903,
1548
+ "eval_dim_64_cosine_map@100": 0.3471195034779171,
1549
+ "eval_dim_64_cosine_mrr@10": 0.27606167306871515,
1550
+ "eval_dim_64_cosine_ndcg@10": 0.28738876488409937,
1551
+ "eval_dim_64_cosine_precision@1": 0.26504481434058896,
1552
+ "eval_dim_64_cosine_precision@10": 0.2385403329065301,
1553
+ "eval_dim_64_cosine_precision@3": 0.2646180110968843,
1554
+ "eval_dim_64_cosine_precision@5": 0.25813060179257363,
1555
+ "eval_dim_64_cosine_recall@1": 0.030859353047043094,
1556
+ "eval_dim_64_cosine_recall@10": 0.20564514911963047,
1557
+ "eval_dim_64_cosine_recall@3": 0.09025134722734952,
1558
+ "eval_dim_64_cosine_recall@5": 0.13616521781869687,
1559
+ "eval_dim_768_cosine_accuracy@1": 0.3546734955185659,
1560
+ "eval_dim_768_cosine_accuracy@10": 0.4186939820742638,
1561
+ "eval_dim_768_cosine_accuracy@3": 0.36107554417413573,
1562
+ "eval_dim_768_cosine_accuracy@5": 0.3860435339308579,
1563
+ "eval_dim_768_cosine_map@100": 0.4380682423691407,
1564
+ "eval_dim_768_cosine_mrr@10": 0.36586844298111887,
1565
+ "eval_dim_768_cosine_ndcg@10": 0.3776871349900537,
1566
+ "eval_dim_768_cosine_precision@1": 0.3546734955185659,
1567
+ "eval_dim_768_cosine_precision@10": 0.3146606914212548,
1568
+ "eval_dim_768_cosine_precision@3": 0.354033290653009,
1569
+ "eval_dim_768_cosine_precision@5": 0.34494238156209983,
1570
+ "eval_dim_768_cosine_recall@1": 0.04034211902742892,
1571
+ "eval_dim_768_cosine_recall@10": 0.2626701106236,
1572
+ "eval_dim_768_cosine_recall@3": 0.11739647757212558,
1573
+ "eval_dim_768_cosine_recall@5": 0.17629643014791566,
1574
+ "eval_runtime": 98.6366,
1575
  "eval_samples_per_second": 0.0,
1576
+ "eval_sequential_score": 0.28738876488409937,
1577
  "eval_steps_per_second": 0.0,
1578
  "step": 196
1579
  }
checkpoint-196/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:739b4a0a62fdf782034d2ababe5e5ea588023ed6263f2604e31385fc77a8faab
3
  size 6097
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a0318feaae2d8aeaebeec5835b4bc77eb79a026385dfb908828064d2f4fd294
3
  size 6097
checkpoint-294/README.md CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-294/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43c4fc720dc2e3977a3cc3e3497ee115851c08f432984ab97bfaf724a6b3d666
3
  size 2239607176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78c216aabdfb5d2e5bf950f1cd5a80335096dea547e29ed3b49b85d0858df394
3
  size 2239607176
checkpoint-294/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0ddd633ecdd2f1e6c3de318b7ceb44fe2f43af2a873c9089a62cd07aaeb5c74
3
  size 4471067142
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82cb9db2c86db21e09c6e695d166b2b80f61223909d1f30787b93eb60455ace8
3
  size 4471067142
checkpoint-294/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bdc8f1e2d846953d00ba606f4cf92976f5653cd22fea2aacf347840fdb304ea
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:318bad8325e8be8bdf0a5b272a40b3c1e9ce3b69291a451ca0c924f7dacab8bc
3
  size 14645
checkpoint-294/tokenizer_config.json CHANGED
@@ -47,16 +47,9 @@
47
  "eos_token": "</s>",
48
  "extra_special_tokens": {},
49
  "mask_token": "<mask>",
50
- "max_length": 512,
51
  "model_max_length": 512,
52
- "pad_to_multiple_of": null,
53
  "pad_token": "<pad>",
54
- "pad_token_type_id": 0,
55
- "padding_side": "right",
56
  "sep_token": "</s>",
57
- "stride": 0,
58
  "tokenizer_class": "XLMRobertaTokenizer",
59
- "truncation_side": "right",
60
- "truncation_strategy": "longest_first",
61
  "unk_token": "<unk>"
62
  }
 
47
  "eos_token": "</s>",
48
  "extra_special_tokens": {},
49
  "mask_token": "<mask>",
 
50
  "model_max_length": 512,
 
51
  "pad_token": "<pad>",
 
 
52
  "sep_token": "</s>",
 
53
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
54
  "unk_token": "<unk>"
55
  }
checkpoint-294/trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-294/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:739b4a0a62fdf782034d2ababe5e5ea588023ed6263f2604e31385fc77a8faab
3
  size 6097
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a0318feaae2d8aeaebeec5835b4bc77eb79a026385dfb908828064d2f4fd294
3
  size 6097
checkpoint-98/README.md CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-98/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8b8678a200e1ec3a97ec08f700f81cc6660e581d09862b47b576834736c0668
3
  size 2239607176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad96896c31a7a57d2d2591979a3de4018f6e0a81c8eb47198774b1b88cd52c55
3
  size 2239607176
checkpoint-98/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43981d3b1c8c7efd9d147726925594fec137b1d2137148a81f15c7a1d493486a
3
  size 4471067142
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20282c36c7a9cbb242a2f093dea3da6be7e335cb5a7077742e22a6e220d97616
3
  size 4471067142
checkpoint-98/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4f05f697e2a026dbb8be0397c5f3215957e05bbf5897dea20c686e5f8917f13
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42beaf0575196ccbd002c1b61b2ae1f21df3e7e108abc6b5b6e4c2fc22e71b1d
3
  size 14645
checkpoint-98/tokenizer_config.json CHANGED
@@ -47,16 +47,9 @@
47
  "eos_token": "</s>",
48
  "extra_special_tokens": {},
49
  "mask_token": "<mask>",
50
- "max_length": 512,
51
  "model_max_length": 512,
52
- "pad_to_multiple_of": null,
53
  "pad_token": "<pad>",
54
- "pad_token_type_id": 0,
55
- "padding_side": "right",
56
  "sep_token": "</s>",
57
- "stride": 0,
58
  "tokenizer_class": "XLMRobertaTokenizer",
59
- "truncation_side": "right",
60
- "truncation_strategy": "longest_first",
61
  "unk_token": "<unk>"
62
  }
 
47
  "eos_token": "</s>",
48
  "extra_special_tokens": {},
49
  "mask_token": "<mask>",
 
50
  "model_max_length": 512,
 
51
  "pad_token": "<pad>",
 
 
52
  "sep_token": "</s>",
 
53
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
54
  "unk_token": "<unk>"
55
  }
checkpoint-98/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 98,
3
- "best_metric": 0.3312285498294292,
4
  "best_model_checkpoint": "intfloat/multilingual-e5-large/checkpoint-98",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
@@ -11,785 +11,785 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.01020408163265306,
14
- "grad_norm": 973.273681640625,
15
  "learning_rate": 0.0,
16
- "loss": 15.8588,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.02040816326530612,
21
- "grad_norm": 1016.8517456054688,
22
  "learning_rate": 1.0204081632653061e-07,
23
- "loss": 10.7411,
24
  "step": 2
25
  },
26
  {
27
  "epoch": 0.030612244897959183,
28
- "grad_norm": 166.88465881347656,
29
  "learning_rate": 2.0408163265306121e-07,
30
- "loss": 1.3873,
31
  "step": 3
32
  },
33
  {
34
  "epoch": 0.04081632653061224,
35
- "grad_norm": 108.06741333007812,
36
  "learning_rate": 3.0612244897959183e-07,
37
- "loss": 0.9088,
38
  "step": 4
39
  },
40
  {
41
  "epoch": 0.05102040816326531,
42
- "grad_norm": 1.1959134340286255,
43
  "learning_rate": 4.0816326530612243e-07,
44
- "loss": 0.0077,
45
  "step": 5
46
  },
47
  {
48
  "epoch": 0.061224489795918366,
49
- "grad_norm": 130.83908081054688,
50
  "learning_rate": 5.102040816326531e-07,
51
- "loss": 0.6016,
52
  "step": 6
53
  },
54
  {
55
  "epoch": 0.07142857142857142,
56
- "grad_norm": 318.3863525390625,
57
  "learning_rate": 6.122448979591837e-07,
58
- "loss": 1.6714,
59
  "step": 7
60
  },
61
  {
62
  "epoch": 0.08163265306122448,
63
- "grad_norm": 74.26002502441406,
64
  "learning_rate": 7.142857142857143e-07,
65
- "loss": 0.4211,
66
  "step": 8
67
  },
68
  {
69
  "epoch": 0.09183673469387756,
70
- "grad_norm": 32.4500846862793,
71
  "learning_rate": 8.163265306122449e-07,
72
- "loss": 0.1996,
73
  "step": 9
74
  },
75
  {
76
  "epoch": 0.10204081632653061,
77
- "grad_norm": 41.27345275878906,
78
  "learning_rate": 9.183673469387756e-07,
79
- "loss": 0.1895,
80
  "step": 10
81
  },
82
  {
83
  "epoch": 0.11224489795918367,
84
- "grad_norm": 27.35291862487793,
85
  "learning_rate": 1.0204081632653063e-06,
86
- "loss": 0.1358,
87
  "step": 11
88
  },
89
  {
90
  "epoch": 0.12244897959183673,
91
- "grad_norm": 103.75244903564453,
92
  "learning_rate": 1.122448979591837e-06,
93
- "loss": 0.5552,
94
  "step": 12
95
  },
96
  {
97
  "epoch": 0.1326530612244898,
98
- "grad_norm": 155.97923278808594,
99
  "learning_rate": 1.2244897959183673e-06,
100
- "loss": 0.5141,
101
  "step": 13
102
  },
103
  {
104
  "epoch": 0.14285714285714285,
105
- "grad_norm": 53.757484436035156,
106
  "learning_rate": 1.3265306122448982e-06,
107
- "loss": 0.1955,
108
  "step": 14
109
  },
110
  {
111
  "epoch": 0.15306122448979592,
112
- "grad_norm": 175.17491149902344,
113
  "learning_rate": 1.4285714285714286e-06,
114
- "loss": 1.9114,
115
  "step": 15
116
  },
117
  {
118
  "epoch": 0.16326530612244897,
119
- "grad_norm": 49.02252197265625,
120
  "learning_rate": 1.5306122448979593e-06,
121
- "loss": 0.2645,
122
  "step": 16
123
  },
124
  {
125
  "epoch": 0.17346938775510204,
126
- "grad_norm": 999.3756103515625,
127
  "learning_rate": 1.6326530612244897e-06,
128
- "loss": 7.5545,
129
  "step": 17
130
  },
131
  {
132
  "epoch": 0.1836734693877551,
133
- "grad_norm": 149.2627410888672,
134
  "learning_rate": 1.7346938775510206e-06,
135
- "loss": 0.4297,
136
  "step": 18
137
  },
138
  {
139
  "epoch": 0.19387755102040816,
140
- "grad_norm": 204.95181274414062,
141
  "learning_rate": 1.8367346938775512e-06,
142
- "loss": 0.678,
143
  "step": 19
144
  },
145
  {
146
  "epoch": 0.20408163265306123,
147
- "grad_norm": 103.94851684570312,
148
  "learning_rate": 1.938775510204082e-06,
149
- "loss": 0.4634,
150
  "step": 20
151
  },
152
  {
153
  "epoch": 0.21428571428571427,
154
- "grad_norm": 536.7100219726562,
155
  "learning_rate": 2.0408163265306125e-06,
156
- "loss": 4.2252,
157
  "step": 21
158
  },
159
  {
160
  "epoch": 0.22448979591836735,
161
- "grad_norm": 444.44805908203125,
162
  "learning_rate": 2.1428571428571427e-06,
163
- "loss": 3.9985,
164
  "step": 22
165
  },
166
  {
167
  "epoch": 0.23469387755102042,
168
- "grad_norm": 170.50369262695312,
169
  "learning_rate": 2.244897959183674e-06,
170
- "loss": 1.9242,
171
  "step": 23
172
  },
173
  {
174
  "epoch": 0.24489795918367346,
175
- "grad_norm": 626.5487060546875,
176
  "learning_rate": 2.3469387755102044e-06,
177
- "loss": 3.2716,
178
  "step": 24
179
  },
180
  {
181
  "epoch": 0.25510204081632654,
182
- "grad_norm": 51.353050231933594,
183
  "learning_rate": 2.4489795918367347e-06,
184
- "loss": 0.123,
185
  "step": 25
186
  },
187
  {
188
  "epoch": 0.2653061224489796,
189
- "grad_norm": 108.25341796875,
190
  "learning_rate": 2.5510204081632657e-06,
191
- "loss": 1.0011,
192
  "step": 26
193
  },
194
  {
195
  "epoch": 0.2755102040816326,
196
- "grad_norm": 322.83502197265625,
197
  "learning_rate": 2.6530612244897964e-06,
198
- "loss": 3.5846,
199
  "step": 27
200
  },
201
  {
202
  "epoch": 0.2857142857142857,
203
- "grad_norm": 203.38458251953125,
204
  "learning_rate": 2.7551020408163266e-06,
205
- "loss": 1.1365,
206
  "step": 28
207
  },
208
  {
209
  "epoch": 0.29591836734693877,
210
- "grad_norm": 127.78427124023438,
211
  "learning_rate": 2.8571428571428573e-06,
212
- "loss": 0.7149,
213
  "step": 29
214
  },
215
  {
216
  "epoch": 0.30612244897959184,
217
- "grad_norm": 283.67645263671875,
218
  "learning_rate": 2.959183673469388e-06,
219
- "loss": 1.2629,
220
  "step": 30
221
  },
222
  {
223
  "epoch": 0.3163265306122449,
224
- "grad_norm": 82.65542602539062,
225
  "learning_rate": 3.0612244897959185e-06,
226
- "loss": 0.6459,
227
  "step": 31
228
  },
229
  {
230
  "epoch": 0.32653061224489793,
231
- "grad_norm": 42.66185760498047,
232
  "learning_rate": 3.1632653061224496e-06,
233
- "loss": 0.1934,
234
  "step": 32
235
  },
236
  {
237
  "epoch": 0.336734693877551,
238
- "grad_norm": 212.1294708251953,
239
  "learning_rate": 3.2653061224489794e-06,
240
- "loss": 1.4897,
241
  "step": 33
242
  },
243
  {
244
  "epoch": 0.3469387755102041,
245
- "grad_norm": 188.0417022705078,
246
  "learning_rate": 3.3673469387755105e-06,
247
- "loss": 0.8561,
248
  "step": 34
249
  },
250
  {
251
  "epoch": 0.35714285714285715,
252
- "grad_norm": 2.0467610359191895,
253
  "learning_rate": 3.469387755102041e-06,
254
- "loss": 0.0128,
255
  "step": 35
256
  },
257
  {
258
  "epoch": 0.3673469387755102,
259
- "grad_norm": 283.3966979980469,
260
  "learning_rate": 3.5714285714285718e-06,
261
- "loss": 1.4952,
262
  "step": 36
263
  },
264
  {
265
  "epoch": 0.37755102040816324,
266
- "grad_norm": 60.74869155883789,
267
  "learning_rate": 3.6734693877551024e-06,
268
- "loss": 0.3181,
269
  "step": 37
270
  },
271
  {
272
  "epoch": 0.3877551020408163,
273
- "grad_norm": 824.6165771484375,
274
  "learning_rate": 3.7755102040816327e-06,
275
- "loss": 6.3681,
276
  "step": 38
277
  },
278
  {
279
  "epoch": 0.3979591836734694,
280
- "grad_norm": 231.1636962890625,
281
  "learning_rate": 3.877551020408164e-06,
282
- "loss": 1.4487,
283
  "step": 39
284
  },
285
  {
286
  "epoch": 0.40816326530612246,
287
- "grad_norm": 26.46611785888672,
288
  "learning_rate": 3.979591836734694e-06,
289
- "loss": 0.1702,
290
  "step": 40
291
  },
292
  {
293
  "epoch": 0.41836734693877553,
294
- "grad_norm": 75.88525390625,
295
  "learning_rate": 4.081632653061225e-06,
296
- "loss": 0.2513,
297
  "step": 41
298
  },
299
  {
300
  "epoch": 0.42857142857142855,
301
- "grad_norm": 465.83392333984375,
302
  "learning_rate": 4.183673469387755e-06,
303
- "loss": 4.1595,
304
  "step": 42
305
  },
306
  {
307
  "epoch": 0.4387755102040816,
308
- "grad_norm": 306.2772521972656,
309
  "learning_rate": 4.2857142857142855e-06,
310
- "loss": 2.7347,
311
  "step": 43
312
  },
313
  {
314
  "epoch": 0.4489795918367347,
315
- "grad_norm": 488.9759521484375,
316
  "learning_rate": 4.3877551020408165e-06,
317
- "loss": 2.3182,
318
  "step": 44
319
  },
320
  {
321
  "epoch": 0.45918367346938777,
322
- "grad_norm": 355.1698913574219,
323
  "learning_rate": 4.489795918367348e-06,
324
- "loss": 1.3285,
325
  "step": 45
326
  },
327
  {
328
  "epoch": 0.46938775510204084,
329
- "grad_norm": 263.558349609375,
330
  "learning_rate": 4.591836734693878e-06,
331
- "loss": 2.1155,
332
  "step": 46
333
  },
334
  {
335
  "epoch": 0.47959183673469385,
336
- "grad_norm": 9.667963981628418,
337
  "learning_rate": 4.693877551020409e-06,
338
- "loss": 0.0645,
339
  "step": 47
340
  },
341
  {
342
  "epoch": 0.4897959183673469,
343
- "grad_norm": 957.79345703125,
344
  "learning_rate": 4.795918367346939e-06,
345
- "loss": 7.1283,
346
  "step": 48
347
  },
348
  {
349
  "epoch": 0.5,
350
- "grad_norm": 160.0965118408203,
351
  "learning_rate": 4.897959183673469e-06,
352
- "loss": 0.711,
353
  "step": 49
354
  },
355
  {
356
  "epoch": 0.5102040816326531,
357
- "grad_norm": 93.697265625,
358
  "learning_rate": 5e-06,
359
- "loss": 0.4716,
360
  "step": 50
361
  },
362
  {
363
  "epoch": 0.5204081632653061,
364
- "grad_norm": 292.9518737792969,
365
  "learning_rate": 5.1020408163265315e-06,
366
- "loss": 2.2895,
367
  "step": 51
368
  },
369
  {
370
  "epoch": 0.5306122448979592,
371
- "grad_norm": 335.4564514160156,
372
  "learning_rate": 5.204081632653062e-06,
373
- "loss": 1.9235,
374
  "step": 52
375
  },
376
  {
377
  "epoch": 0.5408163265306123,
378
- "grad_norm": 138.63575744628906,
379
  "learning_rate": 5.306122448979593e-06,
380
- "loss": 0.8777,
381
  "step": 53
382
  },
383
  {
384
  "epoch": 0.5510204081632653,
385
- "grad_norm": 1.011594533920288,
386
  "learning_rate": 5.408163265306123e-06,
387
- "loss": 0.0038,
388
  "step": 54
389
  },
390
  {
391
  "epoch": 0.5612244897959183,
392
- "grad_norm": 506.25152587890625,
393
  "learning_rate": 5.510204081632653e-06,
394
- "loss": 1.5598,
395
  "step": 55
396
  },
397
  {
398
  "epoch": 0.5714285714285714,
399
- "grad_norm": 2.2550530433654785,
400
  "learning_rate": 5.6122448979591834e-06,
401
- "loss": 0.0177,
402
  "step": 56
403
  },
404
  {
405
  "epoch": 0.5816326530612245,
406
- "grad_norm": 13.93323802947998,
407
  "learning_rate": 5.7142857142857145e-06,
408
- "loss": 0.0837,
409
  "step": 57
410
  },
411
  {
412
  "epoch": 0.5918367346938775,
413
- "grad_norm": 7.279649257659912,
414
  "learning_rate": 5.816326530612246e-06,
415
- "loss": 0.0429,
416
  "step": 58
417
  },
418
  {
419
  "epoch": 0.6020408163265306,
420
- "grad_norm": 0.9923371076583862,
421
  "learning_rate": 5.918367346938776e-06,
422
- "loss": 0.0071,
423
  "step": 59
424
  },
425
  {
426
  "epoch": 0.6122448979591837,
427
- "grad_norm": 743.8301391601562,
428
  "learning_rate": 6.020408163265307e-06,
429
- "loss": 2.7217,
430
  "step": 60
431
  },
432
  {
433
  "epoch": 0.6224489795918368,
434
- "grad_norm": 227.04403686523438,
435
  "learning_rate": 6.122448979591837e-06,
436
- "loss": 3.9013,
437
  "step": 61
438
  },
439
  {
440
  "epoch": 0.6326530612244898,
441
- "grad_norm": 193.12701416015625,
442
  "learning_rate": 6.224489795918368e-06,
443
- "loss": 1.417,
444
  "step": 62
445
  },
446
  {
447
  "epoch": 0.6428571428571429,
448
- "grad_norm": 642.7814331054688,
449
  "learning_rate": 6.326530612244899e-06,
450
- "loss": 3.5854,
451
  "step": 63
452
  },
453
  {
454
  "epoch": 0.6530612244897959,
455
- "grad_norm": 1007.544189453125,
456
  "learning_rate": 6.4285714285714295e-06,
457
- "loss": 12.918,
458
  "step": 64
459
  },
460
  {
461
  "epoch": 0.6632653061224489,
462
- "grad_norm": 1310.942138671875,
463
  "learning_rate": 6.530612244897959e-06,
464
- "loss": 7.1566,
465
  "step": 65
466
  },
467
  {
468
  "epoch": 0.673469387755102,
469
- "grad_norm": 810.1301879882812,
470
  "learning_rate": 6.63265306122449e-06,
471
- "loss": 3.9897,
472
  "step": 66
473
  },
474
  {
475
  "epoch": 0.6836734693877551,
476
- "grad_norm": 513.1759643554688,
477
  "learning_rate": 6.734693877551021e-06,
478
- "loss": 8.1139,
479
  "step": 67
480
  },
481
  {
482
  "epoch": 0.6938775510204082,
483
- "grad_norm": 1414.8878173828125,
484
  "learning_rate": 6.836734693877551e-06,
485
- "loss": 5.7005,
486
  "step": 68
487
  },
488
  {
489
  "epoch": 0.7040816326530612,
490
- "grad_norm": 31.607126235961914,
491
  "learning_rate": 6.938775510204082e-06,
492
- "loss": 0.1219,
493
  "step": 69
494
  },
495
  {
496
  "epoch": 0.7142857142857143,
497
- "grad_norm": 799.9751586914062,
498
  "learning_rate": 7.0408163265306125e-06,
499
- "loss": 5.7849,
500
  "step": 70
501
  },
502
  {
503
  "epoch": 0.7244897959183674,
504
- "grad_norm": 132.71778869628906,
505
  "learning_rate": 7.1428571428571436e-06,
506
- "loss": 1.0726,
507
  "step": 71
508
  },
509
  {
510
  "epoch": 0.7346938775510204,
511
- "grad_norm": 256.61041259765625,
512
  "learning_rate": 7.244897959183675e-06,
513
- "loss": 1.2599,
514
  "step": 72
515
  },
516
  {
517
  "epoch": 0.7448979591836735,
518
- "grad_norm": 192.0435333251953,
519
  "learning_rate": 7.346938775510205e-06,
520
- "loss": 0.6473,
521
  "step": 73
522
  },
523
  {
524
  "epoch": 0.7551020408163265,
525
- "grad_norm": 293.7915954589844,
526
  "learning_rate": 7.448979591836736e-06,
527
- "loss": 1.0397,
528
  "step": 74
529
  },
530
  {
531
  "epoch": 0.7653061224489796,
532
- "grad_norm": 312.2645263671875,
533
  "learning_rate": 7.551020408163265e-06,
534
- "loss": 1.5555,
535
  "step": 75
536
  },
537
  {
538
  "epoch": 0.7755102040816326,
539
- "grad_norm": 1.417815923690796,
540
  "learning_rate": 7.653061224489796e-06,
541
- "loss": 0.0078,
542
  "step": 76
543
  },
544
  {
545
  "epoch": 0.7857142857142857,
546
- "grad_norm": 1.4391653537750244,
547
  "learning_rate": 7.755102040816327e-06,
548
- "loss": 0.0048,
549
  "step": 77
550
  },
551
  {
552
  "epoch": 0.7959183673469388,
553
- "grad_norm": 5.628185749053955,
554
  "learning_rate": 7.857142857142858e-06,
555
- "loss": 0.0323,
556
  "step": 78
557
  },
558
  {
559
  "epoch": 0.8061224489795918,
560
- "grad_norm": 264.5353698730469,
561
  "learning_rate": 7.959183673469388e-06,
562
- "loss": 1.7425,
563
  "step": 79
564
  },
565
  {
566
  "epoch": 0.8163265306122449,
567
- "grad_norm": 1.5278851985931396,
568
  "learning_rate": 8.06122448979592e-06,
569
- "loss": 0.0035,
570
  "step": 80
571
  },
572
  {
573
  "epoch": 0.826530612244898,
574
- "grad_norm": 932.3336181640625,
575
  "learning_rate": 8.16326530612245e-06,
576
- "loss": 6.4849,
577
  "step": 81
578
  },
579
  {
580
  "epoch": 0.8367346938775511,
581
- "grad_norm": 635.4749145507812,
582
  "learning_rate": 8.26530612244898e-06,
583
- "loss": 4.3767,
584
  "step": 82
585
  },
586
  {
587
  "epoch": 0.8469387755102041,
588
- "grad_norm": 8.875201225280762,
589
  "learning_rate": 8.36734693877551e-06,
590
- "loss": 0.0186,
591
  "step": 83
592
  },
593
  {
594
  "epoch": 0.8571428571428571,
595
- "grad_norm": 0.15500876307487488,
596
  "learning_rate": 8.469387755102042e-06,
597
- "loss": 0.0008,
598
  "step": 84
599
  },
600
  {
601
  "epoch": 0.8673469387755102,
602
- "grad_norm": 269.5357666015625,
603
  "learning_rate": 8.571428571428571e-06,
604
- "loss": 0.8354,
605
  "step": 85
606
  },
607
  {
608
  "epoch": 0.8775510204081632,
609
- "grad_norm": 5.054287910461426,
610
  "learning_rate": 8.673469387755103e-06,
611
- "loss": 0.0162,
612
  "step": 86
613
  },
614
  {
615
  "epoch": 0.8877551020408163,
616
- "grad_norm": 84.90735626220703,
617
  "learning_rate": 8.775510204081633e-06,
618
- "loss": 0.1282,
619
  "step": 87
620
  },
621
  {
622
  "epoch": 0.8979591836734694,
623
- "grad_norm": 81.53719329833984,
624
  "learning_rate": 8.877551020408163e-06,
625
- "loss": 0.4514,
626
  "step": 88
627
  },
628
  {
629
  "epoch": 0.9081632653061225,
630
- "grad_norm": 547.4005126953125,
631
  "learning_rate": 8.979591836734695e-06,
632
- "loss": 4.9103,
633
  "step": 89
634
  },
635
  {
636
  "epoch": 0.9183673469387755,
637
- "grad_norm": 25.792213439941406,
638
  "learning_rate": 9.081632653061225e-06,
639
- "loss": 0.0762,
640
  "step": 90
641
  },
642
  {
643
  "epoch": 0.9285714285714286,
644
- "grad_norm": 10.455421447753906,
645
  "learning_rate": 9.183673469387756e-06,
646
- "loss": 0.0444,
647
  "step": 91
648
  },
649
  {
650
  "epoch": 0.9387755102040817,
651
- "grad_norm": 472.54376220703125,
652
  "learning_rate": 9.285714285714288e-06,
653
- "loss": 1.8609,
654
  "step": 92
655
  },
656
  {
657
  "epoch": 0.9489795918367347,
658
- "grad_norm": 31.092357635498047,
659
  "learning_rate": 9.387755102040818e-06,
660
- "loss": 0.1489,
661
  "step": 93
662
  },
663
  {
664
  "epoch": 0.9591836734693877,
665
- "grad_norm": 231.94151306152344,
666
  "learning_rate": 9.489795918367348e-06,
667
- "loss": 0.5926,
668
  "step": 94
669
  },
670
  {
671
  "epoch": 0.9693877551020408,
672
- "grad_norm": 211.05117797851562,
673
  "learning_rate": 9.591836734693878e-06,
674
- "loss": 0.5344,
675
  "step": 95
676
  },
677
  {
678
  "epoch": 0.9795918367346939,
679
- "grad_norm": 217.01339721679688,
680
  "learning_rate": 9.693877551020408e-06,
681
- "loss": 0.4693,
682
  "step": 96
683
  },
684
  {
685
  "epoch": 0.9897959183673469,
686
- "grad_norm": 1123.96484375,
687
  "learning_rate": 9.795918367346939e-06,
688
- "loss": 9.2282,
689
  "step": 97
690
  },
691
  {
692
  "epoch": 1.0,
693
- "grad_norm": 741.597412109375,
694
  "learning_rate": 9.89795918367347e-06,
695
- "loss": 4.6238,
696
  "step": 98
697
  },
698
  {
699
  "epoch": 1.0,
700
- "eval_dim_1024_cosine_accuracy@1": 0.36235595390524966,
701
- "eval_dim_1024_cosine_accuracy@10": 0.4334186939820743,
702
- "eval_dim_1024_cosine_accuracy@3": 0.3681177976952625,
703
- "eval_dim_1024_cosine_accuracy@5": 0.39308578745198464,
704
- "eval_dim_1024_cosine_map@100": 0.45394800707643057,
705
- "eval_dim_1024_cosine_mrr@10": 0.37430415828303115,
706
- "eval_dim_1024_cosine_ndcg@10": 0.3858809020056271,
707
- "eval_dim_1024_cosine_precision@1": 0.36235595390524966,
708
- "eval_dim_1024_cosine_precision@10": 0.3176696542893726,
709
- "eval_dim_1024_cosine_precision@3": 0.36192915066154496,
710
- "eval_dim_1024_cosine_precision@5": 0.35172855313700385,
711
- "eval_dim_1024_cosine_recall@1": 0.04346309464734114,
712
- "eval_dim_1024_cosine_recall@10": 0.28096984500258326,
713
- "eval_dim_1024_cosine_recall@3": 0.12757812796185336,
714
- "eval_dim_1024_cosine_recall@5": 0.19200836801442767,
715
- "eval_dim_128_cosine_accuracy@1": 0.3085787451984635,
716
- "eval_dim_128_cosine_accuracy@10": 0.37964148527528807,
717
- "eval_dim_128_cosine_accuracy@3": 0.31241997439180536,
718
  "eval_dim_128_cosine_accuracy@5": 0.3361075544174136,
719
- "eval_dim_128_cosine_map@100": 0.3963095303049961,
720
- "eval_dim_128_cosine_mrr@10": 0.3199812511432227,
721
- "eval_dim_128_cosine_ndcg@10": 0.3312285498294292,
722
- "eval_dim_128_cosine_precision@1": 0.3085787451984635,
723
- "eval_dim_128_cosine_precision@10": 0.2752880921895006,
724
- "eval_dim_128_cosine_precision@3": 0.3079385403329065,
725
- "eval_dim_128_cosine_precision@5": 0.29961587708066584,
726
- "eval_dim_128_cosine_recall@1": 0.036297623853982414,
727
- "eval_dim_128_cosine_recall@10": 0.24000960695821508,
728
- "eval_dim_128_cosine_recall@3": 0.10638786483158841,
729
- "eval_dim_128_cosine_recall@5": 0.16032639984514846,
730
- "eval_dim_256_cosine_accuracy@1": 0.3437900128040973,
731
- "eval_dim_256_cosine_accuracy@10": 0.41101152368758004,
732
- "eval_dim_256_cosine_accuracy@3": 0.34763124199743917,
733
- "eval_dim_256_cosine_accuracy@5": 0.3764404609475032,
734
- "eval_dim_256_cosine_map@100": 0.4298669852983799,
735
- "eval_dim_256_cosine_mrr@10": 0.3551361197487955,
736
- "eval_dim_256_cosine_ndcg@10": 0.3670052960875804,
737
- "eval_dim_256_cosine_precision@1": 0.3437900128040973,
738
- "eval_dim_256_cosine_precision@10": 0.3040973111395647,
739
- "eval_dim_256_cosine_precision@3": 0.342936406316688,
740
- "eval_dim_256_cosine_precision@5": 0.33457106274007686,
741
- "eval_dim_256_cosine_recall@1": 0.04013102608834382,
742
- "eval_dim_256_cosine_recall@10": 0.2648598688529433,
743
- "eval_dim_256_cosine_recall@3": 0.11771735023719074,
744
- "eval_dim_256_cosine_recall@5": 0.17837935755014916,
745
- "eval_dim_512_cosine_accuracy@1": 0.35979513444302175,
746
- "eval_dim_512_cosine_accuracy@10": 0.4334186939820743,
747
- "eval_dim_512_cosine_accuracy@3": 0.36555697823303457,
748
- "eval_dim_512_cosine_accuracy@5": 0.3911651728553137,
749
- "eval_dim_512_cosine_map@100": 0.4476805587612892,
750
- "eval_dim_512_cosine_mrr@10": 0.37212542934373866,
751
- "eval_dim_512_cosine_ndcg@10": 0.3843750966464458,
752
- "eval_dim_512_cosine_precision@1": 0.35979513444302175,
753
- "eval_dim_512_cosine_precision@10": 0.3173495518565941,
754
- "eval_dim_512_cosine_precision@3": 0.35936833119931705,
755
- "eval_dim_512_cosine_precision@5": 0.34967989756722156,
756
- "eval_dim_512_cosine_recall@1": 0.04265405128130224,
757
- "eval_dim_512_cosine_recall@10": 0.2781876565001863,
758
- "eval_dim_512_cosine_recall@3": 0.12523102347193127,
759
- "eval_dim_512_cosine_recall@5": 0.18912519336740205,
760
- "eval_dim_64_cosine_accuracy@1": 0.2740076824583867,
761
- "eval_dim_64_cosine_accuracy@10": 0.3354673495518566,
762
- "eval_dim_64_cosine_accuracy@3": 0.27848911651728553,
763
- "eval_dim_64_cosine_accuracy@5": 0.30153649167733676,
764
- "eval_dim_64_cosine_map@100": 0.3539045084602349,
765
- "eval_dim_64_cosine_mrr@10": 0.28429414873076814,
766
- "eval_dim_64_cosine_ndcg@10": 0.29402896525927075,
767
- "eval_dim_64_cosine_precision@1": 0.2740076824583867,
768
- "eval_dim_64_cosine_precision@10": 0.24571062740076827,
769
- "eval_dim_64_cosine_precision@3": 0.27315407597097735,
770
- "eval_dim_64_cosine_precision@5": 0.2670934699103713,
771
- "eval_dim_64_cosine_recall@1": 0.03167890172057568,
772
- "eval_dim_64_cosine_recall@10": 0.21092883720941633,
773
- "eval_dim_64_cosine_recall@3": 0.09267023360511464,
774
- "eval_dim_64_cosine_recall@5": 0.14048625468314752,
775
- "eval_dim_768_cosine_accuracy@1": 0.3591549295774648,
776
- "eval_dim_768_cosine_accuracy@10": 0.4334186939820743,
777
- "eval_dim_768_cosine_accuracy@3": 0.3649167733674776,
778
- "eval_dim_768_cosine_accuracy@5": 0.3892445582586428,
779
- "eval_dim_768_cosine_map@100": 0.4493001842217619,
780
- "eval_dim_768_cosine_mrr@10": 0.37149335406377615,
781
- "eval_dim_768_cosine_ndcg@10": 0.38308181752122755,
782
- "eval_dim_768_cosine_precision@1": 0.3591549295774648,
783
- "eval_dim_768_cosine_precision@10": 0.31670934699103714,
784
- "eval_dim_768_cosine_precision@3": 0.3587281263337601,
785
- "eval_dim_768_cosine_precision@5": 0.34852752880921894,
786
- "eval_dim_768_cosine_recall@1": 0.04250079684114586,
787
- "eval_dim_768_cosine_recall@10": 0.27695909667507057,
788
- "eval_dim_768_cosine_recall@3": 0.12462187901616553,
789
- "eval_dim_768_cosine_recall@5": 0.1875478484365334,
790
- "eval_runtime": 99.0843,
791
  "eval_samples_per_second": 0.0,
792
- "eval_sequential_score": 0.29402896525927075,
793
  "eval_steps_per_second": 0.0,
794
  "step": 98
795
  }
 
1
  {
2
  "best_global_step": 98,
3
+ "best_metric": 0.3299991425713933,
4
  "best_model_checkpoint": "intfloat/multilingual-e5-large/checkpoint-98",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.01020408163265306,
14
+ "grad_norm": 196.63563537597656,
15
  "learning_rate": 0.0,
16
+ "loss": 4.0658,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.02040816326530612,
21
+ "grad_norm": 184.93710327148438,
22
  "learning_rate": 1.0204081632653061e-07,
23
+ "loss": 5.2785,
24
  "step": 2
25
  },
26
  {
27
  "epoch": 0.030612244897959183,
28
+ "grad_norm": 179.60655212402344,
29
  "learning_rate": 2.0408163265306121e-07,
30
+ "loss": 4.349,
31
  "step": 3
32
  },
33
  {
34
  "epoch": 0.04081632653061224,
35
+ "grad_norm": 163.9447479248047,
36
  "learning_rate": 3.0612244897959183e-07,
37
+ "loss": 3.805,
38
  "step": 4
39
  },
40
  {
41
  "epoch": 0.05102040816326531,
42
+ "grad_norm": 164.29776000976562,
43
  "learning_rate": 4.0816326530612243e-07,
44
+ "loss": 3.1683,
45
  "step": 5
46
  },
47
  {
48
  "epoch": 0.061224489795918366,
49
+ "grad_norm": 209.0354766845703,
50
  "learning_rate": 5.102040816326531e-07,
51
+ "loss": 5.3989,
52
  "step": 6
53
  },
54
  {
55
  "epoch": 0.07142857142857142,
56
+ "grad_norm": 262.14971923828125,
57
  "learning_rate": 6.122448979591837e-07,
58
+ "loss": 8.211,
59
  "step": 7
60
  },
61
  {
62
  "epoch": 0.08163265306122448,
63
+ "grad_norm": 188.68443298339844,
64
  "learning_rate": 7.142857142857143e-07,
65
+ "loss": 5.3598,
66
  "step": 8
67
  },
68
  {
69
  "epoch": 0.09183673469387756,
70
+ "grad_norm": 216.530517578125,
71
  "learning_rate": 8.163265306122449e-07,
72
+ "loss": 5.0522,
73
  "step": 9
74
  },
75
  {
76
  "epoch": 0.10204081632653061,
77
+ "grad_norm": 231.66384887695312,
78
  "learning_rate": 9.183673469387756e-07,
79
+ "loss": 4.4736,
80
  "step": 10
81
  },
82
  {
83
  "epoch": 0.11224489795918367,
84
+ "grad_norm": 329.2440490722656,
85
  "learning_rate": 1.0204081632653063e-06,
86
+ "loss": 8.3251,
87
  "step": 11
88
  },
89
  {
90
  "epoch": 0.12244897959183673,
91
+ "grad_norm": 102.95913696289062,
92
  "learning_rate": 1.122448979591837e-06,
93
+ "loss": 2.5822,
94
  "step": 12
95
  },
96
  {
97
  "epoch": 0.1326530612244898,
98
+ "grad_norm": 116.47322845458984,
99
  "learning_rate": 1.2244897959183673e-06,
100
+ "loss": 2.8464,
101
  "step": 13
102
  },
103
  {
104
  "epoch": 0.14285714285714285,
105
+ "grad_norm": 226.98976135253906,
106
  "learning_rate": 1.3265306122448982e-06,
107
+ "loss": 5.8915,
108
  "step": 14
109
  },
110
  {
111
  "epoch": 0.15306122448979592,
112
+ "grad_norm": 192.4533233642578,
113
  "learning_rate": 1.4285714285714286e-06,
114
+ "loss": 7.2637,
115
  "step": 15
116
  },
117
  {
118
  "epoch": 0.16326530612244897,
119
+ "grad_norm": 269.7630920410156,
120
  "learning_rate": 1.5306122448979593e-06,
121
+ "loss": 7.4234,
122
  "step": 16
123
  },
124
  {
125
  "epoch": 0.17346938775510204,
126
+ "grad_norm": 111.28227233886719,
127
  "learning_rate": 1.6326530612244897e-06,
128
+ "loss": 2.3787,
129
  "step": 17
130
  },
131
  {
132
  "epoch": 0.1836734693877551,
133
+ "grad_norm": 165.55792236328125,
134
  "learning_rate": 1.7346938775510206e-06,
135
+ "loss": 3.1947,
136
  "step": 18
137
  },
138
  {
139
  "epoch": 0.19387755102040816,
140
+ "grad_norm": 152.33682250976562,
141
  "learning_rate": 1.8367346938775512e-06,
142
+ "loss": 3.4787,
143
  "step": 19
144
  },
145
  {
146
  "epoch": 0.20408163265306123,
147
+ "grad_norm": 159.368408203125,
148
  "learning_rate": 1.938775510204082e-06,
149
+ "loss": 5.1418,
150
  "step": 20
151
  },
152
  {
153
  "epoch": 0.21428571428571427,
154
+ "grad_norm": 112.97805786132812,
155
  "learning_rate": 2.0408163265306125e-06,
156
+ "loss": 2.3042,
157
  "step": 21
158
  },
159
  {
160
  "epoch": 0.22448979591836735,
161
+ "grad_norm": 199.43443298339844,
162
  "learning_rate": 2.1428571428571427e-06,
163
+ "loss": 6.6786,
164
  "step": 22
165
  },
166
  {
167
  "epoch": 0.23469387755102042,
168
+ "grad_norm": 241.95591735839844,
169
  "learning_rate": 2.244897959183674e-06,
170
+ "loss": 6.6721,
171
  "step": 23
172
  },
173
  {
174
  "epoch": 0.24489795918367346,
175
+ "grad_norm": 249.65122985839844,
176
  "learning_rate": 2.3469387755102044e-06,
177
+ "loss": 4.4896,
178
  "step": 24
179
  },
180
  {
181
  "epoch": 0.25510204081632654,
182
+ "grad_norm": 183.51483154296875,
183
  "learning_rate": 2.4489795918367347e-06,
184
+ "loss": 3.4416,
185
  "step": 25
186
  },
187
  {
188
  "epoch": 0.2653061224489796,
189
+ "grad_norm": 286.1512756347656,
190
  "learning_rate": 2.5510204081632657e-06,
191
+ "loss": 7.5134,
192
  "step": 26
193
  },
194
  {
195
  "epoch": 0.2755102040816326,
196
+ "grad_norm": 98.32283782958984,
197
  "learning_rate": 2.6530612244897964e-06,
198
+ "loss": 1.9577,
199
  "step": 27
200
  },
201
  {
202
  "epoch": 0.2857142857142857,
203
+ "grad_norm": 274.64178466796875,
204
  "learning_rate": 2.7551020408163266e-06,
205
+ "loss": 4.9552,
206
  "step": 28
207
  },
208
  {
209
  "epoch": 0.29591836734693877,
210
+ "grad_norm": 142.77537536621094,
211
  "learning_rate": 2.8571428571428573e-06,
212
+ "loss": 2.5202,
213
  "step": 29
214
  },
215
  {
216
  "epoch": 0.30612244897959184,
217
+ "grad_norm": 127.23304748535156,
218
  "learning_rate": 2.959183673469388e-06,
219
+ "loss": 2.4486,
220
  "step": 30
221
  },
222
  {
223
  "epoch": 0.3163265306122449,
224
+ "grad_norm": 99.88568878173828,
225
  "learning_rate": 3.0612244897959185e-06,
226
+ "loss": 1.9923,
227
  "step": 31
228
  },
229
  {
230
  "epoch": 0.32653061224489793,
231
+ "grad_norm": 151.1445770263672,
232
  "learning_rate": 3.1632653061224496e-06,
233
+ "loss": 2.8301,
234
  "step": 32
235
  },
236
  {
237
  "epoch": 0.336734693877551,
238
+ "grad_norm": 203.54248046875,
239
  "learning_rate": 3.2653061224489794e-06,
240
+ "loss": 3.7414,
241
  "step": 33
242
  },
243
  {
244
  "epoch": 0.3469387755102041,
245
+ "grad_norm": 443.4117126464844,
246
  "learning_rate": 3.3673469387755105e-06,
247
+ "loss": 5.2738,
248
  "step": 34
249
  },
250
  {
251
  "epoch": 0.35714285714285715,
252
+ "grad_norm": 178.9974822998047,
253
  "learning_rate": 3.469387755102041e-06,
254
+ "loss": 3.791,
255
  "step": 35
256
  },
257
  {
258
  "epoch": 0.3673469387755102,
259
+ "grad_norm": 122.32801818847656,
260
  "learning_rate": 3.5714285714285718e-06,
261
+ "loss": 1.9081,
262
  "step": 36
263
  },
264
  {
265
  "epoch": 0.37755102040816324,
266
+ "grad_norm": 189.6477813720703,
267
  "learning_rate": 3.6734693877551024e-06,
268
+ "loss": 2.4172,
269
  "step": 37
270
  },
271
  {
272
  "epoch": 0.3877551020408163,
273
+ "grad_norm": 222.67959594726562,
274
  "learning_rate": 3.7755102040816327e-06,
275
+ "loss": 4.0417,
276
  "step": 38
277
  },
278
  {
279
  "epoch": 0.3979591836734694,
280
+ "grad_norm": 160.97071838378906,
281
  "learning_rate": 3.877551020408164e-06,
282
+ "loss": 3.5591,
283
  "step": 39
284
  },
285
  {
286
  "epoch": 0.40816326530612246,
287
+ "grad_norm": 178.01609802246094,
288
  "learning_rate": 3.979591836734694e-06,
289
+ "loss": 3.0139,
290
  "step": 40
291
  },
292
  {
293
  "epoch": 0.41836734693877553,
294
+ "grad_norm": 142.32168579101562,
295
  "learning_rate": 4.081632653061225e-06,
296
+ "loss": 2.4836,
297
  "step": 41
298
  },
299
  {
300
  "epoch": 0.42857142857142855,
301
+ "grad_norm": 148.1731719970703,
302
  "learning_rate": 4.183673469387755e-06,
303
+ "loss": 2.807,
304
  "step": 42
305
  },
306
  {
307
  "epoch": 0.4387755102040816,
308
+ "grad_norm": 152.1929931640625,
309
  "learning_rate": 4.2857142857142855e-06,
310
+ "loss": 1.9753,
311
  "step": 43
312
  },
313
  {
314
  "epoch": 0.4489795918367347,
315
+ "grad_norm": 219.5394287109375,
316
  "learning_rate": 4.3877551020408165e-06,
317
+ "loss": 0.9764,
318
  "step": 44
319
  },
320
  {
321
  "epoch": 0.45918367346938777,
322
+ "grad_norm": 95.7768783569336,
323
  "learning_rate": 4.489795918367348e-06,
324
+ "loss": 1.1398,
325
  "step": 45
326
  },
327
  {
328
  "epoch": 0.46938775510204084,
329
+ "grad_norm": 320.86529541015625,
330
  "learning_rate": 4.591836734693878e-06,
331
+ "loss": 6.7812,
332
  "step": 46
333
  },
334
  {
335
  "epoch": 0.47959183673469385,
336
+ "grad_norm": 18.277860641479492,
337
  "learning_rate": 4.693877551020409e-06,
338
+ "loss": 0.2479,
339
  "step": 47
340
  },
341
  {
342
  "epoch": 0.4897959183673469,
343
+ "grad_norm": 405.69024658203125,
344
  "learning_rate": 4.795918367346939e-06,
345
+ "loss": 6.041,
346
  "step": 48
347
  },
348
  {
349
  "epoch": 0.5,
350
+ "grad_norm": 104.91180419921875,
351
  "learning_rate": 4.897959183673469e-06,
352
+ "loss": 1.1839,
353
  "step": 49
354
  },
355
  {
356
  "epoch": 0.5102040816326531,
357
+ "grad_norm": 110.48990631103516,
358
  "learning_rate": 5e-06,
359
+ "loss": 2.0933,
360
  "step": 50
361
  },
362
  {
363
  "epoch": 0.5204081632653061,
364
+ "grad_norm": 135.2900390625,
365
  "learning_rate": 5.1020408163265315e-06,
366
+ "loss": 1.8613,
367
  "step": 51
368
  },
369
  {
370
  "epoch": 0.5306122448979592,
371
+ "grad_norm": 146.744140625,
372
  "learning_rate": 5.204081632653062e-06,
373
+ "loss": 2.9359,
374
  "step": 52
375
  },
376
  {
377
  "epoch": 0.5408163265306123,
378
+ "grad_norm": 103.08831787109375,
379
  "learning_rate": 5.306122448979593e-06,
380
+ "loss": 1.0129,
381
  "step": 53
382
  },
383
  {
384
  "epoch": 0.5510204081632653,
385
+ "grad_norm": 527.3735961914062,
386
  "learning_rate": 5.408163265306123e-06,
387
+ "loss": 11.8843,
388
  "step": 54
389
  },
390
  {
391
  "epoch": 0.5612244897959183,
392
+ "grad_norm": 374.4621887207031,
393
  "learning_rate": 5.510204081632653e-06,
394
+ "loss": 5.8523,
395
  "step": 55
396
  },
397
  {
398
  "epoch": 0.5714285714285714,
399
+ "grad_norm": 71.62016296386719,
400
  "learning_rate": 5.6122448979591834e-06,
401
+ "loss": 0.832,
402
  "step": 56
403
  },
404
  {
405
  "epoch": 0.5816326530612245,
406
+ "grad_norm": 287.0186462402344,
407
  "learning_rate": 5.7142857142857145e-06,
408
+ "loss": 3.1778,
409
  "step": 57
410
  },
411
  {
412
  "epoch": 0.5918367346938775,
413
+ "grad_norm": 31.022693634033203,
414
  "learning_rate": 5.816326530612246e-06,
415
+ "loss": 0.3919,
416
  "step": 58
417
  },
418
  {
419
  "epoch": 0.6020408163265306,
420
+ "grad_norm": 412.99176025390625,
421
  "learning_rate": 5.918367346938776e-06,
422
+ "loss": 7.2526,
423
  "step": 59
424
  },
425
  {
426
  "epoch": 0.6122448979591837,
427
+ "grad_norm": 40.534244537353516,
428
  "learning_rate": 6.020408163265307e-06,
429
+ "loss": 0.578,
430
  "step": 60
431
  },
432
  {
433
  "epoch": 0.6224489795918368,
434
+ "grad_norm": 226.0243682861328,
435
  "learning_rate": 6.122448979591837e-06,
436
+ "loss": 2.5233,
437
  "step": 61
438
  },
439
  {
440
  "epoch": 0.6326530612244898,
441
+ "grad_norm": 113.55558013916016,
442
  "learning_rate": 6.224489795918368e-06,
443
+ "loss": 1.5694,
444
  "step": 62
445
  },
446
  {
447
  "epoch": 0.6428571428571429,
448
+ "grad_norm": 190.6112518310547,
449
  "learning_rate": 6.326530612244899e-06,
450
+ "loss": 2.2332,
451
  "step": 63
452
  },
453
  {
454
  "epoch": 0.6530612244897959,
455
+ "grad_norm": 391.7434387207031,
456
  "learning_rate": 6.4285714285714295e-06,
457
+ "loss": 4.5545,
458
  "step": 64
459
  },
460
  {
461
  "epoch": 0.6632653061224489,
462
+ "grad_norm": 214.495361328125,
463
  "learning_rate": 6.530612244897959e-06,
464
+ "loss": 1.4804,
465
  "step": 65
466
  },
467
  {
468
  "epoch": 0.673469387755102,
469
+ "grad_norm": 62.52560806274414,
470
  "learning_rate": 6.63265306122449e-06,
471
+ "loss": 0.4391,
472
  "step": 66
473
  },
474
  {
475
  "epoch": 0.6836734693877551,
476
+ "grad_norm": 298.97808837890625,
477
  "learning_rate": 6.734693877551021e-06,
478
+ "loss": 3.4715,
479
  "step": 67
480
  },
481
  {
482
  "epoch": 0.6938775510204082,
483
+ "grad_norm": 483.84796142578125,
484
  "learning_rate": 6.836734693877551e-06,
485
+ "loss": 8.5808,
486
  "step": 68
487
  },
488
  {
489
  "epoch": 0.7040816326530612,
490
+ "grad_norm": 482.55743408203125,
491
  "learning_rate": 6.938775510204082e-06,
492
+ "loss": 5.6959,
493
  "step": 69
494
  },
495
  {
496
  "epoch": 0.7142857142857143,
497
+ "grad_norm": 198.6812744140625,
498
  "learning_rate": 7.0408163265306125e-06,
499
+ "loss": 3.8277,
500
  "step": 70
501
  },
502
  {
503
  "epoch": 0.7244897959183674,
504
+ "grad_norm": 293.4190673828125,
505
  "learning_rate": 7.1428571428571436e-06,
506
+ "loss": 2.1832,
507
  "step": 71
508
  },
509
  {
510
  "epoch": 0.7346938775510204,
511
+ "grad_norm": 13.164139747619629,
512
  "learning_rate": 7.244897959183675e-06,
513
+ "loss": 0.1244,
514
  "step": 72
515
  },
516
  {
517
  "epoch": 0.7448979591836735,
518
+ "grad_norm": 163.4252166748047,
519
  "learning_rate": 7.346938775510205e-06,
520
+ "loss": 0.7707,
521
  "step": 73
522
  },
523
  {
524
  "epoch": 0.7551020408163265,
525
+ "grad_norm": 193.64401245117188,
526
  "learning_rate": 7.448979591836736e-06,
527
+ "loss": 3.4828,
528
  "step": 74
529
  },
530
  {
531
  "epoch": 0.7653061224489796,
532
+ "grad_norm": 178.31982421875,
533
  "learning_rate": 7.551020408163265e-06,
534
+ "loss": 2.9645,
535
  "step": 75
536
  },
537
  {
538
  "epoch": 0.7755102040816326,
539
+ "grad_norm": 28.57689666748047,
540
  "learning_rate": 7.653061224489796e-06,
541
+ "loss": 0.2948,
542
  "step": 76
543
  },
544
  {
545
  "epoch": 0.7857142857142857,
546
+ "grad_norm": 608.8088989257812,
547
  "learning_rate": 7.755102040816327e-06,
548
+ "loss": 12.6456,
549
  "step": 77
550
  },
551
  {
552
  "epoch": 0.7959183673469388,
553
+ "grad_norm": 123.08556365966797,
554
  "learning_rate": 7.857142857142858e-06,
555
+ "loss": 1.2493,
556
  "step": 78
557
  },
558
  {
559
  "epoch": 0.8061224489795918,
560
+ "grad_norm": 225.3292694091797,
561
  "learning_rate": 7.959183673469388e-06,
562
+ "loss": 2.6675,
563
  "step": 79
564
  },
565
  {
566
  "epoch": 0.8163265306122449,
567
+ "grad_norm": 57.49665069580078,
568
  "learning_rate": 8.06122448979592e-06,
569
+ "loss": 0.5642,
570
  "step": 80
571
  },
572
  {
573
  "epoch": 0.826530612244898,
574
+ "grad_norm": 247.52210998535156,
575
  "learning_rate": 8.16326530612245e-06,
576
+ "loss": 1.6008,
577
  "step": 81
578
  },
579
  {
580
  "epoch": 0.8367346938775511,
581
+ "grad_norm": 309.60382080078125,
582
  "learning_rate": 8.26530612244898e-06,
583
+ "loss": 3.257,
584
  "step": 82
585
  },
586
  {
587
  "epoch": 0.8469387755102041,
588
+ "grad_norm": 183.82882690429688,
589
  "learning_rate": 8.36734693877551e-06,
590
+ "loss": 2.8086,
591
  "step": 83
592
  },
593
  {
594
  "epoch": 0.8571428571428571,
595
+ "grad_norm": 88.08740234375,
596
  "learning_rate": 8.469387755102042e-06,
597
+ "loss": 0.4056,
598
  "step": 84
599
  },
600
  {
601
  "epoch": 0.8673469387755102,
602
+ "grad_norm": 515.5130615234375,
603
  "learning_rate": 8.571428571428571e-06,
604
+ "loss": 3.711,
605
  "step": 85
606
  },
607
  {
608
  "epoch": 0.8775510204081632,
609
+ "grad_norm": 2.946629285812378,
610
  "learning_rate": 8.673469387755103e-06,
611
+ "loss": 0.0253,
612
  "step": 86
613
  },
614
  {
615
  "epoch": 0.8877551020408163,
616
+ "grad_norm": 31.143714904785156,
617
  "learning_rate": 8.775510204081633e-06,
618
+ "loss": 0.168,
619
  "step": 87
620
  },
621
  {
622
  "epoch": 0.8979591836734694,
623
+ "grad_norm": 535.6795043945312,
624
  "learning_rate": 8.877551020408163e-06,
625
+ "loss": 5.0992,
626
  "step": 88
627
  },
628
  {
629
  "epoch": 0.9081632653061225,
630
+ "grad_norm": 577.0897216796875,
631
  "learning_rate": 8.979591836734695e-06,
632
+ "loss": 5.3724,
633
  "step": 89
634
  },
635
  {
636
  "epoch": 0.9183673469387755,
637
+ "grad_norm": 102.31855773925781,
638
  "learning_rate": 9.081632653061225e-06,
639
+ "loss": 0.9172,
640
  "step": 90
641
  },
642
  {
643
  "epoch": 0.9285714285714286,
644
+ "grad_norm": 306.1739196777344,
645
  "learning_rate": 9.183673469387756e-06,
646
+ "loss": 3.1239,
647
  "step": 91
648
  },
649
  {
650
  "epoch": 0.9387755102040817,
651
+ "grad_norm": 191.27415466308594,
652
  "learning_rate": 9.285714285714288e-06,
653
+ "loss": 1.4121,
654
  "step": 92
655
  },
656
  {
657
  "epoch": 0.9489795918367347,
658
+ "grad_norm": 67.19822692871094,
659
  "learning_rate": 9.387755102040818e-06,
660
+ "loss": 0.2599,
661
  "step": 93
662
  },
663
  {
664
  "epoch": 0.9591836734693877,
665
+ "grad_norm": 17.93955421447754,
666
  "learning_rate": 9.489795918367348e-06,
667
+ "loss": 0.1166,
668
  "step": 94
669
  },
670
  {
671
  "epoch": 0.9693877551020408,
672
+ "grad_norm": 23.839630126953125,
673
  "learning_rate": 9.591836734693878e-06,
674
+ "loss": 0.1938,
675
  "step": 95
676
  },
677
  {
678
  "epoch": 0.9795918367346939,
679
+ "grad_norm": 1459.8140869140625,
680
  "learning_rate": 9.693877551020408e-06,
681
+ "loss": 18.5143,
682
  "step": 96
683
  },
684
  {
685
  "epoch": 0.9897959183673469,
686
+ "grad_norm": 670.869140625,
687
  "learning_rate": 9.795918367346939e-06,
688
+ "loss": 6.1932,
689
  "step": 97
690
  },
691
  {
692
  "epoch": 1.0,
693
+ "grad_norm": 942.95849609375,
694
  "learning_rate": 9.89795918367347e-06,
695
+ "loss": 20.3042,
696
  "step": 98
697
  },
698
  {
699
  "epoch": 1.0,
700
+ "eval_dim_1024_cosine_accuracy@1": 0.3719590268886043,
701
+ "eval_dim_1024_cosine_accuracy@10": 0.43982074263764404,
702
+ "eval_dim_1024_cosine_accuracy@3": 0.37836107554417414,
703
+ "eval_dim_1024_cosine_accuracy@5": 0.4020486555697823,
704
+ "eval_dim_1024_cosine_map@100": 0.4604070214987707,
705
+ "eval_dim_1024_cosine_mrr@10": 0.383397099770339,
706
+ "eval_dim_1024_cosine_ndcg@10": 0.3947688545057553,
707
+ "eval_dim_1024_cosine_precision@1": 0.3719590268886043,
708
+ "eval_dim_1024_cosine_precision@10": 0.3265044814340589,
709
+ "eval_dim_1024_cosine_precision@3": 0.37153222364489963,
710
+ "eval_dim_1024_cosine_precision@5": 0.3613316261203585,
711
+ "eval_dim_1024_cosine_recall@1": 0.04338670134208909,
712
+ "eval_dim_1024_cosine_recall@10": 0.28262195979320087,
713
+ "eval_dim_1024_cosine_recall@3": 0.1268773565773867,
714
+ "eval_dim_1024_cosine_recall@5": 0.19083511167371434,
715
+ "eval_dim_128_cosine_accuracy@1": 0.3053777208706786,
716
+ "eval_dim_128_cosine_accuracy@10": 0.37836107554417414,
717
+ "eval_dim_128_cosine_accuracy@3": 0.3111395646606914,
718
  "eval_dim_128_cosine_accuracy@5": 0.3361075544174136,
719
+ "eval_dim_128_cosine_map@100": 0.3904168259576031,
720
+ "eval_dim_128_cosine_mrr@10": 0.3174377070503828,
721
+ "eval_dim_128_cosine_ndcg@10": 0.3299991425713933,
722
+ "eval_dim_128_cosine_precision@1": 0.3053777208706786,
723
+ "eval_dim_128_cosine_precision@10": 0.271830985915493,
724
+ "eval_dim_128_cosine_precision@3": 0.30495091762697396,
725
+ "eval_dim_128_cosine_precision@5": 0.29641485275288093,
726
+ "eval_dim_128_cosine_recall@1": 0.03688049871840266,
727
+ "eval_dim_128_cosine_recall@10": 0.24260019246216608,
728
+ "eval_dim_128_cosine_recall@3": 0.10779952005618963,
729
+ "eval_dim_128_cosine_recall@5": 0.16176912684922656,
730
+ "eval_dim_256_cosine_accuracy@1": 0.324583866837388,
731
+ "eval_dim_256_cosine_accuracy@10": 0.4058898847631242,
732
+ "eval_dim_256_cosine_accuracy@3": 0.33290653008962867,
733
+ "eval_dim_256_cosine_accuracy@5": 0.3649167733674776,
734
+ "eval_dim_256_cosine_map@100": 0.4232469199200366,
735
+ "eval_dim_256_cosine_mrr@10": 0.33872299453285326,
736
+ "eval_dim_256_cosine_ndcg@10": 0.35307499975694673,
737
+ "eval_dim_256_cosine_precision@1": 0.324583866837388,
738
+ "eval_dim_256_cosine_precision@10": 0.2935339308578745,
739
+ "eval_dim_256_cosine_precision@3": 0.324583866837388,
740
+ "eval_dim_256_cosine_precision@5": 0.31792573623559534,
741
+ "eval_dim_256_cosine_recall@1": 0.038313787861467184,
742
+ "eval_dim_256_cosine_recall@10": 0.2549878568107636,
743
+ "eval_dim_256_cosine_recall@3": 0.11223891931505588,
744
+ "eval_dim_256_cosine_recall@5": 0.1697408782100328,
745
+ "eval_dim_512_cosine_accuracy@1": 0.34827144686299616,
746
+ "eval_dim_512_cosine_accuracy@10": 0.44302176696542894,
747
+ "eval_dim_512_cosine_accuracy@3": 0.3553137003841229,
748
+ "eval_dim_512_cosine_accuracy@5": 0.39436619718309857,
749
+ "eval_dim_512_cosine_map@100": 0.44994622162234726,
750
+ "eval_dim_512_cosine_mrr@10": 0.3644033392272826,
751
+ "eval_dim_512_cosine_ndcg@10": 0.3807642678190648,
752
+ "eval_dim_512_cosine_precision@1": 0.34827144686299616,
753
+ "eval_dim_512_cosine_precision@10": 0.31798975672215113,
754
+ "eval_dim_512_cosine_precision@3": 0.34827144686299616,
755
+ "eval_dim_512_cosine_precision@5": 0.3418693982074264,
756
+ "eval_dim_512_cosine_recall@1": 0.04125738861359979,
757
+ "eval_dim_512_cosine_recall@10": 0.2752162089385945,
758
+ "eval_dim_512_cosine_recall@3": 0.12077279112247459,
759
+ "eval_dim_512_cosine_recall@5": 0.18268801127884626,
760
+ "eval_dim_64_cosine_accuracy@1": 0.23175416133162613,
761
+ "eval_dim_64_cosine_accuracy@10": 0.29833546734955185,
762
+ "eval_dim_64_cosine_accuracy@3": 0.23879641485275288,
763
+ "eval_dim_64_cosine_accuracy@5": 0.2612035851472471,
764
+ "eval_dim_64_cosine_map@100": 0.3151829220617657,
765
+ "eval_dim_64_cosine_mrr@10": 0.24303802613661746,
766
+ "eval_dim_64_cosine_ndcg@10": 0.2548721998123125,
767
+ "eval_dim_64_cosine_precision@1": 0.23175416133162613,
768
+ "eval_dim_64_cosine_precision@10": 0.20864276568501922,
769
+ "eval_dim_64_cosine_precision@3": 0.23218096457533077,
770
+ "eval_dim_64_cosine_precision@5": 0.22740076824583869,
771
+ "eval_dim_64_cosine_recall@1": 0.028384798943475897,
772
+ "eval_dim_64_cosine_recall@10": 0.19540887275051927,
773
+ "eval_dim_64_cosine_recall@3": 0.08325514613360847,
774
+ "eval_dim_64_cosine_recall@5": 0.12720688223912358,
775
+ "eval_dim_768_cosine_accuracy@1": 0.36619718309859156,
776
+ "eval_dim_768_cosine_accuracy@10": 0.44302176696542894,
777
+ "eval_dim_768_cosine_accuracy@3": 0.37516005121638923,
778
+ "eval_dim_768_cosine_accuracy@5": 0.4014084507042254,
779
+ "eval_dim_768_cosine_map@100": 0.4592162636155952,
780
+ "eval_dim_768_cosine_mrr@10": 0.37943672133812956,
781
+ "eval_dim_768_cosine_ndcg@10": 0.3923099208699586,
782
+ "eval_dim_768_cosine_precision@1": 0.36619718309859156,
783
+ "eval_dim_768_cosine_precision@10": 0.32541613316261203,
784
+ "eval_dim_768_cosine_precision@3": 0.3666239863422962,
785
+ "eval_dim_768_cosine_precision@5": 0.35761843790012804,
786
+ "eval_dim_768_cosine_recall@1": 0.042908107176418055,
787
+ "eval_dim_768_cosine_recall@10": 0.2828362934197418,
788
+ "eval_dim_768_cosine_recall@3": 0.12573828441229515,
789
+ "eval_dim_768_cosine_recall@5": 0.18984022934199501,
790
+ "eval_runtime": 98.8905,
791
  "eval_samples_per_second": 0.0,
792
+ "eval_sequential_score": 0.2548721998123125,
793
  "eval_steps_per_second": 0.0,
794
  "step": 98
795
  }
checkpoint-98/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:739b4a0a62fdf782034d2ababe5e5ea588023ed6263f2604e31385fc77a8faab
3
  size 6097
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a0318feaae2d8aeaebeec5835b4bc77eb79a026385dfb908828064d2f4fd294
3
  size 6097
eval/Information-Retrieval_evaluation_dim_1024_results.csv CHANGED
@@ -1,7 +1,4 @@
1
  epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
2
- 1.0,98,0.3758002560819462,0.38092189500640206,0.42189500640204863,0.46350832266325226,0.3758002560819462,0.04194926351085856,0.3758002560819462,0.12414468143510038,0.3700384122919334,0.19046123021012357,0.3443661971830986,0.2917779043677444,0.390878350913562,0.40707429228028374,0.470393135872763
3
- 2.0,196,0.323943661971831,0.3348271446862996,0.3725992317541613,0.41165172855313703,0.323943661971831,0.03729875848331919,0.32565087494664957,0.11010828228304573,0.32215108834827144,0.1691095157664461,0.30121638924455824,0.25941286480210374,0.3400178850883073,0.35730245744576905,0.42245999663664574
4
- 3.0,294,0.30217669654289375,0.3111395646606914,0.3444302176696543,0.382842509603073,0.30217669654289375,0.03408811614897108,0.3034571062740077,0.1010394072515041,0.2997439180537772,0.15530224859949704,0.2768886043533931,0.23384842264767727,0.31628610044102934,0.33035731141222535,0.39467218596398806
5
- 1.0,98,0.36235595390524966,0.3681177976952625,0.39308578745198464,0.4334186939820743,0.36235595390524966,0.04346309464734114,0.36192915066154496,0.12757812796185336,0.35172855313700385,0.19200836801442767,0.3176696542893726,0.28096984500258326,0.37430415828303115,0.3858809020056271,0.45394800707643057
6
- 2.0,196,0.32522407170294493,0.33290653008962867,0.36043533930857874,0.3969270166453265,0.32522407170294493,0.04113491331982186,0.3254374733247973,0.12080229545561262,0.31626120358514725,0.18183789253196145,0.28361075544174136,0.2664549051060991,0.33769460195516493,0.34986350069216465,0.4164888021641558
7
- 3.0,294,0.30985915492957744,0.31498079385403327,0.34699103713188223,0.37836107554417414,0.30985915492957744,0.03936027574360421,0.30900554844216815,0.11544349976954149,0.30115236875800255,0.17456487753074904,0.2694622279129321,0.2548212686119806,0.32174181452350414,0.33364818903542787,0.3968421394024028
 
1
  epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
2
+ 1.0,98,0.3719590268886043,0.37836107554417414,0.4020486555697823,0.43982074263764404,0.3719590268886043,0.04338670134208909,0.37153222364489963,0.1268773565773867,0.3613316261203585,0.19083511167371434,0.3265044814340589,0.28262195979320087,0.383397099770339,0.3947688545057553,0.4604070214987707
3
+ 2.0,196,0.3591549295774648,0.3649167733674776,0.39308578745198464,0.42445582586427655,0.3591549295774648,0.040925664980934,0.35830132309005547,0.11912044022990924,0.3495518565941101,0.17907504115268522,0.31914212548015364,0.2663878590041381,0.37061688311688284,0.38277564232489586,0.4442656203645836
4
+ 3.0,294,0.3354673495518566,0.3405889884763124,0.3725992317541613,0.41357234314980795,0.3354673495518566,0.0398374526951408,0.33418693982074266,0.11613369449549121,0.3261203585147247,0.1745427554610417,0.2996798975672215,0.26082122731297214,0.34860069507956787,0.3620561774122382,0.4292335776181432
 
 
 
eval/Information-Retrieval_evaluation_dim_128_results.csv CHANGED
@@ -1,7 +1,4 @@
1
  epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
2
- 1.0,98,0.3034571062740077,0.31049935979513443,0.3418693982074264,0.38092189500640206,0.3034571062740077,0.033684401658902394,0.3038839095177123,0.0993687488012139,0.2994878361075545,0.1530597677971524,0.27951344430217673,0.23623301896460966,0.31698549885169575,0.3316022948596431,0.3880699945060264
3
- 2.0,196,0.26312419974391804,0.27208706786171577,0.3040973111395647,0.3437900128040973,0.26312419974391804,0.030292339319323316,0.264404609475032,0.08932760883813833,0.2622279129321383,0.13813514940643304,0.24705505761843788,0.21401864973389503,0.27745640509724984,0.2925461220092893,0.35443528477119834
4
- 3.0,294,0.2528809218950064,0.2573623559539053,0.2855313700384123,0.31562099871959026,0.2528809218950064,0.026569332449478945,0.2528809218950064,0.07825317283023206,0.24942381562099875,0.11986554677533047,0.23405889884763126,0.18209399283843894,0.26369479503282284,0.274763189652588,0.32691762851003603
5
- 1.0,98,0.3085787451984635,0.31241997439180536,0.3361075544174136,0.37964148527528807,0.3085787451984635,0.036297623853982414,0.3079385403329065,0.10638786483158841,0.29961587708066584,0.16032639984514846,0.2752880921895006,0.24000960695821508,0.3199812511432227,0.3312285498294292,0.3963095303049961
6
- 2.0,196,0.30217669654289375,0.3072983354673495,0.3265044814340589,0.3546734955185659,0.30217669654289375,0.03603846894598867,0.30239009816474605,0.10607255532328354,0.29359795134443023,0.15998840334482403,0.26312419974391804,0.23664446759855584,0.3112729406743488,0.32071443787836906,0.38014172959059034
7
- 3.0,294,0.2765685019206146,0.2887323943661972,0.31882202304737517,0.3501920614596671,0.2765685019206146,0.03462627857091171,0.2787025181391378,0.10222485929387912,0.27413572343149806,0.15567435868523452,0.24916773367477596,0.23219859983003413,0.29051937889966023,0.3054184027921396,0.36208318391000843
 
1
  epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
2
+ 1.0,98,0.3053777208706786,0.3111395646606914,0.3361075544174136,0.37836107554417414,0.3053777208706786,0.03688049871840266,0.30495091762697396,0.10779952005618963,0.29641485275288093,0.16176912684922656,0.271830985915493,0.24260019246216608,0.3174377070503828,0.3299991425713933,0.3904168259576031
3
+ 2.0,196,0.3060179257362356,0.31049935979513443,0.3322663252240717,0.3687580025608195,0.3060179257362356,0.035453426807775947,0.3053777208706786,0.10361683841932247,0.29679897567221514,0.15549008066651587,0.27099871959026883,0.23154232181544257,0.3164926427250368,0.3273120368593549,0.38523839229741214
4
+ 3.0,294,0.2874519846350832,0.2919334186939821,0.3207426376440461,0.36299615877080665,0.2874519846350832,0.03347207506231567,0.28638497652582157,0.09769125063098655,0.2796414852752881,0.14652568796520726,0.260179257362356,0.2200069729736681,0.29988542365302884,0.3121618176452898,0.37222007780985544
 
 
 
eval/Information-Retrieval_evaluation_dim_256_results.csv CHANGED
@@ -1,7 +1,4 @@
1
  epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
2
- 1.0,98,0.3412291933418694,0.34635083226632524,0.38348271446862997,0.42573623559539053,0.3412291933418694,0.03848278551561358,0.34101579172001706,0.11327945737089931,0.3354673495518566,0.1731366734431563,0.31241997439180536,0.2637956260522792,0.35570315834400307,0.3711945274033381,0.43658178532295944
3
- 2.0,196,0.2797695262483995,0.293854033290653,0.32842509603072984,0.37451984635083224,0.2797695262483995,0.032498552866695056,0.2825437473324798,0.09600029320659498,0.2809218950064021,0.1479519425167412,0.26677336747759284,0.23047217237905318,0.29707690587972235,0.31483989393569145,0.38160012953782213
4
- 3.0,294,0.2740076824583867,0.28169014084507044,0.31562099871959026,0.34763124199743917,0.2740076824583867,0.02872973938731268,0.2750746905676483,0.08491580875997423,0.2736235595390525,0.13128424269244784,0.25832266325224074,0.2042482031495929,0.28722029144564304,0.30137677496002707,0.355446615033308
5
- 1.0,98,0.3437900128040973,0.34763124199743917,0.3764404609475032,0.41101152368758004,0.3437900128040973,0.04013102608834382,0.342936406316688,0.11771735023719074,0.33457106274007686,0.17837935755014916,0.3040973111395647,0.2648598688529433,0.3551361197487955,0.3670052960875804,0.4298669852983799
6
- 2.0,196,0.31049935979513443,0.31882202304737517,0.34571062740076824,0.3725992317541613,0.31049935979513443,0.0379038673811849,0.3109261630388391,0.11184662439829526,0.3035851472471191,0.16972372403865282,0.2727272727272727,0.25061548215235363,0.3219094872263883,0.33365785011470184,0.3940538127924734
7
- 3.0,294,0.29577464788732394,0.3047375160051216,0.33098591549295775,0.3585147247119078,0.29577464788732394,0.03692836080135826,0.2968416559965856,0.1089192018057998,0.2898847631241997,0.16530160845995479,0.25845070422535216,0.24162273030445708,0.3073963985935813,0.31951819898251643,0.377358622211706
 
1
  epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
2
+ 1.0,98,0.324583866837388,0.33290653008962867,0.3649167733674776,0.4058898847631242,0.324583866837388,0.038313787861467184,0.324583866837388,0.11223891931505588,0.31792573623559534,0.1697408782100328,0.2935339308578745,0.2549878568107636,0.33872299453285326,0.35307499975694673,0.4232469199200366
3
+ 2.0,196,0.3290653008962868,0.33354673495518566,0.36235595390524966,0.3969270166453265,0.3290653008962868,0.03735817465789262,0.3282116944088775,0.10921822534273136,0.3209987195902689,0.165931214027115,0.29423815620998717,0.24868649598286982,0.3407241428368185,0.3532462927680357,0.4130430492367209
4
+ 3.0,294,0.3213828425096031,0.32842509603072984,0.3559539052496799,0.38988476312419973,0.3213828425096031,0.03683491421575636,0.32159624413145543,0.10829890857523781,0.31459667093469906,0.16421046118001698,0.28732394366197184,0.2447394908113676,0.3334143751397268,0.34585160474489407,0.40656810490109624
 
 
 
eval/Information-Retrieval_evaluation_dim_512_results.csv CHANGED
@@ -1,7 +1,4 @@
1
  epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
2
- 1.0,98,0.354033290653009,0.36043533930857874,0.4033290653008963,0.4468629961587708,0.354033290653009,0.040213322698113355,0.3542466922748612,0.11849364024378686,0.349807938540333,0.181197723266763,0.32816901408450705,0.27659287824292283,0.3702490193687374,0.3875074092346156,0.4580788132793331
3
- 2.0,196,0.29897567221510885,0.31306017925736235,0.34699103713188223,0.3879641485275288,0.29897567221510885,0.03524581966276957,0.3013230900554844,0.1041081313129689,0.29833546734955185,0.1595546072366398,0.27925736235595394,0.24463022936769843,0.31555926467898265,0.3326922254093368,0.39986627638770844
4
- 3.0,294,0.2912932138284251,0.3002560819462228,0.32842509603072984,0.3706786171574904,0.2912932138284251,0.031548557747378574,0.2927870251813914,0.09357095555684627,0.2891165172855314,0.1435753152370639,0.26939820742637643,0.21891302574546295,0.3048952807755618,0.3179744532097737,0.3772797561888447
5
- 1.0,98,0.35979513444302175,0.36555697823303457,0.3911651728553137,0.4334186939820743,0.35979513444302175,0.04265405128130224,0.35936833119931705,0.12523102347193127,0.34967989756722156,0.18912519336740205,0.3173495518565941,0.2781876565001863,0.37212542934373866,0.3843750966464458,0.4476805587612892
6
- 2.0,196,0.32842509603072984,0.33418693982074266,0.36555697823303457,0.39564660691421255,0.32842509603072984,0.04071091183465321,0.3282116944088775,0.11970757850133786,0.31997439180537773,0.1806811237454132,0.28693982074263763,0.2638449444559509,0.34027168058858154,0.3525488928748249,0.4125328284000196
7
- 3.0,294,0.3047375160051216,0.31049935979513443,0.34507042253521125,0.3719590268886043,0.3047375160051216,0.03902184942619328,0.30431071276141697,0.11440062517351587,0.29756722151088344,0.17317031567103489,0.2661331626120359,0.2526764166009778,0.31690623539215046,0.3300149893720946,0.39281877553256617
 
1
  epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
2
+ 1.0,98,0.34827144686299616,0.3553137003841229,0.39436619718309857,0.44302176696542894,0.34827144686299616,0.04125738861359979,0.34827144686299616,0.12077279112247459,0.3418693982074264,0.18268801127884626,0.31798975672215113,0.2752162089385945,0.3644033392272826,0.3807642678190648,0.44994622162234726
3
+ 2.0,196,0.3578745198463508,0.36299615877080665,0.3892445582586428,0.42765685019206146,0.3578745198463508,0.04087028201741599,0.35680751173708913,0.11887426573116062,0.34724711907810496,0.17823551058609727,0.3177336747759283,0.2641908975177659,0.36958849053919446,0.3812404238586874,0.44173422420832625
4
+ 3.0,294,0.33162612035851474,0.3405889884763124,0.3681177976952625,0.4039692701664533,0.33162612035851474,0.038588929588341196,0.3322663252240717,0.11297571247042615,0.3250960307298335,0.17031603260342357,0.29769526248399486,0.2554519940509474,0.3444845842733163,0.3580070743078956,0.42093121870079336
 
 
 
eval/Information-Retrieval_evaluation_dim_64_results.csv CHANGED
@@ -1,7 +1,4 @@
1
  epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
2
- 1.0,98,0.25480153649167736,0.2605633802816901,0.29257362355953903,0.33034571062740076,0.25480153649167736,0.027436091074072376,0.2554417413572343,0.08147833022110194,0.25416133162612037,0.12742618628634134,0.241101152368758,0.20345713326771228,0.268041582830315,0.28207944797157253,0.34000696782787143
3
- 2.0,196,0.24007682458386684,0.2471190781049936,0.26632522407170295,0.30985915492957744,0.24007682458386684,0.02752517367039453,0.24093043107127612,0.08099552823400773,0.2367477592829705,0.12372044634475811,0.22099871959026893,0.18855203634297554,0.25156494522691686,0.26238472694967735,0.3270997667027699
4
- 3.0,294,0.2119078104993598,0.21638924455825864,0.23623559539052497,0.26312419974391804,0.2119078104993598,0.021909848472067953,0.21233461374306442,0.06487593892879456,0.20947503201024328,0.09950245391323564,0.19590268886043533,0.14975275404193011,0.22060240229254297,0.22921012374813438,0.2778029194657925
5
- 1.0,98,0.2740076824583867,0.27848911651728553,0.30153649167733676,0.3354673495518566,0.2740076824583867,0.03167890172057568,0.27315407597097735,0.09267023360511464,0.2670934699103713,0.14048625468314752,0.24571062740076827,0.21092883720941633,0.28429414873076814,0.29402896525927075,0.3539045084602349
6
- 2.0,196,0.28040973111395645,0.28297055057618437,0.3072983354673495,0.3348271446862996,0.28040973111395645,0.03187808455878807,0.27955612462654716,0.09363361347149868,0.27247119078105,0.14192536615474802,0.24878361075544175,0.2128007008801171,0.28944678170030247,0.2991224720529457,0.35085623648833997
7
- 3.0,294,0.25480153649167736,0.2605633802816901,0.2906530089628681,0.323303457106274,0.25480153649167736,0.031011767980561305,0.25480153649167736,0.09100224310580617,0.25006402048655574,0.13823759538062028,0.23079385403329064,0.21011380307216662,0.266841960856045,0.28022682237950125,0.3332689262079475
 
1
  epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
2
+ 1.0,98,0.23175416133162613,0.23879641485275288,0.2612035851472471,0.29833546734955185,0.23175416133162613,0.028384798943475897,0.23218096457533077,0.08325514613360847,0.22740076824583869,0.12720688223912358,0.20864276568501922,0.19540887275051927,0.24303802613661746,0.2548721998123125,0.3151829220617657
3
+ 2.0,196,0.26504481434058896,0.26952624839948786,0.29257362355953903,0.3322663252240717,0.26504481434058896,0.030859353047043094,0.2646180110968843,0.09025134722734952,0.25813060179257363,0.13616521781869687,0.2385403329065301,0.20564514911963047,0.27606167306871515,0.28738876488409937,0.3471195034779171
4
+ 3.0,294,0.25096030729833546,0.2560819462227913,0.2900128040973111,0.32970550576184376,0.25096030729833546,0.0281516535128601,0.2507469056764831,0.08222758111049663,0.24788732394366192,0.12591556967755266,0.23604353393085786,0.19940598657336947,0.2645008942544151,0.27958348757665724,0.33353078928131286
 
 
 
eval/Information-Retrieval_evaluation_dim_768_results.csv CHANGED
@@ -1,7 +1,4 @@
1
  epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
2
- 1.0,98,0.3674775928297055,0.3738796414852753,0.41165172855313703,0.4558258642765685,0.3674775928297055,0.04090312467372361,0.3681177976952625,0.12089217713355141,0.3624839948783611,0.18502530356129382,0.3385403329065301,0.28508929243050846,0.3827741702741699,0.3991780077769589,0.46526581757220226
3
- 2.0,196,0.3201024327784891,0.33034571062740076,0.36555697823303457,0.40781049935979513,0.3201024327784891,0.0368742412270932,0.3215962441314554,0.1088101072503079,0.31766965428937255,0.16683683168769528,0.29769526248399486,0.2575427865947166,0.33594394650732656,0.3531117937333453,0.41567574230937915
4
- 3.0,294,0.3002560819462228,0.3085787451984635,0.3405889884763124,0.38348271446862997,0.3002560819462228,0.03354719734205534,0.30153649167733676,0.09937908091364656,0.29769526248399486,0.15284430642322494,0.27567221510883483,0.23085365176940398,0.31442747393451553,0.3279302500611031,0.3911512630662361
5
- 1.0,98,0.3591549295774648,0.3649167733674776,0.3892445582586428,0.4334186939820743,0.3591549295774648,0.04250079684114586,0.3587281263337601,0.12462187901616553,0.34852752880921894,0.1875478484365334,0.31670934699103714,0.27695909667507057,0.37149335406377615,0.38308181752122755,0.4493001842217619
6
- 2.0,196,0.32970550576184376,0.33418693982074266,0.36427656850192064,0.3994878361075544,0.32970550576184376,0.040955758827011135,0.3288518992744345,0.12009305539695316,0.31997439180537773,0.18142212378067016,0.2877720870678617,0.26685683005601735,0.3415124585899229,0.35370573856938964,0.4160652625925415
7
- 3.0,294,0.3072983354673495,0.31049935979513443,0.3444302176696543,0.37451984635083224,0.3072983354673495,0.03940235994624546,0.3060179257362356,0.11527075559959522,0.29795134443021765,0.17393586357387436,0.26677336747759284,0.2544826642178083,0.3188075422230347,0.3310954692046881,0.3944113472988561
 
1
  epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100
2
+ 1.0,98,0.36619718309859156,0.37516005121638923,0.4014084507042254,0.44302176696542894,0.36619718309859156,0.042908107176418055,0.3666239863422962,0.12573828441229515,0.35761843790012804,0.18984022934199501,0.32541613316261203,0.2828362934197418,0.37943672133812956,0.3923099208699586,0.4592162636155952
3
+ 2.0,196,0.3546734955185659,0.36107554417413573,0.3860435339308579,0.4186939820742638,0.3546734955185659,0.04034211902742892,0.354033290653009,0.11739647757212558,0.34494238156209983,0.17629643014791566,0.3146606914212548,0.2626701106236,0.36586844298111887,0.3776871349900537,0.4380682423691407
4
+ 3.0,294,0.32842509603072984,0.3361075544174136,0.36299615877080665,0.4039692701664533,0.32842509603072984,0.03923116896195945,0.32863849765258213,0.1147561273639918,0.3199743918053777,0.17206648000872884,0.2928297055057618,0.25514859248143046,0.3415119504908234,0.35462738075585315,0.4227568005919578
 
 
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8b8678a200e1ec3a97ec08f700f81cc6660e581d09862b47b576834736c0668
3
  size 2239607176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad96896c31a7a57d2d2591979a3de4018f6e0a81c8eb47198774b1b88cd52c55
3
  size 2239607176
tokenizer_config.json CHANGED
@@ -47,16 +47,9 @@
47
  "eos_token": "</s>",
48
  "extra_special_tokens": {},
49
  "mask_token": "<mask>",
50
- "max_length": 512,
51
  "model_max_length": 512,
52
- "pad_to_multiple_of": null,
53
  "pad_token": "<pad>",
54
- "pad_token_type_id": 0,
55
- "padding_side": "right",
56
  "sep_token": "</s>",
57
- "stride": 0,
58
  "tokenizer_class": "XLMRobertaTokenizer",
59
- "truncation_side": "right",
60
- "truncation_strategy": "longest_first",
61
  "unk_token": "<unk>"
62
  }
 
47
  "eos_token": "</s>",
48
  "extra_special_tokens": {},
49
  "mask_token": "<mask>",
 
50
  "model_max_length": 512,
 
51
  "pad_token": "<pad>",
 
 
52
  "sep_token": "</s>",
 
53
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
54
  "unk_token": "<unk>"
55
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:739b4a0a62fdf782034d2ababe5e5ea588023ed6263f2604e31385fc77a8faab
3
  size 6097
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a0318feaae2d8aeaebeec5835b4bc77eb79a026385dfb908828064d2f4fd294
3
  size 6097