IoannisKat1's picture
Add finetuned model
8d5b4f3 verified
{
"best_global_step": 816,
"best_metric": 0.5113687589504219,
"best_model_checkpoint": "modernbert-embed-base/checkpoint-816",
"epoch": 8.0,
"eval_steps": 500,
"global_step": 816,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00980392156862745,
"grad_norm": 129.86514282226562,
"learning_rate": 0.0,
"loss": 17.4853,
"step": 1
},
{
"epoch": 0.0196078431372549,
"grad_norm": 100.67037200927734,
"learning_rate": 1.9607843137254904e-07,
"loss": 15.5822,
"step": 2
},
{
"epoch": 0.029411764705882353,
"grad_norm": 100.6810073852539,
"learning_rate": 3.921568627450981e-07,
"loss": 8.6936,
"step": 3
},
{
"epoch": 0.0392156862745098,
"grad_norm": 88.11703491210938,
"learning_rate": 5.882352941176471e-07,
"loss": 12.4725,
"step": 4
},
{
"epoch": 0.049019607843137254,
"grad_norm": 87.48355102539062,
"learning_rate": 7.843137254901962e-07,
"loss": 8.4815,
"step": 5
},
{
"epoch": 0.058823529411764705,
"grad_norm": 84.57987213134766,
"learning_rate": 9.80392156862745e-07,
"loss": 10.6456,
"step": 6
},
{
"epoch": 0.06862745098039216,
"grad_norm": 78.86805725097656,
"learning_rate": 1.1764705882352942e-06,
"loss": 14.3675,
"step": 7
},
{
"epoch": 0.0784313725490196,
"grad_norm": 94.04440307617188,
"learning_rate": 1.3725490196078434e-06,
"loss": 10.4737,
"step": 8
},
{
"epoch": 0.08823529411764706,
"grad_norm": 71.46792602539062,
"learning_rate": 1.5686274509803923e-06,
"loss": 10.21,
"step": 9
},
{
"epoch": 0.09803921568627451,
"grad_norm": 86.01820373535156,
"learning_rate": 1.7647058823529414e-06,
"loss": 8.6732,
"step": 10
},
{
"epoch": 0.10784313725490197,
"grad_norm": 78.19393157958984,
"learning_rate": 1.96078431372549e-06,
"loss": 6.8418,
"step": 11
},
{
"epoch": 0.11764705882352941,
"grad_norm": 80.1185302734375,
"learning_rate": 2.1568627450980393e-06,
"loss": 8.9665,
"step": 12
},
{
"epoch": 0.12745098039215685,
"grad_norm": 79.87296295166016,
"learning_rate": 2.3529411764705885e-06,
"loss": 7.3845,
"step": 13
},
{
"epoch": 0.13725490196078433,
"grad_norm": 64.95475769042969,
"learning_rate": 2.549019607843137e-06,
"loss": 7.4307,
"step": 14
},
{
"epoch": 0.14705882352941177,
"grad_norm": 73.19120788574219,
"learning_rate": 2.7450980392156867e-06,
"loss": 8.574,
"step": 15
},
{
"epoch": 0.1568627450980392,
"grad_norm": 124.22490692138672,
"learning_rate": 2.9411764705882355e-06,
"loss": 13.5512,
"step": 16
},
{
"epoch": 0.16666666666666666,
"grad_norm": 283.95123291015625,
"learning_rate": 3.1372549019607846e-06,
"loss": 9.0797,
"step": 17
},
{
"epoch": 0.17647058823529413,
"grad_norm": 73.035888671875,
"learning_rate": 3.3333333333333333e-06,
"loss": 10.7027,
"step": 18
},
{
"epoch": 0.18627450980392157,
"grad_norm": 80.89883422851562,
"learning_rate": 3.529411764705883e-06,
"loss": 8.1399,
"step": 19
},
{
"epoch": 0.19607843137254902,
"grad_norm": 97.07267761230469,
"learning_rate": 3.7254901960784316e-06,
"loss": 5.2519,
"step": 20
},
{
"epoch": 0.20588235294117646,
"grad_norm": 81.92726135253906,
"learning_rate": 3.92156862745098e-06,
"loss": 5.7496,
"step": 21
},
{
"epoch": 0.21568627450980393,
"grad_norm": 98.03091430664062,
"learning_rate": 4.11764705882353e-06,
"loss": 6.6065,
"step": 22
},
{
"epoch": 0.22549019607843138,
"grad_norm": 75.11953735351562,
"learning_rate": 4.313725490196079e-06,
"loss": 7.2297,
"step": 23
},
{
"epoch": 0.23529411764705882,
"grad_norm": 118.70673370361328,
"learning_rate": 4.509803921568628e-06,
"loss": 9.7108,
"step": 24
},
{
"epoch": 0.24509803921568626,
"grad_norm": 85.62348175048828,
"learning_rate": 4.705882352941177e-06,
"loss": 6.0649,
"step": 25
},
{
"epoch": 0.2549019607843137,
"grad_norm": 109.30182647705078,
"learning_rate": 4.901960784313726e-06,
"loss": 12.5883,
"step": 26
},
{
"epoch": 0.2647058823529412,
"grad_norm": 56.858070373535156,
"learning_rate": 5.098039215686274e-06,
"loss": 3.3575,
"step": 27
},
{
"epoch": 0.27450980392156865,
"grad_norm": 81.4185791015625,
"learning_rate": 5.294117647058824e-06,
"loss": 8.5852,
"step": 28
},
{
"epoch": 0.28431372549019607,
"grad_norm": 79.82296752929688,
"learning_rate": 5.4901960784313735e-06,
"loss": 11.2424,
"step": 29
},
{
"epoch": 0.29411764705882354,
"grad_norm": 83.92647552490234,
"learning_rate": 5.686274509803922e-06,
"loss": 3.9034,
"step": 30
},
{
"epoch": 0.30392156862745096,
"grad_norm": 95.46015167236328,
"learning_rate": 5.882352941176471e-06,
"loss": 4.0761,
"step": 31
},
{
"epoch": 0.3137254901960784,
"grad_norm": 67.38555145263672,
"learning_rate": 6.07843137254902e-06,
"loss": 6.7761,
"step": 32
},
{
"epoch": 0.3235294117647059,
"grad_norm": 87.96136474609375,
"learning_rate": 6.274509803921569e-06,
"loss": 11.8273,
"step": 33
},
{
"epoch": 0.3333333333333333,
"grad_norm": 214.4466094970703,
"learning_rate": 6.470588235294119e-06,
"loss": 9.8458,
"step": 34
},
{
"epoch": 0.3431372549019608,
"grad_norm": 83.86022186279297,
"learning_rate": 6.666666666666667e-06,
"loss": 10.1052,
"step": 35
},
{
"epoch": 0.35294117647058826,
"grad_norm": 97.6032485961914,
"learning_rate": 6.862745098039216e-06,
"loss": 12.6555,
"step": 36
},
{
"epoch": 0.3627450980392157,
"grad_norm": 66.56285858154297,
"learning_rate": 7.058823529411766e-06,
"loss": 7.6164,
"step": 37
},
{
"epoch": 0.37254901960784315,
"grad_norm": 83.6164779663086,
"learning_rate": 7.2549019607843145e-06,
"loss": 6.2306,
"step": 38
},
{
"epoch": 0.38235294117647056,
"grad_norm": 104.8736801147461,
"learning_rate": 7.450980392156863e-06,
"loss": 9.7449,
"step": 39
},
{
"epoch": 0.39215686274509803,
"grad_norm": 88.20790100097656,
"learning_rate": 7.647058823529411e-06,
"loss": 9.503,
"step": 40
},
{
"epoch": 0.4019607843137255,
"grad_norm": 53.291019439697266,
"learning_rate": 7.84313725490196e-06,
"loss": 3.9503,
"step": 41
},
{
"epoch": 0.4117647058823529,
"grad_norm": 78.73179626464844,
"learning_rate": 8.03921568627451e-06,
"loss": 9.0781,
"step": 42
},
{
"epoch": 0.4215686274509804,
"grad_norm": 79.14136505126953,
"learning_rate": 8.23529411764706e-06,
"loss": 7.6947,
"step": 43
},
{
"epoch": 0.43137254901960786,
"grad_norm": 75.01749420166016,
"learning_rate": 8.43137254901961e-06,
"loss": 7.4759,
"step": 44
},
{
"epoch": 0.4411764705882353,
"grad_norm": 72.87895965576172,
"learning_rate": 8.627450980392157e-06,
"loss": 8.9653,
"step": 45
},
{
"epoch": 0.45098039215686275,
"grad_norm": 73.91986846923828,
"learning_rate": 8.823529411764707e-06,
"loss": 5.9928,
"step": 46
},
{
"epoch": 0.46078431372549017,
"grad_norm": 168.424072265625,
"learning_rate": 9.019607843137256e-06,
"loss": 7.7841,
"step": 47
},
{
"epoch": 0.47058823529411764,
"grad_norm": 59.69188690185547,
"learning_rate": 9.215686274509804e-06,
"loss": 4.9347,
"step": 48
},
{
"epoch": 0.4803921568627451,
"grad_norm": 75.39530181884766,
"learning_rate": 9.411764705882354e-06,
"loss": 8.5892,
"step": 49
},
{
"epoch": 0.49019607843137253,
"grad_norm": 57.832862854003906,
"learning_rate": 9.607843137254903e-06,
"loss": 7.7227,
"step": 50
},
{
"epoch": 0.5,
"grad_norm": 84.35389709472656,
"learning_rate": 9.803921568627451e-06,
"loss": 8.8423,
"step": 51
},
{
"epoch": 0.5098039215686274,
"grad_norm": 90.03999328613281,
"learning_rate": 1e-05,
"loss": 7.9743,
"step": 52
},
{
"epoch": 0.5196078431372549,
"grad_norm": 68.16461944580078,
"learning_rate": 1.0196078431372549e-05,
"loss": 6.0536,
"step": 53
},
{
"epoch": 0.5294117647058824,
"grad_norm": 84.21912384033203,
"learning_rate": 1.03921568627451e-05,
"loss": 6.2513,
"step": 54
},
{
"epoch": 0.5392156862745098,
"grad_norm": 191.86093139648438,
"learning_rate": 1.0588235294117648e-05,
"loss": 3.7778,
"step": 55
},
{
"epoch": 0.5490196078431373,
"grad_norm": 63.44783020019531,
"learning_rate": 1.0784313725490196e-05,
"loss": 9.3877,
"step": 56
},
{
"epoch": 0.5588235294117647,
"grad_norm": 77.16984558105469,
"learning_rate": 1.0980392156862747e-05,
"loss": 9.3963,
"step": 57
},
{
"epoch": 0.5686274509803921,
"grad_norm": 77.03890991210938,
"learning_rate": 1.1176470588235295e-05,
"loss": 5.6104,
"step": 58
},
{
"epoch": 0.5784313725490197,
"grad_norm": 68.74703216552734,
"learning_rate": 1.1372549019607844e-05,
"loss": 5.8724,
"step": 59
},
{
"epoch": 0.5882352941176471,
"grad_norm": 58.074546813964844,
"learning_rate": 1.1568627450980394e-05,
"loss": 3.8029,
"step": 60
},
{
"epoch": 0.5980392156862745,
"grad_norm": 101.86833190917969,
"learning_rate": 1.1764705882352942e-05,
"loss": 8.6739,
"step": 61
},
{
"epoch": 0.6078431372549019,
"grad_norm": 68.621826171875,
"learning_rate": 1.1960784313725491e-05,
"loss": 4.371,
"step": 62
},
{
"epoch": 0.6176470588235294,
"grad_norm": 88.57069396972656,
"learning_rate": 1.215686274509804e-05,
"loss": 10.0285,
"step": 63
},
{
"epoch": 0.6274509803921569,
"grad_norm": 71.93238067626953,
"learning_rate": 1.235294117647059e-05,
"loss": 9.1923,
"step": 64
},
{
"epoch": 0.6372549019607843,
"grad_norm": 101.66337585449219,
"learning_rate": 1.2549019607843138e-05,
"loss": 5.1715,
"step": 65
},
{
"epoch": 0.6470588235294118,
"grad_norm": 60.6130485534668,
"learning_rate": 1.2745098039215686e-05,
"loss": 6.2528,
"step": 66
},
{
"epoch": 0.6568627450980392,
"grad_norm": 62.68037033081055,
"learning_rate": 1.2941176470588238e-05,
"loss": 5.3587,
"step": 67
},
{
"epoch": 0.6666666666666666,
"grad_norm": 67.31169128417969,
"learning_rate": 1.3137254901960785e-05,
"loss": 8.1243,
"step": 68
},
{
"epoch": 0.6764705882352942,
"grad_norm": 65.39720916748047,
"learning_rate": 1.3333333333333333e-05,
"loss": 5.5494,
"step": 69
},
{
"epoch": 0.6862745098039216,
"grad_norm": 54.92850875854492,
"learning_rate": 1.3529411764705885e-05,
"loss": 4.4475,
"step": 70
},
{
"epoch": 0.696078431372549,
"grad_norm": 57.63169479370117,
"learning_rate": 1.3725490196078432e-05,
"loss": 4.8783,
"step": 71
},
{
"epoch": 0.7058823529411765,
"grad_norm": 61.44647979736328,
"learning_rate": 1.392156862745098e-05,
"loss": 5.4655,
"step": 72
},
{
"epoch": 0.7156862745098039,
"grad_norm": 28.529205322265625,
"learning_rate": 1.4117647058823532e-05,
"loss": 1.4754,
"step": 73
},
{
"epoch": 0.7254901960784313,
"grad_norm": 64.21969604492188,
"learning_rate": 1.431372549019608e-05,
"loss": 6.2656,
"step": 74
},
{
"epoch": 0.7352941176470589,
"grad_norm": 72.76560974121094,
"learning_rate": 1.4509803921568629e-05,
"loss": 8.3554,
"step": 75
},
{
"epoch": 0.7450980392156863,
"grad_norm": 73.29241943359375,
"learning_rate": 1.4705882352941179e-05,
"loss": 6.1232,
"step": 76
},
{
"epoch": 0.7549019607843137,
"grad_norm": 37.65147399902344,
"learning_rate": 1.4901960784313726e-05,
"loss": 2.2596,
"step": 77
},
{
"epoch": 0.7647058823529411,
"grad_norm": 60.9337158203125,
"learning_rate": 1.5098039215686276e-05,
"loss": 4.9636,
"step": 78
},
{
"epoch": 0.7745098039215687,
"grad_norm": 67.36978149414062,
"learning_rate": 1.5294117647058822e-05,
"loss": 5.6401,
"step": 79
},
{
"epoch": 0.7843137254901961,
"grad_norm": 59.556007385253906,
"learning_rate": 1.5490196078431373e-05,
"loss": 5.5852,
"step": 80
},
{
"epoch": 0.7941176470588235,
"grad_norm": 79.2451171875,
"learning_rate": 1.568627450980392e-05,
"loss": 8.55,
"step": 81
},
{
"epoch": 0.803921568627451,
"grad_norm": 71.37342834472656,
"learning_rate": 1.5882352941176473e-05,
"loss": 5.2085,
"step": 82
},
{
"epoch": 0.8137254901960784,
"grad_norm": 83.74903869628906,
"learning_rate": 1.607843137254902e-05,
"loss": 5.7077,
"step": 83
},
{
"epoch": 0.8235294117647058,
"grad_norm": 64.63584899902344,
"learning_rate": 1.627450980392157e-05,
"loss": 3.9988,
"step": 84
},
{
"epoch": 0.8333333333333334,
"grad_norm": 85.18892669677734,
"learning_rate": 1.647058823529412e-05,
"loss": 8.3305,
"step": 85
},
{
"epoch": 0.8431372549019608,
"grad_norm": 72.52571868896484,
"learning_rate": 1.6666666666666667e-05,
"loss": 7.063,
"step": 86
},
{
"epoch": 0.8529411764705882,
"grad_norm": 70.59456634521484,
"learning_rate": 1.686274509803922e-05,
"loss": 6.9146,
"step": 87
},
{
"epoch": 0.8627450980392157,
"grad_norm": 72.51757049560547,
"learning_rate": 1.7058823529411767e-05,
"loss": 7.1729,
"step": 88
},
{
"epoch": 0.8725490196078431,
"grad_norm": 75.5470962524414,
"learning_rate": 1.7254901960784314e-05,
"loss": 5.6916,
"step": 89
},
{
"epoch": 0.8823529411764706,
"grad_norm": 53.650146484375,
"learning_rate": 1.7450980392156866e-05,
"loss": 4.689,
"step": 90
},
{
"epoch": 0.8921568627450981,
"grad_norm": 130.48622131347656,
"learning_rate": 1.7647058823529414e-05,
"loss": 10.2449,
"step": 91
},
{
"epoch": 0.9019607843137255,
"grad_norm": 64.78260803222656,
"learning_rate": 1.7843137254901965e-05,
"loss": 4.4491,
"step": 92
},
{
"epoch": 0.9117647058823529,
"grad_norm": 74.58740234375,
"learning_rate": 1.8039215686274513e-05,
"loss": 7.1342,
"step": 93
},
{
"epoch": 0.9215686274509803,
"grad_norm": 72.59771728515625,
"learning_rate": 1.823529411764706e-05,
"loss": 6.8294,
"step": 94
},
{
"epoch": 0.9313725490196079,
"grad_norm": 76.36857604980469,
"learning_rate": 1.843137254901961e-05,
"loss": 6.429,
"step": 95
},
{
"epoch": 0.9411764705882353,
"grad_norm": 43.28879165649414,
"learning_rate": 1.862745098039216e-05,
"loss": 2.6789,
"step": 96
},
{
"epoch": 0.9509803921568627,
"grad_norm": 66.45750427246094,
"learning_rate": 1.8823529411764708e-05,
"loss": 5.7232,
"step": 97
},
{
"epoch": 0.9607843137254902,
"grad_norm": 53.644187927246094,
"learning_rate": 1.9019607843137255e-05,
"loss": 4.0619,
"step": 98
},
{
"epoch": 0.9705882352941176,
"grad_norm": 88.7365951538086,
"learning_rate": 1.9215686274509807e-05,
"loss": 4.7323,
"step": 99
},
{
"epoch": 0.9803921568627451,
"grad_norm": 234.25643920898438,
"learning_rate": 1.9411764705882355e-05,
"loss": 5.403,
"step": 100
},
{
"epoch": 0.9901960784313726,
"grad_norm": 94.92127990722656,
"learning_rate": 1.9607843137254903e-05,
"loss": 7.4416,
"step": 101
},
{
"epoch": 1.0,
"grad_norm": 49.972049713134766,
"learning_rate": 1.9803921568627454e-05,
"loss": 2.2006,
"step": 102
},
{
"epoch": 1.0,
"eval_dim_128_cosine_accuracy@1": 0.36117936117936117,
"eval_dim_128_cosine_accuracy@10": 0.4963144963144963,
"eval_dim_128_cosine_accuracy@3": 0.3955773955773956,
"eval_dim_128_cosine_accuracy@5": 0.4275184275184275,
"eval_dim_128_cosine_map@100": 0.4895004701043925,
"eval_dim_128_cosine_mrr@10": 0.3885242385242385,
"eval_dim_128_cosine_ndcg@10": 0.4167443951294149,
"eval_dim_128_cosine_precision@1": 0.36117936117936117,
"eval_dim_128_cosine_precision@10": 0.28624078624078625,
"eval_dim_128_cosine_precision@3": 0.3472563472563472,
"eval_dim_128_cosine_precision@5": 0.32383292383292384,
"eval_dim_128_cosine_recall@1": 0.07788842857349605,
"eval_dim_128_cosine_recall@10": 0.3584844315819464,
"eval_dim_128_cosine_recall@3": 0.19455169660689905,
"eval_dim_128_cosine_recall@5": 0.258844960906662,
"eval_dim_256_cosine_accuracy@1": 0.4275184275184275,
"eval_dim_256_cosine_accuracy@10": 0.5552825552825553,
"eval_dim_256_cosine_accuracy@3": 0.47665847665847666,
"eval_dim_256_cosine_accuracy@5": 0.5085995085995086,
"eval_dim_256_cosine_map@100": 0.5428508610938695,
"eval_dim_256_cosine_mrr@10": 0.4582280332280333,
"eval_dim_256_cosine_ndcg@10": 0.48999200434646606,
"eval_dim_256_cosine_precision@1": 0.4275184275184275,
"eval_dim_256_cosine_precision@10": 0.3518427518427518,
"eval_dim_256_cosine_precision@3": 0.42588042588042585,
"eval_dim_256_cosine_precision@5": 0.4019656019656019,
"eval_dim_256_cosine_recall@1": 0.07932221334609162,
"eval_dim_256_cosine_recall@10": 0.4034245970803078,
"eval_dim_256_cosine_recall@3": 0.21260446467609953,
"eval_dim_256_cosine_recall@5": 0.2889879213800401,
"eval_dim_512_cosine_accuracy@1": 0.42997542997543,
"eval_dim_512_cosine_accuracy@10": 0.5528255528255528,
"eval_dim_512_cosine_accuracy@3": 0.4692874692874693,
"eval_dim_512_cosine_accuracy@5": 0.5061425061425061,
"eval_dim_512_cosine_map@100": 0.5508810990559729,
"eval_dim_512_cosine_mrr@10": 0.45731835731835724,
"eval_dim_512_cosine_ndcg@10": 0.4837752441144749,
"eval_dim_512_cosine_precision@1": 0.42997542997543,
"eval_dim_512_cosine_precision@10": 0.343980343980344,
"eval_dim_512_cosine_precision@3": 0.4201474201474201,
"eval_dim_512_cosine_precision@5": 0.3945945945945945,
"eval_dim_512_cosine_recall@1": 0.08343785701085553,
"eval_dim_512_cosine_recall@10": 0.3843687245273678,
"eval_dim_512_cosine_recall@3": 0.21509818581718135,
"eval_dim_512_cosine_recall@5": 0.28983590270089526,
"eval_dim_64_cosine_accuracy@1": 0.31203931203931207,
"eval_dim_64_cosine_accuracy@10": 0.4103194103194103,
"eval_dim_64_cosine_accuracy@3": 0.3316953316953317,
"eval_dim_64_cosine_accuracy@5": 0.36855036855036855,
"eval_dim_64_cosine_map@100": 0.4146467456393331,
"eval_dim_64_cosine_mrr@10": 0.33203658203658204,
"eval_dim_64_cosine_ndcg@10": 0.355673399321963,
"eval_dim_64_cosine_precision@1": 0.31203931203931207,
"eval_dim_64_cosine_precision@10": 0.24496314496314495,
"eval_dim_64_cosine_precision@3": 0.29975429975429974,
"eval_dim_64_cosine_precision@5": 0.28058968058968053,
"eval_dim_64_cosine_recall@1": 0.06266071012060316,
"eval_dim_64_cosine_recall@10": 0.29885112416397924,
"eval_dim_64_cosine_recall@3": 0.15793980445741465,
"eval_dim_64_cosine_recall@5": 0.21634287969250168,
"eval_dim_768_cosine_accuracy@1": 0.44717444717444715,
"eval_dim_768_cosine_accuracy@10": 0.5773955773955773,
"eval_dim_768_cosine_accuracy@3": 0.49385749385749383,
"eval_dim_768_cosine_accuracy@5": 0.5307125307125307,
"eval_dim_768_cosine_map@100": 0.5683678203840881,
"eval_dim_768_cosine_mrr@10": 0.477099177099177,
"eval_dim_768_cosine_ndcg@10": 0.5075181383767897,
"eval_dim_768_cosine_precision@1": 0.44717444717444715,
"eval_dim_768_cosine_precision@10": 0.36363636363636365,
"eval_dim_768_cosine_precision@3": 0.4398034398034398,
"eval_dim_768_cosine_precision@5": 0.41326781326781326,
"eval_dim_768_cosine_recall@1": 0.08596712954012806,
"eval_dim_768_cosine_recall@10": 0.4072103916010098,
"eval_dim_768_cosine_recall@3": 0.22479572965265626,
"eval_dim_768_cosine_recall@5": 0.30023063914578846,
"eval_runtime": 113.7011,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.355673399321963,
"eval_steps_per_second": 0.0,
"step": 102
},
{
"epoch": 1.0098039215686274,
"grad_norm": 47.996768951416016,
"learning_rate": 2e-05,
"loss": 2.523,
"step": 103
},
{
"epoch": 1.0196078431372548,
"grad_norm": 29.821338653564453,
"learning_rate": 1.9999941442306328e-05,
"loss": 1.4913,
"step": 104
},
{
"epoch": 1.0294117647058822,
"grad_norm": 92.14228820800781,
"learning_rate": 1.9999765769911108e-05,
"loss": 9.9224,
"step": 105
},
{
"epoch": 1.0392156862745099,
"grad_norm": 128.05392456054688,
"learning_rate": 1.9999472984871734e-05,
"loss": 7.2427,
"step": 106
},
{
"epoch": 1.0490196078431373,
"grad_norm": 78.10018920898438,
"learning_rate": 1.999906309061717e-05,
"loss": 3.8076,
"step": 107
},
{
"epoch": 1.0588235294117647,
"grad_norm": 98.82392120361328,
"learning_rate": 1.9998536091947907e-05,
"loss": 3.5538,
"step": 108
},
{
"epoch": 1.0686274509803921,
"grad_norm": 35.518714904785156,
"learning_rate": 1.9997891995035914e-05,
"loss": 2.9958,
"step": 109
},
{
"epoch": 1.0784313725490196,
"grad_norm": 65.9166030883789,
"learning_rate": 1.9997130807424556e-05,
"loss": 5.0938,
"step": 110
},
{
"epoch": 1.088235294117647,
"grad_norm": 29.002384185791016,
"learning_rate": 1.999625253802851e-05,
"loss": 1.6151,
"step": 111
},
{
"epoch": 1.0980392156862746,
"grad_norm": 43.29655075073242,
"learning_rate": 1.999525719713366e-05,
"loss": 2.4825,
"step": 112
},
{
"epoch": 1.107843137254902,
"grad_norm": 45.45580291748047,
"learning_rate": 1.9994144796396985e-05,
"loss": 2.6052,
"step": 113
},
{
"epoch": 1.1176470588235294,
"grad_norm": 52.51070785522461,
"learning_rate": 1.9992915348846403e-05,
"loss": 3.2484,
"step": 114
},
{
"epoch": 1.1274509803921569,
"grad_norm": 47.346988677978516,
"learning_rate": 1.999156886888064e-05,
"loss": 1.7134,
"step": 115
},
{
"epoch": 1.1372549019607843,
"grad_norm": 67.60808563232422,
"learning_rate": 1.999010537226905e-05,
"loss": 4.5488,
"step": 116
},
{
"epoch": 1.1470588235294117,
"grad_norm": 57.041481018066406,
"learning_rate": 1.9988524876151425e-05,
"loss": 2.9845,
"step": 117
},
{
"epoch": 1.156862745098039,
"grad_norm": 65.12330627441406,
"learning_rate": 1.998682739903781e-05,
"loss": 3.6352,
"step": 118
},
{
"epoch": 1.1666666666666667,
"grad_norm": 74.98400115966797,
"learning_rate": 1.9985012960808275e-05,
"loss": 5.681,
"step": 119
},
{
"epoch": 1.1764705882352942,
"grad_norm": 38.95284652709961,
"learning_rate": 1.9983081582712684e-05,
"loss": 2.7269,
"step": 120
},
{
"epoch": 1.1862745098039216,
"grad_norm": 66.9459228515625,
"learning_rate": 1.9981033287370443e-05,
"loss": 3.1317,
"step": 121
},
{
"epoch": 1.196078431372549,
"grad_norm": 84.69365692138672,
"learning_rate": 1.9978868098770244e-05,
"loss": 8.6548,
"step": 122
},
{
"epoch": 1.2058823529411764,
"grad_norm": 26.863765716552734,
"learning_rate": 1.9976586042269776e-05,
"loss": 1.2277,
"step": 123
},
{
"epoch": 1.215686274509804,
"grad_norm": 78.60285186767578,
"learning_rate": 1.9974187144595433e-05,
"loss": 4.8203,
"step": 124
},
{
"epoch": 1.2254901960784315,
"grad_norm": 58.215972900390625,
"learning_rate": 1.9971671433842e-05,
"loss": 5.0602,
"step": 125
},
{
"epoch": 1.2352941176470589,
"grad_norm": 59.439632415771484,
"learning_rate": 1.9969038939472315e-05,
"loss": 5.9304,
"step": 126
},
{
"epoch": 1.2450980392156863,
"grad_norm": 61.38117218017578,
"learning_rate": 1.9966289692316944e-05,
"loss": 3.8992,
"step": 127
},
{
"epoch": 1.2549019607843137,
"grad_norm": 93.6502456665039,
"learning_rate": 1.99634237245738e-05,
"loss": 4.6071,
"step": 128
},
{
"epoch": 1.2647058823529411,
"grad_norm": 82.45580291748047,
"learning_rate": 1.9960441069807778e-05,
"loss": 7.071,
"step": 129
},
{
"epoch": 1.2745098039215685,
"grad_norm": 72.7511978149414,
"learning_rate": 1.9957341762950346e-05,
"loss": 2.796,
"step": 130
},
{
"epoch": 1.284313725490196,
"grad_norm": 82.5091323852539,
"learning_rate": 1.9954125840299165e-05,
"loss": 4.1005,
"step": 131
},
{
"epoch": 1.2941176470588236,
"grad_norm": 58.769676208496094,
"learning_rate": 1.9950793339517632e-05,
"loss": 2.4508,
"step": 132
},
{
"epoch": 1.303921568627451,
"grad_norm": 182.6251678466797,
"learning_rate": 1.9947344299634464e-05,
"loss": 3.0313,
"step": 133
},
{
"epoch": 1.3137254901960784,
"grad_norm": 45.02217483520508,
"learning_rate": 1.9943778761043223e-05,
"loss": 1.6569,
"step": 134
},
{
"epoch": 1.3235294117647058,
"grad_norm": 91.53306579589844,
"learning_rate": 1.994009676550185e-05,
"loss": 5.6474,
"step": 135
},
{
"epoch": 1.3333333333333333,
"grad_norm": 66.25041961669922,
"learning_rate": 1.993629835613218e-05,
"loss": 5.0485,
"step": 136
},
{
"epoch": 1.343137254901961,
"grad_norm": 59.17399597167969,
"learning_rate": 1.9932383577419432e-05,
"loss": 5.342,
"step": 137
},
{
"epoch": 1.3529411764705883,
"grad_norm": 48.39958953857422,
"learning_rate": 1.992835247521169e-05,
"loss": 2.1806,
"step": 138
},
{
"epoch": 1.3627450980392157,
"grad_norm": 36.56830596923828,
"learning_rate": 1.992420509671936e-05,
"loss": 2.3089,
"step": 139
},
{
"epoch": 1.3725490196078431,
"grad_norm": 55.99013137817383,
"learning_rate": 1.991994149051463e-05,
"loss": 2.0881,
"step": 140
},
{
"epoch": 1.3823529411764706,
"grad_norm": 34.761634826660156,
"learning_rate": 1.9915561706530882e-05,
"loss": 1.2435,
"step": 141
},
{
"epoch": 1.392156862745098,
"grad_norm": 35.68510818481445,
"learning_rate": 1.9911065796062137e-05,
"loss": 2.3912,
"step": 142
},
{
"epoch": 1.4019607843137254,
"grad_norm": 74.37715148925781,
"learning_rate": 1.9906453811762415e-05,
"loss": 1.7524,
"step": 143
},
{
"epoch": 1.4117647058823528,
"grad_norm": 77.04254150390625,
"learning_rate": 1.9901725807645154e-05,
"loss": 5.1758,
"step": 144
},
{
"epoch": 1.4215686274509804,
"grad_norm": 60.880775451660156,
"learning_rate": 1.9896881839082554e-05,
"loss": 1.9937,
"step": 145
},
{
"epoch": 1.4313725490196079,
"grad_norm": 38.05253219604492,
"learning_rate": 1.9891921962804942e-05,
"loss": 3.3948,
"step": 146
},
{
"epoch": 1.4411764705882353,
"grad_norm": 69.32609558105469,
"learning_rate": 1.9886846236900102e-05,
"loss": 4.8789,
"step": 147
},
{
"epoch": 1.4509803921568627,
"grad_norm": 40.149715423583984,
"learning_rate": 1.9881654720812594e-05,
"loss": 1.9967,
"step": 148
},
{
"epoch": 1.4607843137254901,
"grad_norm": 54.33725357055664,
"learning_rate": 1.9876347475343062e-05,
"loss": 1.9438,
"step": 149
},
{
"epoch": 1.4705882352941178,
"grad_norm": 79.5066909790039,
"learning_rate": 1.9870924562647512e-05,
"loss": 5.8335,
"step": 150
},
{
"epoch": 1.4803921568627452,
"grad_norm": 66.37535095214844,
"learning_rate": 1.9865386046236597e-05,
"loss": 3.2073,
"step": 151
},
{
"epoch": 1.4901960784313726,
"grad_norm": 117.20733642578125,
"learning_rate": 1.9859731990974867e-05,
"loss": 8.3916,
"step": 152
},
{
"epoch": 1.5,
"grad_norm": 39.88603591918945,
"learning_rate": 1.9853962463080013e-05,
"loss": 1.6447,
"step": 153
},
{
"epoch": 1.5098039215686274,
"grad_norm": 37.4543571472168,
"learning_rate": 1.9848077530122083e-05,
"loss": 2.7262,
"step": 154
},
{
"epoch": 1.5196078431372548,
"grad_norm": 55.9295768737793,
"learning_rate": 1.984207726102269e-05,
"loss": 4.0002,
"step": 155
},
{
"epoch": 1.5294117647058822,
"grad_norm": 36.691829681396484,
"learning_rate": 1.9835961726054228e-05,
"loss": 2.0588,
"step": 156
},
{
"epoch": 1.5392156862745097,
"grad_norm": 43.383121490478516,
"learning_rate": 1.982973099683902e-05,
"loss": 1.9514,
"step": 157
},
{
"epoch": 1.5490196078431373,
"grad_norm": 56.915279388427734,
"learning_rate": 1.9823385146348485e-05,
"loss": 2.0048,
"step": 158
},
{
"epoch": 1.5588235294117647,
"grad_norm": 101.29805755615234,
"learning_rate": 1.9816924248902304e-05,
"loss": 4.8991,
"step": 159
},
{
"epoch": 1.5686274509803921,
"grad_norm": 68.61731719970703,
"learning_rate": 1.9810348380167527e-05,
"loss": 5.2414,
"step": 160
},
{
"epoch": 1.5784313725490198,
"grad_norm": 106.38484191894531,
"learning_rate": 1.9803657617157693e-05,
"loss": 2.193,
"step": 161
},
{
"epoch": 1.5882352941176472,
"grad_norm": 51.97545623779297,
"learning_rate": 1.9796852038231932e-05,
"loss": 4.6859,
"step": 162
},
{
"epoch": 1.5980392156862746,
"grad_norm": 361.40118408203125,
"learning_rate": 1.9789931723094046e-05,
"loss": 3.1137,
"step": 163
},
{
"epoch": 1.607843137254902,
"grad_norm": 63.9984245300293,
"learning_rate": 1.9782896752791576e-05,
"loss": 2.8398,
"step": 164
},
{
"epoch": 1.6176470588235294,
"grad_norm": 67.82437133789062,
"learning_rate": 1.9775747209714847e-05,
"loss": 4.6547,
"step": 165
},
{
"epoch": 1.6274509803921569,
"grad_norm": 77.33601379394531,
"learning_rate": 1.9768483177596008e-05,
"loss": 4.1404,
"step": 166
},
{
"epoch": 1.6372549019607843,
"grad_norm": 85.53030395507812,
"learning_rate": 1.976110474150806e-05,
"loss": 5.2769,
"step": 167
},
{
"epoch": 1.6470588235294117,
"grad_norm": 67.6707763671875,
"learning_rate": 1.975361198786383e-05,
"loss": 3.6466,
"step": 168
},
{
"epoch": 1.656862745098039,
"grad_norm": 30.49669075012207,
"learning_rate": 1.9746005004415004e-05,
"loss": 1.2928,
"step": 169
},
{
"epoch": 1.6666666666666665,
"grad_norm": 88.84416961669922,
"learning_rate": 1.973828388025106e-05,
"loss": 7.6842,
"step": 170
},
{
"epoch": 1.6764705882352942,
"grad_norm": 68.43903350830078,
"learning_rate": 1.973044870579824e-05,
"loss": 3.6167,
"step": 171
},
{
"epoch": 1.6862745098039216,
"grad_norm": 47.34165573120117,
"learning_rate": 1.9722499572818496e-05,
"loss": 1.5441,
"step": 172
},
{
"epoch": 1.696078431372549,
"grad_norm": 54.96414566040039,
"learning_rate": 1.9714436574408408e-05,
"loss": 4.6245,
"step": 173
},
{
"epoch": 1.7058823529411766,
"grad_norm": 57.44639587402344,
"learning_rate": 1.9706259804998093e-05,
"loss": 3.4359,
"step": 174
},
{
"epoch": 1.715686274509804,
"grad_norm": 441.9508361816406,
"learning_rate": 1.9697969360350098e-05,
"loss": 5.561,
"step": 175
},
{
"epoch": 1.7254901960784315,
"grad_norm": 106.98697662353516,
"learning_rate": 1.968956533755829e-05,
"loss": 9.2408,
"step": 176
},
{
"epoch": 1.7352941176470589,
"grad_norm": 89.20357513427734,
"learning_rate": 1.9681047835046708e-05,
"loss": 3.4619,
"step": 177
},
{
"epoch": 1.7450980392156863,
"grad_norm": 141.26058959960938,
"learning_rate": 1.9672416952568416e-05,
"loss": 0.7945,
"step": 178
},
{
"epoch": 1.7549019607843137,
"grad_norm": 38.98542022705078,
"learning_rate": 1.9663672791204328e-05,
"loss": 1.4854,
"step": 179
},
{
"epoch": 1.7647058823529411,
"grad_norm": 80.01353454589844,
"learning_rate": 1.9654815453362016e-05,
"loss": 4.4899,
"step": 180
},
{
"epoch": 1.7745098039215685,
"grad_norm": 74.00617218017578,
"learning_rate": 1.9645845042774555e-05,
"loss": 2.9133,
"step": 181
},
{
"epoch": 1.784313725490196,
"grad_norm": 62.659385681152344,
"learning_rate": 1.963676166449924e-05,
"loss": 2.2408,
"step": 182
},
{
"epoch": 1.7941176470588234,
"grad_norm": 61.16650390625,
"learning_rate": 1.962756542491641e-05,
"loss": 3.7768,
"step": 183
},
{
"epoch": 1.803921568627451,
"grad_norm": 41.40045928955078,
"learning_rate": 1.961825643172819e-05,
"loss": 3.2455,
"step": 184
},
{
"epoch": 1.8137254901960784,
"grad_norm": 88.19357299804688,
"learning_rate": 1.960883479395721e-05,
"loss": 3.9414,
"step": 185
},
{
"epoch": 1.8235294117647058,
"grad_norm": 49.906280517578125,
"learning_rate": 1.959930062194534e-05,
"loss": 2.1961,
"step": 186
},
{
"epoch": 1.8333333333333335,
"grad_norm": 43.06416320800781,
"learning_rate": 1.9589654027352412e-05,
"loss": 2.4825,
"step": 187
},
{
"epoch": 1.843137254901961,
"grad_norm": 71.20182800292969,
"learning_rate": 1.957989512315489e-05,
"loss": 3.2995,
"step": 188
},
{
"epoch": 1.8529411764705883,
"grad_norm": 66.23078918457031,
"learning_rate": 1.957002402364456e-05,
"loss": 2.8202,
"step": 189
},
{
"epoch": 1.8627450980392157,
"grad_norm": 76.61695098876953,
"learning_rate": 1.956004084442718e-05,
"loss": 6.1953,
"step": 190
},
{
"epoch": 1.8725490196078431,
"grad_norm": 65.12879180908203,
"learning_rate": 1.9549945702421144e-05,
"loss": 3.3925,
"step": 191
},
{
"epoch": 1.8823529411764706,
"grad_norm": 55.46363067626953,
"learning_rate": 1.95397387158561e-05,
"loss": 3.3051,
"step": 192
},
{
"epoch": 1.892156862745098,
"grad_norm": 52.92805099487305,
"learning_rate": 1.9529420004271568e-05,
"loss": 4.141,
"step": 193
},
{
"epoch": 1.9019607843137254,
"grad_norm": 94.87969970703125,
"learning_rate": 1.9518989688515533e-05,
"loss": 8.7842,
"step": 194
},
{
"epoch": 1.9117647058823528,
"grad_norm": 56.655792236328125,
"learning_rate": 1.950844789074305e-05,
"loss": 2.0724,
"step": 195
},
{
"epoch": 1.9215686274509802,
"grad_norm": 72.97640228271484,
"learning_rate": 1.9497794734414782e-05,
"loss": 5.1611,
"step": 196
},
{
"epoch": 1.9313725490196079,
"grad_norm": 42.993202209472656,
"learning_rate": 1.9487030344295586e-05,
"loss": 5.0744,
"step": 197
},
{
"epoch": 1.9411764705882353,
"grad_norm": 59.05615997314453,
"learning_rate": 1.9476154846453037e-05,
"loss": 1.7611,
"step": 198
},
{
"epoch": 1.9509803921568627,
"grad_norm": 43.90251541137695,
"learning_rate": 1.9465168368255946e-05,
"loss": 1.9447,
"step": 199
},
{
"epoch": 1.9607843137254903,
"grad_norm": 26.028093338012695,
"learning_rate": 1.945407103837288e-05,
"loss": 1.0533,
"step": 200
},
{
"epoch": 1.9705882352941178,
"grad_norm": 71.4136734008789,
"learning_rate": 1.9442862986770645e-05,
"loss": 6.2447,
"step": 201
},
{
"epoch": 1.9803921568627452,
"grad_norm": 37.935176849365234,
"learning_rate": 1.9431544344712776e-05,
"loss": 1.6885,
"step": 202
},
{
"epoch": 1.9901960784313726,
"grad_norm": 45.75434494018555,
"learning_rate": 1.9420115244757985e-05,
"loss": 2.0872,
"step": 203
},
{
"epoch": 2.0,
"grad_norm": 116.2767105102539,
"learning_rate": 1.9408575820758616e-05,
"loss": 4.7202,
"step": 204
},
{
"epoch": 2.0,
"eval_dim_128_cosine_accuracy@1": 0.4176904176904177,
"eval_dim_128_cosine_accuracy@10": 0.5356265356265356,
"eval_dim_128_cosine_accuracy@3": 0.44717444717444715,
"eval_dim_128_cosine_accuracy@5": 0.48157248157248156,
"eval_dim_128_cosine_map@100": 0.5283196469578106,
"eval_dim_128_cosine_mrr@10": 0.4427976677976677,
"eval_dim_128_cosine_ndcg@10": 0.4677195424317774,
"eval_dim_128_cosine_precision@1": 0.4176904176904177,
"eval_dim_128_cosine_precision@10": 0.3405405405405405,
"eval_dim_128_cosine_precision@3": 0.407043407043407,
"eval_dim_128_cosine_precision@5": 0.38378378378378375,
"eval_dim_128_cosine_recall@1": 0.0764634023360966,
"eval_dim_128_cosine_recall@10": 0.37167610923736266,
"eval_dim_128_cosine_recall@3": 0.1950757226938055,
"eval_dim_128_cosine_recall@5": 0.2653784346282696,
"eval_dim_256_cosine_accuracy@1": 0.4692874692874693,
"eval_dim_256_cosine_accuracy@10": 0.5823095823095823,
"eval_dim_256_cosine_accuracy@3": 0.5036855036855037,
"eval_dim_256_cosine_accuracy@5": 0.5356265356265356,
"eval_dim_256_cosine_map@100": 0.5757345895348797,
"eval_dim_256_cosine_mrr@10": 0.49419289419289414,
"eval_dim_256_cosine_ndcg@10": 0.5181510100009289,
"eval_dim_256_cosine_precision@1": 0.4692874692874693,
"eval_dim_256_cosine_precision@10": 0.38255528255528254,
"eval_dim_256_cosine_precision@3": 0.4594594594594595,
"eval_dim_256_cosine_precision@5": 0.4329238329238329,
"eval_dim_256_cosine_recall@1": 0.08186779696509015,
"eval_dim_256_cosine_recall@10": 0.40096566829469704,
"eval_dim_256_cosine_recall@3": 0.21723143666124728,
"eval_dim_256_cosine_recall@5": 0.29338456581068695,
"eval_dim_512_cosine_accuracy@1": 0.47174447174447176,
"eval_dim_512_cosine_accuracy@10": 0.5749385749385749,
"eval_dim_512_cosine_accuracy@3": 0.5012285012285013,
"eval_dim_512_cosine_accuracy@5": 0.542997542997543,
"eval_dim_512_cosine_map@100": 0.5829895521285842,
"eval_dim_512_cosine_mrr@10": 0.49543894543894534,
"eval_dim_512_cosine_ndcg@10": 0.5188017558826996,
"eval_dim_512_cosine_precision@1": 0.47174447174447176,
"eval_dim_512_cosine_precision@10": 0.3766584766584767,
"eval_dim_512_cosine_precision@3": 0.457002457002457,
"eval_dim_512_cosine_precision@5": 0.4309582309582309,
"eval_dim_512_cosine_recall@1": 0.0885958586931519,
"eval_dim_512_cosine_recall@10": 0.3993905646668074,
"eval_dim_512_cosine_recall@3": 0.2234614078912185,
"eval_dim_512_cosine_recall@5": 0.2992478654239865,
"eval_dim_64_cosine_accuracy@1": 0.33415233415233414,
"eval_dim_64_cosine_accuracy@10": 0.4348894348894349,
"eval_dim_64_cosine_accuracy@3": 0.36855036855036855,
"eval_dim_64_cosine_accuracy@5": 0.41277641277641275,
"eval_dim_64_cosine_map@100": 0.4456413066511673,
"eval_dim_64_cosine_mrr@10": 0.3588959088959089,
"eval_dim_64_cosine_ndcg@10": 0.38293193435604433,
"eval_dim_64_cosine_precision@1": 0.33415233415233414,
"eval_dim_64_cosine_precision@10": 0.27764127764127766,
"eval_dim_64_cosine_precision@3": 0.33005733005733,
"eval_dim_64_cosine_precision@5": 0.3164619164619165,
"eval_dim_64_cosine_recall@1": 0.05986407848776464,
"eval_dim_64_cosine_recall@10": 0.30059000614504944,
"eval_dim_64_cosine_recall@3": 0.1600282971493556,
"eval_dim_64_cosine_recall@5": 0.2246468814016758,
"eval_dim_768_cosine_accuracy@1": 0.4742014742014742,
"eval_dim_768_cosine_accuracy@10": 0.5823095823095823,
"eval_dim_768_cosine_accuracy@3": 0.5135135135135135,
"eval_dim_768_cosine_accuracy@5": 0.538083538083538,
"eval_dim_768_cosine_map@100": 0.5832191164703833,
"eval_dim_768_cosine_mrr@10": 0.49936332436332426,
"eval_dim_768_cosine_ndcg@10": 0.5224041192438582,
"eval_dim_768_cosine_precision@1": 0.4742014742014742,
"eval_dim_768_cosine_precision@10": 0.37862407862407865,
"eval_dim_768_cosine_precision@3": 0.466011466011466,
"eval_dim_768_cosine_precision@5": 0.4368550368550368,
"eval_dim_768_cosine_recall@1": 0.08615877386970341,
"eval_dim_768_cosine_recall@10": 0.40446624702254846,
"eval_dim_768_cosine_recall@3": 0.22654634506706478,
"eval_dim_768_cosine_recall@5": 0.30265535064965354,
"eval_runtime": 112.0679,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.38293193435604433,
"eval_steps_per_second": 0.0,
"step": 204
},
{
"epoch": 2.0098039215686274,
"grad_norm": 40.668514251708984,
"learning_rate": 1.9396926207859085e-05,
"loss": 1.8304,
"step": 205
},
{
"epoch": 2.019607843137255,
"grad_norm": 29.127620697021484,
"learning_rate": 1.938516654249428e-05,
"loss": 0.9245,
"step": 206
},
{
"epoch": 2.0294117647058822,
"grad_norm": 40.05576705932617,
"learning_rate": 1.9373296962387988e-05,
"loss": 2.1126,
"step": 207
},
{
"epoch": 2.0392156862745097,
"grad_norm": 62.072296142578125,
"learning_rate": 1.936131760655124e-05,
"loss": 5.1247,
"step": 208
},
{
"epoch": 2.049019607843137,
"grad_norm": 39.36087417602539,
"learning_rate": 1.9349228615280736e-05,
"loss": 1.9362,
"step": 209
},
{
"epoch": 2.0588235294117645,
"grad_norm": 85.72864532470703,
"learning_rate": 1.9337030130157166e-05,
"loss": 2.6958,
"step": 210
},
{
"epoch": 2.0686274509803924,
"grad_norm": 31.34132957458496,
"learning_rate": 1.932472229404356e-05,
"loss": 2.4759,
"step": 211
},
{
"epoch": 2.0784313725490198,
"grad_norm": 38.84762954711914,
"learning_rate": 1.9312305251083613e-05,
"loss": 2.092,
"step": 212
},
{
"epoch": 2.088235294117647,
"grad_norm": 60.889225006103516,
"learning_rate": 1.929977914670001e-05,
"loss": 4.3632,
"step": 213
},
{
"epoch": 2.0980392156862746,
"grad_norm": 69.78909301757812,
"learning_rate": 1.9287144127592704e-05,
"loss": 2.8144,
"step": 214
},
{
"epoch": 2.107843137254902,
"grad_norm": 18.469602584838867,
"learning_rate": 1.9274400341737214e-05,
"loss": 0.6525,
"step": 215
},
{
"epoch": 2.1176470588235294,
"grad_norm": 21.518314361572266,
"learning_rate": 1.926154793838288e-05,
"loss": 0.7783,
"step": 216
},
{
"epoch": 2.127450980392157,
"grad_norm": 54.2549934387207,
"learning_rate": 1.924858706805112e-05,
"loss": 3.2555,
"step": 217
},
{
"epoch": 2.1372549019607843,
"grad_norm": 62.788028717041016,
"learning_rate": 1.923551788253367e-05,
"loss": 2.5865,
"step": 218
},
{
"epoch": 2.1470588235294117,
"grad_norm": 49.17964553833008,
"learning_rate": 1.9222340534890803e-05,
"loss": 3.927,
"step": 219
},
{
"epoch": 2.156862745098039,
"grad_norm": 17.153976440429688,
"learning_rate": 1.920905517944954e-05,
"loss": 0.5981,
"step": 220
},
{
"epoch": 2.1666666666666665,
"grad_norm": 76.39110565185547,
"learning_rate": 1.9195661971801825e-05,
"loss": 5.5659,
"step": 221
},
{
"epoch": 2.176470588235294,
"grad_norm": 58.603973388671875,
"learning_rate": 1.9182161068802742e-05,
"loss": 2.2788,
"step": 222
},
{
"epoch": 2.186274509803922,
"grad_norm": 28.089658737182617,
"learning_rate": 1.9168552628568632e-05,
"loss": 1.8267,
"step": 223
},
{
"epoch": 2.196078431372549,
"grad_norm": 47.15828323364258,
"learning_rate": 1.9154836810475266e-05,
"loss": 2.0744,
"step": 224
},
{
"epoch": 2.2058823529411766,
"grad_norm": 56.87709426879883,
"learning_rate": 1.914101377515599e-05,
"loss": 3.8103,
"step": 225
},
{
"epoch": 2.215686274509804,
"grad_norm": 26.599693298339844,
"learning_rate": 1.9127083684499805e-05,
"loss": 1.1361,
"step": 226
},
{
"epoch": 2.2254901960784315,
"grad_norm": 68.64309692382812,
"learning_rate": 1.9113046701649517e-05,
"loss": 3.3677,
"step": 227
},
{
"epoch": 2.235294117647059,
"grad_norm": 61.36454772949219,
"learning_rate": 1.909890299099979e-05,
"loss": 3.0295,
"step": 228
},
{
"epoch": 2.2450980392156863,
"grad_norm": 35.946556091308594,
"learning_rate": 1.9084652718195237e-05,
"loss": 1.5912,
"step": 229
},
{
"epoch": 2.2549019607843137,
"grad_norm": 59.93580627441406,
"learning_rate": 1.9070296050128486e-05,
"loss": 4.2332,
"step": 230
},
{
"epoch": 2.264705882352941,
"grad_norm": 55.56387710571289,
"learning_rate": 1.9055833154938208e-05,
"loss": 3.0785,
"step": 231
},
{
"epoch": 2.2745098039215685,
"grad_norm": 59.931373596191406,
"learning_rate": 1.9041264202007158e-05,
"loss": 2.137,
"step": 232
},
{
"epoch": 2.284313725490196,
"grad_norm": 71.74024200439453,
"learning_rate": 1.90265893619602e-05,
"loss": 3.521,
"step": 233
},
{
"epoch": 2.2941176470588234,
"grad_norm": 103.71435546875,
"learning_rate": 1.901180880666228e-05,
"loss": 5.2255,
"step": 234
},
{
"epoch": 2.303921568627451,
"grad_norm": 73.98539733886719,
"learning_rate": 1.8996922709216456e-05,
"loss": 5.3743,
"step": 235
},
{
"epoch": 2.313725490196078,
"grad_norm": 41.58651351928711,
"learning_rate": 1.8981931243961823e-05,
"loss": 2.6036,
"step": 236
},
{
"epoch": 2.323529411764706,
"grad_norm": 15.649252891540527,
"learning_rate": 1.8966834586471517e-05,
"loss": 0.571,
"step": 237
},
{
"epoch": 2.3333333333333335,
"grad_norm": 19.277177810668945,
"learning_rate": 1.8951632913550625e-05,
"loss": 0.5066,
"step": 238
},
{
"epoch": 2.343137254901961,
"grad_norm": 44.847564697265625,
"learning_rate": 1.8936326403234125e-05,
"loss": 2.6968,
"step": 239
},
{
"epoch": 2.3529411764705883,
"grad_norm": 29.89764976501465,
"learning_rate": 1.8920915234784805e-05,
"loss": 1.0818,
"step": 240
},
{
"epoch": 2.3627450980392157,
"grad_norm": 29.265735626220703,
"learning_rate": 1.8905399588691165e-05,
"loss": 0.9833,
"step": 241
},
{
"epoch": 2.372549019607843,
"grad_norm": 33.96104431152344,
"learning_rate": 1.888977964666529e-05,
"loss": 0.8127,
"step": 242
},
{
"epoch": 2.3823529411764706,
"grad_norm": 26.53548240661621,
"learning_rate": 1.8874055591640746e-05,
"loss": 0.9684,
"step": 243
},
{
"epoch": 2.392156862745098,
"grad_norm": 75.08963012695312,
"learning_rate": 1.8858227607770398e-05,
"loss": 4.3469,
"step": 244
},
{
"epoch": 2.4019607843137254,
"grad_norm": 56.277896881103516,
"learning_rate": 1.8842295880424305e-05,
"loss": 3.7872,
"step": 245
},
{
"epoch": 2.411764705882353,
"grad_norm": 21.697683334350586,
"learning_rate": 1.8826260596187505e-05,
"loss": 0.6947,
"step": 246
},
{
"epoch": 2.4215686274509802,
"grad_norm": 31.832931518554688,
"learning_rate": 1.8810121942857848e-05,
"loss": 1.0844,
"step": 247
},
{
"epoch": 2.431372549019608,
"grad_norm": 13.941960334777832,
"learning_rate": 1.8793880109443797e-05,
"loss": 0.4574,
"step": 248
},
{
"epoch": 2.4411764705882355,
"grad_norm": 73.8248062133789,
"learning_rate": 1.8777535286162217e-05,
"loss": 2.5933,
"step": 249
},
{
"epoch": 2.450980392156863,
"grad_norm": 64.75690460205078,
"learning_rate": 1.8761087664436137e-05,
"loss": 1.6238,
"step": 250
},
{
"epoch": 2.4607843137254903,
"grad_norm": 52.90013122558594,
"learning_rate": 1.8744537436892517e-05,
"loss": 1.5579,
"step": 251
},
{
"epoch": 2.4705882352941178,
"grad_norm": 65.30809783935547,
"learning_rate": 1.8727884797359984e-05,
"loss": 3.1798,
"step": 252
},
{
"epoch": 2.480392156862745,
"grad_norm": 41.322776794433594,
"learning_rate": 1.8711129940866577e-05,
"loss": 1.3299,
"step": 253
},
{
"epoch": 2.4901960784313726,
"grad_norm": 37.93022537231445,
"learning_rate": 1.8694273063637444e-05,
"loss": 1.431,
"step": 254
},
{
"epoch": 2.5,
"grad_norm": 36.55287170410156,
"learning_rate": 1.8677314363092555e-05,
"loss": 1.0556,
"step": 255
},
{
"epoch": 2.5098039215686274,
"grad_norm": 43.371185302734375,
"learning_rate": 1.866025403784439e-05,
"loss": 2.3683,
"step": 256
},
{
"epoch": 2.519607843137255,
"grad_norm": 67.2099380493164,
"learning_rate": 1.8643092287695604e-05,
"loss": 3.6157,
"step": 257
},
{
"epoch": 2.5294117647058822,
"grad_norm": 65.53031158447266,
"learning_rate": 1.8625829313636707e-05,
"loss": 1.5859,
"step": 258
},
{
"epoch": 2.5392156862745097,
"grad_norm": 38.635250091552734,
"learning_rate": 1.860846531784368e-05,
"loss": 1.2728,
"step": 259
},
{
"epoch": 2.549019607843137,
"grad_norm": 45.58648681640625,
"learning_rate": 1.8591000503675635e-05,
"loss": 2.0595,
"step": 260
},
{
"epoch": 2.5588235294117645,
"grad_norm": 56.70792007446289,
"learning_rate": 1.8573435075672422e-05,
"loss": 2.7455,
"step": 261
},
{
"epoch": 2.568627450980392,
"grad_norm": 47.19007110595703,
"learning_rate": 1.8555769239552232e-05,
"loss": 1.3221,
"step": 262
},
{
"epoch": 2.5784313725490198,
"grad_norm": 69.4935302734375,
"learning_rate": 1.8538003202209186e-05,
"loss": 1.7831,
"step": 263
},
{
"epoch": 2.588235294117647,
"grad_norm": 61.42034149169922,
"learning_rate": 1.8520137171710923e-05,
"loss": 1.8362,
"step": 264
},
{
"epoch": 2.5980392156862746,
"grad_norm": 17.980220794677734,
"learning_rate": 1.8502171357296144e-05,
"loss": 0.4301,
"step": 265
},
{
"epoch": 2.607843137254902,
"grad_norm": 278.0427551269531,
"learning_rate": 1.8484105969372184e-05,
"loss": 1.4383,
"step": 266
},
{
"epoch": 2.6176470588235294,
"grad_norm": 64.51187133789062,
"learning_rate": 1.8465941219512533e-05,
"loss": 3.6068,
"step": 267
},
{
"epoch": 2.627450980392157,
"grad_norm": 47.54879379272461,
"learning_rate": 1.8447677320454367e-05,
"loss": 3.2374,
"step": 268
},
{
"epoch": 2.6372549019607843,
"grad_norm": 25.81728744506836,
"learning_rate": 1.8429314486096042e-05,
"loss": 1.1956,
"step": 269
},
{
"epoch": 2.6470588235294117,
"grad_norm": 58.41852951049805,
"learning_rate": 1.8410852931494606e-05,
"loss": 3.1378,
"step": 270
},
{
"epoch": 2.656862745098039,
"grad_norm": 54.03165817260742,
"learning_rate": 1.839229287286327e-05,
"loss": 2.8349,
"step": 271
},
{
"epoch": 2.6666666666666665,
"grad_norm": 43.03903579711914,
"learning_rate": 1.8373634527568877e-05,
"loss": 1.4831,
"step": 272
},
{
"epoch": 2.6764705882352944,
"grad_norm": 83.86200714111328,
"learning_rate": 1.8354878114129368e-05,
"loss": 2.628,
"step": 273
},
{
"epoch": 2.686274509803922,
"grad_norm": 69.86614990234375,
"learning_rate": 1.8336023852211197e-05,
"loss": 1.4708,
"step": 274
},
{
"epoch": 2.696078431372549,
"grad_norm": 23.16594696044922,
"learning_rate": 1.831707196262679e-05,
"loss": 0.8406,
"step": 275
},
{
"epoch": 2.7058823529411766,
"grad_norm": 108.72282409667969,
"learning_rate": 1.829802266733193e-05,
"loss": 1.0961,
"step": 276
},
{
"epoch": 2.715686274509804,
"grad_norm": 38.8856086730957,
"learning_rate": 1.827887618942318e-05,
"loss": 0.8955,
"step": 277
},
{
"epoch": 2.7254901960784315,
"grad_norm": 40.05035400390625,
"learning_rate": 1.8259632753135257e-05,
"loss": 2.2775,
"step": 278
},
{
"epoch": 2.735294117647059,
"grad_norm": 84.37309265136719,
"learning_rate": 1.824029258383841e-05,
"loss": 4.0415,
"step": 279
},
{
"epoch": 2.7450980392156863,
"grad_norm": 60.279048919677734,
"learning_rate": 1.8220855908035783e-05,
"loss": 3.2129,
"step": 280
},
{
"epoch": 2.7549019607843137,
"grad_norm": 33.11183166503906,
"learning_rate": 1.8201322953360758e-05,
"loss": 1.4543,
"step": 281
},
{
"epoch": 2.764705882352941,
"grad_norm": 66.42842102050781,
"learning_rate": 1.8181693948574285e-05,
"loss": 3.2836,
"step": 282
},
{
"epoch": 2.7745098039215685,
"grad_norm": 402.927490234375,
"learning_rate": 1.816196912356222e-05,
"loss": 1.9991,
"step": 283
},
{
"epoch": 2.784313725490196,
"grad_norm": 53.51690673828125,
"learning_rate": 1.814214870933261e-05,
"loss": 1.7477,
"step": 284
},
{
"epoch": 2.7941176470588234,
"grad_norm": 60.276546478271484,
"learning_rate": 1.812223293801301e-05,
"loss": 2.853,
"step": 285
},
{
"epoch": 2.803921568627451,
"grad_norm": 12.18474292755127,
"learning_rate": 1.8102222042847735e-05,
"loss": 0.4566,
"step": 286
},
{
"epoch": 2.813725490196078,
"grad_norm": 32.41611099243164,
"learning_rate": 1.8082116258195173e-05,
"loss": 0.9655,
"step": 287
},
{
"epoch": 2.8235294117647056,
"grad_norm": 46.4759407043457,
"learning_rate": 1.8061915819524995e-05,
"loss": 1.6009,
"step": 288
},
{
"epoch": 2.8333333333333335,
"grad_norm": 38.24691390991211,
"learning_rate": 1.8041620963415418e-05,
"loss": 2.776,
"step": 289
},
{
"epoch": 2.843137254901961,
"grad_norm": 9.454116821289062,
"learning_rate": 1.802123192755044e-05,
"loss": 0.1765,
"step": 290
},
{
"epoch": 2.8529411764705883,
"grad_norm": 28.283329010009766,
"learning_rate": 1.800074895071704e-05,
"loss": 0.9924,
"step": 291
},
{
"epoch": 2.8627450980392157,
"grad_norm": 53.15471649169922,
"learning_rate": 1.7980172272802398e-05,
"loss": 2.1822,
"step": 292
},
{
"epoch": 2.872549019607843,
"grad_norm": 31.239439010620117,
"learning_rate": 1.795950213479107e-05,
"loss": 1.5509,
"step": 293
},
{
"epoch": 2.8823529411764706,
"grad_norm": 30.74629020690918,
"learning_rate": 1.7938738778762182e-05,
"loss": 0.8738,
"step": 294
},
{
"epoch": 2.892156862745098,
"grad_norm": 38.464263916015625,
"learning_rate": 1.7917882447886585e-05,
"loss": 1.1838,
"step": 295
},
{
"epoch": 2.9019607843137254,
"grad_norm": 38.54100036621094,
"learning_rate": 1.7896933386423998e-05,
"loss": 0.6173,
"step": 296
},
{
"epoch": 2.911764705882353,
"grad_norm": 51.80077362060547,
"learning_rate": 1.787589183972017e-05,
"loss": 1.8889,
"step": 297
},
{
"epoch": 2.9215686274509802,
"grad_norm": 60.93544006347656,
"learning_rate": 1.785475805420399e-05,
"loss": 3.8679,
"step": 298
},
{
"epoch": 2.931372549019608,
"grad_norm": 54.1665153503418,
"learning_rate": 1.7833532277384607e-05,
"loss": 1.7225,
"step": 299
},
{
"epoch": 2.9411764705882355,
"grad_norm": 59.37535858154297,
"learning_rate": 1.7812214757848523e-05,
"loss": 2.289,
"step": 300
},
{
"epoch": 2.950980392156863,
"grad_norm": 76.9103012084961,
"learning_rate": 1.7790805745256703e-05,
"loss": 3.0041,
"step": 301
},
{
"epoch": 2.9607843137254903,
"grad_norm": 36.257568359375,
"learning_rate": 1.7769305490341623e-05,
"loss": 0.7329,
"step": 302
},
{
"epoch": 2.9705882352941178,
"grad_norm": 63.820068359375,
"learning_rate": 1.7747714244904348e-05,
"loss": 2.8791,
"step": 303
},
{
"epoch": 2.980392156862745,
"grad_norm": 64.49067687988281,
"learning_rate": 1.772603226181159e-05,
"loss": 3.0804,
"step": 304
},
{
"epoch": 2.9901960784313726,
"grad_norm": 35.46078109741211,
"learning_rate": 1.7704259794992734e-05,
"loss": 1.1065,
"step": 305
},
{
"epoch": 3.0,
"grad_norm": 9.00590991973877,
"learning_rate": 1.768239709943686e-05,
"loss": 0.2322,
"step": 306
},
{
"epoch": 3.0,
"eval_dim_128_cosine_accuracy@1": 0.43734643734643736,
"eval_dim_128_cosine_accuracy@10": 0.5208845208845209,
"eval_dim_128_cosine_accuracy@3": 0.4619164619164619,
"eval_dim_128_cosine_accuracy@5": 0.48894348894348894,
"eval_dim_128_cosine_map@100": 0.5392590111297522,
"eval_dim_128_cosine_mrr@10": 0.45547268047268047,
"eval_dim_128_cosine_ndcg@10": 0.47408205891922484,
"eval_dim_128_cosine_precision@1": 0.43734643734643736,
"eval_dim_128_cosine_precision@10": 0.3393120393120393,
"eval_dim_128_cosine_precision@3": 0.42178542178542183,
"eval_dim_128_cosine_precision@5": 0.3921375921375922,
"eval_dim_128_cosine_recall@1": 0.08189001580025773,
"eval_dim_128_cosine_recall@10": 0.37398642308884233,
"eval_dim_128_cosine_recall@3": 0.20626313299385876,
"eval_dim_128_cosine_recall@5": 0.2755675051187147,
"eval_dim_256_cosine_accuracy@1": 0.4742014742014742,
"eval_dim_256_cosine_accuracy@10": 0.5675675675675675,
"eval_dim_256_cosine_accuracy@3": 0.5036855036855037,
"eval_dim_256_cosine_accuracy@5": 0.5282555282555282,
"eval_dim_256_cosine_map@100": 0.575661339960839,
"eval_dim_256_cosine_mrr@10": 0.49422116922116915,
"eval_dim_256_cosine_ndcg@10": 0.5128919275568166,
"eval_dim_256_cosine_precision@1": 0.4742014742014742,
"eval_dim_256_cosine_precision@10": 0.3707616707616708,
"eval_dim_256_cosine_precision@3": 0.4578214578214578,
"eval_dim_256_cosine_precision@5": 0.428009828009828,
"eval_dim_256_cosine_recall@1": 0.08790827345644642,
"eval_dim_256_cosine_recall@10": 0.3967473256381461,
"eval_dim_256_cosine_recall@3": 0.2229928796373985,
"eval_dim_256_cosine_recall@5": 0.29837100236186714,
"eval_dim_512_cosine_accuracy@1": 0.4692874692874693,
"eval_dim_512_cosine_accuracy@10": 0.5700245700245701,
"eval_dim_512_cosine_accuracy@3": 0.4987714987714988,
"eval_dim_512_cosine_accuracy@5": 0.5307125307125307,
"eval_dim_512_cosine_map@100": 0.5798713119018684,
"eval_dim_512_cosine_mrr@10": 0.4916744666744665,
"eval_dim_512_cosine_ndcg@10": 0.5125729638170266,
"eval_dim_512_cosine_precision@1": 0.4692874692874693,
"eval_dim_512_cosine_precision@10": 0.3690417690417691,
"eval_dim_512_cosine_precision@3": 0.4553644553644553,
"eval_dim_512_cosine_precision@5": 0.42653562653562654,
"eval_dim_512_cosine_recall@1": 0.08723764664945596,
"eval_dim_512_cosine_recall@10": 0.398982369605165,
"eval_dim_512_cosine_recall@3": 0.22349567673110468,
"eval_dim_512_cosine_recall@5": 0.3012767319721422,
"eval_dim_64_cosine_accuracy@1": 0.3783783783783784,
"eval_dim_64_cosine_accuracy@10": 0.4742014742014742,
"eval_dim_64_cosine_accuracy@3": 0.40786240786240785,
"eval_dim_64_cosine_accuracy@5": 0.44963144963144963,
"eval_dim_64_cosine_map@100": 0.4832282201923836,
"eval_dim_64_cosine_mrr@10": 0.4007975507975507,
"eval_dim_64_cosine_ndcg@10": 0.4247869932057438,
"eval_dim_64_cosine_precision@1": 0.3783783783783784,
"eval_dim_64_cosine_precision@10": 0.3122850122850123,
"eval_dim_64_cosine_precision@3": 0.3693693693693693,
"eval_dim_64_cosine_precision@5": 0.3538083538083538,
"eval_dim_64_cosine_recall@1": 0.06567789731541197,
"eval_dim_64_cosine_recall@10": 0.32596052392657954,
"eval_dim_64_cosine_recall@3": 0.16867167722058482,
"eval_dim_64_cosine_recall@5": 0.23705500240166655,
"eval_dim_768_cosine_accuracy@1": 0.4742014742014742,
"eval_dim_768_cosine_accuracy@10": 0.5847665847665847,
"eval_dim_768_cosine_accuracy@3": 0.4987714987714988,
"eval_dim_768_cosine_accuracy@5": 0.5331695331695332,
"eval_dim_768_cosine_map@100": 0.5832479691314802,
"eval_dim_768_cosine_mrr@10": 0.49626087126087104,
"eval_dim_768_cosine_ndcg@10": 0.51702577778776,
"eval_dim_768_cosine_precision@1": 0.4742014742014742,
"eval_dim_768_cosine_precision@10": 0.3719901719901719,
"eval_dim_768_cosine_precision@3": 0.45782145782145783,
"eval_dim_768_cosine_precision@5": 0.42604422604422604,
"eval_dim_768_cosine_recall@1": 0.08712841415385984,
"eval_dim_768_cosine_recall@10": 0.40559805323520803,
"eval_dim_768_cosine_recall@3": 0.22590773198406905,
"eval_dim_768_cosine_recall@5": 0.30092748730471575,
"eval_runtime": 112.1245,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.4247869932057438,
"eval_steps_per_second": 0.0,
"step": 306
},
{
"epoch": 3.0098039215686274,
"grad_norm": 33.71516036987305,
"learning_rate": 1.766044443118978e-05,
"loss": 1.5574,
"step": 307
},
{
"epoch": 3.019607843137255,
"grad_norm": 51.304203033447266,
"learning_rate": 1.7638402047351025e-05,
"loss": 1.3758,
"step": 308
},
{
"epoch": 3.0294117647058822,
"grad_norm": 16.059232711791992,
"learning_rate": 1.7616270206070814e-05,
"loss": 0.6382,
"step": 309
},
{
"epoch": 3.0392156862745097,
"grad_norm": 53.8888053894043,
"learning_rate": 1.7594049166547073e-05,
"loss": 1.8904,
"step": 310
},
{
"epoch": 3.049019607843137,
"grad_norm": 30.433692932128906,
"learning_rate": 1.7571739189022365e-05,
"loss": 2.7908,
"step": 311
},
{
"epoch": 3.0588235294117645,
"grad_norm": 73.848388671875,
"learning_rate": 1.7549340534780852e-05,
"loss": 4.3568,
"step": 312
},
{
"epoch": 3.0686274509803924,
"grad_norm": 49.22679901123047,
"learning_rate": 1.7526853466145248e-05,
"loss": 1.591,
"step": 313
},
{
"epoch": 3.0784313725490198,
"grad_norm": 51.269676208496094,
"learning_rate": 1.750427824647372e-05,
"loss": 2.5855,
"step": 314
},
{
"epoch": 3.088235294117647,
"grad_norm": 34.83403396606445,
"learning_rate": 1.7481615140156837e-05,
"loss": 1.7845,
"step": 315
},
{
"epoch": 3.0980392156862746,
"grad_norm": 60.68478775024414,
"learning_rate": 1.7458864412614436e-05,
"loss": 2.7024,
"step": 316
},
{
"epoch": 3.107843137254902,
"grad_norm": 12.775145530700684,
"learning_rate": 1.743602633029255e-05,
"loss": 0.1437,
"step": 317
},
{
"epoch": 3.1176470588235294,
"grad_norm": 26.30499267578125,
"learning_rate": 1.7413101160660267e-05,
"loss": 0.8981,
"step": 318
},
{
"epoch": 3.127450980392157,
"grad_norm": 59.40461349487305,
"learning_rate": 1.7390089172206594e-05,
"loss": 0.8955,
"step": 319
},
{
"epoch": 3.1372549019607843,
"grad_norm": 37.89866638183594,
"learning_rate": 1.7366990634437328e-05,
"loss": 1.0776,
"step": 320
},
{
"epoch": 3.1470588235294117,
"grad_norm": 65.3907241821289,
"learning_rate": 1.7343805817871885e-05,
"loss": 1.8035,
"step": 321
},
{
"epoch": 3.156862745098039,
"grad_norm": 41.77218246459961,
"learning_rate": 1.7320534994040148e-05,
"loss": 1.1962,
"step": 322
},
{
"epoch": 3.1666666666666665,
"grad_norm": 26.18130111694336,
"learning_rate": 1.729717843547927e-05,
"loss": 1.0252,
"step": 323
},
{
"epoch": 3.176470588235294,
"grad_norm": 23.27877426147461,
"learning_rate": 1.7273736415730488e-05,
"loss": 0.6757,
"step": 324
},
{
"epoch": 3.186274509803922,
"grad_norm": 20.14743995666504,
"learning_rate": 1.725020920933593e-05,
"loss": 0.4822,
"step": 325
},
{
"epoch": 3.196078431372549,
"grad_norm": 18.556365966796875,
"learning_rate": 1.7226597091835377e-05,
"loss": 1.0597,
"step": 326
},
{
"epoch": 3.2058823529411766,
"grad_norm": 45.01679229736328,
"learning_rate": 1.7202900339763066e-05,
"loss": 2.4075,
"step": 327
},
{
"epoch": 3.215686274509804,
"grad_norm": 22.377981185913086,
"learning_rate": 1.717911923064442e-05,
"loss": 0.8851,
"step": 328
},
{
"epoch": 3.2254901960784315,
"grad_norm": 34.32155227661133,
"learning_rate": 1.7155254042992827e-05,
"loss": 1.4165,
"step": 329
},
{
"epoch": 3.235294117647059,
"grad_norm": 70.21348571777344,
"learning_rate": 1.713130505630635e-05,
"loss": 3.2401,
"step": 330
},
{
"epoch": 3.2450980392156863,
"grad_norm": 24.261335372924805,
"learning_rate": 1.710727255106447e-05,
"loss": 0.455,
"step": 331
},
{
"epoch": 3.2549019607843137,
"grad_norm": 59.5804328918457,
"learning_rate": 1.7083156808724817e-05,
"loss": 2.5575,
"step": 332
},
{
"epoch": 3.264705882352941,
"grad_norm": 16.754976272583008,
"learning_rate": 1.7058958111719836e-05,
"loss": 0.397,
"step": 333
},
{
"epoch": 3.2745098039215685,
"grad_norm": 53.39505386352539,
"learning_rate": 1.70346767434535e-05,
"loss": 2.365,
"step": 334
},
{
"epoch": 3.284313725490196,
"grad_norm": 44.01158142089844,
"learning_rate": 1.7010312988297993e-05,
"loss": 1.2017,
"step": 335
},
{
"epoch": 3.2941176470588234,
"grad_norm": 20.175052642822266,
"learning_rate": 1.6985867131590383e-05,
"loss": 0.5282,
"step": 336
},
{
"epoch": 3.303921568627451,
"grad_norm": 79.74894714355469,
"learning_rate": 1.696133945962927e-05,
"loss": 3.37,
"step": 337
},
{
"epoch": 3.313725490196078,
"grad_norm": 38.84457778930664,
"learning_rate": 1.6936730259671423e-05,
"loss": 1.1749,
"step": 338
},
{
"epoch": 3.323529411764706,
"grad_norm": 48.577674865722656,
"learning_rate": 1.691203981992845e-05,
"loss": 2.494,
"step": 339
},
{
"epoch": 3.3333333333333335,
"grad_norm": 23.741260528564453,
"learning_rate": 1.6887268429563387e-05,
"loss": 1.3695,
"step": 340
},
{
"epoch": 3.343137254901961,
"grad_norm": 45.17987060546875,
"learning_rate": 1.686241637868734e-05,
"loss": 2.1702,
"step": 341
},
{
"epoch": 3.3529411764705883,
"grad_norm": 51.08975601196289,
"learning_rate": 1.6837483958356054e-05,
"loss": 1.7424,
"step": 342
},
{
"epoch": 3.3627450980392157,
"grad_norm": 114.2376708984375,
"learning_rate": 1.681247146056654e-05,
"loss": 2.481,
"step": 343
},
{
"epoch": 3.372549019607843,
"grad_norm": 6.545116424560547,
"learning_rate": 1.6787379178253642e-05,
"loss": 0.195,
"step": 344
},
{
"epoch": 3.3823529411764706,
"grad_norm": 281.9471130371094,
"learning_rate": 1.676220740528659e-05,
"loss": 0.5217,
"step": 345
},
{
"epoch": 3.392156862745098,
"grad_norm": 38.844215393066406,
"learning_rate": 1.6736956436465573e-05,
"loss": 1.0893,
"step": 346
},
{
"epoch": 3.4019607843137254,
"grad_norm": 34.51204299926758,
"learning_rate": 1.67116265675183e-05,
"loss": 0.606,
"step": 347
},
{
"epoch": 3.411764705882353,
"grad_norm": 53.36712646484375,
"learning_rate": 1.6686218095096506e-05,
"loss": 1.5417,
"step": 348
},
{
"epoch": 3.4215686274509802,
"grad_norm": 73.62438201904297,
"learning_rate": 1.6660731316772503e-05,
"loss": 2.3694,
"step": 349
},
{
"epoch": 3.431372549019608,
"grad_norm": 29.111061096191406,
"learning_rate": 1.663516653103568e-05,
"loss": 0.7988,
"step": 350
},
{
"epoch": 3.4411764705882355,
"grad_norm": 54.98247146606445,
"learning_rate": 1.660952403728902e-05,
"loss": 1.2099,
"step": 351
},
{
"epoch": 3.450980392156863,
"grad_norm": 57.59906768798828,
"learning_rate": 1.6583804135845582e-05,
"loss": 0.9519,
"step": 352
},
{
"epoch": 3.4607843137254903,
"grad_norm": 31.186080932617188,
"learning_rate": 1.655800712792498e-05,
"loss": 1.0354,
"step": 353
},
{
"epoch": 3.4705882352941178,
"grad_norm": 19.18726921081543,
"learning_rate": 1.653213331564987e-05,
"loss": 0.4518,
"step": 354
},
{
"epoch": 3.480392156862745,
"grad_norm": 83.21920776367188,
"learning_rate": 1.650618300204242e-05,
"loss": 3.0758,
"step": 355
},
{
"epoch": 3.4901960784313726,
"grad_norm": 83.59344482421875,
"learning_rate": 1.648015649102073e-05,
"loss": 0.9814,
"step": 356
},
{
"epoch": 3.5,
"grad_norm": 70.87894439697266,
"learning_rate": 1.6454054087395284e-05,
"loss": 2.4242,
"step": 357
},
{
"epoch": 3.5098039215686274,
"grad_norm": 78.1067886352539,
"learning_rate": 1.6427876096865394e-05,
"loss": 3.3301,
"step": 358
},
{
"epoch": 3.519607843137255,
"grad_norm": 41.97034454345703,
"learning_rate": 1.6401622826015616e-05,
"loss": 1.4931,
"step": 359
},
{
"epoch": 3.5294117647058822,
"grad_norm": 30.62147331237793,
"learning_rate": 1.637529458231215e-05,
"loss": 0.8788,
"step": 360
},
{
"epoch": 3.5392156862745097,
"grad_norm": 35.48588943481445,
"learning_rate": 1.634889167409923e-05,
"loss": 1.056,
"step": 361
},
{
"epoch": 3.549019607843137,
"grad_norm": 43.44130325317383,
"learning_rate": 1.6322414410595548e-05,
"loss": 1.3501,
"step": 362
},
{
"epoch": 3.5588235294117645,
"grad_norm": 80.56116485595703,
"learning_rate": 1.6295863101890603e-05,
"loss": 3.3744,
"step": 363
},
{
"epoch": 3.568627450980392,
"grad_norm": 181.51242065429688,
"learning_rate": 1.626923805894107e-05,
"loss": 7.6844,
"step": 364
},
{
"epoch": 3.5784313725490198,
"grad_norm": 43.1163330078125,
"learning_rate": 1.624253959356717e-05,
"loss": 1.9189,
"step": 365
},
{
"epoch": 3.588235294117647,
"grad_norm": 34.17588806152344,
"learning_rate": 1.6215768018449015e-05,
"loss": 1.2354,
"step": 366
},
{
"epoch": 3.5980392156862746,
"grad_norm": 51.04001998901367,
"learning_rate": 1.6188923647122946e-05,
"loss": 1.1185,
"step": 367
},
{
"epoch": 3.607843137254902,
"grad_norm": 71.40681457519531,
"learning_rate": 1.6162006793977858e-05,
"loss": 1.4144,
"step": 368
},
{
"epoch": 3.6176470588235294,
"grad_norm": 18.94059944152832,
"learning_rate": 1.613501777425152e-05,
"loss": 0.4259,
"step": 369
},
{
"epoch": 3.627450980392157,
"grad_norm": 23.881540298461914,
"learning_rate": 1.610795690402688e-05,
"loss": 0.2264,
"step": 370
},
{
"epoch": 3.6372549019607843,
"grad_norm": 18.20306396484375,
"learning_rate": 1.6080824500228367e-05,
"loss": 0.7256,
"step": 371
},
{
"epoch": 3.6470588235294117,
"grad_norm": 65.94014739990234,
"learning_rate": 1.605362088061818e-05,
"loss": 2.6337,
"step": 372
},
{
"epoch": 3.656862745098039,
"grad_norm": 47.62442398071289,
"learning_rate": 1.6026346363792565e-05,
"loss": 1.2556,
"step": 373
},
{
"epoch": 3.6666666666666665,
"grad_norm": 45.08142852783203,
"learning_rate": 1.5999001269178082e-05,
"loss": 2.3852,
"step": 374
},
{
"epoch": 3.6764705882352944,
"grad_norm": 46.066654205322266,
"learning_rate": 1.5971585917027864e-05,
"loss": 0.4105,
"step": 375
},
{
"epoch": 3.686274509803922,
"grad_norm": 104.716552734375,
"learning_rate": 1.594410062841787e-05,
"loss": 1.7846,
"step": 376
},
{
"epoch": 3.696078431372549,
"grad_norm": 90.9739761352539,
"learning_rate": 1.5916545725243124e-05,
"loss": 6.6395,
"step": 377
},
{
"epoch": 3.7058823529411766,
"grad_norm": 28.02090072631836,
"learning_rate": 1.5888921530213938e-05,
"loss": 0.7761,
"step": 378
},
{
"epoch": 3.715686274509804,
"grad_norm": 55.797698974609375,
"learning_rate": 1.5861228366852148e-05,
"loss": 1.6567,
"step": 379
},
{
"epoch": 3.7254901960784315,
"grad_norm": 57.91788864135742,
"learning_rate": 1.5833466559487305e-05,
"loss": 2.2471,
"step": 380
},
{
"epoch": 3.735294117647059,
"grad_norm": 25.998655319213867,
"learning_rate": 1.5805636433252892e-05,
"loss": 0.8596,
"step": 381
},
{
"epoch": 3.7450980392156863,
"grad_norm": 16.920719146728516,
"learning_rate": 1.5777738314082514e-05,
"loss": 0.3693,
"step": 382
},
{
"epoch": 3.7549019607843137,
"grad_norm": 33.89975357055664,
"learning_rate": 1.574977252870607e-05,
"loss": 0.8207,
"step": 383
},
{
"epoch": 3.764705882352941,
"grad_norm": 53.536048889160156,
"learning_rate": 1.5721739404645937e-05,
"loss": 2.9248,
"step": 384
},
{
"epoch": 3.7745098039215685,
"grad_norm": 30.637815475463867,
"learning_rate": 1.5693639270213138e-05,
"loss": 1.4509,
"step": 385
},
{
"epoch": 3.784313725490196,
"grad_norm": 90.1275863647461,
"learning_rate": 1.5665472454503484e-05,
"loss": 2.2966,
"step": 386
},
{
"epoch": 3.7941176470588234,
"grad_norm": 83.83513641357422,
"learning_rate": 1.5637239287393725e-05,
"loss": 3.726,
"step": 387
},
{
"epoch": 3.803921568627451,
"grad_norm": 51.7756233215332,
"learning_rate": 1.56089400995377e-05,
"loss": 1.7707,
"step": 388
},
{
"epoch": 3.813725490196078,
"grad_norm": 40.58710861206055,
"learning_rate": 1.5580575222362435e-05,
"loss": 0.9623,
"step": 389
},
{
"epoch": 3.8235294117647056,
"grad_norm": 21.709218978881836,
"learning_rate": 1.5552144988064292e-05,
"loss": 0.7915,
"step": 390
},
{
"epoch": 3.8333333333333335,
"grad_norm": 5.2088541984558105,
"learning_rate": 1.552364972960506e-05,
"loss": 0.1255,
"step": 391
},
{
"epoch": 3.843137254901961,
"grad_norm": 53.78287887573242,
"learning_rate": 1.5495089780708062e-05,
"loss": 1.8356,
"step": 392
},
{
"epoch": 3.8529411764705883,
"grad_norm": 43.29266357421875,
"learning_rate": 1.5466465475854246e-05,
"loss": 2.0525,
"step": 393
},
{
"epoch": 3.8627450980392157,
"grad_norm": 63.14021682739258,
"learning_rate": 1.5437777150278268e-05,
"loss": 1.7096,
"step": 394
},
{
"epoch": 3.872549019607843,
"grad_norm": 48.62488555908203,
"learning_rate": 1.540902513996456e-05,
"loss": 1.7327,
"step": 395
},
{
"epoch": 3.8823529411764706,
"grad_norm": 46.965576171875,
"learning_rate": 1.538020978164341e-05,
"loss": 2.4524,
"step": 396
},
{
"epoch": 3.892156862745098,
"grad_norm": 24.237958908081055,
"learning_rate": 1.5351331412787004e-05,
"loss": 0.8552,
"step": 397
},
{
"epoch": 3.9019607843137254,
"grad_norm": 59.1135368347168,
"learning_rate": 1.5322390371605473e-05,
"loss": 2.2829,
"step": 398
},
{
"epoch": 3.911764705882353,
"grad_norm": 68.53660583496094,
"learning_rate": 1.5293386997042943e-05,
"loss": 1.7359,
"step": 399
},
{
"epoch": 3.9215686274509802,
"grad_norm": 20.409564971923828,
"learning_rate": 1.526432162877356e-05,
"loss": 0.761,
"step": 400
},
{
"epoch": 3.931372549019608,
"grad_norm": 32.15175247192383,
"learning_rate": 1.5235194607197508e-05,
"loss": 1.1795,
"step": 401
},
{
"epoch": 3.9411764705882355,
"grad_norm": 22.11779022216797,
"learning_rate": 1.5206006273437031e-05,
"loss": 0.6309,
"step": 402
},
{
"epoch": 3.950980392156863,
"grad_norm": 29.106136322021484,
"learning_rate": 1.5176756969332428e-05,
"loss": 1.5526,
"step": 403
},
{
"epoch": 3.9607843137254903,
"grad_norm": 45.77709197998047,
"learning_rate": 1.5147447037438055e-05,
"loss": 1.5281,
"step": 404
},
{
"epoch": 3.9705882352941178,
"grad_norm": 45.066898345947266,
"learning_rate": 1.5118076821018322e-05,
"loss": 1.1863,
"step": 405
},
{
"epoch": 3.980392156862745,
"grad_norm": 46.726715087890625,
"learning_rate": 1.5088646664043652e-05,
"loss": 2.1151,
"step": 406
},
{
"epoch": 3.9901960784313726,
"grad_norm": 40.37775802612305,
"learning_rate": 1.5059156911186465e-05,
"loss": 1.4431,
"step": 407
},
{
"epoch": 4.0,
"grad_norm": 30.91987419128418,
"learning_rate": 1.502960790781715e-05,
"loss": 0.604,
"step": 408
},
{
"epoch": 4.0,
"eval_dim_128_cosine_accuracy@1": 0.4152334152334152,
"eval_dim_128_cosine_accuracy@10": 0.547911547911548,
"eval_dim_128_cosine_accuracy@3": 0.4520884520884521,
"eval_dim_128_cosine_accuracy@5": 0.4963144963144963,
"eval_dim_128_cosine_map@100": 0.5352022650376457,
"eval_dim_128_cosine_mrr@10": 0.4439540189540188,
"eval_dim_128_cosine_ndcg@10": 0.476022616462926,
"eval_dim_128_cosine_precision@1": 0.4152334152334152,
"eval_dim_128_cosine_precision@10": 0.3503685503685504,
"eval_dim_128_cosine_precision@3": 0.40458640458640455,
"eval_dim_128_cosine_precision@5": 0.3862407862407862,
"eval_dim_128_cosine_recall@1": 0.07576985729373557,
"eval_dim_128_cosine_recall@10": 0.38250304397177337,
"eval_dim_128_cosine_recall@3": 0.19164666121829604,
"eval_dim_128_cosine_recall@5": 0.2648431710989262,
"eval_dim_256_cosine_accuracy@1": 0.4643734643734644,
"eval_dim_256_cosine_accuracy@10": 0.5749385749385749,
"eval_dim_256_cosine_accuracy@3": 0.5110565110565111,
"eval_dim_256_cosine_accuracy@5": 0.538083538083538,
"eval_dim_256_cosine_map@100": 0.5769248711893793,
"eval_dim_256_cosine_mrr@10": 0.49037966537966543,
"eval_dim_256_cosine_ndcg@10": 0.5151839021725609,
"eval_dim_256_cosine_precision@1": 0.4643734643734644,
"eval_dim_256_cosine_precision@10": 0.37371007371007375,
"eval_dim_256_cosine_precision@3": 0.4553644553644554,
"eval_dim_256_cosine_precision@5": 0.43243243243243246,
"eval_dim_256_cosine_recall@1": 0.08413544479725411,
"eval_dim_256_cosine_recall@10": 0.40271483033559735,
"eval_dim_256_cosine_recall@3": 0.21740852439395233,
"eval_dim_256_cosine_recall@5": 0.30018741213282235,
"eval_dim_512_cosine_accuracy@1": 0.47174447174447176,
"eval_dim_512_cosine_accuracy@10": 0.5921375921375921,
"eval_dim_512_cosine_accuracy@3": 0.5061425061425061,
"eval_dim_512_cosine_accuracy@5": 0.547911547911548,
"eval_dim_512_cosine_map@100": 0.5827439062036934,
"eval_dim_512_cosine_mrr@10": 0.49824109824109813,
"eval_dim_512_cosine_ndcg@10": 0.5246424542329371,
"eval_dim_512_cosine_precision@1": 0.47174447174447176,
"eval_dim_512_cosine_precision@10": 0.38108108108108113,
"eval_dim_512_cosine_precision@3": 0.4586404586404586,
"eval_dim_512_cosine_precision@5": 0.4348894348894349,
"eval_dim_512_cosine_recall@1": 0.08653854806242635,
"eval_dim_512_cosine_recall@10": 0.4102651355191313,
"eval_dim_512_cosine_recall@3": 0.22027117898074489,
"eval_dim_512_cosine_recall@5": 0.3003373003704849,
"eval_dim_64_cosine_accuracy@1": 0.3857493857493858,
"eval_dim_64_cosine_accuracy@10": 0.4668304668304668,
"eval_dim_64_cosine_accuracy@3": 0.41277641277641275,
"eval_dim_64_cosine_accuracy@5": 0.44717444717444715,
"eval_dim_64_cosine_map@100": 0.4898542710662095,
"eval_dim_64_cosine_mrr@10": 0.405001755001755,
"eval_dim_64_cosine_ndcg@10": 0.4248125110618003,
"eval_dim_64_cosine_precision@1": 0.3857493857493858,
"eval_dim_64_cosine_precision@10": 0.3078624078624078,
"eval_dim_64_cosine_precision@3": 0.3759213759213759,
"eval_dim_64_cosine_precision@5": 0.3572481572481573,
"eval_dim_64_cosine_recall@1": 0.06840941459604949,
"eval_dim_64_cosine_recall@10": 0.3229858166703473,
"eval_dim_64_cosine_recall@3": 0.17394674375664843,
"eval_dim_64_cosine_recall@5": 0.2428196811392192,
"eval_dim_768_cosine_accuracy@1": 0.48157248157248156,
"eval_dim_768_cosine_accuracy@10": 0.6044226044226044,
"eval_dim_768_cosine_accuracy@3": 0.5257985257985258,
"eval_dim_768_cosine_accuracy@5": 0.5651105651105651,
"eval_dim_768_cosine_map@100": 0.5932894665948539,
"eval_dim_768_cosine_mrr@10": 0.5104325104325105,
"eval_dim_768_cosine_ndcg@10": 0.5387104935062083,
"eval_dim_768_cosine_precision@1": 0.48157248157248156,
"eval_dim_768_cosine_precision@10": 0.39336609336609335,
"eval_dim_768_cosine_precision@3": 0.4725634725634726,
"eval_dim_768_cosine_precision@5": 0.4501228501228501,
"eval_dim_768_cosine_recall@1": 0.08566107518652087,
"eval_dim_768_cosine_recall@10": 0.41755530449425327,
"eval_dim_768_cosine_recall@3": 0.22490812348446051,
"eval_dim_768_cosine_recall@5": 0.30947544835267676,
"eval_runtime": 112.1091,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.4248125110618003,
"eval_steps_per_second": 0.0,
"step": 408
},
{
"epoch": 4.009803921568627,
"grad_norm": 13.785823822021484,
"learning_rate": 1.5000000000000002e-05,
"loss": 0.5226,
"step": 409
},
{
"epoch": 4.019607843137255,
"grad_norm": 17.55457878112793,
"learning_rate": 1.4970333534489179e-05,
"loss": 0.4814,
"step": 410
},
{
"epoch": 4.029411764705882,
"grad_norm": 21.303373336791992,
"learning_rate": 1.494060885872464e-05,
"loss": 0.7015,
"step": 411
},
{
"epoch": 4.03921568627451,
"grad_norm": 15.431344032287598,
"learning_rate": 1.4910826320828085e-05,
"loss": 0.9108,
"step": 412
},
{
"epoch": 4.049019607843137,
"grad_norm": 49.46926498413086,
"learning_rate": 1.488098626959885e-05,
"loss": 1.8203,
"step": 413
},
{
"epoch": 4.0588235294117645,
"grad_norm": 50.32418441772461,
"learning_rate": 1.4851089054509852e-05,
"loss": 1.2907,
"step": 414
},
{
"epoch": 4.068627450980392,
"grad_norm": 3.7227249145507812,
"learning_rate": 1.4821135025703491e-05,
"loss": 0.1011,
"step": 415
},
{
"epoch": 4.078431372549019,
"grad_norm": 17.569326400756836,
"learning_rate": 1.4791124533987529e-05,
"loss": 0.6205,
"step": 416
},
{
"epoch": 4.088235294117647,
"grad_norm": 7.294639587402344,
"learning_rate": 1.4761057930831002e-05,
"loss": 0.2523,
"step": 417
},
{
"epoch": 4.098039215686274,
"grad_norm": 38.585670471191406,
"learning_rate": 1.4730935568360103e-05,
"loss": 1.7956,
"step": 418
},
{
"epoch": 4.107843137254902,
"grad_norm": 31.60200309753418,
"learning_rate": 1.470075779935404e-05,
"loss": 1.3579,
"step": 419
},
{
"epoch": 4.117647058823529,
"grad_norm": 46.006561279296875,
"learning_rate": 1.4670524977240929e-05,
"loss": 0.6554,
"step": 420
},
{
"epoch": 4.127450980392156,
"grad_norm": 6.237908363342285,
"learning_rate": 1.4640237456093636e-05,
"loss": 0.1167,
"step": 421
},
{
"epoch": 4.137254901960785,
"grad_norm": 10.552518844604492,
"learning_rate": 1.4609895590625635e-05,
"loss": 0.3729,
"step": 422
},
{
"epoch": 4.147058823529412,
"grad_norm": 38.46894073486328,
"learning_rate": 1.4579499736186864e-05,
"loss": 1.2643,
"step": 423
},
{
"epoch": 4.1568627450980395,
"grad_norm": 18.761796951293945,
"learning_rate": 1.4549050248759546e-05,
"loss": 0.3683,
"step": 424
},
{
"epoch": 4.166666666666667,
"grad_norm": 24.26785659790039,
"learning_rate": 1.4518547484954033e-05,
"loss": 0.8441,
"step": 425
},
{
"epoch": 4.176470588235294,
"grad_norm": 48.49263000488281,
"learning_rate": 1.4487991802004625e-05,
"loss": 1.7266,
"step": 426
},
{
"epoch": 4.186274509803922,
"grad_norm": 14.525657653808594,
"learning_rate": 1.4457383557765385e-05,
"loss": 0.4744,
"step": 427
},
{
"epoch": 4.196078431372549,
"grad_norm": 30.405841827392578,
"learning_rate": 1.442672311070595e-05,
"loss": 0.5775,
"step": 428
},
{
"epoch": 4.205882352941177,
"grad_norm": 28.066011428833008,
"learning_rate": 1.439601081990734e-05,
"loss": 0.4439,
"step": 429
},
{
"epoch": 4.215686274509804,
"grad_norm": 47.19637680053711,
"learning_rate": 1.4365247045057732e-05,
"loss": 1.3623,
"step": 430
},
{
"epoch": 4.2254901960784315,
"grad_norm": 9.484166145324707,
"learning_rate": 1.4334432146448272e-05,
"loss": 0.2321,
"step": 431
},
{
"epoch": 4.235294117647059,
"grad_norm": 23.378170013427734,
"learning_rate": 1.4303566484968836e-05,
"loss": 0.6784,
"step": 432
},
{
"epoch": 4.245098039215686,
"grad_norm": 45.757328033447266,
"learning_rate": 1.427265042210381e-05,
"loss": 2.2527,
"step": 433
},
{
"epoch": 4.254901960784314,
"grad_norm": 16.887813568115234,
"learning_rate": 1.4241684319927869e-05,
"loss": 0.2091,
"step": 434
},
{
"epoch": 4.264705882352941,
"grad_norm": 11.387648582458496,
"learning_rate": 1.4210668541101713e-05,
"loss": 0.3422,
"step": 435
},
{
"epoch": 4.2745098039215685,
"grad_norm": 18.770689010620117,
"learning_rate": 1.4179603448867836e-05,
"loss": 0.7188,
"step": 436
},
{
"epoch": 4.284313725490196,
"grad_norm": 11.738122940063477,
"learning_rate": 1.4148489407046274e-05,
"loss": 0.4749,
"step": 437
},
{
"epoch": 4.294117647058823,
"grad_norm": 45.838897705078125,
"learning_rate": 1.411732678003033e-05,
"loss": 0.9337,
"step": 438
},
{
"epoch": 4.303921568627451,
"grad_norm": 14.99522876739502,
"learning_rate": 1.4086115932782316e-05,
"loss": 0.2575,
"step": 439
},
{
"epoch": 4.313725490196078,
"grad_norm": 22.09161949157715,
"learning_rate": 1.4054857230829284e-05,
"loss": 0.5921,
"step": 440
},
{
"epoch": 4.323529411764706,
"grad_norm": 45.57215881347656,
"learning_rate": 1.4023551040258726e-05,
"loss": 1.2174,
"step": 441
},
{
"epoch": 4.333333333333333,
"grad_norm": 22.06878662109375,
"learning_rate": 1.399219772771431e-05,
"loss": 0.5094,
"step": 442
},
{
"epoch": 4.3431372549019605,
"grad_norm": 48.54280471801758,
"learning_rate": 1.396079766039157e-05,
"loss": 3.8625,
"step": 443
},
{
"epoch": 4.352941176470588,
"grad_norm": 50.120140075683594,
"learning_rate": 1.3929351206033607e-05,
"loss": 1.5764,
"step": 444
},
{
"epoch": 4.362745098039215,
"grad_norm": 69.57695770263672,
"learning_rate": 1.3897858732926794e-05,
"loss": 2.267,
"step": 445
},
{
"epoch": 4.372549019607844,
"grad_norm": 23.35730743408203,
"learning_rate": 1.3866320609896449e-05,
"loss": 0.8361,
"step": 446
},
{
"epoch": 4.382352941176471,
"grad_norm": 76.67755889892578,
"learning_rate": 1.3834737206302519e-05,
"loss": 2.5708,
"step": 447
},
{
"epoch": 4.392156862745098,
"grad_norm": 46.89466857910156,
"learning_rate": 1.3803108892035259e-05,
"loss": 1.0165,
"step": 448
},
{
"epoch": 4.401960784313726,
"grad_norm": 37.923797607421875,
"learning_rate": 1.3771436037510897e-05,
"loss": 0.9901,
"step": 449
},
{
"epoch": 4.411764705882353,
"grad_norm": 53.60729217529297,
"learning_rate": 1.3739719013667297e-05,
"loss": 2.3626,
"step": 450
},
{
"epoch": 4.421568627450981,
"grad_norm": 20.109682083129883,
"learning_rate": 1.3707958191959609e-05,
"loss": 0.4889,
"step": 451
},
{
"epoch": 4.431372549019608,
"grad_norm": 40.12085723876953,
"learning_rate": 1.367615394435593e-05,
"loss": 1.2405,
"step": 452
},
{
"epoch": 4.4411764705882355,
"grad_norm": 16.053781509399414,
"learning_rate": 1.3644306643332939e-05,
"loss": 0.3081,
"step": 453
},
{
"epoch": 4.450980392156863,
"grad_norm": 46.42075729370117,
"learning_rate": 1.3612416661871532e-05,
"loss": 1.2049,
"step": 454
},
{
"epoch": 4.46078431372549,
"grad_norm": 50.46061706542969,
"learning_rate": 1.3580484373452462e-05,
"loss": 1.3629,
"step": 455
},
{
"epoch": 4.470588235294118,
"grad_norm": 11.081779479980469,
"learning_rate": 1.3548510152051963e-05,
"loss": 0.3651,
"step": 456
},
{
"epoch": 4.480392156862745,
"grad_norm": 15.377547264099121,
"learning_rate": 1.3516494372137368e-05,
"loss": 0.3298,
"step": 457
},
{
"epoch": 4.490196078431373,
"grad_norm": 11.707538604736328,
"learning_rate": 1.3484437408662725e-05,
"loss": 0.2576,
"step": 458
},
{
"epoch": 4.5,
"grad_norm": 28.47398567199707,
"learning_rate": 1.34523396370644e-05,
"loss": 0.5005,
"step": 459
},
{
"epoch": 4.509803921568627,
"grad_norm": 44.449241638183594,
"learning_rate": 1.342020143325669e-05,
"loss": 1.3059,
"step": 460
},
{
"epoch": 4.519607843137255,
"grad_norm": 18.449378967285156,
"learning_rate": 1.3388023173627413e-05,
"loss": 0.4972,
"step": 461
},
{
"epoch": 4.529411764705882,
"grad_norm": 20.04595375061035,
"learning_rate": 1.3355805235033503e-05,
"loss": 0.2702,
"step": 462
},
{
"epoch": 4.53921568627451,
"grad_norm": 13.716774940490723,
"learning_rate": 1.3323547994796597e-05,
"loss": 0.4177,
"step": 463
},
{
"epoch": 4.549019607843137,
"grad_norm": 37.53811264038086,
"learning_rate": 1.3291251830698615e-05,
"loss": 1.1491,
"step": 464
},
{
"epoch": 4.5588235294117645,
"grad_norm": 34.22946548461914,
"learning_rate": 1.3258917120977327e-05,
"loss": 0.8601,
"step": 465
},
{
"epoch": 4.568627450980392,
"grad_norm": 15.655835151672363,
"learning_rate": 1.322654424432195e-05,
"loss": 0.3014,
"step": 466
},
{
"epoch": 4.578431372549019,
"grad_norm": 38.49649429321289,
"learning_rate": 1.3194133579868672e-05,
"loss": 0.3109,
"step": 467
},
{
"epoch": 4.588235294117647,
"grad_norm": 19.030349731445312,
"learning_rate": 1.3161685507196251e-05,
"loss": 0.5373,
"step": 468
},
{
"epoch": 4.598039215686274,
"grad_norm": 88.58914947509766,
"learning_rate": 1.3129200406321545e-05,
"loss": 2.7848,
"step": 469
},
{
"epoch": 4.607843137254902,
"grad_norm": 17.441198348999023,
"learning_rate": 1.3096678657695072e-05,
"loss": 0.3327,
"step": 470
},
{
"epoch": 4.617647058823529,
"grad_norm": 9.304110527038574,
"learning_rate": 1.3064120642196549e-05,
"loss": 0.205,
"step": 471
},
{
"epoch": 4.627450980392156,
"grad_norm": 35.4295654296875,
"learning_rate": 1.3031526741130435e-05,
"loss": 0.957,
"step": 472
},
{
"epoch": 4.637254901960784,
"grad_norm": 6.130290985107422,
"learning_rate": 1.299889733622147e-05,
"loss": 0.1345,
"step": 473
},
{
"epoch": 4.647058823529412,
"grad_norm": 18.754501342773438,
"learning_rate": 1.2966232809610189e-05,
"loss": 0.2789,
"step": 474
},
{
"epoch": 4.6568627450980395,
"grad_norm": 34.85990905761719,
"learning_rate": 1.2933533543848462e-05,
"loss": 0.9098,
"step": 475
},
{
"epoch": 4.666666666666667,
"grad_norm": 70.20014953613281,
"learning_rate": 1.2900799921895004e-05,
"loss": 2.7092,
"step": 476
},
{
"epoch": 4.676470588235294,
"grad_norm": 61.75537109375,
"learning_rate": 1.2868032327110904e-05,
"loss": 1.7403,
"step": 477
},
{
"epoch": 4.686274509803922,
"grad_norm": 27.4073543548584,
"learning_rate": 1.283523114325511e-05,
"loss": 0.4427,
"step": 478
},
{
"epoch": 4.696078431372549,
"grad_norm": 25.60388946533203,
"learning_rate": 1.2802396754479958e-05,
"loss": 0.8062,
"step": 479
},
{
"epoch": 4.705882352941177,
"grad_norm": 50.109405517578125,
"learning_rate": 1.2769529545326669e-05,
"loss": 1.1155,
"step": 480
},
{
"epoch": 4.715686274509804,
"grad_norm": 35.40762710571289,
"learning_rate": 1.2736629900720832e-05,
"loss": 0.7681,
"step": 481
},
{
"epoch": 4.7254901960784315,
"grad_norm": 40.7324333190918,
"learning_rate": 1.2703698205967907e-05,
"loss": 0.9159,
"step": 482
},
{
"epoch": 4.735294117647059,
"grad_norm": 32.49546432495117,
"learning_rate": 1.2670734846748717e-05,
"loss": 0.9655,
"step": 483
},
{
"epoch": 4.745098039215686,
"grad_norm": 82.01554870605469,
"learning_rate": 1.2637740209114918e-05,
"loss": 1.2566,
"step": 484
},
{
"epoch": 4.754901960784314,
"grad_norm": 47.905113220214844,
"learning_rate": 1.260471467948449e-05,
"loss": 1.3371,
"step": 485
},
{
"epoch": 4.764705882352941,
"grad_norm": 43.50104522705078,
"learning_rate": 1.25716586446372e-05,
"loss": 0.8586,
"step": 486
},
{
"epoch": 4.7745098039215685,
"grad_norm": 35.68792724609375,
"learning_rate": 1.2538572491710079e-05,
"loss": 0.8426,
"step": 487
},
{
"epoch": 4.784313725490196,
"grad_norm": 11.195624351501465,
"learning_rate": 1.2505456608192889e-05,
"loss": 0.4057,
"step": 488
},
{
"epoch": 4.794117647058823,
"grad_norm": 99.8272933959961,
"learning_rate": 1.247231138192359e-05,
"loss": 1.6484,
"step": 489
},
{
"epoch": 4.803921568627451,
"grad_norm": 31.994155883789062,
"learning_rate": 1.2439137201083772e-05,
"loss": 0.8504,
"step": 490
},
{
"epoch": 4.813725490196078,
"grad_norm": 6.590636730194092,
"learning_rate": 1.2405934454194146e-05,
"loss": 0.1841,
"step": 491
},
{
"epoch": 4.823529411764706,
"grad_norm": 33.11680221557617,
"learning_rate": 1.2372703530109967e-05,
"loss": 0.6473,
"step": 492
},
{
"epoch": 4.833333333333333,
"grad_norm": 6.750977039337158,
"learning_rate": 1.2339444818016488e-05,
"loss": 0.1751,
"step": 493
},
{
"epoch": 4.8431372549019605,
"grad_norm": 20.01805877685547,
"learning_rate": 1.2306158707424402e-05,
"loss": 0.3423,
"step": 494
},
{
"epoch": 4.852941176470588,
"grad_norm": 70.0676498413086,
"learning_rate": 1.227284558816529e-05,
"loss": 0.9846,
"step": 495
},
{
"epoch": 4.862745098039216,
"grad_norm": 43.0538444519043,
"learning_rate": 1.2239505850387032e-05,
"loss": 0.8286,
"step": 496
},
{
"epoch": 4.872549019607844,
"grad_norm": 11.747536659240723,
"learning_rate": 1.220613988454926e-05,
"loss": 0.2899,
"step": 497
},
{
"epoch": 4.882352941176471,
"grad_norm": 25.99458122253418,
"learning_rate": 1.2172748081418775e-05,
"loss": 0.8783,
"step": 498
},
{
"epoch": 4.892156862745098,
"grad_norm": 22.697128295898438,
"learning_rate": 1.2139330832064975e-05,
"loss": 0.7759,
"step": 499
},
{
"epoch": 4.901960784313726,
"grad_norm": 63.33037567138672,
"learning_rate": 1.210588852785527e-05,
"loss": 3.1335,
"step": 500
},
{
"epoch": 4.911764705882353,
"grad_norm": 13.776933670043945,
"learning_rate": 1.2072421560450497e-05,
"loss": 0.4373,
"step": 501
},
{
"epoch": 4.921568627450981,
"grad_norm": 54.81978988647461,
"learning_rate": 1.2038930321800346e-05,
"loss": 1.1926,
"step": 502
},
{
"epoch": 4.931372549019608,
"grad_norm": 75.68399047851562,
"learning_rate": 1.2005415204138753e-05,
"loss": 2.6567,
"step": 503
},
{
"epoch": 4.9411764705882355,
"grad_norm": 67.70237731933594,
"learning_rate": 1.197187659997932e-05,
"loss": 1.9625,
"step": 504
},
{
"epoch": 4.950980392156863,
"grad_norm": 91.70159149169922,
"learning_rate": 1.1938314902110701e-05,
"loss": 2.3935,
"step": 505
},
{
"epoch": 4.96078431372549,
"grad_norm": 55.2404670715332,
"learning_rate": 1.190473050359203e-05,
"loss": 1.3384,
"step": 506
},
{
"epoch": 4.970588235294118,
"grad_norm": 25.485261917114258,
"learning_rate": 1.1871123797748285e-05,
"loss": 0.6214,
"step": 507
},
{
"epoch": 4.980392156862745,
"grad_norm": 8.1741361618042,
"learning_rate": 1.1837495178165706e-05,
"loss": 0.2068,
"step": 508
},
{
"epoch": 4.990196078431373,
"grad_norm": 33.370906829833984,
"learning_rate": 1.1803845038687171e-05,
"loss": 1.1153,
"step": 509
},
{
"epoch": 5.0,
"grad_norm": 9.367639541625977,
"learning_rate": 1.1770173773407594e-05,
"loss": 0.2192,
"step": 510
},
{
"epoch": 5.0,
"eval_dim_128_cosine_accuracy@1": 0.45454545454545453,
"eval_dim_128_cosine_accuracy@10": 0.5675675675675675,
"eval_dim_128_cosine_accuracy@3": 0.49385749385749383,
"eval_dim_128_cosine_accuracy@5": 0.5208845208845209,
"eval_dim_128_cosine_map@100": 0.5603109985351792,
"eval_dim_128_cosine_mrr@10": 0.48035958035958043,
"eval_dim_128_cosine_ndcg@10": 0.5062551057228073,
"eval_dim_128_cosine_precision@1": 0.45454545454545453,
"eval_dim_128_cosine_precision@10": 0.36732186732186733,
"eval_dim_128_cosine_precision@3": 0.443079443079443,
"eval_dim_128_cosine_precision@5": 0.41818181818181815,
"eval_dim_128_cosine_recall@1": 0.08397723550111379,
"eval_dim_128_cosine_recall@10": 0.39422932906796443,
"eval_dim_128_cosine_recall@3": 0.21211236668400155,
"eval_dim_128_cosine_recall@5": 0.287989095608487,
"eval_dim_256_cosine_accuracy@1": 0.47911547911547914,
"eval_dim_256_cosine_accuracy@10": 0.5847665847665847,
"eval_dim_256_cosine_accuracy@3": 0.5282555282555282,
"eval_dim_256_cosine_accuracy@5": 0.5503685503685504,
"eval_dim_256_cosine_map@100": 0.5878980194902607,
"eval_dim_256_cosine_mrr@10": 0.5055097305097306,
"eval_dim_256_cosine_ndcg@10": 0.5325550313814882,
"eval_dim_256_cosine_precision@1": 0.47911547911547914,
"eval_dim_256_cosine_precision@10": 0.38845208845208845,
"eval_dim_256_cosine_precision@3": 0.4692874692874693,
"eval_dim_256_cosine_precision@5": 0.4437346437346437,
"eval_dim_256_cosine_recall@1": 0.08827454382271678,
"eval_dim_256_cosine_recall@10": 0.41109307509753196,
"eval_dim_256_cosine_recall@3": 0.22471764136216021,
"eval_dim_256_cosine_recall@5": 0.3054435206843854,
"eval_dim_512_cosine_accuracy@1": 0.48402948402948404,
"eval_dim_512_cosine_accuracy@10": 0.6191646191646192,
"eval_dim_512_cosine_accuracy@3": 0.5331695331695332,
"eval_dim_512_cosine_accuracy@5": 0.5823095823095823,
"eval_dim_512_cosine_map@100": 0.6050621703698408,
"eval_dim_512_cosine_mrr@10": 0.5165555165555165,
"eval_dim_512_cosine_ndcg@10": 0.5498543196969946,
"eval_dim_512_cosine_precision@1": 0.48402948402948404,
"eval_dim_512_cosine_precision@10": 0.4027027027027027,
"eval_dim_512_cosine_precision@3": 0.47502047502047495,
"eval_dim_512_cosine_precision@5": 0.45454545454545453,
"eval_dim_512_cosine_recall@1": 0.08949432590456784,
"eval_dim_512_cosine_recall@10": 0.4315256936453543,
"eval_dim_512_cosine_recall@3": 0.22986683823549506,
"eval_dim_512_cosine_recall@5": 0.31724461295961565,
"eval_dim_64_cosine_accuracy@1": 0.39803439803439805,
"eval_dim_64_cosine_accuracy@10": 0.4987714987714988,
"eval_dim_64_cosine_accuracy@3": 0.44471744471744473,
"eval_dim_64_cosine_accuracy@5": 0.4742014742014742,
"eval_dim_64_cosine_map@100": 0.5057418600512718,
"eval_dim_64_cosine_mrr@10": 0.42407764907764905,
"eval_dim_64_cosine_ndcg@10": 0.44895723420736994,
"eval_dim_64_cosine_precision@1": 0.39803439803439805,
"eval_dim_64_cosine_precision@10": 0.3287469287469288,
"eval_dim_64_cosine_precision@3": 0.3955773955773956,
"eval_dim_64_cosine_precision@5": 0.3759213759213759,
"eval_dim_64_cosine_recall@1": 0.07035944693401285,
"eval_dim_64_cosine_recall@10": 0.34306780244982527,
"eval_dim_64_cosine_recall@3": 0.18502162724532498,
"eval_dim_64_cosine_recall@5": 0.25450147487430447,
"eval_dim_768_cosine_accuracy@1": 0.48894348894348894,
"eval_dim_768_cosine_accuracy@10": 0.6117936117936118,
"eval_dim_768_cosine_accuracy@3": 0.5405405405405406,
"eval_dim_768_cosine_accuracy@5": 0.5749385749385749,
"eval_dim_768_cosine_map@100": 0.6032680456666982,
"eval_dim_768_cosine_mrr@10": 0.5188867438867439,
"eval_dim_768_cosine_ndcg@10": 0.5473414475281431,
"eval_dim_768_cosine_precision@1": 0.48894348894348894,
"eval_dim_768_cosine_precision@10": 0.39852579852579856,
"eval_dim_768_cosine_precision@3": 0.48157248157248156,
"eval_dim_768_cosine_precision@5": 0.4574938574938575,
"eval_dim_768_cosine_recall@1": 0.0886840657095114,
"eval_dim_768_cosine_recall@10": 0.4266798333915095,
"eval_dim_768_cosine_recall@3": 0.23252746360380064,
"eval_dim_768_cosine_recall@5": 0.31903859916582755,
"eval_runtime": 112.1158,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.44895723420736994,
"eval_steps_per_second": 0.0,
"step": 510
},
{
"epoch": 5.009803921568627,
"grad_norm": 18.287439346313477,
"learning_rate": 1.1736481776669307e-05,
"loss": 0.4635,
"step": 511
},
{
"epoch": 5.019607843137255,
"grad_norm": 16.274803161621094,
"learning_rate": 1.1702769443057425e-05,
"loss": 0.3765,
"step": 512
},
{
"epoch": 5.029411764705882,
"grad_norm": 40.07232666015625,
"learning_rate": 1.1669037167395256e-05,
"loss": 1.3445,
"step": 513
},
{
"epoch": 5.03921568627451,
"grad_norm": 6.797827243804932,
"learning_rate": 1.163528534473965e-05,
"loss": 0.1555,
"step": 514
},
{
"epoch": 5.049019607843137,
"grad_norm": 39.5912971496582,
"learning_rate": 1.160151437037639e-05,
"loss": 1.4591,
"step": 515
},
{
"epoch": 5.0588235294117645,
"grad_norm": 29.114177703857422,
"learning_rate": 1.1567724639815546e-05,
"loss": 0.797,
"step": 516
},
{
"epoch": 5.068627450980392,
"grad_norm": 23.631912231445312,
"learning_rate": 1.1533916548786856e-05,
"loss": 0.5968,
"step": 517
},
{
"epoch": 5.078431372549019,
"grad_norm": 5.112504005432129,
"learning_rate": 1.1500090493235088e-05,
"loss": 0.1294,
"step": 518
},
{
"epoch": 5.088235294117647,
"grad_norm": 57.03156280517578,
"learning_rate": 1.1466246869315407e-05,
"loss": 1.1385,
"step": 519
},
{
"epoch": 5.098039215686274,
"grad_norm": 3.7766454219818115,
"learning_rate": 1.1432386073388718e-05,
"loss": 0.0714,
"step": 520
},
{
"epoch": 5.107843137254902,
"grad_norm": 17.038589477539062,
"learning_rate": 1.1398508502017047e-05,
"loss": 0.2861,
"step": 521
},
{
"epoch": 5.117647058823529,
"grad_norm": 42.3565559387207,
"learning_rate": 1.136461455195888e-05,
"loss": 1.9842,
"step": 522
},
{
"epoch": 5.127450980392156,
"grad_norm": 36.66664123535156,
"learning_rate": 1.133070462016454e-05,
"loss": 1.3137,
"step": 523
},
{
"epoch": 5.137254901960785,
"grad_norm": 41.97998046875,
"learning_rate": 1.129677910377149e-05,
"loss": 2.5049,
"step": 524
},
{
"epoch": 5.147058823529412,
"grad_norm": 12.782203674316406,
"learning_rate": 1.1262838400099733e-05,
"loss": 0.2286,
"step": 525
},
{
"epoch": 5.1568627450980395,
"grad_norm": 21.99018669128418,
"learning_rate": 1.1228882906647142e-05,
"loss": 0.7288,
"step": 526
},
{
"epoch": 5.166666666666667,
"grad_norm": 14.436247825622559,
"learning_rate": 1.119491302108479e-05,
"loss": 0.3438,
"step": 527
},
{
"epoch": 5.176470588235294,
"grad_norm": 45.806724548339844,
"learning_rate": 1.1160929141252303e-05,
"loss": 1.3318,
"step": 528
},
{
"epoch": 5.186274509803922,
"grad_norm": 42.586368560791016,
"learning_rate": 1.1126931665153213e-05,
"loss": 1.1948,
"step": 529
},
{
"epoch": 5.196078431372549,
"grad_norm": 45.92109680175781,
"learning_rate": 1.1092920990950276e-05,
"loss": 1.1211,
"step": 530
},
{
"epoch": 5.205882352941177,
"grad_norm": 24.936033248901367,
"learning_rate": 1.1058897516960817e-05,
"loss": 0.5072,
"step": 531
},
{
"epoch": 5.215686274509804,
"grad_norm": 23.934478759765625,
"learning_rate": 1.102486164165207e-05,
"loss": 0.5509,
"step": 532
},
{
"epoch": 5.2254901960784315,
"grad_norm": 19.33404541015625,
"learning_rate": 1.0990813763636511e-05,
"loss": 0.3087,
"step": 533
},
{
"epoch": 5.235294117647059,
"grad_norm": 5.541775703430176,
"learning_rate": 1.0956754281667182e-05,
"loss": 0.091,
"step": 534
},
{
"epoch": 5.245098039215686,
"grad_norm": 3.9329075813293457,
"learning_rate": 1.092268359463302e-05,
"loss": 0.0683,
"step": 535
},
{
"epoch": 5.254901960784314,
"grad_norm": 5.671967029571533,
"learning_rate": 1.0888602101554202e-05,
"loss": 0.1064,
"step": 536
},
{
"epoch": 5.264705882352941,
"grad_norm": 23.599103927612305,
"learning_rate": 1.0854510201577451e-05,
"loss": 0.2124,
"step": 537
},
{
"epoch": 5.2745098039215685,
"grad_norm": 6.7037034034729,
"learning_rate": 1.082040829397138e-05,
"loss": 0.1575,
"step": 538
},
{
"epoch": 5.284313725490196,
"grad_norm": 9.288305282592773,
"learning_rate": 1.0786296778121787e-05,
"loss": 0.2538,
"step": 539
},
{
"epoch": 5.294117647058823,
"grad_norm": 61.20328140258789,
"learning_rate": 1.0752176053527025e-05,
"loss": 1.2086,
"step": 540
},
{
"epoch": 5.303921568627451,
"grad_norm": 45.325164794921875,
"learning_rate": 1.0718046519793276e-05,
"loss": 0.714,
"step": 541
},
{
"epoch": 5.313725490196078,
"grad_norm": 57.097469329833984,
"learning_rate": 1.06839085766299e-05,
"loss": 1.2357,
"step": 542
},
{
"epoch": 5.323529411764706,
"grad_norm": 27.721464157104492,
"learning_rate": 1.0649762623844733e-05,
"loss": 0.4326,
"step": 543
},
{
"epoch": 5.333333333333333,
"grad_norm": 8.938958168029785,
"learning_rate": 1.0615609061339431e-05,
"loss": 0.162,
"step": 544
},
{
"epoch": 5.3431372549019605,
"grad_norm": 67.71815490722656,
"learning_rate": 1.0581448289104759e-05,
"loss": 3.2438,
"step": 545
},
{
"epoch": 5.352941176470588,
"grad_norm": 21.078720092773438,
"learning_rate": 1.054728070721593e-05,
"loss": 0.3651,
"step": 546
},
{
"epoch": 5.362745098039215,
"grad_norm": 51.107810974121094,
"learning_rate": 1.0513106715827897e-05,
"loss": 1.5591,
"step": 547
},
{
"epoch": 5.372549019607844,
"grad_norm": 48.679931640625,
"learning_rate": 1.0478926715170687e-05,
"loss": 1.2833,
"step": 548
},
{
"epoch": 5.382352941176471,
"grad_norm": 32.35419464111328,
"learning_rate": 1.0444741105544705e-05,
"loss": 1.0673,
"step": 549
},
{
"epoch": 5.392156862745098,
"grad_norm": 48.693363189697266,
"learning_rate": 1.0410550287316035e-05,
"loss": 1.0428,
"step": 550
},
{
"epoch": 5.401960784313726,
"grad_norm": 23.73291778564453,
"learning_rate": 1.0376354660911772e-05,
"loss": 0.2781,
"step": 551
},
{
"epoch": 5.411764705882353,
"grad_norm": 18.309711456298828,
"learning_rate": 1.0342154626815321e-05,
"loss": 0.5207,
"step": 552
},
{
"epoch": 5.421568627450981,
"grad_norm": 6.845706939697266,
"learning_rate": 1.0307950585561705e-05,
"loss": 0.1488,
"step": 553
},
{
"epoch": 5.431372549019608,
"grad_norm": 37.41105651855469,
"learning_rate": 1.0273742937732877e-05,
"loss": 0.8952,
"step": 554
},
{
"epoch": 5.4411764705882355,
"grad_norm": 55.09328842163086,
"learning_rate": 1.0239532083953032e-05,
"loss": 1.8624,
"step": 555
},
{
"epoch": 5.450980392156863,
"grad_norm": 28.785524368286133,
"learning_rate": 1.0205318424883906e-05,
"loss": 0.478,
"step": 556
},
{
"epoch": 5.46078431372549,
"grad_norm": 11.9961519241333,
"learning_rate": 1.0171102361220093e-05,
"loss": 0.4321,
"step": 557
},
{
"epoch": 5.470588235294118,
"grad_norm": 29.817943572998047,
"learning_rate": 1.013688429368435e-05,
"loss": 0.4267,
"step": 558
},
{
"epoch": 5.480392156862745,
"grad_norm": 11.442191123962402,
"learning_rate": 1.01026646230229e-05,
"loss": 0.1908,
"step": 559
},
{
"epoch": 5.490196078431373,
"grad_norm": 21.462247848510742,
"learning_rate": 1.006844375000074e-05,
"loss": 0.4265,
"step": 560
},
{
"epoch": 5.5,
"grad_norm": 15.826882362365723,
"learning_rate": 1.0034222075396954e-05,
"loss": 0.3623,
"step": 561
},
{
"epoch": 5.509803921568627,
"grad_norm": 26.226381301879883,
"learning_rate": 1e-05,
"loss": 0.3901,
"step": 562
},
{
"epoch": 5.519607843137255,
"grad_norm": 8.917034149169922,
"learning_rate": 9.965777924603053e-06,
"loss": 0.2053,
"step": 563
},
{
"epoch": 5.529411764705882,
"grad_norm": 22.18689727783203,
"learning_rate": 9.931556249999262e-06,
"loss": 0.5849,
"step": 564
},
{
"epoch": 5.53921568627451,
"grad_norm": 44.32406997680664,
"learning_rate": 9.897335376977104e-06,
"loss": 1.1978,
"step": 565
},
{
"epoch": 5.549019607843137,
"grad_norm": 42.73876953125,
"learning_rate": 9.863115706315652e-06,
"loss": 1.1177,
"step": 566
},
{
"epoch": 5.5588235294117645,
"grad_norm": 193.61639404296875,
"learning_rate": 9.828897638779909e-06,
"loss": 3.9417,
"step": 567
},
{
"epoch": 5.568627450980392,
"grad_norm": 2.4439382553100586,
"learning_rate": 9.794681575116097e-06,
"loss": 0.0611,
"step": 568
},
{
"epoch": 5.578431372549019,
"grad_norm": 59.247318267822266,
"learning_rate": 9.760467916046971e-06,
"loss": 1.8463,
"step": 569
},
{
"epoch": 5.588235294117647,
"grad_norm": 10.288777351379395,
"learning_rate": 9.726257062267124e-06,
"loss": 0.3208,
"step": 570
},
{
"epoch": 5.598039215686274,
"grad_norm": 10.400925636291504,
"learning_rate": 9.692049414438298e-06,
"loss": 0.2139,
"step": 571
},
{
"epoch": 5.607843137254902,
"grad_norm": 24.203197479248047,
"learning_rate": 9.65784537318468e-06,
"loss": 0.3233,
"step": 572
},
{
"epoch": 5.617647058823529,
"grad_norm": 39.97361373901367,
"learning_rate": 9.62364533908823e-06,
"loss": 1.1404,
"step": 573
},
{
"epoch": 5.627450980392156,
"grad_norm": 19.145709991455078,
"learning_rate": 9.58944971268397e-06,
"loss": 0.5637,
"step": 574
},
{
"epoch": 5.637254901960784,
"grad_norm": 57.55147171020508,
"learning_rate": 9.555258894455298e-06,
"loss": 1.2947,
"step": 575
},
{
"epoch": 5.647058823529412,
"grad_norm": 36.118831634521484,
"learning_rate": 9.521073284829315e-06,
"loss": 0.5029,
"step": 576
},
{
"epoch": 5.6568627450980395,
"grad_norm": 26.942630767822266,
"learning_rate": 9.486893284172103e-06,
"loss": 0.9816,
"step": 577
},
{
"epoch": 5.666666666666667,
"grad_norm": 42.2514762878418,
"learning_rate": 9.452719292784074e-06,
"loss": 1.0183,
"step": 578
},
{
"epoch": 5.676470588235294,
"grad_norm": 30.29510498046875,
"learning_rate": 9.418551710895243e-06,
"loss": 0.9679,
"step": 579
},
{
"epoch": 5.686274509803922,
"grad_norm": 15.582209587097168,
"learning_rate": 9.384390938660572e-06,
"loss": 0.4796,
"step": 580
},
{
"epoch": 5.696078431372549,
"grad_norm": 38.37430191040039,
"learning_rate": 9.350237376155269e-06,
"loss": 1.4002,
"step": 581
},
{
"epoch": 5.705882352941177,
"grad_norm": 19.0704345703125,
"learning_rate": 9.316091423370105e-06,
"loss": 0.4527,
"step": 582
},
{
"epoch": 5.715686274509804,
"grad_norm": 93.1865463256836,
"learning_rate": 9.281953480206725e-06,
"loss": 2.5092,
"step": 583
},
{
"epoch": 5.7254901960784315,
"grad_norm": 62.14712142944336,
"learning_rate": 9.247823946472978e-06,
"loss": 2.203,
"step": 584
},
{
"epoch": 5.735294117647059,
"grad_norm": 27.992765426635742,
"learning_rate": 9.213703221878217e-06,
"loss": 0.4727,
"step": 585
},
{
"epoch": 5.745098039215686,
"grad_norm": 43.58892059326172,
"learning_rate": 9.179591706028626e-06,
"loss": 0.9486,
"step": 586
},
{
"epoch": 5.754901960784314,
"grad_norm": 56.890865325927734,
"learning_rate": 9.14548979842255e-06,
"loss": 1.0363,
"step": 587
},
{
"epoch": 5.764705882352941,
"grad_norm": 2.498443603515625,
"learning_rate": 9.111397898445798e-06,
"loss": 0.0555,
"step": 588
},
{
"epoch": 5.7745098039215685,
"grad_norm": 9.865997314453125,
"learning_rate": 9.07731640536698e-06,
"loss": 0.152,
"step": 589
},
{
"epoch": 5.784313725490196,
"grad_norm": 5.903684616088867,
"learning_rate": 9.043245718332821e-06,
"loss": 0.1018,
"step": 590
},
{
"epoch": 5.794117647058823,
"grad_norm": 39.71007537841797,
"learning_rate": 9.00918623636349e-06,
"loss": 1.252,
"step": 591
},
{
"epoch": 5.803921568627451,
"grad_norm": 34.80263137817383,
"learning_rate": 8.975138358347931e-06,
"loss": 0.6691,
"step": 592
},
{
"epoch": 5.813725490196078,
"grad_norm": 44.590431213378906,
"learning_rate": 8.941102483039188e-06,
"loss": 1.5344,
"step": 593
},
{
"epoch": 5.823529411764706,
"grad_norm": 28.364145278930664,
"learning_rate": 8.907079009049728e-06,
"loss": 0.5273,
"step": 594
},
{
"epoch": 5.833333333333333,
"grad_norm": 8.224447250366211,
"learning_rate": 8.87306833484679e-06,
"loss": 0.1534,
"step": 595
},
{
"epoch": 5.8431372549019605,
"grad_norm": 7.280407905578613,
"learning_rate": 8.839070858747697e-06,
"loss": 0.16,
"step": 596
},
{
"epoch": 5.852941176470588,
"grad_norm": 53.77709197998047,
"learning_rate": 8.805086978915215e-06,
"loss": 1.128,
"step": 597
},
{
"epoch": 5.862745098039216,
"grad_norm": 12.247149467468262,
"learning_rate": 8.771117093352861e-06,
"loss": 0.2065,
"step": 598
},
{
"epoch": 5.872549019607844,
"grad_norm": 16.858009338378906,
"learning_rate": 8.737161599900267e-06,
"loss": 0.3988,
"step": 599
},
{
"epoch": 5.882352941176471,
"grad_norm": 5.127101421356201,
"learning_rate": 8.703220896228515e-06,
"loss": 0.107,
"step": 600
},
{
"epoch": 5.892156862745098,
"grad_norm": 30.363554000854492,
"learning_rate": 8.669295379835467e-06,
"loss": 0.5792,
"step": 601
},
{
"epoch": 5.901960784313726,
"grad_norm": 32.23699951171875,
"learning_rate": 8.63538544804112e-06,
"loss": 0.8125,
"step": 602
},
{
"epoch": 5.911764705882353,
"grad_norm": 9.517292022705078,
"learning_rate": 8.601491497982956e-06,
"loss": 0.19,
"step": 603
},
{
"epoch": 5.921568627450981,
"grad_norm": 33.7909049987793,
"learning_rate": 8.567613926611287e-06,
"loss": 0.2934,
"step": 604
},
{
"epoch": 5.931372549019608,
"grad_norm": 63.455528259277344,
"learning_rate": 8.533753130684596e-06,
"loss": 1.3682,
"step": 605
},
{
"epoch": 5.9411764705882355,
"grad_norm": 20.38621711730957,
"learning_rate": 8.499909506764914e-06,
"loss": 0.4138,
"step": 606
},
{
"epoch": 5.950980392156863,
"grad_norm": 12.571402549743652,
"learning_rate": 8.466083451213145e-06,
"loss": 0.2711,
"step": 607
},
{
"epoch": 5.96078431372549,
"grad_norm": 63.67697525024414,
"learning_rate": 8.432275360184458e-06,
"loss": 0.7787,
"step": 608
},
{
"epoch": 5.970588235294118,
"grad_norm": 19.5760440826416,
"learning_rate": 8.398485629623613e-06,
"loss": 0.4397,
"step": 609
},
{
"epoch": 5.980392156862745,
"grad_norm": 19.238487243652344,
"learning_rate": 8.36471465526035e-06,
"loss": 0.4851,
"step": 610
},
{
"epoch": 5.990196078431373,
"grad_norm": 11.60233211517334,
"learning_rate": 8.330962832604747e-06,
"loss": 0.2568,
"step": 611
},
{
"epoch": 6.0,
"grad_norm": 11.525057792663574,
"learning_rate": 8.29723055694258e-06,
"loss": 0.2262,
"step": 612
},
{
"epoch": 6.0,
"eval_dim_128_cosine_accuracy@1": 0.44963144963144963,
"eval_dim_128_cosine_accuracy@10": 0.5503685503685504,
"eval_dim_128_cosine_accuracy@3": 0.47911547911547914,
"eval_dim_128_cosine_accuracy@5": 0.5184275184275184,
"eval_dim_128_cosine_map@100": 0.5534135693873364,
"eval_dim_128_cosine_mrr@10": 0.4721910221910222,
"eval_dim_128_cosine_ndcg@10": 0.4973319572194222,
"eval_dim_128_cosine_precision@1": 0.44963144963144963,
"eval_dim_128_cosine_precision@10": 0.3626535626535627,
"eval_dim_128_cosine_precision@3": 0.4348894348894348,
"eval_dim_128_cosine_precision@5": 0.4083538083538083,
"eval_dim_128_cosine_recall@1": 0.08310336633179008,
"eval_dim_128_cosine_recall@10": 0.38807458205236783,
"eval_dim_128_cosine_recall@3": 0.2086366583219295,
"eval_dim_128_cosine_recall@5": 0.28017190256402125,
"eval_dim_256_cosine_accuracy@1": 0.4914004914004914,
"eval_dim_256_cosine_accuracy@10": 0.5773955773955773,
"eval_dim_256_cosine_accuracy@3": 0.5331695331695332,
"eval_dim_256_cosine_accuracy@5": 0.5577395577395577,
"eval_dim_256_cosine_map@100": 0.5935019696916838,
"eval_dim_256_cosine_mrr@10": 0.5140653640653642,
"eval_dim_256_cosine_ndcg@10": 0.5369827121386748,
"eval_dim_256_cosine_precision@1": 0.4914004914004914,
"eval_dim_256_cosine_precision@10": 0.3882063882063882,
"eval_dim_256_cosine_precision@3": 0.47911547911547914,
"eval_dim_256_cosine_precision@5": 0.45159705159705155,
"eval_dim_256_cosine_recall@1": 0.09093823598117586,
"eval_dim_256_cosine_recall@10": 0.41377596714891424,
"eval_dim_256_cosine_recall@3": 0.22965403978285936,
"eval_dim_256_cosine_recall@5": 0.3105966895613889,
"eval_dim_512_cosine_accuracy@1": 0.5061425061425061,
"eval_dim_512_cosine_accuracy@10": 0.601965601965602,
"eval_dim_512_cosine_accuracy@3": 0.5356265356265356,
"eval_dim_512_cosine_accuracy@5": 0.5749385749385749,
"eval_dim_512_cosine_map@100": 0.6037847907506382,
"eval_dim_512_cosine_mrr@10": 0.5285109785109784,
"eval_dim_512_cosine_ndcg@10": 0.5517005282425766,
"eval_dim_512_cosine_precision@1": 0.5061425061425061,
"eval_dim_512_cosine_precision@10": 0.3987714987714988,
"eval_dim_512_cosine_precision@3": 0.48894348894348894,
"eval_dim_512_cosine_precision@5": 0.4604422604422604,
"eval_dim_512_cosine_recall@1": 0.0929721919924046,
"eval_dim_512_cosine_recall@10": 0.42689680029768245,
"eval_dim_512_cosine_recall@3": 0.23491550611407494,
"eval_dim_512_cosine_recall@5": 0.3189797777446339,
"eval_dim_64_cosine_accuracy@1": 0.40294840294840295,
"eval_dim_64_cosine_accuracy@10": 0.4914004914004914,
"eval_dim_64_cosine_accuracy@3": 0.4275184275184275,
"eval_dim_64_cosine_accuracy@5": 0.45454545454545453,
"eval_dim_64_cosine_map@100": 0.49756317885655243,
"eval_dim_64_cosine_mrr@10": 0.42249717249717245,
"eval_dim_64_cosine_ndcg@10": 0.4438902727397777,
"eval_dim_64_cosine_precision@1": 0.40294840294840295,
"eval_dim_64_cosine_precision@10": 0.3250614250614251,
"eval_dim_64_cosine_precision@3": 0.3906633906633907,
"eval_dim_64_cosine_precision@5": 0.36756756756756753,
"eval_dim_64_cosine_recall@1": 0.07181295901865549,
"eval_dim_64_cosine_recall@10": 0.3373068468638115,
"eval_dim_64_cosine_recall@3": 0.1812235990906885,
"eval_dim_64_cosine_recall@5": 0.24608925711773946,
"eval_dim_768_cosine_accuracy@1": 0.5110565110565111,
"eval_dim_768_cosine_accuracy@10": 0.6240786240786241,
"eval_dim_768_cosine_accuracy@3": 0.5528255528255528,
"eval_dim_768_cosine_accuracy@5": 0.5847665847665847,
"eval_dim_768_cosine_map@100": 0.6129515423171509,
"eval_dim_768_cosine_mrr@10": 0.5374000624000623,
"eval_dim_768_cosine_ndcg@10": 0.5625970686042534,
"eval_dim_768_cosine_precision@1": 0.5110565110565111,
"eval_dim_768_cosine_precision@10": 0.40786240786240785,
"eval_dim_768_cosine_precision@3": 0.4963144963144963,
"eval_dim_768_cosine_precision@5": 0.4687960687960688,
"eval_dim_768_cosine_recall@1": 0.09526979342793707,
"eval_dim_768_cosine_recall@10": 0.4330089602411642,
"eval_dim_768_cosine_recall@3": 0.2403356648100957,
"eval_dim_768_cosine_recall@5": 0.3243210213617395,
"eval_runtime": 112.2979,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.4438902727397777,
"eval_steps_per_second": 0.0,
"step": 612
},
{
"epoch": 6.009803921568627,
"grad_norm": 15.984579086303711,
"learning_rate": 8.263518223330698e-06,
"loss": 0.0581,
"step": 613
},
{
"epoch": 6.019607843137255,
"grad_norm": 1.5031594038009644,
"learning_rate": 8.22982622659241e-06,
"loss": 0.0368,
"step": 614
},
{
"epoch": 6.029411764705882,
"grad_norm": 19.65412712097168,
"learning_rate": 8.19615496131283e-06,
"loss": 0.3975,
"step": 615
},
{
"epoch": 6.03921568627451,
"grad_norm": 64.86492156982422,
"learning_rate": 8.162504821834296e-06,
"loss": 1.859,
"step": 616
},
{
"epoch": 6.049019607843137,
"grad_norm": 11.959246635437012,
"learning_rate": 8.128876202251719e-06,
"loss": 0.2347,
"step": 617
},
{
"epoch": 6.0588235294117645,
"grad_norm": 44.15569305419922,
"learning_rate": 8.095269496407972e-06,
"loss": 1.2704,
"step": 618
},
{
"epoch": 6.068627450980392,
"grad_norm": 15.993767738342285,
"learning_rate": 8.0616850978893e-06,
"loss": 0.4264,
"step": 619
},
{
"epoch": 6.078431372549019,
"grad_norm": 21.32816505432129,
"learning_rate": 8.028123400020686e-06,
"loss": 0.6203,
"step": 620
},
{
"epoch": 6.088235294117647,
"grad_norm": 34.41018295288086,
"learning_rate": 7.994584795861248e-06,
"loss": 0.7693,
"step": 621
},
{
"epoch": 6.098039215686274,
"grad_norm": 71.35839080810547,
"learning_rate": 7.961069678199658e-06,
"loss": 2.2813,
"step": 622
},
{
"epoch": 6.107843137254902,
"grad_norm": 8.495076179504395,
"learning_rate": 7.927578439549506e-06,
"loss": 0.2037,
"step": 623
},
{
"epoch": 6.117647058823529,
"grad_norm": 26.12579345703125,
"learning_rate": 7.894111472144733e-06,
"loss": 0.6719,
"step": 624
},
{
"epoch": 6.127450980392156,
"grad_norm": 41.17097854614258,
"learning_rate": 7.860669167935028e-06,
"loss": 0.8625,
"step": 625
},
{
"epoch": 6.137254901960785,
"grad_norm": 14.122386932373047,
"learning_rate": 7.827251918581225e-06,
"loss": 0.1724,
"step": 626
},
{
"epoch": 6.147058823529412,
"grad_norm": 76.35289764404297,
"learning_rate": 7.793860115450744e-06,
"loss": 1.1075,
"step": 627
},
{
"epoch": 6.1568627450980395,
"grad_norm": 7.909937858581543,
"learning_rate": 7.760494149612971e-06,
"loss": 0.1819,
"step": 628
},
{
"epoch": 6.166666666666667,
"grad_norm": 4.258299827575684,
"learning_rate": 7.727154411834712e-06,
"loss": 0.0981,
"step": 629
},
{
"epoch": 6.176470588235294,
"grad_norm": 4.633931636810303,
"learning_rate": 7.6938412925756e-06,
"loss": 0.0588,
"step": 630
},
{
"epoch": 6.186274509803922,
"grad_norm": 57.823883056640625,
"learning_rate": 7.660555181983517e-06,
"loss": 1.938,
"step": 631
},
{
"epoch": 6.196078431372549,
"grad_norm": 1.4953142404556274,
"learning_rate": 7.6272964698900356e-06,
"loss": 0.0238,
"step": 632
},
{
"epoch": 6.205882352941177,
"grad_norm": 38.66411590576172,
"learning_rate": 7.5940655458058575e-06,
"loss": 0.9169,
"step": 633
},
{
"epoch": 6.215686274509804,
"grad_norm": 15.436481475830078,
"learning_rate": 7.560862798916229e-06,
"loss": 0.4274,
"step": 634
},
{
"epoch": 6.2254901960784315,
"grad_norm": 10.556517601013184,
"learning_rate": 7.527688618076413e-06,
"loss": 0.2964,
"step": 635
},
{
"epoch": 6.235294117647059,
"grad_norm": 3.354306936264038,
"learning_rate": 7.494543391807112e-06,
"loss": 0.0783,
"step": 636
},
{
"epoch": 6.245098039215686,
"grad_norm": 6.201272010803223,
"learning_rate": 7.461427508289922e-06,
"loss": 0.0955,
"step": 637
},
{
"epoch": 6.254901960784314,
"grad_norm": 16.523658752441406,
"learning_rate": 7.428341355362803e-06,
"loss": 0.2783,
"step": 638
},
{
"epoch": 6.264705882352941,
"grad_norm": 9.32617473602295,
"learning_rate": 7.395285320515513e-06,
"loss": 0.2676,
"step": 639
},
{
"epoch": 6.2745098039215685,
"grad_norm": 16.20706558227539,
"learning_rate": 7.36225979088508e-06,
"loss": 0.2932,
"step": 640
},
{
"epoch": 6.284313725490196,
"grad_norm": 72.69991302490234,
"learning_rate": 7.329265153251285e-06,
"loss": 2.0471,
"step": 641
},
{
"epoch": 6.294117647058823,
"grad_norm": 13.719210624694824,
"learning_rate": 7.296301794032097e-06,
"loss": 0.1922,
"step": 642
},
{
"epoch": 6.303921568627451,
"grad_norm": 8.904701232910156,
"learning_rate": 7.263370099279173e-06,
"loss": 0.2343,
"step": 643
},
{
"epoch": 6.313725490196078,
"grad_norm": 12.111551284790039,
"learning_rate": 7.230470454673335e-06,
"loss": 0.3383,
"step": 644
},
{
"epoch": 6.323529411764706,
"grad_norm": 15.141802787780762,
"learning_rate": 7.197603245520042e-06,
"loss": 0.2505,
"step": 645
},
{
"epoch": 6.333333333333333,
"grad_norm": 12.530791282653809,
"learning_rate": 7.164768856744893e-06,
"loss": 0.1997,
"step": 646
},
{
"epoch": 6.3431372549019605,
"grad_norm": 6.673354148864746,
"learning_rate": 7.131967672889101e-06,
"loss": 0.0996,
"step": 647
},
{
"epoch": 6.352941176470588,
"grad_norm": 10.483192443847656,
"learning_rate": 7.099200078104995e-06,
"loss": 0.1974,
"step": 648
},
{
"epoch": 6.362745098039215,
"grad_norm": 6.595174312591553,
"learning_rate": 7.066466456151541e-06,
"loss": 0.1524,
"step": 649
},
{
"epoch": 6.372549019607844,
"grad_norm": 6.1990885734558105,
"learning_rate": 7.033767190389814e-06,
"loss": 0.167,
"step": 650
},
{
"epoch": 6.382352941176471,
"grad_norm": 25.681594848632812,
"learning_rate": 7.001102663778533e-06,
"loss": 0.6196,
"step": 651
},
{
"epoch": 6.392156862745098,
"grad_norm": 21.825302124023438,
"learning_rate": 6.968473258869566e-06,
"loss": 0.4484,
"step": 652
},
{
"epoch": 6.401960784313726,
"grad_norm": 33.6745719909668,
"learning_rate": 6.935879357803453e-06,
"loss": 0.6763,
"step": 653
},
{
"epoch": 6.411764705882353,
"grad_norm": 2.2407619953155518,
"learning_rate": 6.90332134230493e-06,
"loss": 0.0762,
"step": 654
},
{
"epoch": 6.421568627450981,
"grad_norm": 4.6282267570495605,
"learning_rate": 6.870799593678459e-06,
"loss": 0.1122,
"step": 655
},
{
"epoch": 6.431372549019608,
"grad_norm": 63.29195022583008,
"learning_rate": 6.83831449280375e-06,
"loss": 1.5314,
"step": 656
},
{
"epoch": 6.4411764705882355,
"grad_norm": 17.580034255981445,
"learning_rate": 6.80586642013133e-06,
"loss": 0.2948,
"step": 657
},
{
"epoch": 6.450980392156863,
"grad_norm": 28.558643341064453,
"learning_rate": 6.773455755678054e-06,
"loss": 0.5316,
"step": 658
},
{
"epoch": 6.46078431372549,
"grad_norm": 62.772953033447266,
"learning_rate": 6.741082879022671e-06,
"loss": 0.8572,
"step": 659
},
{
"epoch": 6.470588235294118,
"grad_norm": 67.80099487304688,
"learning_rate": 6.708748169301389e-06,
"loss": 1.9052,
"step": 660
},
{
"epoch": 6.480392156862745,
"grad_norm": 10.767579078674316,
"learning_rate": 6.6764520052034054e-06,
"loss": 0.3168,
"step": 661
},
{
"epoch": 6.490196078431373,
"grad_norm": 21.23326301574707,
"learning_rate": 6.644194764966499e-06,
"loss": 0.6483,
"step": 662
},
{
"epoch": 6.5,
"grad_norm": 37.76388931274414,
"learning_rate": 6.61197682637259e-06,
"loss": 1.2313,
"step": 663
},
{
"epoch": 6.509803921568627,
"grad_norm": 31.841718673706055,
"learning_rate": 6.579798566743314e-06,
"loss": 0.5487,
"step": 664
},
{
"epoch": 6.519607843137255,
"grad_norm": 23.495777130126953,
"learning_rate": 6.547660362935603e-06,
"loss": 0.3067,
"step": 665
},
{
"epoch": 6.529411764705882,
"grad_norm": 17.078996658325195,
"learning_rate": 6.515562591337279e-06,
"loss": 0.3117,
"step": 666
},
{
"epoch": 6.53921568627451,
"grad_norm": 11.851016998291016,
"learning_rate": 6.483505627862632e-06,
"loss": 0.2003,
"step": 667
},
{
"epoch": 6.549019607843137,
"grad_norm": 23.34535789489746,
"learning_rate": 6.451489847948039e-06,
"loss": 0.4868,
"step": 668
},
{
"epoch": 6.5588235294117645,
"grad_norm": 28.493072509765625,
"learning_rate": 6.419515626547543e-06,
"loss": 0.3717,
"step": 669
},
{
"epoch": 6.568627450980392,
"grad_norm": 42.109066009521484,
"learning_rate": 6.387583338128471e-06,
"loss": 0.9412,
"step": 670
},
{
"epoch": 6.578431372549019,
"grad_norm": 62.01663589477539,
"learning_rate": 6.3556933566670656e-06,
"loss": 1.6313,
"step": 671
},
{
"epoch": 6.588235294117647,
"grad_norm": 27.55695915222168,
"learning_rate": 6.32384605564407e-06,
"loss": 0.6452,
"step": 672
},
{
"epoch": 6.598039215686274,
"grad_norm": 61.346435546875,
"learning_rate": 6.292041808040393e-06,
"loss": 1.9431,
"step": 673
},
{
"epoch": 6.607843137254902,
"grad_norm": 3.8480262756347656,
"learning_rate": 6.260280986332707e-06,
"loss": 0.1115,
"step": 674
},
{
"epoch": 6.617647058823529,
"grad_norm": 27.856801986694336,
"learning_rate": 6.228563962489106e-06,
"loss": 1.0499,
"step": 675
},
{
"epoch": 6.627450980392156,
"grad_norm": 32.078163146972656,
"learning_rate": 6.196891107964744e-06,
"loss": 0.6338,
"step": 676
},
{
"epoch": 6.637254901960784,
"grad_norm": 33.52442169189453,
"learning_rate": 6.165262793697486e-06,
"loss": 1.5934,
"step": 677
},
{
"epoch": 6.647058823529412,
"grad_norm": 33.4982795715332,
"learning_rate": 6.1336793901035526e-06,
"loss": 0.9806,
"step": 678
},
{
"epoch": 6.6568627450980395,
"grad_norm": 31.796276092529297,
"learning_rate": 6.102141267073207e-06,
"loss": 0.4624,
"step": 679
},
{
"epoch": 6.666666666666667,
"grad_norm": 64.05634307861328,
"learning_rate": 6.070648793966396e-06,
"loss": 1.1895,
"step": 680
},
{
"epoch": 6.676470588235294,
"grad_norm": 10.422757148742676,
"learning_rate": 6.039202339608432e-06,
"loss": 0.3156,
"step": 681
},
{
"epoch": 6.686274509803922,
"grad_norm": 18.19187355041504,
"learning_rate": 6.007802272285693e-06,
"loss": 0.3458,
"step": 682
},
{
"epoch": 6.696078431372549,
"grad_norm": 44.2559700012207,
"learning_rate": 5.9764489597412744e-06,
"loss": 0.8909,
"step": 683
},
{
"epoch": 6.705882352941177,
"grad_norm": 51.00062942504883,
"learning_rate": 5.94514276917072e-06,
"loss": 1.4376,
"step": 684
},
{
"epoch": 6.715686274509804,
"grad_norm": 71.0698013305664,
"learning_rate": 5.913884067217686e-06,
"loss": 2.3231,
"step": 685
},
{
"epoch": 6.7254901960784315,
"grad_norm": 42.56141662597656,
"learning_rate": 5.882673219969673e-06,
"loss": 1.6846,
"step": 686
},
{
"epoch": 6.735294117647059,
"grad_norm": 14.134212493896484,
"learning_rate": 5.851510592953729e-06,
"loss": 0.2639,
"step": 687
},
{
"epoch": 6.745098039215686,
"grad_norm": 3.739373207092285,
"learning_rate": 5.82039655113217e-06,
"loss": 0.1005,
"step": 688
},
{
"epoch": 6.754901960784314,
"grad_norm": 13.384047508239746,
"learning_rate": 5.7893314588982905e-06,
"loss": 0.7306,
"step": 689
},
{
"epoch": 6.764705882352941,
"grad_norm": 32.962947845458984,
"learning_rate": 5.758315680072137e-06,
"loss": 0.5565,
"step": 690
},
{
"epoch": 6.7745098039215685,
"grad_norm": 13.934967041015625,
"learning_rate": 5.727349577896194e-06,
"loss": 0.3282,
"step": 691
},
{
"epoch": 6.784313725490196,
"grad_norm": 13.745176315307617,
"learning_rate": 5.696433515031169e-06,
"loss": 0.1483,
"step": 692
},
{
"epoch": 6.794117647058823,
"grad_norm": 48.74391174316406,
"learning_rate": 5.66556785355173e-06,
"loss": 1.1891,
"step": 693
},
{
"epoch": 6.803921568627451,
"grad_norm": 37.949527740478516,
"learning_rate": 5.634752954942264e-06,
"loss": 0.936,
"step": 694
},
{
"epoch": 6.813725490196078,
"grad_norm": 10.46754264831543,
"learning_rate": 5.603989180092661e-06,
"loss": 0.2507,
"step": 695
},
{
"epoch": 6.823529411764706,
"grad_norm": 48.78800964355469,
"learning_rate": 5.57327688929405e-06,
"loss": 1.0859,
"step": 696
},
{
"epoch": 6.833333333333333,
"grad_norm": 20.261493682861328,
"learning_rate": 5.542616442234618e-06,
"loss": 0.6285,
"step": 697
},
{
"epoch": 6.8431372549019605,
"grad_norm": 19.126684188842773,
"learning_rate": 5.512008197995379e-06,
"loss": 0.208,
"step": 698
},
{
"epoch": 6.852941176470588,
"grad_norm": 15.567009925842285,
"learning_rate": 5.481452515045974e-06,
"loss": 0.2511,
"step": 699
},
{
"epoch": 6.862745098039216,
"grad_norm": 19.60640525817871,
"learning_rate": 5.450949751240456e-06,
"loss": 0.2671,
"step": 700
},
{
"epoch": 6.872549019607844,
"grad_norm": 12.359797477722168,
"learning_rate": 5.420500263813141e-06,
"loss": 0.1619,
"step": 701
},
{
"epoch": 6.882352941176471,
"grad_norm": 15.319607734680176,
"learning_rate": 5.390104409374364e-06,
"loss": 0.1394,
"step": 702
},
{
"epoch": 6.892156862745098,
"grad_norm": 43.92523956298828,
"learning_rate": 5.3597625439063685e-06,
"loss": 0.7688,
"step": 703
},
{
"epoch": 6.901960784313726,
"grad_norm": 43.91411590576172,
"learning_rate": 5.329475022759074e-06,
"loss": 1.8208,
"step": 704
},
{
"epoch": 6.911764705882353,
"grad_norm": 21.796554565429688,
"learning_rate": 5.299242200645959e-06,
"loss": 0.3901,
"step": 705
},
{
"epoch": 6.921568627450981,
"grad_norm": 28.99288558959961,
"learning_rate": 5.269064431639901e-06,
"loss": 1.3879,
"step": 706
},
{
"epoch": 6.931372549019608,
"grad_norm": 8.39719009399414,
"learning_rate": 5.238942069169e-06,
"loss": 0.1595,
"step": 707
},
{
"epoch": 6.9411764705882355,
"grad_norm": 7.552489757537842,
"learning_rate": 5.208875466012475e-06,
"loss": 0.1905,
"step": 708
},
{
"epoch": 6.950980392156863,
"grad_norm": 11.019523620605469,
"learning_rate": 5.178864974296511e-06,
"loss": 0.1992,
"step": 709
},
{
"epoch": 6.96078431372549,
"grad_norm": 26.535831451416016,
"learning_rate": 5.148910945490152e-06,
"loss": 0.1662,
"step": 710
},
{
"epoch": 6.970588235294118,
"grad_norm": 7.696582794189453,
"learning_rate": 5.119013730401152e-06,
"loss": 0.116,
"step": 711
},
{
"epoch": 6.980392156862745,
"grad_norm": 6.992671489715576,
"learning_rate": 5.089173679171922e-06,
"loss": 0.116,
"step": 712
},
{
"epoch": 6.990196078431373,
"grad_norm": 6.2422919273376465,
"learning_rate": 5.059391141275358e-06,
"loss": 0.1566,
"step": 713
},
{
"epoch": 7.0,
"grad_norm": 36.10157775878906,
"learning_rate": 5.029666465510825e-06,
"loss": 0.8007,
"step": 714
},
{
"epoch": 7.0,
"eval_dim_128_cosine_accuracy@1": 0.4643734643734644,
"eval_dim_128_cosine_accuracy@10": 0.5651105651105651,
"eval_dim_128_cosine_accuracy@3": 0.5036855036855037,
"eval_dim_128_cosine_accuracy@5": 0.5257985257985258,
"eval_dim_128_cosine_map@100": 0.564075733812558,
"eval_dim_128_cosine_mrr@10": 0.48757751257751264,
"eval_dim_128_cosine_ndcg@10": 0.5108359029328255,
"eval_dim_128_cosine_precision@1": 0.4643734643734644,
"eval_dim_128_cosine_precision@10": 0.372972972972973,
"eval_dim_128_cosine_precision@3": 0.4537264537264537,
"eval_dim_128_cosine_precision@5": 0.4250614250614251,
"eval_dim_128_cosine_recall@1": 0.0836470460134008,
"eval_dim_128_cosine_recall@10": 0.3926482449171058,
"eval_dim_128_cosine_recall@3": 0.2139970760961404,
"eval_dim_128_cosine_recall@5": 0.28840905024082414,
"eval_dim_256_cosine_accuracy@1": 0.5085995085995086,
"eval_dim_256_cosine_accuracy@10": 0.5823095823095823,
"eval_dim_256_cosine_accuracy@3": 0.5454545454545454,
"eval_dim_256_cosine_accuracy@5": 0.5651105651105651,
"eval_dim_256_cosine_map@100": 0.6021996684970489,
"eval_dim_256_cosine_mrr@10": 0.5282028782028781,
"eval_dim_256_cosine_ndcg@10": 0.5485369477415476,
"eval_dim_256_cosine_precision@1": 0.5085995085995086,
"eval_dim_256_cosine_precision@10": 0.39803439803439805,
"eval_dim_256_cosine_precision@3": 0.493038493038493,
"eval_dim_256_cosine_precision@5": 0.4638820638820638,
"eval_dim_256_cosine_recall@1": 0.0924079374508773,
"eval_dim_256_cosine_recall@10": 0.41789750252044955,
"eval_dim_256_cosine_recall@3": 0.23300639313521274,
"eval_dim_256_cosine_recall@5": 0.3150564752711746,
"eval_dim_512_cosine_accuracy@1": 0.5135135135135135,
"eval_dim_512_cosine_accuracy@10": 0.6167076167076168,
"eval_dim_512_cosine_accuracy@3": 0.547911547911548,
"eval_dim_512_cosine_accuracy@5": 0.5823095823095823,
"eval_dim_512_cosine_map@100": 0.610450473760828,
"eval_dim_512_cosine_mrr@10": 0.5376964626964627,
"eval_dim_512_cosine_ndcg@10": 0.5626252307241971,
"eval_dim_512_cosine_precision@1": 0.5135135135135135,
"eval_dim_512_cosine_precision@10": 0.4098280098280099,
"eval_dim_512_cosine_precision@3": 0.4995904995904996,
"eval_dim_512_cosine_precision@5": 0.47272727272727266,
"eval_dim_512_cosine_recall@1": 0.09177756312119065,
"eval_dim_512_cosine_recall@10": 0.4353105358971801,
"eval_dim_512_cosine_recall@3": 0.23510487327368682,
"eval_dim_512_cosine_recall@5": 0.3221634374244003,
"eval_dim_64_cosine_accuracy@1": 0.40540540540540543,
"eval_dim_64_cosine_accuracy@10": 0.5036855036855037,
"eval_dim_64_cosine_accuracy@3": 0.44226044226044225,
"eval_dim_64_cosine_accuracy@5": 0.4643734643734644,
"eval_dim_64_cosine_map@100": 0.5038636989681976,
"eval_dim_64_cosine_mrr@10": 0.42867672867672857,
"eval_dim_64_cosine_ndcg@10": 0.4514130750769757,
"eval_dim_64_cosine_precision@1": 0.40540540540540543,
"eval_dim_64_cosine_precision@10": 0.33046683046683045,
"eval_dim_64_cosine_precision@3": 0.39803439803439805,
"eval_dim_64_cosine_precision@5": 0.3759213759213759,
"eval_dim_64_cosine_recall@1": 0.07123982719552367,
"eval_dim_64_cosine_recall@10": 0.3470196396411205,
"eval_dim_64_cosine_recall@3": 0.18522550934259877,
"eval_dim_64_cosine_recall@5": 0.2536244771529595,
"eval_dim_768_cosine_accuracy@1": 0.5061425061425061,
"eval_dim_768_cosine_accuracy@10": 0.6167076167076168,
"eval_dim_768_cosine_accuracy@3": 0.5528255528255528,
"eval_dim_768_cosine_accuracy@5": 0.5872235872235873,
"eval_dim_768_cosine_map@100": 0.6139686310087906,
"eval_dim_768_cosine_mrr@10": 0.5337730587730588,
"eval_dim_768_cosine_ndcg@10": 0.5613623506592793,
"eval_dim_768_cosine_precision@1": 0.5061425061425061,
"eval_dim_768_cosine_precision@10": 0.40884520884520886,
"eval_dim_768_cosine_precision@3": 0.4954954954954955,
"eval_dim_768_cosine_precision@5": 0.47027027027027024,
"eval_dim_768_cosine_recall@1": 0.09169628140197787,
"eval_dim_768_cosine_recall@10": 0.43426130796610773,
"eval_dim_768_cosine_recall@3": 0.23924458336167276,
"eval_dim_768_cosine_recall@5": 0.32428362882750705,
"eval_runtime": 112.1147,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.4514130750769757,
"eval_steps_per_second": 0.0,
"step": 714
},
{
"epoch": 7.009803921568627,
"grad_norm": 59.34041976928711,
"learning_rate": 5.000000000000003e-06,
"loss": 1.0775,
"step": 715
},
{
"epoch": 7.019607843137255,
"grad_norm": 35.32329559326172,
"learning_rate": 4.970392092182853e-06,
"loss": 0.44,
"step": 716
},
{
"epoch": 7.029411764705882,
"grad_norm": 1.9226537942886353,
"learning_rate": 4.940843088813537e-06,
"loss": 0.0443,
"step": 717
},
{
"epoch": 7.03921568627451,
"grad_norm": 36.416629791259766,
"learning_rate": 4.911353335956353e-06,
"loss": 1.3114,
"step": 718
},
{
"epoch": 7.049019607843137,
"grad_norm": 15.497233390808105,
"learning_rate": 4.881923178981681e-06,
"loss": 0.2947,
"step": 719
},
{
"epoch": 7.0588235294117645,
"grad_norm": 19.8168888092041,
"learning_rate": 4.852552962561946e-06,
"loss": 0.5323,
"step": 720
},
{
"epoch": 7.068627450980392,
"grad_norm": 23.126468658447266,
"learning_rate": 4.823243030667576e-06,
"loss": 0.3138,
"step": 721
},
{
"epoch": 7.078431372549019,
"grad_norm": 41.2757453918457,
"learning_rate": 4.7939937265629725e-06,
"loss": 0.8026,
"step": 722
},
{
"epoch": 7.088235294117647,
"grad_norm": 2.18070125579834,
"learning_rate": 4.764805392802497e-06,
"loss": 0.0669,
"step": 723
},
{
"epoch": 7.098039215686274,
"grad_norm": 3.1881673336029053,
"learning_rate": 4.7356783712264405e-06,
"loss": 0.0805,
"step": 724
},
{
"epoch": 7.107843137254902,
"grad_norm": 5.860881328582764,
"learning_rate": 4.70661300295706e-06,
"loss": 0.0799,
"step": 725
},
{
"epoch": 7.117647058823529,
"grad_norm": 2.0763373374938965,
"learning_rate": 4.677609628394529e-06,
"loss": 0.0574,
"step": 726
},
{
"epoch": 7.127450980392156,
"grad_norm": 30.232059478759766,
"learning_rate": 4.648668587212998e-06,
"loss": 0.4692,
"step": 727
},
{
"epoch": 7.137254901960785,
"grad_norm": 49.929012298583984,
"learning_rate": 4.619790218356589e-06,
"loss": 1.6721,
"step": 728
},
{
"epoch": 7.147058823529412,
"grad_norm": 49.42375564575195,
"learning_rate": 4.5909748600354395e-06,
"loss": 0.9704,
"step": 729
},
{
"epoch": 7.1568627450980395,
"grad_norm": 10.879191398620605,
"learning_rate": 4.562222849721735e-06,
"loss": 0.2699,
"step": 730
},
{
"epoch": 7.166666666666667,
"grad_norm": 36.91472244262695,
"learning_rate": 4.533534524145756e-06,
"loss": 0.6855,
"step": 731
},
{
"epoch": 7.176470588235294,
"grad_norm": 4.611995697021484,
"learning_rate": 4.504910219291941e-06,
"loss": 0.1086,
"step": 732
},
{
"epoch": 7.186274509803922,
"grad_norm": 18.415523529052734,
"learning_rate": 4.476350270394942e-06,
"loss": 0.1754,
"step": 733
},
{
"epoch": 7.196078431372549,
"grad_norm": 48.348392486572266,
"learning_rate": 4.447855011935714e-06,
"loss": 1.2667,
"step": 734
},
{
"epoch": 7.205882352941177,
"grad_norm": 9.687373161315918,
"learning_rate": 4.419424777637565e-06,
"loss": 0.1927,
"step": 735
},
{
"epoch": 7.215686274509804,
"grad_norm": 0.5537658333778381,
"learning_rate": 4.391059900462305e-06,
"loss": 0.0114,
"step": 736
},
{
"epoch": 7.2254901960784315,
"grad_norm": 5.573686122894287,
"learning_rate": 4.362760712606278e-06,
"loss": 0.12,
"step": 737
},
{
"epoch": 7.235294117647059,
"grad_norm": 38.901397705078125,
"learning_rate": 4.334527545496521e-06,
"loss": 0.4625,
"step": 738
},
{
"epoch": 7.245098039215686,
"grad_norm": 2.3081209659576416,
"learning_rate": 4.306360729786867e-06,
"loss": 0.0489,
"step": 739
},
{
"epoch": 7.254901960784314,
"grad_norm": 19.291519165039062,
"learning_rate": 4.278260595354067e-06,
"loss": 0.5556,
"step": 740
},
{
"epoch": 7.264705882352941,
"grad_norm": 4.187754154205322,
"learning_rate": 4.2502274712939355e-06,
"loss": 0.0939,
"step": 741
},
{
"epoch": 7.2745098039215685,
"grad_norm": 67.83704376220703,
"learning_rate": 4.222261685917489e-06,
"loss": 1.6064,
"step": 742
},
{
"epoch": 7.284313725490196,
"grad_norm": 11.588760375976562,
"learning_rate": 4.1943635667471095e-06,
"loss": 0.237,
"step": 743
},
{
"epoch": 7.294117647058823,
"grad_norm": 91.4351806640625,
"learning_rate": 4.166533440512696e-06,
"loss": 1.7594,
"step": 744
},
{
"epoch": 7.303921568627451,
"grad_norm": 24.139951705932617,
"learning_rate": 4.138771633147856e-06,
"loss": 0.374,
"step": 745
},
{
"epoch": 7.313725490196078,
"grad_norm": 28.552005767822266,
"learning_rate": 4.111078469786062e-06,
"loss": 0.6035,
"step": 746
},
{
"epoch": 7.323529411764706,
"grad_norm": 4.377718448638916,
"learning_rate": 4.083454274756881e-06,
"loss": 0.0507,
"step": 747
},
{
"epoch": 7.333333333333333,
"grad_norm": 2.3305182456970215,
"learning_rate": 4.0558993715821335e-06,
"loss": 0.0689,
"step": 748
},
{
"epoch": 7.3431372549019605,
"grad_norm": 7.7289533615112305,
"learning_rate": 4.028414082972141e-06,
"loss": 0.1226,
"step": 749
},
{
"epoch": 7.352941176470588,
"grad_norm": 29.273544311523438,
"learning_rate": 4.000998730821922e-06,
"loss": 0.5325,
"step": 750
},
{
"epoch": 7.362745098039215,
"grad_norm": 36.495277404785156,
"learning_rate": 3.973653636207437e-06,
"loss": 0.7446,
"step": 751
},
{
"epoch": 7.372549019607844,
"grad_norm": 2.182332992553711,
"learning_rate": 3.946379119381822e-06,
"loss": 0.0552,
"step": 752
},
{
"epoch": 7.382352941176471,
"grad_norm": 5.138607978820801,
"learning_rate": 3.919175499771635e-06,
"loss": 0.1349,
"step": 753
},
{
"epoch": 7.392156862745098,
"grad_norm": 4.032210350036621,
"learning_rate": 3.892043095973123e-06,
"loss": 0.1023,
"step": 754
},
{
"epoch": 7.401960784313726,
"grad_norm": 4.79258394241333,
"learning_rate": 3.864982225748481e-06,
"loss": 0.0752,
"step": 755
},
{
"epoch": 7.411764705882353,
"grad_norm": 15.944069862365723,
"learning_rate": 3.837993206022146e-06,
"loss": 0.3072,
"step": 756
},
{
"epoch": 7.421568627450981,
"grad_norm": 42.97219467163086,
"learning_rate": 3.8110763528770543e-06,
"loss": 0.6699,
"step": 757
},
{
"epoch": 7.431372549019608,
"grad_norm": 3.7546660900115967,
"learning_rate": 3.784231981550991e-06,
"loss": 0.082,
"step": 758
},
{
"epoch": 7.4411764705882355,
"grad_norm": 2.975052833557129,
"learning_rate": 3.7574604064328336e-06,
"loss": 0.0643,
"step": 759
},
{
"epoch": 7.450980392156863,
"grad_norm": 215.64663696289062,
"learning_rate": 3.730761941058938e-06,
"loss": 0.5188,
"step": 760
},
{
"epoch": 7.46078431372549,
"grad_norm": 8.11312198638916,
"learning_rate": 3.704136898109403e-06,
"loss": 0.2556,
"step": 761
},
{
"epoch": 7.470588235294118,
"grad_norm": 23.435211181640625,
"learning_rate": 3.6775855894044543e-06,
"loss": 0.4907,
"step": 762
},
{
"epoch": 7.480392156862745,
"grad_norm": 8.580737113952637,
"learning_rate": 3.651108325900773e-06,
"loss": 0.2051,
"step": 763
},
{
"epoch": 7.490196078431373,
"grad_norm": 23.523828506469727,
"learning_rate": 3.624705417687856e-06,
"loss": 0.5969,
"step": 764
},
{
"epoch": 7.5,
"grad_norm": 6.671662330627441,
"learning_rate": 3.5983771739843855e-06,
"loss": 0.1616,
"step": 765
},
{
"epoch": 7.509803921568627,
"grad_norm": 6.877324104309082,
"learning_rate": 3.5721239031346067e-06,
"loss": 0.1203,
"step": 766
},
{
"epoch": 7.519607843137255,
"grad_norm": 63.056732177734375,
"learning_rate": 3.5459459126047226e-06,
"loss": 2.1851,
"step": 767
},
{
"epoch": 7.529411764705882,
"grad_norm": 39.958675384521484,
"learning_rate": 3.5198435089792726e-06,
"loss": 0.7754,
"step": 768
},
{
"epoch": 7.53921568627451,
"grad_norm": 58.4364128112793,
"learning_rate": 3.493816997957582e-06,
"loss": 1.4312,
"step": 769
},
{
"epoch": 7.549019607843137,
"grad_norm": 2.793602228164673,
"learning_rate": 3.4678666843501276e-06,
"loss": 0.067,
"step": 770
},
{
"epoch": 7.5588235294117645,
"grad_norm": 62.26560974121094,
"learning_rate": 3.4419928720750274e-06,
"loss": 1.1101,
"step": 771
},
{
"epoch": 7.568627450980392,
"grad_norm": 63.20686721801758,
"learning_rate": 3.416195864154426e-06,
"loss": 1.7196,
"step": 772
},
{
"epoch": 7.578431372549019,
"grad_norm": 86.96753692626953,
"learning_rate": 3.3904759627109828e-06,
"loss": 1.233,
"step": 773
},
{
"epoch": 7.588235294117647,
"grad_norm": 20.991119384765625,
"learning_rate": 3.3648334689643214e-06,
"loss": 0.4773,
"step": 774
},
{
"epoch": 7.598039215686274,
"grad_norm": 42.48898696899414,
"learning_rate": 3.339268683227499e-06,
"loss": 0.6321,
"step": 775
},
{
"epoch": 7.607843137254902,
"grad_norm": 56.39468765258789,
"learning_rate": 3.3137819049034957e-06,
"loss": 1.1809,
"step": 776
},
{
"epoch": 7.617647058823529,
"grad_norm": 25.71571159362793,
"learning_rate": 3.288373432481703e-06,
"loss": 0.6292,
"step": 777
},
{
"epoch": 7.627450980392156,
"grad_norm": 8.829726219177246,
"learning_rate": 3.2630435635344283e-06,
"loss": 0.224,
"step": 778
},
{
"epoch": 7.637254901960784,
"grad_norm": 2.1662521362304688,
"learning_rate": 3.2377925947134137e-06,
"loss": 0.0644,
"step": 779
},
{
"epoch": 7.647058823529412,
"grad_norm": 2.9743878841400146,
"learning_rate": 3.212620821746362e-06,
"loss": 0.0691,
"step": 780
},
{
"epoch": 7.6568627450980395,
"grad_norm": 31.683246612548828,
"learning_rate": 3.1875285394334575e-06,
"loss": 0.7733,
"step": 781
},
{
"epoch": 7.666666666666667,
"grad_norm": 7.796841621398926,
"learning_rate": 3.1625160416439503e-06,
"loss": 0.1648,
"step": 782
},
{
"epoch": 7.676470588235294,
"grad_norm": 13.790186882019043,
"learning_rate": 3.1375836213126653e-06,
"loss": 0.2088,
"step": 783
},
{
"epoch": 7.686274509803922,
"grad_norm": 13.46403980255127,
"learning_rate": 3.1127315704366144e-06,
"loss": 0.1723,
"step": 784
},
{
"epoch": 7.696078431372549,
"grad_norm": 7.356055736541748,
"learning_rate": 3.087960180071553e-06,
"loss": 0.2156,
"step": 785
},
{
"epoch": 7.705882352941177,
"grad_norm": 0.36267539858818054,
"learning_rate": 3.063269740328579e-06,
"loss": 0.0082,
"step": 786
},
{
"epoch": 7.715686274509804,
"grad_norm": 1.4741358757019043,
"learning_rate": 3.0386605403707347e-06,
"loss": 0.0436,
"step": 787
},
{
"epoch": 7.7254901960784315,
"grad_norm": 4.873252868652344,
"learning_rate": 3.014132868409617e-06,
"loss": 0.1314,
"step": 788
},
{
"epoch": 7.735294117647059,
"grad_norm": 18.598533630371094,
"learning_rate": 2.9896870117020073e-06,
"loss": 0.3727,
"step": 789
},
{
"epoch": 7.745098039215686,
"grad_norm": 7.328352451324463,
"learning_rate": 2.9653232565465017e-06,
"loss": 0.1463,
"step": 790
},
{
"epoch": 7.754901960784314,
"grad_norm": 16.266738891601562,
"learning_rate": 2.9410418882801682e-06,
"loss": 0.3104,
"step": 791
},
{
"epoch": 7.764705882352941,
"grad_norm": 9.812725067138672,
"learning_rate": 2.9168431912751805e-06,
"loss": 0.2729,
"step": 792
},
{
"epoch": 7.7745098039215685,
"grad_norm": 56.76531982421875,
"learning_rate": 2.8927274489355296e-06,
"loss": 1.0571,
"step": 793
},
{
"epoch": 7.784313725490196,
"grad_norm": 4.355117321014404,
"learning_rate": 2.868694943693655e-06,
"loss": 0.1072,
"step": 794
},
{
"epoch": 7.794117647058823,
"grad_norm": 3.7711503505706787,
"learning_rate": 2.844745957007178e-06,
"loss": 0.0875,
"step": 795
},
{
"epoch": 7.803921568627451,
"grad_norm": 41.87392807006836,
"learning_rate": 2.820880769355582e-06,
"loss": 0.5096,
"step": 796
},
{
"epoch": 7.813725490196078,
"grad_norm": 9.530420303344727,
"learning_rate": 2.797099660236937e-06,
"loss": 0.2251,
"step": 797
},
{
"epoch": 7.823529411764706,
"grad_norm": 8.098447799682617,
"learning_rate": 2.773402908164625e-06,
"loss": 0.1296,
"step": 798
},
{
"epoch": 7.833333333333333,
"grad_norm": 20.566387176513672,
"learning_rate": 2.749790790664074e-06,
"loss": 0.3319,
"step": 799
},
{
"epoch": 7.8431372549019605,
"grad_norm": 55.28195571899414,
"learning_rate": 2.726263584269513e-06,
"loss": 1.3618,
"step": 800
},
{
"epoch": 7.852941176470588,
"grad_norm": 17.402467727661133,
"learning_rate": 2.702821564520732e-06,
"loss": 0.3323,
"step": 801
},
{
"epoch": 7.862745098039216,
"grad_norm": 56.373085021972656,
"learning_rate": 2.679465005959856e-06,
"loss": 1.606,
"step": 802
},
{
"epoch": 7.872549019607844,
"grad_norm": 4.239923477172852,
"learning_rate": 2.6561941821281145e-06,
"loss": 0.0946,
"step": 803
},
{
"epoch": 7.882352941176471,
"grad_norm": 32.294925689697266,
"learning_rate": 2.6330093655626777e-06,
"loss": 0.5191,
"step": 804
},
{
"epoch": 7.892156862745098,
"grad_norm": 3.309236526489258,
"learning_rate": 2.6099108277934105e-06,
"loss": 0.0633,
"step": 805
},
{
"epoch": 7.901960784313726,
"grad_norm": 6.212430000305176,
"learning_rate": 2.5868988393397376e-06,
"loss": 0.1488,
"step": 806
},
{
"epoch": 7.911764705882353,
"grad_norm": 58.60629653930664,
"learning_rate": 2.5639736697074525e-06,
"loss": 1.5574,
"step": 807
},
{
"epoch": 7.921568627450981,
"grad_norm": 63.188838958740234,
"learning_rate": 2.5411355873855683e-06,
"loss": 1.4453,
"step": 808
},
{
"epoch": 7.931372549019608,
"grad_norm": 10.800907135009766,
"learning_rate": 2.518384859843168e-06,
"loss": 0.1937,
"step": 809
},
{
"epoch": 7.9411764705882355,
"grad_norm": 2.4388890266418457,
"learning_rate": 2.4957217535262824e-06,
"loss": 0.056,
"step": 810
},
{
"epoch": 7.950980392156863,
"grad_norm": 31.72113800048828,
"learning_rate": 2.4731465338547556e-06,
"loss": 0.5072,
"step": 811
},
{
"epoch": 7.96078431372549,
"grad_norm": 9.861820220947266,
"learning_rate": 2.4506594652191485e-06,
"loss": 0.1725,
"step": 812
},
{
"epoch": 7.970588235294118,
"grad_norm": 85.08750915527344,
"learning_rate": 2.428260810977641e-06,
"loss": 2.6814,
"step": 813
},
{
"epoch": 7.980392156862745,
"grad_norm": 34.63070297241211,
"learning_rate": 2.405950833452928e-06,
"loss": 0.1841,
"step": 814
},
{
"epoch": 7.990196078431373,
"grad_norm": 7.566990852355957,
"learning_rate": 2.3837297939291893e-06,
"loss": 0.1535,
"step": 815
},
{
"epoch": 8.0,
"grad_norm": 5.708738803863525,
"learning_rate": 2.3615979526489773e-06,
"loss": 0.1073,
"step": 816
},
{
"epoch": 8.0,
"eval_dim_128_cosine_accuracy@1": 0.4619164619164619,
"eval_dim_128_cosine_accuracy@10": 0.5626535626535627,
"eval_dim_128_cosine_accuracy@3": 0.5085995085995086,
"eval_dim_128_cosine_accuracy@5": 0.5257985257985258,
"eval_dim_128_cosine_map@100": 0.5639453524845017,
"eval_dim_128_cosine_mrr@10": 0.4866863616863617,
"eval_dim_128_cosine_ndcg@10": 0.5113687589504219,
"eval_dim_128_cosine_precision@1": 0.4619164619164619,
"eval_dim_128_cosine_precision@10": 0.3717444717444717,
"eval_dim_128_cosine_precision@3": 0.45454545454545453,
"eval_dim_128_cosine_precision@5": 0.42653562653562643,
"eval_dim_128_cosine_recall@1": 0.08335640095002847,
"eval_dim_128_cosine_recall@10": 0.39128031312567907,
"eval_dim_128_cosine_recall@3": 0.21448234226322477,
"eval_dim_128_cosine_recall@5": 0.2901411531092906,
"eval_dim_256_cosine_accuracy@1": 0.5085995085995086,
"eval_dim_256_cosine_accuracy@10": 0.5921375921375921,
"eval_dim_256_cosine_accuracy@3": 0.5528255528255528,
"eval_dim_256_cosine_accuracy@5": 0.5724815724815725,
"eval_dim_256_cosine_map@100": 0.6064564762454882,
"eval_dim_256_cosine_mrr@10": 0.530898755898756,
"eval_dim_256_cosine_ndcg@10": 0.5542570977911798,
"eval_dim_256_cosine_precision@1": 0.5085995085995086,
"eval_dim_256_cosine_precision@10": 0.4027027027027027,
"eval_dim_256_cosine_precision@3": 0.4963144963144963,
"eval_dim_256_cosine_precision@5": 0.46830466830466827,
"eval_dim_256_cosine_recall@1": 0.09203117787323799,
"eval_dim_256_cosine_recall@10": 0.4211373173804991,
"eval_dim_256_cosine_recall@3": 0.23375469753087783,
"eval_dim_256_cosine_recall@5": 0.3165647057750063,
"eval_dim_512_cosine_accuracy@1": 0.4987714987714988,
"eval_dim_512_cosine_accuracy@10": 0.6240786240786241,
"eval_dim_512_cosine_accuracy@3": 0.5454545454545454,
"eval_dim_512_cosine_accuracy@5": 0.5872235872235873,
"eval_dim_512_cosine_map@100": 0.6087461605615523,
"eval_dim_512_cosine_mrr@10": 0.5293933543933543,
"eval_dim_512_cosine_ndcg@10": 0.561119482694997,
"eval_dim_512_cosine_precision@1": 0.4987714987714988,
"eval_dim_512_cosine_precision@10": 0.4108108108108108,
"eval_dim_512_cosine_precision@3": 0.4881244881244881,
"eval_dim_512_cosine_precision@5": 0.4668304668304668,
"eval_dim_512_cosine_recall@1": 0.09107814197088972,
"eval_dim_512_cosine_recall@10": 0.4337764747296877,
"eval_dim_512_cosine_recall@3": 0.23345881527498952,
"eval_dim_512_cosine_recall@5": 0.32166235591891995,
"eval_dim_64_cosine_accuracy@1": 0.4201474201474201,
"eval_dim_64_cosine_accuracy@10": 0.5061425061425061,
"eval_dim_64_cosine_accuracy@3": 0.4520884520884521,
"eval_dim_64_cosine_accuracy@5": 0.47665847665847666,
"eval_dim_64_cosine_map@100": 0.5122496827910115,
"eval_dim_64_cosine_mrr@10": 0.4401641901641902,
"eval_dim_64_cosine_ndcg@10": 0.4611545919906432,
"eval_dim_64_cosine_precision@1": 0.4201474201474201,
"eval_dim_64_cosine_precision@10": 0.3393120393120393,
"eval_dim_64_cosine_precision@3": 0.40786240786240785,
"eval_dim_64_cosine_precision@5": 0.38476658476658476,
"eval_dim_64_cosine_recall@1": 0.07391007361577008,
"eval_dim_64_cosine_recall@10": 0.34472604178300653,
"eval_dim_64_cosine_recall@3": 0.18675809837518775,
"eval_dim_64_cosine_recall@5": 0.25280626383474614,
"eval_dim_768_cosine_accuracy@1": 0.5110565110565111,
"eval_dim_768_cosine_accuracy@10": 0.6339066339066339,
"eval_dim_768_cosine_accuracy@3": 0.5626535626535627,
"eval_dim_768_cosine_accuracy@5": 0.5921375921375921,
"eval_dim_768_cosine_map@100": 0.6212691129210207,
"eval_dim_768_cosine_mrr@10": 0.540914940914941,
"eval_dim_768_cosine_ndcg@10": 0.5690704149975347,
"eval_dim_768_cosine_precision@1": 0.5110565110565111,
"eval_dim_768_cosine_precision@10": 0.4157248157248157,
"eval_dim_768_cosine_precision@3": 0.5004095004095004,
"eval_dim_768_cosine_precision@5": 0.4756756756756756,
"eval_dim_768_cosine_recall@1": 0.09300949340067374,
"eval_dim_768_cosine_recall@10": 0.43665873254095156,
"eval_dim_768_cosine_recall@3": 0.24081789199143297,
"eval_dim_768_cosine_recall@5": 0.32824283621413386,
"eval_runtime": 112.0636,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.4611545919906432,
"eval_steps_per_second": 0.0,
"step": 816
}
],
"logging_steps": 1,
"max_steps": 1020,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}