IoannisKat1's picture
Add finetuned model
46d92b5 verified
{
"best_global_step": 14,
"best_metric": 0.5685354415901852,
"best_model_checkpoint": "multilingual-e5-large/checkpoint-14",
"epoch": 2.090909090909091,
"eval_steps": 500,
"global_step": 14,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.18181818181818182,
"grad_norm": 135.6220703125,
"learning_rate": 0.0,
"loss": 18.029,
"step": 1
},
{
"epoch": 0.36363636363636365,
"grad_norm": 140.5317840576172,
"learning_rate": 4.000000000000001e-06,
"loss": 19.4106,
"step": 2
},
{
"epoch": 0.5454545454545454,
"grad_norm": 142.2399444580078,
"learning_rate": 8.000000000000001e-06,
"loss": 16.6201,
"step": 3
},
{
"epoch": 0.7272727272727273,
"grad_norm": 110.63945770263672,
"learning_rate": 1.2e-05,
"loss": 15.3048,
"step": 4
},
{
"epoch": 0.9090909090909091,
"grad_norm": 129.43089294433594,
"learning_rate": 1.6000000000000003e-05,
"loss": 14.0182,
"step": 5
},
{
"epoch": 1.0,
"grad_norm": 108.25520324707031,
"learning_rate": 2e-05,
"loss": 6.4771,
"step": 6
},
{
"epoch": 1.0909090909090908,
"grad_norm": 95.05782318115234,
"learning_rate": 1.9975640502598243e-05,
"loss": 6.7664,
"step": 7
},
{
"epoch": 1.0909090909090908,
"eval_dim_1024_cosine_accuracy@1": 0.5714285714285714,
"eval_dim_1024_cosine_accuracy@10": 0.6666666666666666,
"eval_dim_1024_cosine_accuracy@3": 0.5714285714285714,
"eval_dim_1024_cosine_accuracy@5": 0.6190476190476191,
"eval_dim_1024_cosine_map@100": 0.6946498352795571,
"eval_dim_1024_cosine_mrr@10": 0.5888888888888888,
"eval_dim_1024_cosine_ndcg@10": 0.6167053425672016,
"eval_dim_1024_cosine_precision@1": 0.5714285714285714,
"eval_dim_1024_cosine_precision@10": 0.4476190476190477,
"eval_dim_1024_cosine_precision@3": 0.5555555555555556,
"eval_dim_1024_cosine_precision@5": 0.5142857142857142,
"eval_dim_1024_cosine_recall@1": 0.10123626373626372,
"eval_dim_1024_cosine_recall@10": 0.5401404151404151,
"eval_dim_1024_cosine_recall@3": 0.2798992673992674,
"eval_dim_1024_cosine_recall@5": 0.3871336996336997,
"eval_dim_128_cosine_accuracy@1": 0.47619047619047616,
"eval_dim_128_cosine_accuracy@10": 0.5714285714285714,
"eval_dim_128_cosine_accuracy@3": 0.47619047619047616,
"eval_dim_128_cosine_accuracy@5": 0.5238095238095238,
"eval_dim_128_cosine_map@100": 0.5954946117864774,
"eval_dim_128_cosine_mrr@10": 0.49251700680272104,
"eval_dim_128_cosine_ndcg@10": 0.5277801377642622,
"eval_dim_128_cosine_precision@1": 0.47619047619047616,
"eval_dim_128_cosine_precision@10": 0.3761904761904762,
"eval_dim_128_cosine_precision@3": 0.4603174603174603,
"eval_dim_128_cosine_precision@5": 0.42857142857142855,
"eval_dim_128_cosine_recall@1": 0.08536324786324785,
"eval_dim_128_cosine_recall@10": 0.4806166056166057,
"eval_dim_128_cosine_recall@3": 0.23228021978021982,
"eval_dim_128_cosine_recall@5": 0.31967338217338215,
"eval_dim_256_cosine_accuracy@1": 0.47619047619047616,
"eval_dim_256_cosine_accuracy@10": 0.5714285714285714,
"eval_dim_256_cosine_accuracy@3": 0.47619047619047616,
"eval_dim_256_cosine_accuracy@5": 0.5238095238095238,
"eval_dim_256_cosine_map@100": 0.6218563539505393,
"eval_dim_256_cosine_mrr@10": 0.4925170068027211,
"eval_dim_256_cosine_ndcg@10": 0.5177339964253599,
"eval_dim_256_cosine_precision@1": 0.47619047619047616,
"eval_dim_256_cosine_precision@10": 0.3476190476190476,
"eval_dim_256_cosine_precision@3": 0.4603174603174603,
"eval_dim_256_cosine_precision@5": 0.419047619047619,
"eval_dim_256_cosine_recall@1": 0.09543650793650793,
"eval_dim_256_cosine_recall@10": 0.47817460317460325,
"eval_dim_256_cosine_recall@3": 0.2625,
"eval_dim_256_cosine_recall@5": 0.35813492063492064,
"eval_dim_512_cosine_accuracy@1": 0.5238095238095238,
"eval_dim_512_cosine_accuracy@10": 0.5714285714285714,
"eval_dim_512_cosine_accuracy@3": 0.5238095238095238,
"eval_dim_512_cosine_accuracy@5": 0.5714285714285714,
"eval_dim_512_cosine_map@100": 0.6608463504287858,
"eval_dim_512_cosine_mrr@10": 0.5333333333333333,
"eval_dim_512_cosine_ndcg@10": 0.5523687509163372,
"eval_dim_512_cosine_precision@1": 0.5238095238095238,
"eval_dim_512_cosine_precision@10": 0.3761904761904762,
"eval_dim_512_cosine_precision@3": 0.5079365079365079,
"eval_dim_512_cosine_precision@5": 0.4666666666666666,
"eval_dim_512_cosine_recall@1": 0.0994047619047619,
"eval_dim_512_cosine_recall@10": 0.5019841269841271,
"eval_dim_512_cosine_recall@3": 0.2744047619047619,
"eval_dim_512_cosine_recall@5": 0.37797619047619047,
"eval_dim_64_cosine_accuracy@1": 0.3333333333333333,
"eval_dim_64_cosine_accuracy@10": 0.5714285714285714,
"eval_dim_64_cosine_accuracy@3": 0.38095238095238093,
"eval_dim_64_cosine_accuracy@5": 0.38095238095238093,
"eval_dim_64_cosine_map@100": 0.5579595177809107,
"eval_dim_64_cosine_mrr@10": 0.3764172335600907,
"eval_dim_64_cosine_ndcg@10": 0.41244392103396355,
"eval_dim_64_cosine_precision@1": 0.3333333333333333,
"eval_dim_64_cosine_precision@10": 0.2619047619047619,
"eval_dim_64_cosine_precision@3": 0.3492063492063492,
"eval_dim_64_cosine_precision@5": 0.3142857142857143,
"eval_dim_64_cosine_recall@1": 0.07063492063492063,
"eval_dim_64_cosine_recall@10": 0.42002442002442003,
"eval_dim_64_cosine_recall@3": 0.2357142857142857,
"eval_dim_64_cosine_recall@5": 0.34523809523809523,
"eval_dim_768_cosine_accuracy@1": 0.5238095238095238,
"eval_dim_768_cosine_accuracy@10": 0.6666666666666666,
"eval_dim_768_cosine_accuracy@3": 0.5238095238095238,
"eval_dim_768_cosine_accuracy@5": 0.5714285714285714,
"eval_dim_768_cosine_map@100": 0.6727109022414055,
"eval_dim_768_cosine_mrr@10": 0.5480725623582765,
"eval_dim_768_cosine_ndcg@10": 0.5820705880762242,
"eval_dim_768_cosine_precision@1": 0.5238095238095238,
"eval_dim_768_cosine_precision@10": 0.41904761904761906,
"eval_dim_768_cosine_precision@3": 0.5079365079365079,
"eval_dim_768_cosine_precision@5": 0.4666666666666666,
"eval_dim_768_cosine_recall@1": 0.09726800976800977,
"eval_dim_768_cosine_recall@10": 0.5163308913308913,
"eval_dim_768_cosine_recall@3": 0.2679945054945055,
"eval_dim_768_cosine_recall@5": 0.36729242979242976,
"eval_runtime": 7.0357,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.41244392103396355,
"eval_steps_per_second": 0.0,
"step": 7
},
{
"epoch": 1.1818181818181819,
"grad_norm": 159.73828125,
"learning_rate": 1.9902680687415704e-05,
"loss": 11.8583,
"step": 8
},
{
"epoch": 1.3636363636363638,
"grad_norm": 134.70603942871094,
"learning_rate": 1.9781476007338058e-05,
"loss": 11.9216,
"step": 9
},
{
"epoch": 1.5454545454545454,
"grad_norm": 139.73654174804688,
"learning_rate": 1.961261695938319e-05,
"loss": 13.3764,
"step": 10
},
{
"epoch": 1.7272727272727273,
"grad_norm": 173.23065185546875,
"learning_rate": 1.9396926207859085e-05,
"loss": 12.9063,
"step": 11
},
{
"epoch": 1.9090909090909092,
"grad_norm": 208.78482055664062,
"learning_rate": 1.913545457642601e-05,
"loss": 13.5984,
"step": 12
},
{
"epoch": 2.0,
"grad_norm": 298.82183837890625,
"learning_rate": 1.8829475928589272e-05,
"loss": 7.8523,
"step": 13
},
{
"epoch": 2.090909090909091,
"grad_norm": 156.5718994140625,
"learning_rate": 1.848048096156426e-05,
"loss": 4.4487,
"step": 14
},
{
"epoch": 2.090909090909091,
"eval_dim_1024_cosine_accuracy@1": 0.5238095238095238,
"eval_dim_1024_cosine_accuracy@10": 0.6666666666666666,
"eval_dim_1024_cosine_accuracy@3": 0.5238095238095238,
"eval_dim_1024_cosine_accuracy@5": 0.5714285714285714,
"eval_dim_1024_cosine_map@100": 0.67423207909377,
"eval_dim_1024_cosine_mrr@10": 0.5480725623582765,
"eval_dim_1024_cosine_ndcg@10": 0.5921167294151266,
"eval_dim_1024_cosine_precision@1": 0.5238095238095238,
"eval_dim_1024_cosine_precision@10": 0.4476190476190477,
"eval_dim_1024_cosine_precision@3": 0.5079365079365079,
"eval_dim_1024_cosine_precision@5": 0.47619047619047616,
"eval_dim_1024_cosine_recall@1": 0.08933150183150182,
"eval_dim_1024_cosine_recall@10": 0.5401404151404151,
"eval_dim_1024_cosine_recall@3": 0.24418498168498168,
"eval_dim_1024_cosine_recall@5": 0.33951465201465203,
"eval_dim_128_cosine_accuracy@1": 0.5238095238095238,
"eval_dim_128_cosine_accuracy@10": 0.6190476190476191,
"eval_dim_128_cosine_accuracy@3": 0.5238095238095238,
"eval_dim_128_cosine_accuracy@5": 0.5714285714285714,
"eval_dim_128_cosine_map@100": 0.6489604480560528,
"eval_dim_128_cosine_mrr@10": 0.5401360544217686,
"eval_dim_128_cosine_ndcg@10": 0.5685354415901852,
"eval_dim_128_cosine_precision@1": 0.5238095238095238,
"eval_dim_128_cosine_precision@10": 0.4238095238095239,
"eval_dim_128_cosine_precision@3": 0.5238095238095238,
"eval_dim_128_cosine_precision@5": 0.5047619047619047,
"eval_dim_128_cosine_recall@1": 0.07345848595848595,
"eval_dim_128_cosine_recall@10": 0.5202991452991453,
"eval_dim_128_cosine_recall@3": 0.2203754578754579,
"eval_dim_128_cosine_recall@5": 0.34745115995116,
"eval_dim_256_cosine_accuracy@1": 0.5238095238095238,
"eval_dim_256_cosine_accuracy@10": 0.6190476190476191,
"eval_dim_256_cosine_accuracy@3": 0.5238095238095238,
"eval_dim_256_cosine_accuracy@5": 0.5714285714285714,
"eval_dim_256_cosine_map@100": 0.651530364911684,
"eval_dim_256_cosine_mrr@10": 0.5401360544217686,
"eval_dim_256_cosine_ndcg@10": 0.5708936958722651,
"eval_dim_256_cosine_precision@1": 0.5238095238095238,
"eval_dim_256_cosine_precision@10": 0.4238095238095239,
"eval_dim_256_cosine_precision@3": 0.5079365079365079,
"eval_dim_256_cosine_precision@5": 0.49523809523809514,
"eval_dim_256_cosine_recall@1": 0.0813949938949939,
"eval_dim_256_cosine_recall@10": 0.5202991452991453,
"eval_dim_256_cosine_recall@3": 0.22037545787545787,
"eval_dim_256_cosine_recall@5": 0.33951465201465203,
"eval_dim_512_cosine_accuracy@1": 0.47619047619047616,
"eval_dim_512_cosine_accuracy@10": 0.6190476190476191,
"eval_dim_512_cosine_accuracy@3": 0.47619047619047616,
"eval_dim_512_cosine_accuracy@5": 0.5714285714285714,
"eval_dim_512_cosine_map@100": 0.6265911712939339,
"eval_dim_512_cosine_mrr@10": 0.5020408163265305,
"eval_dim_512_cosine_ndcg@10": 0.5518338753600308,
"eval_dim_512_cosine_precision@1": 0.47619047619047616,
"eval_dim_512_cosine_precision@10": 0.4238095238095239,
"eval_dim_512_cosine_precision@3": 0.4603174603174603,
"eval_dim_512_cosine_precision@5": 0.45714285714285713,
"eval_dim_512_cosine_recall@1": 0.07345848595848595,
"eval_dim_512_cosine_recall@10": 0.5202991452991453,
"eval_dim_512_cosine_recall@3": 0.19656593406593406,
"eval_dim_512_cosine_recall@5": 0.3077686202686203,
"eval_dim_64_cosine_accuracy@1": 0.42857142857142855,
"eval_dim_64_cosine_accuracy@10": 0.6190476190476191,
"eval_dim_64_cosine_accuracy@3": 0.42857142857142855,
"eval_dim_64_cosine_accuracy@5": 0.47619047619047616,
"eval_dim_64_cosine_map@100": 0.5888462989137369,
"eval_dim_64_cosine_mrr@10": 0.45963718820861665,
"eval_dim_64_cosine_ndcg@10": 0.51131642091388,
"eval_dim_64_cosine_precision@1": 0.42857142857142855,
"eval_dim_64_cosine_precision@10": 0.3999999999999999,
"eval_dim_64_cosine_precision@3": 0.42857142857142855,
"eval_dim_64_cosine_precision@5": 0.42857142857142855,
"eval_dim_64_cosine_recall@1": 0.053617216117216114,
"eval_dim_64_cosine_recall@10": 0.5004578754578755,
"eval_dim_64_cosine_recall@3": 0.16085164835164836,
"eval_dim_64_cosine_recall@5": 0.27205433455433453,
"eval_dim_768_cosine_accuracy@1": 0.5238095238095238,
"eval_dim_768_cosine_accuracy@10": 0.6666666666666666,
"eval_dim_768_cosine_accuracy@3": 0.5238095238095238,
"eval_dim_768_cosine_accuracy@5": 0.5714285714285714,
"eval_dim_768_cosine_map@100": 0.67423207909377,
"eval_dim_768_cosine_mrr@10": 0.5480725623582765,
"eval_dim_768_cosine_ndcg@10": 0.5921167294151266,
"eval_dim_768_cosine_precision@1": 0.5238095238095238,
"eval_dim_768_cosine_precision@10": 0.4476190476190477,
"eval_dim_768_cosine_precision@3": 0.5079365079365079,
"eval_dim_768_cosine_precision@5": 0.47619047619047616,
"eval_dim_768_cosine_recall@1": 0.08933150183150182,
"eval_dim_768_cosine_recall@10": 0.5401404151404151,
"eval_dim_768_cosine_recall@3": 0.24418498168498168,
"eval_dim_768_cosine_recall@5": 0.33951465201465203,
"eval_runtime": 6.9723,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.51131642091388,
"eval_steps_per_second": 0.0,
"step": 14
}
],
"logging_steps": 1,
"max_steps": 50,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}