{ "best_global_step": 8000, "best_metric": 0.8765376300834912, "best_model_checkpoint": "E:/ModernBERT_finetune_data/models/reranker-ModernBERT-base-s2orc\\checkpoint-8000", "epoch": 0.23087356786239935, "eval_steps": 4000, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.885919598279992e-05, "grad_norm": 7.188746452331543, "learning_rate": 0.0, "loss": 1.0682, "step": 1 }, { "epoch": 0.01442959799139996, "grad_norm": 13.780556678771973, "learning_rate": 2.8793998845931915e-06, "loss": 1.1555, "step": 500 }, { "epoch": 0.02885919598279992, "grad_norm": 52.33120346069336, "learning_rate": 5.76457010963647e-06, "loss": 0.7743, "step": 1000 }, { "epoch": 0.04328879397419988, "grad_norm": 2.325308322906494, "learning_rate": 8.649740334679747e-06, "loss": 0.538, "step": 1500 }, { "epoch": 0.05771839196559984, "grad_norm": 7.630313396453857, "learning_rate": 1.1534910559723025e-05, "loss": 0.5771, "step": 2000 }, { "epoch": 0.0721479899569998, "grad_norm": 0.15915359556674957, "learning_rate": 1.4420080784766301e-05, "loss": 0.5345, "step": 2500 }, { "epoch": 0.08657758794839976, "grad_norm": 45.458744049072266, "learning_rate": 1.730525100980958e-05, "loss": 0.4394, "step": 3000 }, { "epoch": 0.10100718593979972, "grad_norm": 1.0640169382095337, "learning_rate": 1.9978835978835978e-05, "loss": 0.4607, "step": 3500 }, { "epoch": 0.11543678393119967, "grad_norm": 0.10893326252698898, "learning_rate": 1.9658168991502328e-05, "loss": 0.3866, "step": 4000 }, { "epoch": 0.11543678393119967, "eval_NanoBEIR_R100_mean_base_map": 0.4895766320756843, "eval_NanoBEIR_R100_mean_base_mrr@10": 0.4775, "eval_NanoBEIR_R100_mean_base_ndcg@10": 0.5404259879670522, "eval_NanoBEIR_R100_mean_map": 0.49520150363958415, "eval_NanoBEIR_R100_mean_mrr@10": 0.48177777777777775, "eval_NanoBEIR_R100_mean_ndcg@10": 0.5468622576263062, "eval_NanoMSMARCO_R100_base_map": 0.4895766320756843, "eval_NanoMSMARCO_R100_base_mrr@10": 0.4775, "eval_NanoMSMARCO_R100_base_ndcg@10": 0.5404259879670522, "eval_NanoMSMARCO_R100_map": 0.49520150363958415, "eval_NanoMSMARCO_R100_mrr@10": 0.48177777777777775, "eval_NanoMSMARCO_R100_ndcg@10": 0.5468622576263062, "eval_runtime": 6161.5831, "eval_s2orc-dev_base_map": 0.7378771185598135, "eval_s2orc-dev_base_mrr@10": 0.7359269841269841, "eval_s2orc-dev_base_ndcg@10": 0.7659559959785154, "eval_s2orc-dev_map": 0.8617319597069597, "eval_s2orc-dev_mrr@10": 0.8613789682539682, "eval_s2orc-dev_ndcg@10": 0.8684642495142619, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5468622576263062, "eval_steps_per_second": 0.0, "step": 4000 }, { "epoch": 0.12986638192259964, "grad_norm": 3.2822253704071045, "learning_rate": 1.9337502004168674e-05, "loss": 0.4222, "step": 4500 }, { "epoch": 0.1442959799139996, "grad_norm": 0.163385272026062, "learning_rate": 1.9016835016835017e-05, "loss": 0.3734, "step": 5000 }, { "epoch": 0.15872557790539957, "grad_norm": 1.1301782131195068, "learning_rate": 1.8696168029501366e-05, "loss": 0.3558, "step": 5500 }, { "epoch": 0.17315517589679952, "grad_norm": 26.550891876220703, "learning_rate": 1.837550104216771e-05, "loss": 0.3968, "step": 6000 }, { "epoch": 0.18758477388819947, "grad_norm": 52.34437942504883, "learning_rate": 1.8054834054834055e-05, "loss": 0.3203, "step": 6500 }, { "epoch": 0.20201437187959945, "grad_norm": 0.1360524594783783, "learning_rate": 1.77341670675004e-05, "loss": 0.3354, "step": 7000 }, { "epoch": 0.2164439698709994, "grad_norm": 0.3569001853466034, "learning_rate": 1.7413500080166747e-05, "loss": 0.3579, "step": 7500 }, { "epoch": 0.23087356786239935, "grad_norm": 0.017899315804243088, "learning_rate": 1.7092833092833094e-05, "loss": 0.3349, "step": 8000 }, { "epoch": 0.23087356786239935, "eval_NanoBEIR_R100_mean_base_map": 0.4895766320756843, "eval_NanoBEIR_R100_mean_base_mrr@10": 0.4775, "eval_NanoBEIR_R100_mean_base_ndcg@10": 0.5404259879670522, "eval_NanoBEIR_R100_mean_map": 0.49410785900248205, "eval_NanoBEIR_R100_mean_mrr@10": 0.482047619047619, "eval_NanoBEIR_R100_mean_ndcg@10": 0.5528612484142966, "eval_NanoMSMARCO_R100_base_map": 0.4895766320756843, "eval_NanoMSMARCO_R100_base_mrr@10": 0.4775, "eval_NanoMSMARCO_R100_base_ndcg@10": 0.5404259879670522, "eval_NanoMSMARCO_R100_map": 0.49410785900248205, "eval_NanoMSMARCO_R100_mrr@10": 0.482047619047619, "eval_NanoMSMARCO_R100_ndcg@10": 0.5528612484142966, "eval_runtime": 4663.3621, "eval_s2orc-dev_base_map": 0.7378771185598135, "eval_s2orc-dev_base_mrr@10": 0.7359269841269841, "eval_s2orc-dev_base_ndcg@10": 0.7659559959785154, "eval_s2orc-dev_map": 0.8712208333333333, "eval_s2orc-dev_mrr@10": 0.871075, "eval_s2orc-dev_ndcg@10": 0.8765376300834912, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5528612484142966, "eval_steps_per_second": 0.0, "step": 8000 } ], "logging_steps": 500, "max_steps": 34651, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }