query_rewriting / trainer_state.json
whoisltd's picture
Upload 12 files
d798d08
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9180411063181935,
"global_step": 23000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.99788547745919e-05,
"loss": 4.119,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 4.9957709549183795e-05,
"loss": 2.8616,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 4.9936564323775695e-05,
"loss": 2.2364,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 4.991541909836759e-05,
"loss": 2.5701,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 4.989427387295949e-05,
"loss": 2.2571,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 4.987312864755139e-05,
"loss": 1.9427,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 4.985198342214328e-05,
"loss": 2.2772,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 4.983083819673518e-05,
"loss": 1.9227,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 4.9809692971327073e-05,
"loss": 1.9526,
"step": 90
},
{
"epoch": 0.01,
"learning_rate": 4.978854774591897e-05,
"loss": 1.9265,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 4.9767402520510866e-05,
"loss": 1.6983,
"step": 110
},
{
"epoch": 0.02,
"learning_rate": 4.9746257295102766e-05,
"loss": 1.9613,
"step": 120
},
{
"epoch": 0.02,
"learning_rate": 4.9725112069694666e-05,
"loss": 1.8949,
"step": 130
},
{
"epoch": 0.02,
"learning_rate": 4.9703966844286566e-05,
"loss": 1.9434,
"step": 140
},
{
"epoch": 0.02,
"learning_rate": 4.968282161887846e-05,
"loss": 1.9571,
"step": 150
},
{
"epoch": 0.02,
"learning_rate": 4.966167639347036e-05,
"loss": 1.8398,
"step": 160
},
{
"epoch": 0.02,
"learning_rate": 4.964053116806225e-05,
"loss": 1.7601,
"step": 170
},
{
"epoch": 0.02,
"learning_rate": 4.961938594265415e-05,
"loss": 1.9149,
"step": 180
},
{
"epoch": 0.02,
"learning_rate": 4.9598240717246044e-05,
"loss": 1.9493,
"step": 190
},
{
"epoch": 0.03,
"learning_rate": 4.9577095491837944e-05,
"loss": 1.6848,
"step": 200
},
{
"epoch": 0.03,
"learning_rate": 4.955595026642984e-05,
"loss": 2.1215,
"step": 210
},
{
"epoch": 0.03,
"learning_rate": 4.9534805041021744e-05,
"loss": 1.6608,
"step": 220
},
{
"epoch": 0.03,
"learning_rate": 4.951365981561364e-05,
"loss": 2.0586,
"step": 230
},
{
"epoch": 0.03,
"learning_rate": 4.9492514590205537e-05,
"loss": 1.6336,
"step": 240
},
{
"epoch": 0.03,
"learning_rate": 4.947136936479743e-05,
"loss": 1.8664,
"step": 250
},
{
"epoch": 0.03,
"learning_rate": 4.945022413938933e-05,
"loss": 1.6932,
"step": 260
},
{
"epoch": 0.03,
"learning_rate": 4.942907891398122e-05,
"loss": 1.6981,
"step": 270
},
{
"epoch": 0.04,
"learning_rate": 4.940793368857312e-05,
"loss": 1.8146,
"step": 280
},
{
"epoch": 0.04,
"learning_rate": 4.9386788463165015e-05,
"loss": 1.9239,
"step": 290
},
{
"epoch": 0.04,
"learning_rate": 4.9365643237756915e-05,
"loss": 1.2752,
"step": 300
},
{
"epoch": 0.04,
"learning_rate": 4.9344498012348815e-05,
"loss": 1.5619,
"step": 310
},
{
"epoch": 0.04,
"learning_rate": 4.9323352786940715e-05,
"loss": 1.548,
"step": 320
},
{
"epoch": 0.04,
"learning_rate": 4.930220756153261e-05,
"loss": 1.3221,
"step": 330
},
{
"epoch": 0.04,
"learning_rate": 4.928106233612451e-05,
"loss": 1.7203,
"step": 340
},
{
"epoch": 0.04,
"learning_rate": 4.92599171107164e-05,
"loss": 1.6695,
"step": 350
},
{
"epoch": 0.05,
"learning_rate": 4.92387718853083e-05,
"loss": 1.6246,
"step": 360
},
{
"epoch": 0.05,
"learning_rate": 4.921762665990019e-05,
"loss": 1.6295,
"step": 370
},
{
"epoch": 0.05,
"learning_rate": 4.919648143449209e-05,
"loss": 1.7926,
"step": 380
},
{
"epoch": 0.05,
"learning_rate": 4.9175336209083986e-05,
"loss": 1.7954,
"step": 390
},
{
"epoch": 0.05,
"learning_rate": 4.915419098367589e-05,
"loss": 1.6349,
"step": 400
},
{
"epoch": 0.05,
"learning_rate": 4.9133045758267786e-05,
"loss": 1.6618,
"step": 410
},
{
"epoch": 0.05,
"learning_rate": 4.9111900532859686e-05,
"loss": 2.0134,
"step": 420
},
{
"epoch": 0.05,
"learning_rate": 4.909075530745158e-05,
"loss": 1.7417,
"step": 430
},
{
"epoch": 0.06,
"learning_rate": 4.906961008204348e-05,
"loss": 1.4424,
"step": 440
},
{
"epoch": 0.06,
"learning_rate": 4.904846485663537e-05,
"loss": 1.8978,
"step": 450
},
{
"epoch": 0.06,
"learning_rate": 4.902731963122727e-05,
"loss": 1.604,
"step": 460
},
{
"epoch": 0.06,
"learning_rate": 4.9006174405819164e-05,
"loss": 1.4609,
"step": 470
},
{
"epoch": 0.06,
"learning_rate": 4.8985029180411064e-05,
"loss": 1.6671,
"step": 480
},
{
"epoch": 0.06,
"learning_rate": 4.8963883955002964e-05,
"loss": 1.7155,
"step": 490
},
{
"epoch": 0.06,
"learning_rate": 4.8942738729594864e-05,
"loss": 1.7697,
"step": 500
},
{
"epoch": 0.06,
"learning_rate": 4.892159350418676e-05,
"loss": 1.7196,
"step": 510
},
{
"epoch": 0.07,
"learning_rate": 4.8900448278778656e-05,
"loss": 1.5483,
"step": 520
},
{
"epoch": 0.07,
"learning_rate": 4.887930305337055e-05,
"loss": 1.6332,
"step": 530
},
{
"epoch": 0.07,
"learning_rate": 4.885815782796245e-05,
"loss": 1.446,
"step": 540
},
{
"epoch": 0.07,
"learning_rate": 4.883701260255434e-05,
"loss": 1.538,
"step": 550
},
{
"epoch": 0.07,
"learning_rate": 4.881586737714624e-05,
"loss": 1.5494,
"step": 560
},
{
"epoch": 0.07,
"learning_rate": 4.879472215173814e-05,
"loss": 1.4593,
"step": 570
},
{
"epoch": 0.07,
"learning_rate": 4.877357692633004e-05,
"loss": 1.7192,
"step": 580
},
{
"epoch": 0.07,
"learning_rate": 4.8752431700921935e-05,
"loss": 1.6066,
"step": 590
},
{
"epoch": 0.08,
"learning_rate": 4.8731286475513835e-05,
"loss": 1.5205,
"step": 600
},
{
"epoch": 0.08,
"learning_rate": 4.871014125010573e-05,
"loss": 1.2998,
"step": 610
},
{
"epoch": 0.08,
"learning_rate": 4.868899602469763e-05,
"loss": 1.9734,
"step": 620
},
{
"epoch": 0.08,
"learning_rate": 4.866785079928952e-05,
"loss": 1.6018,
"step": 630
},
{
"epoch": 0.08,
"learning_rate": 4.864670557388142e-05,
"loss": 1.4389,
"step": 640
},
{
"epoch": 0.08,
"learning_rate": 4.862556034847331e-05,
"loss": 1.8656,
"step": 650
},
{
"epoch": 0.08,
"learning_rate": 4.860441512306522e-05,
"loss": 1.3714,
"step": 660
},
{
"epoch": 0.09,
"learning_rate": 4.858326989765711e-05,
"loss": 1.5436,
"step": 670
},
{
"epoch": 0.09,
"learning_rate": 4.856212467224901e-05,
"loss": 1.3216,
"step": 680
},
{
"epoch": 0.09,
"learning_rate": 4.8540979446840906e-05,
"loss": 1.5497,
"step": 690
},
{
"epoch": 0.09,
"learning_rate": 4.8519834221432805e-05,
"loss": 1.5157,
"step": 700
},
{
"epoch": 0.09,
"learning_rate": 4.84986889960247e-05,
"loss": 1.5119,
"step": 710
},
{
"epoch": 0.09,
"learning_rate": 4.84775437706166e-05,
"loss": 1.5533,
"step": 720
},
{
"epoch": 0.09,
"learning_rate": 4.845639854520849e-05,
"loss": 1.37,
"step": 730
},
{
"epoch": 0.09,
"learning_rate": 4.843525331980039e-05,
"loss": 1.4398,
"step": 740
},
{
"epoch": 0.1,
"learning_rate": 4.841410809439229e-05,
"loss": 1.6296,
"step": 750
},
{
"epoch": 0.1,
"learning_rate": 4.839296286898419e-05,
"loss": 1.425,
"step": 760
},
{
"epoch": 0.1,
"learning_rate": 4.8371817643576084e-05,
"loss": 2.0899,
"step": 770
},
{
"epoch": 0.1,
"learning_rate": 4.8350672418167984e-05,
"loss": 1.8247,
"step": 780
},
{
"epoch": 0.1,
"learning_rate": 4.8329527192759877e-05,
"loss": 1.5741,
"step": 790
},
{
"epoch": 0.1,
"learning_rate": 4.8308381967351776e-05,
"loss": 1.3762,
"step": 800
},
{
"epoch": 0.1,
"learning_rate": 4.828723674194367e-05,
"loss": 1.5321,
"step": 810
},
{
"epoch": 0.1,
"learning_rate": 4.826609151653557e-05,
"loss": 1.2903,
"step": 820
},
{
"epoch": 0.11,
"learning_rate": 4.824494629112746e-05,
"loss": 1.6131,
"step": 830
},
{
"epoch": 0.11,
"learning_rate": 4.822380106571936e-05,
"loss": 1.1472,
"step": 840
},
{
"epoch": 0.11,
"learning_rate": 4.820265584031126e-05,
"loss": 1.0589,
"step": 850
},
{
"epoch": 0.11,
"learning_rate": 4.8181510614903155e-05,
"loss": 1.2361,
"step": 860
},
{
"epoch": 0.11,
"learning_rate": 4.8160365389495055e-05,
"loss": 1.731,
"step": 870
},
{
"epoch": 0.11,
"learning_rate": 4.8139220164086954e-05,
"loss": 1.5959,
"step": 880
},
{
"epoch": 0.11,
"learning_rate": 4.811807493867885e-05,
"loss": 1.6363,
"step": 890
},
{
"epoch": 0.11,
"learning_rate": 4.809692971327075e-05,
"loss": 1.4594,
"step": 900
},
{
"epoch": 0.12,
"learning_rate": 4.807578448786264e-05,
"loss": 1.6428,
"step": 910
},
{
"epoch": 0.12,
"learning_rate": 4.805463926245454e-05,
"loss": 1.6087,
"step": 920
},
{
"epoch": 0.12,
"learning_rate": 4.803349403704644e-05,
"loss": 1.5491,
"step": 930
},
{
"epoch": 0.12,
"learning_rate": 4.801234881163833e-05,
"loss": 1.5106,
"step": 940
},
{
"epoch": 0.12,
"learning_rate": 4.799120358623023e-05,
"loss": 1.4327,
"step": 950
},
{
"epoch": 0.12,
"learning_rate": 4.7970058360822126e-05,
"loss": 1.5017,
"step": 960
},
{
"epoch": 0.12,
"learning_rate": 4.7948913135414025e-05,
"loss": 1.6125,
"step": 970
},
{
"epoch": 0.12,
"learning_rate": 4.792776791000592e-05,
"loss": 1.3695,
"step": 980
},
{
"epoch": 0.13,
"learning_rate": 4.790662268459782e-05,
"loss": 1.4244,
"step": 990
},
{
"epoch": 0.13,
"learning_rate": 4.788547745918972e-05,
"loss": 1.5736,
"step": 1000
},
{
"epoch": 0.13,
"eval_loss": 1.3872387409210205,
"eval_rouge1": 0.7007008454611825,
"eval_rouge2": 0.5580063310997472,
"eval_rougeL": 0.64700008575482,
"eval_rougeLsum": 0.6466631466693737,
"eval_runtime": 363.0129,
"eval_samples_per_second": 9.449,
"eval_steps_per_second": 2.364,
"step": 1000
},
{
"epoch": 0.13,
"learning_rate": 4.786433223378162e-05,
"loss": 1.4998,
"step": 1010
},
{
"epoch": 0.13,
"learning_rate": 4.784318700837351e-05,
"loss": 1.5913,
"step": 1020
},
{
"epoch": 0.13,
"learning_rate": 4.782204178296541e-05,
"loss": 1.2321,
"step": 1030
},
{
"epoch": 0.13,
"learning_rate": 4.7800896557557304e-05,
"loss": 1.3155,
"step": 1040
},
{
"epoch": 0.13,
"learning_rate": 4.7779751332149204e-05,
"loss": 1.2577,
"step": 1050
},
{
"epoch": 0.13,
"learning_rate": 4.7758606106741097e-05,
"loss": 1.2684,
"step": 1060
},
{
"epoch": 0.14,
"learning_rate": 4.7737460881332996e-05,
"loss": 1.1728,
"step": 1070
},
{
"epoch": 0.14,
"learning_rate": 4.771631565592489e-05,
"loss": 1.1564,
"step": 1080
},
{
"epoch": 0.14,
"learning_rate": 4.769517043051679e-05,
"loss": 1.3332,
"step": 1090
},
{
"epoch": 0.14,
"learning_rate": 4.767402520510869e-05,
"loss": 1.7237,
"step": 1100
},
{
"epoch": 0.14,
"learning_rate": 4.765287997970059e-05,
"loss": 1.4625,
"step": 1110
},
{
"epoch": 0.14,
"learning_rate": 4.763173475429248e-05,
"loss": 1.3741,
"step": 1120
},
{
"epoch": 0.14,
"learning_rate": 4.761058952888438e-05,
"loss": 1.4992,
"step": 1130
},
{
"epoch": 0.14,
"learning_rate": 4.7589444303476275e-05,
"loss": 1.4342,
"step": 1140
},
{
"epoch": 0.15,
"learning_rate": 4.7568299078068174e-05,
"loss": 1.2679,
"step": 1150
},
{
"epoch": 0.15,
"learning_rate": 4.754715385266007e-05,
"loss": 1.5773,
"step": 1160
},
{
"epoch": 0.15,
"learning_rate": 4.752600862725197e-05,
"loss": 1.7924,
"step": 1170
},
{
"epoch": 0.15,
"learning_rate": 4.750486340184386e-05,
"loss": 1.3374,
"step": 1180
},
{
"epoch": 0.15,
"learning_rate": 4.748371817643577e-05,
"loss": 1.311,
"step": 1190
},
{
"epoch": 0.15,
"learning_rate": 4.746257295102766e-05,
"loss": 1.3868,
"step": 1200
},
{
"epoch": 0.15,
"learning_rate": 4.744142772561956e-05,
"loss": 1.2327,
"step": 1210
},
{
"epoch": 0.15,
"learning_rate": 4.742028250021145e-05,
"loss": 1.5561,
"step": 1220
},
{
"epoch": 0.16,
"learning_rate": 4.739913727480335e-05,
"loss": 1.4778,
"step": 1230
},
{
"epoch": 0.16,
"learning_rate": 4.7377992049395246e-05,
"loss": 1.6092,
"step": 1240
},
{
"epoch": 0.16,
"learning_rate": 4.7356846823987145e-05,
"loss": 1.2994,
"step": 1250
},
{
"epoch": 0.16,
"learning_rate": 4.733570159857904e-05,
"loss": 1.3535,
"step": 1260
},
{
"epoch": 0.16,
"learning_rate": 4.7314556373170945e-05,
"loss": 1.3241,
"step": 1270
},
{
"epoch": 0.16,
"learning_rate": 4.729341114776284e-05,
"loss": 1.7227,
"step": 1280
},
{
"epoch": 0.16,
"learning_rate": 4.727226592235474e-05,
"loss": 1.2324,
"step": 1290
},
{
"epoch": 0.16,
"learning_rate": 4.725112069694663e-05,
"loss": 1.2739,
"step": 1300
},
{
"epoch": 0.17,
"learning_rate": 4.722997547153853e-05,
"loss": 1.6905,
"step": 1310
},
{
"epoch": 0.17,
"learning_rate": 4.7208830246130424e-05,
"loss": 1.4948,
"step": 1320
},
{
"epoch": 0.17,
"learning_rate": 4.7187685020722323e-05,
"loss": 1.7112,
"step": 1330
},
{
"epoch": 0.17,
"learning_rate": 4.7166539795314216e-05,
"loss": 1.1413,
"step": 1340
},
{
"epoch": 0.17,
"learning_rate": 4.7145394569906116e-05,
"loss": 1.6368,
"step": 1350
},
{
"epoch": 0.17,
"learning_rate": 4.7124249344498016e-05,
"loss": 1.2071,
"step": 1360
},
{
"epoch": 0.17,
"learning_rate": 4.7103104119089916e-05,
"loss": 1.1763,
"step": 1370
},
{
"epoch": 0.18,
"learning_rate": 4.708195889368181e-05,
"loss": 1.3112,
"step": 1380
},
{
"epoch": 0.18,
"learning_rate": 4.706081366827371e-05,
"loss": 1.4092,
"step": 1390
},
{
"epoch": 0.18,
"learning_rate": 4.70396684428656e-05,
"loss": 1.4449,
"step": 1400
},
{
"epoch": 0.18,
"learning_rate": 4.70185232174575e-05,
"loss": 1.2075,
"step": 1410
},
{
"epoch": 0.18,
"learning_rate": 4.6997377992049395e-05,
"loss": 1.5342,
"step": 1420
},
{
"epoch": 0.18,
"learning_rate": 4.6976232766641294e-05,
"loss": 1.3868,
"step": 1430
},
{
"epoch": 0.18,
"learning_rate": 4.695508754123319e-05,
"loss": 1.4211,
"step": 1440
},
{
"epoch": 0.18,
"learning_rate": 4.6933942315825094e-05,
"loss": 1.6929,
"step": 1450
},
{
"epoch": 0.19,
"learning_rate": 4.691279709041699e-05,
"loss": 1.3511,
"step": 1460
},
{
"epoch": 0.19,
"learning_rate": 4.689165186500889e-05,
"loss": 1.311,
"step": 1470
},
{
"epoch": 0.19,
"learning_rate": 4.687050663960078e-05,
"loss": 1.542,
"step": 1480
},
{
"epoch": 0.19,
"learning_rate": 4.684936141419268e-05,
"loss": 1.4153,
"step": 1490
},
{
"epoch": 0.19,
"learning_rate": 4.682821618878457e-05,
"loss": 1.5839,
"step": 1500
},
{
"epoch": 0.19,
"learning_rate": 4.680707096337647e-05,
"loss": 1.3799,
"step": 1510
},
{
"epoch": 0.19,
"learning_rate": 4.6785925737968365e-05,
"loss": 1.4052,
"step": 1520
},
{
"epoch": 0.19,
"learning_rate": 4.6764780512560265e-05,
"loss": 1.0553,
"step": 1530
},
{
"epoch": 0.2,
"learning_rate": 4.6743635287152165e-05,
"loss": 1.3985,
"step": 1540
},
{
"epoch": 0.2,
"learning_rate": 4.6722490061744065e-05,
"loss": 1.5055,
"step": 1550
},
{
"epoch": 0.2,
"learning_rate": 4.670134483633596e-05,
"loss": 1.4759,
"step": 1560
},
{
"epoch": 0.2,
"learning_rate": 4.668019961092786e-05,
"loss": 1.1107,
"step": 1570
},
{
"epoch": 0.2,
"learning_rate": 4.665905438551975e-05,
"loss": 1.5009,
"step": 1580
},
{
"epoch": 0.2,
"learning_rate": 4.663790916011165e-05,
"loss": 1.3026,
"step": 1590
},
{
"epoch": 0.2,
"learning_rate": 4.6616763934703543e-05,
"loss": 1.4231,
"step": 1600
},
{
"epoch": 0.2,
"learning_rate": 4.659561870929544e-05,
"loss": 1.2311,
"step": 1610
},
{
"epoch": 0.21,
"learning_rate": 4.657447348388734e-05,
"loss": 1.3696,
"step": 1620
},
{
"epoch": 0.21,
"learning_rate": 4.655332825847924e-05,
"loss": 1.5319,
"step": 1630
},
{
"epoch": 0.21,
"learning_rate": 4.6532183033071136e-05,
"loss": 1.3288,
"step": 1640
},
{
"epoch": 0.21,
"learning_rate": 4.6511037807663036e-05,
"loss": 1.166,
"step": 1650
},
{
"epoch": 0.21,
"learning_rate": 4.648989258225493e-05,
"loss": 1.2852,
"step": 1660
},
{
"epoch": 0.21,
"learning_rate": 4.646874735684683e-05,
"loss": 1.1772,
"step": 1670
},
{
"epoch": 0.21,
"learning_rate": 4.644760213143872e-05,
"loss": 1.2661,
"step": 1680
},
{
"epoch": 0.21,
"learning_rate": 4.642645690603062e-05,
"loss": 1.5031,
"step": 1690
},
{
"epoch": 0.22,
"learning_rate": 4.6405311680622514e-05,
"loss": 1.4132,
"step": 1700
},
{
"epoch": 0.22,
"learning_rate": 4.6384166455214414e-05,
"loss": 1.1577,
"step": 1710
},
{
"epoch": 0.22,
"learning_rate": 4.6363021229806314e-05,
"loss": 1.4381,
"step": 1720
},
{
"epoch": 0.22,
"learning_rate": 4.634187600439821e-05,
"loss": 1.3283,
"step": 1730
},
{
"epoch": 0.22,
"learning_rate": 4.632073077899011e-05,
"loss": 1.0422,
"step": 1740
},
{
"epoch": 0.22,
"learning_rate": 4.6299585553582007e-05,
"loss": 1.1599,
"step": 1750
},
{
"epoch": 0.22,
"learning_rate": 4.62784403281739e-05,
"loss": 1.3828,
"step": 1760
},
{
"epoch": 0.22,
"learning_rate": 4.62572951027658e-05,
"loss": 1.0285,
"step": 1770
},
{
"epoch": 0.23,
"learning_rate": 4.623614987735769e-05,
"loss": 1.1036,
"step": 1780
},
{
"epoch": 0.23,
"learning_rate": 4.621500465194959e-05,
"loss": 1.082,
"step": 1790
},
{
"epoch": 0.23,
"learning_rate": 4.619385942654149e-05,
"loss": 1.4295,
"step": 1800
},
{
"epoch": 0.23,
"learning_rate": 4.6172714201133385e-05,
"loss": 1.2377,
"step": 1810
},
{
"epoch": 0.23,
"learning_rate": 4.6151568975725285e-05,
"loss": 1.4399,
"step": 1820
},
{
"epoch": 0.23,
"learning_rate": 4.613042375031718e-05,
"loss": 1.2579,
"step": 1830
},
{
"epoch": 0.23,
"learning_rate": 4.610927852490908e-05,
"loss": 1.368,
"step": 1840
},
{
"epoch": 0.23,
"learning_rate": 4.608813329950097e-05,
"loss": 1.1344,
"step": 1850
},
{
"epoch": 0.24,
"learning_rate": 4.606698807409287e-05,
"loss": 1.3799,
"step": 1860
},
{
"epoch": 0.24,
"learning_rate": 4.604584284868477e-05,
"loss": 1.4212,
"step": 1870
},
{
"epoch": 0.24,
"learning_rate": 4.602469762327666e-05,
"loss": 1.4548,
"step": 1880
},
{
"epoch": 0.24,
"learning_rate": 4.600355239786856e-05,
"loss": 1.1689,
"step": 1890
},
{
"epoch": 0.24,
"learning_rate": 4.598240717246046e-05,
"loss": 1.1899,
"step": 1900
},
{
"epoch": 0.24,
"learning_rate": 4.5961261947052356e-05,
"loss": 1.5761,
"step": 1910
},
{
"epoch": 0.24,
"learning_rate": 4.5940116721644256e-05,
"loss": 1.2126,
"step": 1920
},
{
"epoch": 0.24,
"learning_rate": 4.591897149623615e-05,
"loss": 1.2565,
"step": 1930
},
{
"epoch": 0.25,
"learning_rate": 4.589782627082805e-05,
"loss": 1.2161,
"step": 1940
},
{
"epoch": 0.25,
"learning_rate": 4.587668104541994e-05,
"loss": 1.5288,
"step": 1950
},
{
"epoch": 0.25,
"learning_rate": 4.585553582001184e-05,
"loss": 1.2892,
"step": 1960
},
{
"epoch": 0.25,
"learning_rate": 4.583439059460374e-05,
"loss": 1.5078,
"step": 1970
},
{
"epoch": 0.25,
"learning_rate": 4.581324536919564e-05,
"loss": 1.3179,
"step": 1980
},
{
"epoch": 0.25,
"learning_rate": 4.5792100143787534e-05,
"loss": 1.4715,
"step": 1990
},
{
"epoch": 0.25,
"learning_rate": 4.5770954918379434e-05,
"loss": 1.2001,
"step": 2000
},
{
"epoch": 0.25,
"eval_loss": 1.2268859148025513,
"eval_rouge1": 0.7185718642341807,
"eval_rouge2": 0.5833830493154122,
"eval_rougeL": 0.667448346580727,
"eval_rougeLsum": 0.6670542295133559,
"eval_runtime": 364.8519,
"eval_samples_per_second": 9.401,
"eval_steps_per_second": 2.352,
"step": 2000
},
{
"epoch": 0.26,
"learning_rate": 4.574980969297133e-05,
"loss": 1.6869,
"step": 2010
},
{
"epoch": 0.26,
"learning_rate": 4.572866446756323e-05,
"loss": 1.1123,
"step": 2020
},
{
"epoch": 0.26,
"learning_rate": 4.570751924215512e-05,
"loss": 1.1036,
"step": 2030
},
{
"epoch": 0.26,
"learning_rate": 4.568637401674702e-05,
"loss": 1.2797,
"step": 2040
},
{
"epoch": 0.26,
"learning_rate": 4.566522879133891e-05,
"loss": 1.2701,
"step": 2050
},
{
"epoch": 0.26,
"learning_rate": 4.564408356593082e-05,
"loss": 1.315,
"step": 2060
},
{
"epoch": 0.26,
"learning_rate": 4.562293834052271e-05,
"loss": 1.5713,
"step": 2070
},
{
"epoch": 0.26,
"learning_rate": 4.560179311511461e-05,
"loss": 1.3174,
"step": 2080
},
{
"epoch": 0.27,
"learning_rate": 4.5580647889706505e-05,
"loss": 1.2269,
"step": 2090
},
{
"epoch": 0.27,
"learning_rate": 4.5559502664298405e-05,
"loss": 1.0282,
"step": 2100
},
{
"epoch": 0.27,
"learning_rate": 4.55383574388903e-05,
"loss": 1.408,
"step": 2110
},
{
"epoch": 0.27,
"learning_rate": 4.55172122134822e-05,
"loss": 1.0085,
"step": 2120
},
{
"epoch": 0.27,
"learning_rate": 4.549606698807409e-05,
"loss": 1.1287,
"step": 2130
},
{
"epoch": 0.27,
"learning_rate": 4.547492176266599e-05,
"loss": 1.3968,
"step": 2140
},
{
"epoch": 0.27,
"learning_rate": 4.545377653725789e-05,
"loss": 1.5901,
"step": 2150
},
{
"epoch": 0.27,
"learning_rate": 4.543263131184979e-05,
"loss": 1.6727,
"step": 2160
},
{
"epoch": 0.28,
"learning_rate": 4.541148608644168e-05,
"loss": 1.3168,
"step": 2170
},
{
"epoch": 0.28,
"learning_rate": 4.539034086103358e-05,
"loss": 1.1262,
"step": 2180
},
{
"epoch": 0.28,
"learning_rate": 4.5369195635625476e-05,
"loss": 1.2725,
"step": 2190
},
{
"epoch": 0.28,
"learning_rate": 4.5348050410217376e-05,
"loss": 1.0816,
"step": 2200
},
{
"epoch": 0.28,
"learning_rate": 4.532690518480927e-05,
"loss": 1.3865,
"step": 2210
},
{
"epoch": 0.28,
"learning_rate": 4.530575995940117e-05,
"loss": 1.4186,
"step": 2220
},
{
"epoch": 0.28,
"learning_rate": 4.528461473399307e-05,
"loss": 1.1668,
"step": 2230
},
{
"epoch": 0.28,
"learning_rate": 4.526346950858497e-05,
"loss": 1.4782,
"step": 2240
},
{
"epoch": 0.29,
"learning_rate": 4.524232428317686e-05,
"loss": 0.8944,
"step": 2250
},
{
"epoch": 0.29,
"learning_rate": 4.522117905776876e-05,
"loss": 1.5129,
"step": 2260
},
{
"epoch": 0.29,
"learning_rate": 4.5200033832360654e-05,
"loss": 1.0514,
"step": 2270
},
{
"epoch": 0.29,
"learning_rate": 4.5178888606952554e-05,
"loss": 1.0524,
"step": 2280
},
{
"epoch": 0.29,
"learning_rate": 4.515774338154445e-05,
"loss": 1.3021,
"step": 2290
},
{
"epoch": 0.29,
"learning_rate": 4.5136598156136347e-05,
"loss": 1.2488,
"step": 2300
},
{
"epoch": 0.29,
"learning_rate": 4.511545293072824e-05,
"loss": 1.5133,
"step": 2310
},
{
"epoch": 0.29,
"learning_rate": 4.5094307705320146e-05,
"loss": 1.279,
"step": 2320
},
{
"epoch": 0.3,
"learning_rate": 4.507316247991204e-05,
"loss": 1.3462,
"step": 2330
},
{
"epoch": 0.3,
"learning_rate": 4.505201725450394e-05,
"loss": 1.3372,
"step": 2340
},
{
"epoch": 0.3,
"learning_rate": 4.503087202909583e-05,
"loss": 1.1555,
"step": 2350
},
{
"epoch": 0.3,
"learning_rate": 4.500972680368773e-05,
"loss": 1.2203,
"step": 2360
},
{
"epoch": 0.3,
"learning_rate": 4.4988581578279625e-05,
"loss": 1.2851,
"step": 2370
},
{
"epoch": 0.3,
"learning_rate": 4.4967436352871525e-05,
"loss": 1.6565,
"step": 2380
},
{
"epoch": 0.3,
"learning_rate": 4.494629112746342e-05,
"loss": 1.3498,
"step": 2390
},
{
"epoch": 0.3,
"learning_rate": 4.492514590205532e-05,
"loss": 1.2291,
"step": 2400
},
{
"epoch": 0.31,
"learning_rate": 4.490400067664722e-05,
"loss": 0.9249,
"step": 2410
},
{
"epoch": 0.31,
"learning_rate": 4.488285545123912e-05,
"loss": 0.9291,
"step": 2420
},
{
"epoch": 0.31,
"learning_rate": 4.486171022583101e-05,
"loss": 1.0281,
"step": 2430
},
{
"epoch": 0.31,
"learning_rate": 4.484056500042291e-05,
"loss": 1.3287,
"step": 2440
},
{
"epoch": 0.31,
"learning_rate": 4.48194197750148e-05,
"loss": 1.2805,
"step": 2450
},
{
"epoch": 0.31,
"learning_rate": 4.47982745496067e-05,
"loss": 1.2766,
"step": 2460
},
{
"epoch": 0.31,
"learning_rate": 4.4777129324198596e-05,
"loss": 1.0285,
"step": 2470
},
{
"epoch": 0.31,
"learning_rate": 4.4755984098790495e-05,
"loss": 1.3407,
"step": 2480
},
{
"epoch": 0.32,
"learning_rate": 4.473483887338239e-05,
"loss": 1.3557,
"step": 2490
},
{
"epoch": 0.32,
"learning_rate": 4.4713693647974295e-05,
"loss": 1.2884,
"step": 2500
},
{
"epoch": 0.32,
"learning_rate": 4.469254842256619e-05,
"loss": 1.0209,
"step": 2510
},
{
"epoch": 0.32,
"learning_rate": 4.467140319715809e-05,
"loss": 1.3099,
"step": 2520
},
{
"epoch": 0.32,
"learning_rate": 4.465025797174998e-05,
"loss": 1.3711,
"step": 2530
},
{
"epoch": 0.32,
"learning_rate": 4.462911274634188e-05,
"loss": 1.3035,
"step": 2540
},
{
"epoch": 0.32,
"learning_rate": 4.4607967520933774e-05,
"loss": 1.2402,
"step": 2550
},
{
"epoch": 0.32,
"learning_rate": 4.4586822295525674e-05,
"loss": 1.3765,
"step": 2560
},
{
"epoch": 0.33,
"learning_rate": 4.4565677070117567e-05,
"loss": 1.6022,
"step": 2570
},
{
"epoch": 0.33,
"learning_rate": 4.4544531844709466e-05,
"loss": 1.1193,
"step": 2580
},
{
"epoch": 0.33,
"learning_rate": 4.4523386619301366e-05,
"loss": 1.3808,
"step": 2590
},
{
"epoch": 0.33,
"learning_rate": 4.450224139389326e-05,
"loss": 1.3768,
"step": 2600
},
{
"epoch": 0.33,
"learning_rate": 4.448109616848516e-05,
"loss": 1.1366,
"step": 2610
},
{
"epoch": 0.33,
"learning_rate": 4.445995094307706e-05,
"loss": 0.9823,
"step": 2620
},
{
"epoch": 0.33,
"learning_rate": 4.443880571766895e-05,
"loss": 1.2394,
"step": 2630
},
{
"epoch": 0.33,
"learning_rate": 4.441766049226085e-05,
"loss": 0.8321,
"step": 2640
},
{
"epoch": 0.34,
"learning_rate": 4.4396515266852745e-05,
"loss": 1.2037,
"step": 2650
},
{
"epoch": 0.34,
"learning_rate": 4.4375370041444644e-05,
"loss": 1.2235,
"step": 2660
},
{
"epoch": 0.34,
"learning_rate": 4.4354224816036544e-05,
"loss": 1.2421,
"step": 2670
},
{
"epoch": 0.34,
"learning_rate": 4.433307959062844e-05,
"loss": 1.169,
"step": 2680
},
{
"epoch": 0.34,
"learning_rate": 4.431193436522034e-05,
"loss": 1.0794,
"step": 2690
},
{
"epoch": 0.34,
"learning_rate": 4.429078913981223e-05,
"loss": 1.4677,
"step": 2700
},
{
"epoch": 0.34,
"learning_rate": 4.426964391440413e-05,
"loss": 1.3037,
"step": 2710
},
{
"epoch": 0.35,
"learning_rate": 4.424849868899602e-05,
"loss": 1.127,
"step": 2720
},
{
"epoch": 0.35,
"learning_rate": 4.422735346358792e-05,
"loss": 0.9941,
"step": 2730
},
{
"epoch": 0.35,
"learning_rate": 4.420620823817982e-05,
"loss": 1.2387,
"step": 2740
},
{
"epoch": 0.35,
"learning_rate": 4.4185063012771716e-05,
"loss": 1.186,
"step": 2750
},
{
"epoch": 0.35,
"learning_rate": 4.4163917787363615e-05,
"loss": 1.166,
"step": 2760
},
{
"epoch": 0.35,
"learning_rate": 4.4142772561955515e-05,
"loss": 1.2582,
"step": 2770
},
{
"epoch": 0.35,
"learning_rate": 4.412162733654741e-05,
"loss": 1.3659,
"step": 2780
},
{
"epoch": 0.35,
"learning_rate": 4.410048211113931e-05,
"loss": 1.0463,
"step": 2790
},
{
"epoch": 0.36,
"learning_rate": 4.40793368857312e-05,
"loss": 1.1737,
"step": 2800
},
{
"epoch": 0.36,
"learning_rate": 4.40581916603231e-05,
"loss": 1.3999,
"step": 2810
},
{
"epoch": 0.36,
"learning_rate": 4.4037046434914994e-05,
"loss": 1.2653,
"step": 2820
},
{
"epoch": 0.36,
"learning_rate": 4.4015901209506894e-05,
"loss": 1.2465,
"step": 2830
},
{
"epoch": 0.36,
"learning_rate": 4.399475598409879e-05,
"loss": 1.4998,
"step": 2840
},
{
"epoch": 0.36,
"learning_rate": 4.397361075869069e-05,
"loss": 1.2225,
"step": 2850
},
{
"epoch": 0.36,
"learning_rate": 4.3952465533282586e-05,
"loss": 1.4169,
"step": 2860
},
{
"epoch": 0.36,
"learning_rate": 4.3931320307874486e-05,
"loss": 0.9862,
"step": 2870
},
{
"epoch": 0.37,
"learning_rate": 4.391017508246638e-05,
"loss": 1.2153,
"step": 2880
},
{
"epoch": 0.37,
"learning_rate": 4.388902985705828e-05,
"loss": 1.425,
"step": 2890
},
{
"epoch": 0.37,
"learning_rate": 4.386788463165017e-05,
"loss": 1.4068,
"step": 2900
},
{
"epoch": 0.37,
"learning_rate": 4.384673940624207e-05,
"loss": 1.2928,
"step": 2910
},
{
"epoch": 0.37,
"learning_rate": 4.3825594180833965e-05,
"loss": 1.2136,
"step": 2920
},
{
"epoch": 0.37,
"learning_rate": 4.380444895542587e-05,
"loss": 1.217,
"step": 2930
},
{
"epoch": 0.37,
"learning_rate": 4.3783303730017764e-05,
"loss": 1.0467,
"step": 2940
},
{
"epoch": 0.37,
"learning_rate": 4.3762158504609664e-05,
"loss": 1.131,
"step": 2950
},
{
"epoch": 0.38,
"learning_rate": 4.374101327920156e-05,
"loss": 1.4643,
"step": 2960
},
{
"epoch": 0.38,
"learning_rate": 4.371986805379346e-05,
"loss": 1.2543,
"step": 2970
},
{
"epoch": 0.38,
"learning_rate": 4.369872282838535e-05,
"loss": 0.8942,
"step": 2980
},
{
"epoch": 0.38,
"learning_rate": 4.367757760297725e-05,
"loss": 1.3059,
"step": 2990
},
{
"epoch": 0.38,
"learning_rate": 4.365643237756914e-05,
"loss": 1.3845,
"step": 3000
},
{
"epoch": 0.38,
"eval_loss": 1.1656155586242676,
"eval_rouge1": 0.7103872520621619,
"eval_rouge2": 0.5716049546181405,
"eval_rougeL": 0.6540532381728661,
"eval_rougeLsum": 0.6540470372570277,
"eval_runtime": 365.6396,
"eval_samples_per_second": 9.381,
"eval_steps_per_second": 2.347,
"step": 3000
},
{
"epoch": 0.38,
"learning_rate": 4.363528715216104e-05,
"loss": 1.0015,
"step": 3010
},
{
"epoch": 0.38,
"learning_rate": 4.361414192675294e-05,
"loss": 1.2851,
"step": 3020
},
{
"epoch": 0.38,
"learning_rate": 4.359299670134484e-05,
"loss": 0.9601,
"step": 3030
},
{
"epoch": 0.39,
"learning_rate": 4.3571851475936735e-05,
"loss": 1.2771,
"step": 3040
},
{
"epoch": 0.39,
"learning_rate": 4.3550706250528635e-05,
"loss": 1.508,
"step": 3050
},
{
"epoch": 0.39,
"learning_rate": 4.352956102512053e-05,
"loss": 1.2288,
"step": 3060
},
{
"epoch": 0.39,
"learning_rate": 4.350841579971243e-05,
"loss": 1.3549,
"step": 3070
},
{
"epoch": 0.39,
"learning_rate": 4.348727057430432e-05,
"loss": 0.9656,
"step": 3080
},
{
"epoch": 0.39,
"learning_rate": 4.346612534889622e-05,
"loss": 1.5611,
"step": 3090
},
{
"epoch": 0.39,
"learning_rate": 4.3444980123488114e-05,
"loss": 1.4958,
"step": 3100
},
{
"epoch": 0.39,
"learning_rate": 4.342383489808002e-05,
"loss": 1.1626,
"step": 3110
},
{
"epoch": 0.4,
"learning_rate": 4.340268967267191e-05,
"loss": 1.5752,
"step": 3120
},
{
"epoch": 0.4,
"learning_rate": 4.338154444726381e-05,
"loss": 1.1986,
"step": 3130
},
{
"epoch": 0.4,
"learning_rate": 4.3360399221855706e-05,
"loss": 1.3061,
"step": 3140
},
{
"epoch": 0.4,
"learning_rate": 4.3339253996447606e-05,
"loss": 1.3984,
"step": 3150
},
{
"epoch": 0.4,
"learning_rate": 4.33181087710395e-05,
"loss": 1.1725,
"step": 3160
},
{
"epoch": 0.4,
"learning_rate": 4.32969635456314e-05,
"loss": 1.2187,
"step": 3170
},
{
"epoch": 0.4,
"learning_rate": 4.327581832022329e-05,
"loss": 1.1962,
"step": 3180
},
{
"epoch": 0.4,
"learning_rate": 4.325467309481519e-05,
"loss": 1.3778,
"step": 3190
},
{
"epoch": 0.41,
"learning_rate": 4.323352786940709e-05,
"loss": 1.5384,
"step": 3200
},
{
"epoch": 0.41,
"learning_rate": 4.321238264399899e-05,
"loss": 1.1991,
"step": 3210
},
{
"epoch": 0.41,
"learning_rate": 4.3191237418590884e-05,
"loss": 1.0682,
"step": 3220
},
{
"epoch": 0.41,
"learning_rate": 4.3170092193182784e-05,
"loss": 1.2427,
"step": 3230
},
{
"epoch": 0.41,
"learning_rate": 4.314894696777468e-05,
"loss": 1.2299,
"step": 3240
},
{
"epoch": 0.41,
"learning_rate": 4.312780174236658e-05,
"loss": 1.4328,
"step": 3250
},
{
"epoch": 0.41,
"learning_rate": 4.310665651695847e-05,
"loss": 1.0342,
"step": 3260
},
{
"epoch": 0.41,
"learning_rate": 4.308551129155037e-05,
"loss": 1.2081,
"step": 3270
},
{
"epoch": 0.42,
"learning_rate": 4.306436606614227e-05,
"loss": 1.0993,
"step": 3280
},
{
"epoch": 0.42,
"learning_rate": 4.304322084073417e-05,
"loss": 1.1221,
"step": 3290
},
{
"epoch": 0.42,
"learning_rate": 4.302207561532606e-05,
"loss": 1.1849,
"step": 3300
},
{
"epoch": 0.42,
"learning_rate": 4.300093038991796e-05,
"loss": 1.3926,
"step": 3310
},
{
"epoch": 0.42,
"learning_rate": 4.2979785164509855e-05,
"loss": 1.2432,
"step": 3320
},
{
"epoch": 0.42,
"learning_rate": 4.2958639939101755e-05,
"loss": 1.25,
"step": 3330
},
{
"epoch": 0.42,
"learning_rate": 4.293749471369365e-05,
"loss": 1.4832,
"step": 3340
},
{
"epoch": 0.43,
"learning_rate": 4.291634948828555e-05,
"loss": 1.1415,
"step": 3350
},
{
"epoch": 0.43,
"learning_rate": 4.289520426287744e-05,
"loss": 1.5167,
"step": 3360
},
{
"epoch": 0.43,
"learning_rate": 4.287405903746935e-05,
"loss": 1.2952,
"step": 3370
},
{
"epoch": 0.43,
"learning_rate": 4.285291381206124e-05,
"loss": 1.2499,
"step": 3380
},
{
"epoch": 0.43,
"learning_rate": 4.283176858665314e-05,
"loss": 1.2415,
"step": 3390
},
{
"epoch": 0.43,
"learning_rate": 4.281062336124503e-05,
"loss": 1.1083,
"step": 3400
},
{
"epoch": 0.43,
"learning_rate": 4.278947813583693e-05,
"loss": 1.296,
"step": 3410
},
{
"epoch": 0.43,
"learning_rate": 4.2768332910428826e-05,
"loss": 1.1228,
"step": 3420
},
{
"epoch": 0.44,
"learning_rate": 4.2747187685020726e-05,
"loss": 0.8723,
"step": 3430
},
{
"epoch": 0.44,
"learning_rate": 4.272604245961262e-05,
"loss": 1.0063,
"step": 3440
},
{
"epoch": 0.44,
"learning_rate": 4.270489723420452e-05,
"loss": 1.3274,
"step": 3450
},
{
"epoch": 0.44,
"learning_rate": 4.268375200879642e-05,
"loss": 1.1145,
"step": 3460
},
{
"epoch": 0.44,
"learning_rate": 4.266260678338831e-05,
"loss": 1.1475,
"step": 3470
},
{
"epoch": 0.44,
"learning_rate": 4.264146155798021e-05,
"loss": 1.1821,
"step": 3480
},
{
"epoch": 0.44,
"learning_rate": 4.262031633257211e-05,
"loss": 1.4813,
"step": 3490
},
{
"epoch": 0.44,
"learning_rate": 4.2599171107164004e-05,
"loss": 1.4221,
"step": 3500
},
{
"epoch": 0.45,
"learning_rate": 4.2578025881755904e-05,
"loss": 1.1854,
"step": 3510
},
{
"epoch": 0.45,
"learning_rate": 4.25568806563478e-05,
"loss": 1.4606,
"step": 3520
},
{
"epoch": 0.45,
"learning_rate": 4.25357354309397e-05,
"loss": 1.1125,
"step": 3530
},
{
"epoch": 0.45,
"learning_rate": 4.251459020553159e-05,
"loss": 1.0876,
"step": 3540
},
{
"epoch": 0.45,
"learning_rate": 4.249344498012349e-05,
"loss": 1.1404,
"step": 3550
},
{
"epoch": 0.45,
"learning_rate": 4.247229975471539e-05,
"loss": 1.2617,
"step": 3560
},
{
"epoch": 0.45,
"learning_rate": 4.245115452930728e-05,
"loss": 0.9826,
"step": 3570
},
{
"epoch": 0.45,
"learning_rate": 4.243000930389918e-05,
"loss": 1.095,
"step": 3580
},
{
"epoch": 0.46,
"learning_rate": 4.2408864078491075e-05,
"loss": 1.1982,
"step": 3590
},
{
"epoch": 0.46,
"learning_rate": 4.2387718853082975e-05,
"loss": 1.4963,
"step": 3600
},
{
"epoch": 0.46,
"learning_rate": 4.2366573627674875e-05,
"loss": 1.1172,
"step": 3610
},
{
"epoch": 0.46,
"learning_rate": 4.234542840226677e-05,
"loss": 1.3512,
"step": 3620
},
{
"epoch": 0.46,
"learning_rate": 4.232428317685867e-05,
"loss": 1.3979,
"step": 3630
},
{
"epoch": 0.46,
"learning_rate": 4.230313795145057e-05,
"loss": 1.4475,
"step": 3640
},
{
"epoch": 0.46,
"learning_rate": 4.228199272604246e-05,
"loss": 1.6278,
"step": 3650
},
{
"epoch": 0.46,
"learning_rate": 4.226084750063436e-05,
"loss": 1.0196,
"step": 3660
},
{
"epoch": 0.47,
"learning_rate": 4.223970227522625e-05,
"loss": 1.2891,
"step": 3670
},
{
"epoch": 0.47,
"learning_rate": 4.221855704981815e-05,
"loss": 1.4219,
"step": 3680
},
{
"epoch": 0.47,
"learning_rate": 4.2197411824410046e-05,
"loss": 1.2143,
"step": 3690
},
{
"epoch": 0.47,
"learning_rate": 4.2176266599001946e-05,
"loss": 1.3219,
"step": 3700
},
{
"epoch": 0.47,
"learning_rate": 4.215512137359384e-05,
"loss": 1.386,
"step": 3710
},
{
"epoch": 0.47,
"learning_rate": 4.2133976148185745e-05,
"loss": 1.1574,
"step": 3720
},
{
"epoch": 0.47,
"learning_rate": 4.211283092277764e-05,
"loss": 1.0739,
"step": 3730
},
{
"epoch": 0.47,
"learning_rate": 4.209168569736954e-05,
"loss": 1.4641,
"step": 3740
},
{
"epoch": 0.48,
"learning_rate": 4.207054047196143e-05,
"loss": 1.1227,
"step": 3750
},
{
"epoch": 0.48,
"learning_rate": 4.204939524655333e-05,
"loss": 1.2122,
"step": 3760
},
{
"epoch": 0.48,
"learning_rate": 4.2028250021145224e-05,
"loss": 1.1389,
"step": 3770
},
{
"epoch": 0.48,
"learning_rate": 4.2007104795737124e-05,
"loss": 1.1806,
"step": 3780
},
{
"epoch": 0.48,
"learning_rate": 4.198595957032902e-05,
"loss": 1.0389,
"step": 3790
},
{
"epoch": 0.48,
"learning_rate": 4.196481434492092e-05,
"loss": 1.3382,
"step": 3800
},
{
"epoch": 0.48,
"learning_rate": 4.1943669119512817e-05,
"loss": 1.214,
"step": 3810
},
{
"epoch": 0.48,
"learning_rate": 4.1922523894104716e-05,
"loss": 1.1705,
"step": 3820
},
{
"epoch": 0.49,
"learning_rate": 4.190137866869661e-05,
"loss": 1.463,
"step": 3830
},
{
"epoch": 0.49,
"learning_rate": 4.188023344328851e-05,
"loss": 0.9182,
"step": 3840
},
{
"epoch": 0.49,
"learning_rate": 4.18590882178804e-05,
"loss": 1.2773,
"step": 3850
},
{
"epoch": 0.49,
"learning_rate": 4.18379429924723e-05,
"loss": 1.418,
"step": 3860
},
{
"epoch": 0.49,
"learning_rate": 4.1816797767064195e-05,
"loss": 1.137,
"step": 3870
},
{
"epoch": 0.49,
"learning_rate": 4.1795652541656095e-05,
"loss": 1.246,
"step": 3880
},
{
"epoch": 0.49,
"learning_rate": 4.177450731624799e-05,
"loss": 1.2301,
"step": 3890
},
{
"epoch": 0.49,
"learning_rate": 4.1753362090839894e-05,
"loss": 1.2134,
"step": 3900
},
{
"epoch": 0.5,
"learning_rate": 4.173221686543179e-05,
"loss": 1.2914,
"step": 3910
},
{
"epoch": 0.5,
"learning_rate": 4.171107164002369e-05,
"loss": 0.9645,
"step": 3920
},
{
"epoch": 0.5,
"learning_rate": 4.168992641461558e-05,
"loss": 1.134,
"step": 3930
},
{
"epoch": 0.5,
"learning_rate": 4.166878118920748e-05,
"loss": 1.4191,
"step": 3940
},
{
"epoch": 0.5,
"learning_rate": 4.164763596379937e-05,
"loss": 0.8903,
"step": 3950
},
{
"epoch": 0.5,
"learning_rate": 4.162649073839127e-05,
"loss": 0.9996,
"step": 3960
},
{
"epoch": 0.5,
"learning_rate": 4.1605345512983166e-05,
"loss": 0.9504,
"step": 3970
},
{
"epoch": 0.5,
"learning_rate": 4.158420028757507e-05,
"loss": 1.0273,
"step": 3980
},
{
"epoch": 0.51,
"learning_rate": 4.1563055062166965e-05,
"loss": 1.0828,
"step": 3990
},
{
"epoch": 0.51,
"learning_rate": 4.1541909836758865e-05,
"loss": 1.0505,
"step": 4000
},
{
"epoch": 0.51,
"eval_loss": 1.1716909408569336,
"eval_rouge1": 0.7168570392602125,
"eval_rouge2": 0.5883835667996011,
"eval_rougeL": 0.6687971578565992,
"eval_rougeLsum": 0.668390648966664,
"eval_runtime": 359.281,
"eval_samples_per_second": 9.547,
"eval_steps_per_second": 2.388,
"step": 4000
},
{
"epoch": 0.51,
"learning_rate": 4.152076461135076e-05,
"loss": 1.0908,
"step": 4010
},
{
"epoch": 0.51,
"learning_rate": 4.149961938594266e-05,
"loss": 1.1406,
"step": 4020
},
{
"epoch": 0.51,
"learning_rate": 4.147847416053455e-05,
"loss": 1.1533,
"step": 4030
},
{
"epoch": 0.51,
"learning_rate": 4.145732893512645e-05,
"loss": 0.9042,
"step": 4040
},
{
"epoch": 0.51,
"learning_rate": 4.1436183709718344e-05,
"loss": 1.0875,
"step": 4050
},
{
"epoch": 0.52,
"learning_rate": 4.1415038484310244e-05,
"loss": 1.0614,
"step": 4060
},
{
"epoch": 0.52,
"learning_rate": 4.1393893258902144e-05,
"loss": 1.2316,
"step": 4070
},
{
"epoch": 0.52,
"learning_rate": 4.137274803349404e-05,
"loss": 1.3367,
"step": 4080
},
{
"epoch": 0.52,
"learning_rate": 4.1351602808085936e-05,
"loss": 0.7229,
"step": 4090
},
{
"epoch": 0.52,
"learning_rate": 4.1330457582677836e-05,
"loss": 1.2227,
"step": 4100
},
{
"epoch": 0.52,
"learning_rate": 4.130931235726973e-05,
"loss": 1.3507,
"step": 4110
},
{
"epoch": 0.52,
"learning_rate": 4.128816713186163e-05,
"loss": 1.3589,
"step": 4120
},
{
"epoch": 0.52,
"learning_rate": 4.126702190645352e-05,
"loss": 1.0713,
"step": 4130
},
{
"epoch": 0.53,
"learning_rate": 4.124587668104542e-05,
"loss": 1.2676,
"step": 4140
},
{
"epoch": 0.53,
"learning_rate": 4.1224731455637315e-05,
"loss": 1.1963,
"step": 4150
},
{
"epoch": 0.53,
"learning_rate": 4.120358623022922e-05,
"loss": 1.3276,
"step": 4160
},
{
"epoch": 0.53,
"learning_rate": 4.1182441004821114e-05,
"loss": 1.1873,
"step": 4170
},
{
"epoch": 0.53,
"learning_rate": 4.1161295779413014e-05,
"loss": 1.0131,
"step": 4180
},
{
"epoch": 0.53,
"learning_rate": 4.114015055400491e-05,
"loss": 1.227,
"step": 4190
},
{
"epoch": 0.53,
"learning_rate": 4.111900532859681e-05,
"loss": 1.3973,
"step": 4200
},
{
"epoch": 0.53,
"learning_rate": 4.10978601031887e-05,
"loss": 0.999,
"step": 4210
},
{
"epoch": 0.54,
"learning_rate": 4.10767148777806e-05,
"loss": 1.0566,
"step": 4220
},
{
"epoch": 0.54,
"learning_rate": 4.105556965237249e-05,
"loss": 1.149,
"step": 4230
},
{
"epoch": 0.54,
"learning_rate": 4.103442442696439e-05,
"loss": 1.1064,
"step": 4240
},
{
"epoch": 0.54,
"learning_rate": 4.101327920155629e-05,
"loss": 1.1675,
"step": 4250
},
{
"epoch": 0.54,
"learning_rate": 4.099213397614819e-05,
"loss": 1.0845,
"step": 4260
},
{
"epoch": 0.54,
"learning_rate": 4.0970988750740085e-05,
"loss": 0.9967,
"step": 4270
},
{
"epoch": 0.54,
"learning_rate": 4.0949843525331985e-05,
"loss": 1.6117,
"step": 4280
},
{
"epoch": 0.54,
"learning_rate": 4.092869829992388e-05,
"loss": 0.7909,
"step": 4290
},
{
"epoch": 0.55,
"learning_rate": 4.090755307451578e-05,
"loss": 1.2702,
"step": 4300
},
{
"epoch": 0.55,
"learning_rate": 4.088640784910767e-05,
"loss": 1.259,
"step": 4310
},
{
"epoch": 0.55,
"learning_rate": 4.086526262369957e-05,
"loss": 1.0539,
"step": 4320
},
{
"epoch": 0.55,
"learning_rate": 4.084411739829147e-05,
"loss": 1.3923,
"step": 4330
},
{
"epoch": 0.55,
"learning_rate": 4.0822972172883364e-05,
"loss": 1.013,
"step": 4340
},
{
"epoch": 0.55,
"learning_rate": 4.0801826947475263e-05,
"loss": 1.3298,
"step": 4350
},
{
"epoch": 0.55,
"learning_rate": 4.078068172206716e-05,
"loss": 1.1503,
"step": 4360
},
{
"epoch": 0.55,
"learning_rate": 4.0759536496659056e-05,
"loss": 0.8204,
"step": 4370
},
{
"epoch": 0.56,
"learning_rate": 4.0738391271250956e-05,
"loss": 1.1837,
"step": 4380
},
{
"epoch": 0.56,
"learning_rate": 4.071724604584285e-05,
"loss": 1.1871,
"step": 4390
},
{
"epoch": 0.56,
"learning_rate": 4.069610082043475e-05,
"loss": 1.2964,
"step": 4400
},
{
"epoch": 0.56,
"learning_rate": 4.067495559502664e-05,
"loss": 1.179,
"step": 4410
},
{
"epoch": 0.56,
"learning_rate": 4.065381036961854e-05,
"loss": 1.3017,
"step": 4420
},
{
"epoch": 0.56,
"learning_rate": 4.063266514421044e-05,
"loss": 1.0223,
"step": 4430
},
{
"epoch": 0.56,
"learning_rate": 4.0611519918802335e-05,
"loss": 1.0445,
"step": 4440
},
{
"epoch": 0.56,
"learning_rate": 4.0590374693394234e-05,
"loss": 1.2249,
"step": 4450
},
{
"epoch": 0.57,
"learning_rate": 4.056922946798613e-05,
"loss": 1.1205,
"step": 4460
},
{
"epoch": 0.57,
"learning_rate": 4.054808424257803e-05,
"loss": 1.0702,
"step": 4470
},
{
"epoch": 0.57,
"learning_rate": 4.052693901716993e-05,
"loss": 1.2028,
"step": 4480
},
{
"epoch": 0.57,
"learning_rate": 4.050579379176182e-05,
"loss": 1.3294,
"step": 4490
},
{
"epoch": 0.57,
"learning_rate": 4.048464856635372e-05,
"loss": 1.0825,
"step": 4500
},
{
"epoch": 0.57,
"learning_rate": 4.046350334094562e-05,
"loss": 0.9816,
"step": 4510
},
{
"epoch": 0.57,
"learning_rate": 4.044235811553751e-05,
"loss": 1.1467,
"step": 4520
},
{
"epoch": 0.57,
"learning_rate": 4.042121289012941e-05,
"loss": 1.2722,
"step": 4530
},
{
"epoch": 0.58,
"learning_rate": 4.0400067664721305e-05,
"loss": 1.0321,
"step": 4540
},
{
"epoch": 0.58,
"learning_rate": 4.0378922439313205e-05,
"loss": 1.2237,
"step": 4550
},
{
"epoch": 0.58,
"learning_rate": 4.03577772139051e-05,
"loss": 1.1057,
"step": 4560
},
{
"epoch": 0.58,
"learning_rate": 4.0336631988497e-05,
"loss": 0.9961,
"step": 4570
},
{
"epoch": 0.58,
"learning_rate": 4.031548676308889e-05,
"loss": 1.1292,
"step": 4580
},
{
"epoch": 0.58,
"learning_rate": 4.029434153768079e-05,
"loss": 1.1948,
"step": 4590
},
{
"epoch": 0.58,
"learning_rate": 4.027319631227269e-05,
"loss": 1.0209,
"step": 4600
},
{
"epoch": 0.58,
"learning_rate": 4.025205108686459e-05,
"loss": 1.22,
"step": 4610
},
{
"epoch": 0.59,
"learning_rate": 4.0230905861456483e-05,
"loss": 0.9762,
"step": 4620
},
{
"epoch": 0.59,
"learning_rate": 4.020976063604838e-05,
"loss": 1.2507,
"step": 4630
},
{
"epoch": 0.59,
"learning_rate": 4.0188615410640276e-05,
"loss": 1.1556,
"step": 4640
},
{
"epoch": 0.59,
"learning_rate": 4.0167470185232176e-05,
"loss": 0.9193,
"step": 4650
},
{
"epoch": 0.59,
"learning_rate": 4.014632495982407e-05,
"loss": 1.0562,
"step": 4660
},
{
"epoch": 0.59,
"learning_rate": 4.012517973441597e-05,
"loss": 1.1911,
"step": 4670
},
{
"epoch": 0.59,
"learning_rate": 4.010403450900787e-05,
"loss": 1.1291,
"step": 4680
},
{
"epoch": 0.6,
"learning_rate": 4.008288928359977e-05,
"loss": 1.0281,
"step": 4690
},
{
"epoch": 0.6,
"learning_rate": 4.006174405819166e-05,
"loss": 0.9332,
"step": 4700
},
{
"epoch": 0.6,
"learning_rate": 4.004059883278356e-05,
"loss": 1.0526,
"step": 4710
},
{
"epoch": 0.6,
"learning_rate": 4.0019453607375454e-05,
"loss": 1.054,
"step": 4720
},
{
"epoch": 0.6,
"learning_rate": 3.9998308381967354e-05,
"loss": 1.1358,
"step": 4730
},
{
"epoch": 0.6,
"learning_rate": 3.997716315655925e-05,
"loss": 1.1124,
"step": 4740
},
{
"epoch": 0.6,
"learning_rate": 3.995601793115115e-05,
"loss": 1.0921,
"step": 4750
},
{
"epoch": 0.6,
"learning_rate": 3.993487270574304e-05,
"loss": 1.1695,
"step": 4760
},
{
"epoch": 0.61,
"learning_rate": 3.9913727480334947e-05,
"loss": 0.9014,
"step": 4770
},
{
"epoch": 0.61,
"learning_rate": 3.989258225492684e-05,
"loss": 1.2469,
"step": 4780
},
{
"epoch": 0.61,
"learning_rate": 3.987143702951874e-05,
"loss": 1.0836,
"step": 4790
},
{
"epoch": 0.61,
"learning_rate": 3.985029180411063e-05,
"loss": 1.1077,
"step": 4800
},
{
"epoch": 0.61,
"learning_rate": 3.982914657870253e-05,
"loss": 1.0164,
"step": 4810
},
{
"epoch": 0.61,
"learning_rate": 3.9808001353294425e-05,
"loss": 1.0059,
"step": 4820
},
{
"epoch": 0.61,
"learning_rate": 3.9786856127886325e-05,
"loss": 1.4307,
"step": 4830
},
{
"epoch": 0.61,
"learning_rate": 3.976571090247822e-05,
"loss": 1.0341,
"step": 4840
},
{
"epoch": 0.62,
"learning_rate": 3.974456567707012e-05,
"loss": 1.0022,
"step": 4850
},
{
"epoch": 0.62,
"learning_rate": 3.972342045166202e-05,
"loss": 0.9112,
"step": 4860
},
{
"epoch": 0.62,
"learning_rate": 3.970227522625392e-05,
"loss": 1.2257,
"step": 4870
},
{
"epoch": 0.62,
"learning_rate": 3.968113000084581e-05,
"loss": 1.3451,
"step": 4880
},
{
"epoch": 0.62,
"learning_rate": 3.965998477543771e-05,
"loss": 1.2531,
"step": 4890
},
{
"epoch": 0.62,
"learning_rate": 3.96388395500296e-05,
"loss": 1.5206,
"step": 4900
},
{
"epoch": 0.62,
"learning_rate": 3.96176943246215e-05,
"loss": 1.2895,
"step": 4910
},
{
"epoch": 0.62,
"learning_rate": 3.9596549099213396e-05,
"loss": 1.2964,
"step": 4920
},
{
"epoch": 0.63,
"learning_rate": 3.9575403873805296e-05,
"loss": 1.1312,
"step": 4930
},
{
"epoch": 0.63,
"learning_rate": 3.955425864839719e-05,
"loss": 1.2521,
"step": 4940
},
{
"epoch": 0.63,
"learning_rate": 3.9533113422989096e-05,
"loss": 1.041,
"step": 4950
},
{
"epoch": 0.63,
"learning_rate": 3.951196819758099e-05,
"loss": 1.083,
"step": 4960
},
{
"epoch": 0.63,
"learning_rate": 3.949082297217289e-05,
"loss": 1.082,
"step": 4970
},
{
"epoch": 0.63,
"learning_rate": 3.946967774676478e-05,
"loss": 1.3422,
"step": 4980
},
{
"epoch": 0.63,
"learning_rate": 3.944853252135668e-05,
"loss": 1.0976,
"step": 4990
},
{
"epoch": 0.63,
"learning_rate": 3.9427387295948574e-05,
"loss": 1.3953,
"step": 5000
},
{
"epoch": 0.63,
"eval_loss": 1.0670011043548584,
"eval_rouge1": 0.724391070964709,
"eval_rouge2": 0.5921697395009229,
"eval_rougeL": 0.6733938305439098,
"eval_rougeLsum": 0.6733584253479956,
"eval_runtime": 364.4174,
"eval_samples_per_second": 9.412,
"eval_steps_per_second": 2.354,
"step": 5000
},
{
"epoch": 0.64,
"learning_rate": 3.9406242070540474e-05,
"loss": 1.2544,
"step": 5010
},
{
"epoch": 0.64,
"learning_rate": 3.938509684513237e-05,
"loss": 1.3485,
"step": 5020
},
{
"epoch": 0.64,
"learning_rate": 3.9363951619724274e-05,
"loss": 1.1386,
"step": 5030
},
{
"epoch": 0.64,
"learning_rate": 3.934280639431617e-05,
"loss": 1.0313,
"step": 5040
},
{
"epoch": 0.64,
"learning_rate": 3.9321661168908066e-05,
"loss": 0.8897,
"step": 5050
},
{
"epoch": 0.64,
"learning_rate": 3.930051594349996e-05,
"loss": 0.9057,
"step": 5060
},
{
"epoch": 0.64,
"learning_rate": 3.927937071809186e-05,
"loss": 1.0252,
"step": 5070
},
{
"epoch": 0.64,
"learning_rate": 3.925822549268375e-05,
"loss": 0.8871,
"step": 5080
},
{
"epoch": 0.65,
"learning_rate": 3.923708026727565e-05,
"loss": 0.926,
"step": 5090
},
{
"epoch": 0.65,
"learning_rate": 3.9215935041867545e-05,
"loss": 1.1344,
"step": 5100
},
{
"epoch": 0.65,
"learning_rate": 3.9194789816459445e-05,
"loss": 1.0828,
"step": 5110
},
{
"epoch": 0.65,
"learning_rate": 3.9173644591051345e-05,
"loss": 1.1273,
"step": 5120
},
{
"epoch": 0.65,
"learning_rate": 3.9152499365643245e-05,
"loss": 1.1482,
"step": 5130
},
{
"epoch": 0.65,
"learning_rate": 3.913135414023514e-05,
"loss": 0.9836,
"step": 5140
},
{
"epoch": 0.65,
"learning_rate": 3.911020891482704e-05,
"loss": 1.1523,
"step": 5150
},
{
"epoch": 0.65,
"learning_rate": 3.908906368941893e-05,
"loss": 1.11,
"step": 5160
},
{
"epoch": 0.66,
"learning_rate": 3.906791846401083e-05,
"loss": 1.0053,
"step": 5170
},
{
"epoch": 0.66,
"learning_rate": 3.904677323860272e-05,
"loss": 1.0402,
"step": 5180
},
{
"epoch": 0.66,
"learning_rate": 3.902562801319462e-05,
"loss": 1.0544,
"step": 5190
},
{
"epoch": 0.66,
"learning_rate": 3.9004482787786516e-05,
"loss": 0.9412,
"step": 5200
},
{
"epoch": 0.66,
"learning_rate": 3.8983337562378416e-05,
"loss": 1.3404,
"step": 5210
},
{
"epoch": 0.66,
"learning_rate": 3.8962192336970316e-05,
"loss": 1.422,
"step": 5220
},
{
"epoch": 0.66,
"learning_rate": 3.8941047111562215e-05,
"loss": 1.2905,
"step": 5230
},
{
"epoch": 0.66,
"learning_rate": 3.891990188615411e-05,
"loss": 1.2346,
"step": 5240
},
{
"epoch": 0.67,
"learning_rate": 3.889875666074601e-05,
"loss": 1.3506,
"step": 5250
},
{
"epoch": 0.67,
"learning_rate": 3.88776114353379e-05,
"loss": 1.069,
"step": 5260
},
{
"epoch": 0.67,
"learning_rate": 3.88564662099298e-05,
"loss": 0.9655,
"step": 5270
},
{
"epoch": 0.67,
"learning_rate": 3.8835320984521694e-05,
"loss": 1.0573,
"step": 5280
},
{
"epoch": 0.67,
"learning_rate": 3.8814175759113594e-05,
"loss": 0.9175,
"step": 5290
},
{
"epoch": 0.67,
"learning_rate": 3.8793030533705494e-05,
"loss": 1.1635,
"step": 5300
},
{
"epoch": 0.67,
"learning_rate": 3.877188530829739e-05,
"loss": 1.0855,
"step": 5310
},
{
"epoch": 0.67,
"learning_rate": 3.8750740082889287e-05,
"loss": 1.3718,
"step": 5320
},
{
"epoch": 0.68,
"learning_rate": 3.872959485748118e-05,
"loss": 1.0953,
"step": 5330
},
{
"epoch": 0.68,
"learning_rate": 3.870844963207308e-05,
"loss": 1.0007,
"step": 5340
},
{
"epoch": 0.68,
"learning_rate": 3.868730440666498e-05,
"loss": 1.0583,
"step": 5350
},
{
"epoch": 0.68,
"learning_rate": 3.866615918125687e-05,
"loss": 1.061,
"step": 5360
},
{
"epoch": 0.68,
"learning_rate": 3.864501395584877e-05,
"loss": 1.4168,
"step": 5370
},
{
"epoch": 0.68,
"learning_rate": 3.862386873044067e-05,
"loss": 1.1249,
"step": 5380
},
{
"epoch": 0.68,
"learning_rate": 3.8602723505032565e-05,
"loss": 1.0632,
"step": 5390
},
{
"epoch": 0.69,
"learning_rate": 3.8581578279624465e-05,
"loss": 1.2256,
"step": 5400
},
{
"epoch": 0.69,
"learning_rate": 3.856043305421636e-05,
"loss": 1.0893,
"step": 5410
},
{
"epoch": 0.69,
"learning_rate": 3.853928782880826e-05,
"loss": 1.1871,
"step": 5420
},
{
"epoch": 0.69,
"learning_rate": 3.851814260340015e-05,
"loss": 1.0979,
"step": 5430
},
{
"epoch": 0.69,
"learning_rate": 3.849699737799205e-05,
"loss": 1.1223,
"step": 5440
},
{
"epoch": 0.69,
"learning_rate": 3.847585215258394e-05,
"loss": 0.9904,
"step": 5450
},
{
"epoch": 0.69,
"learning_rate": 3.845470692717584e-05,
"loss": 1.1959,
"step": 5460
},
{
"epoch": 0.69,
"learning_rate": 3.843356170176774e-05,
"loss": 1.1353,
"step": 5470
},
{
"epoch": 0.7,
"learning_rate": 3.841241647635964e-05,
"loss": 0.793,
"step": 5480
},
{
"epoch": 0.7,
"learning_rate": 3.8391271250951536e-05,
"loss": 0.9614,
"step": 5490
},
{
"epoch": 0.7,
"learning_rate": 3.8370126025543435e-05,
"loss": 1.0416,
"step": 5500
},
{
"epoch": 0.7,
"learning_rate": 3.834898080013533e-05,
"loss": 1.237,
"step": 5510
},
{
"epoch": 0.7,
"learning_rate": 3.832783557472723e-05,
"loss": 1.0482,
"step": 5520
},
{
"epoch": 0.7,
"learning_rate": 3.830669034931912e-05,
"loss": 1.3116,
"step": 5530
},
{
"epoch": 0.7,
"learning_rate": 3.828554512391102e-05,
"loss": 1.1759,
"step": 5540
},
{
"epoch": 0.7,
"learning_rate": 3.8264399898502914e-05,
"loss": 1.3059,
"step": 5550
},
{
"epoch": 0.71,
"learning_rate": 3.824325467309482e-05,
"loss": 1.4175,
"step": 5560
},
{
"epoch": 0.71,
"learning_rate": 3.8222109447686714e-05,
"loss": 0.9319,
"step": 5570
},
{
"epoch": 0.71,
"learning_rate": 3.8200964222278614e-05,
"loss": 0.9898,
"step": 5580
},
{
"epoch": 0.71,
"learning_rate": 3.8179818996870507e-05,
"loss": 0.989,
"step": 5590
},
{
"epoch": 0.71,
"learning_rate": 3.8158673771462406e-05,
"loss": 1.3181,
"step": 5600
},
{
"epoch": 0.71,
"learning_rate": 3.81375285460543e-05,
"loss": 1.2316,
"step": 5610
},
{
"epoch": 0.71,
"learning_rate": 3.81163833206462e-05,
"loss": 1.0285,
"step": 5620
},
{
"epoch": 0.71,
"learning_rate": 3.809523809523809e-05,
"loss": 1.2776,
"step": 5630
},
{
"epoch": 0.72,
"learning_rate": 3.807409286982999e-05,
"loss": 1.081,
"step": 5640
},
{
"epoch": 0.72,
"learning_rate": 3.805294764442189e-05,
"loss": 1.1667,
"step": 5650
},
{
"epoch": 0.72,
"learning_rate": 3.803180241901379e-05,
"loss": 1.0676,
"step": 5660
},
{
"epoch": 0.72,
"learning_rate": 3.8010657193605685e-05,
"loss": 1.0905,
"step": 5670
},
{
"epoch": 0.72,
"learning_rate": 3.7989511968197584e-05,
"loss": 0.9536,
"step": 5680
},
{
"epoch": 0.72,
"learning_rate": 3.796836674278948e-05,
"loss": 0.9615,
"step": 5690
},
{
"epoch": 0.72,
"learning_rate": 3.794722151738138e-05,
"loss": 1.1687,
"step": 5700
},
{
"epoch": 0.72,
"learning_rate": 3.792607629197327e-05,
"loss": 1.2227,
"step": 5710
},
{
"epoch": 0.73,
"learning_rate": 3.790493106656517e-05,
"loss": 0.9113,
"step": 5720
},
{
"epoch": 0.73,
"learning_rate": 3.788378584115707e-05,
"loss": 0.8698,
"step": 5730
},
{
"epoch": 0.73,
"learning_rate": 3.786264061574897e-05,
"loss": 1.1083,
"step": 5740
},
{
"epoch": 0.73,
"learning_rate": 3.784149539034086e-05,
"loss": 1.0659,
"step": 5750
},
{
"epoch": 0.73,
"learning_rate": 3.782035016493276e-05,
"loss": 1.0655,
"step": 5760
},
{
"epoch": 0.73,
"learning_rate": 3.7799204939524656e-05,
"loss": 1.1985,
"step": 5770
},
{
"epoch": 0.73,
"learning_rate": 3.7778059714116555e-05,
"loss": 1.0939,
"step": 5780
},
{
"epoch": 0.73,
"learning_rate": 3.775691448870845e-05,
"loss": 1.0749,
"step": 5790
},
{
"epoch": 0.74,
"learning_rate": 3.773576926330035e-05,
"loss": 1.1781,
"step": 5800
},
{
"epoch": 0.74,
"learning_rate": 3.771462403789224e-05,
"loss": 1.0971,
"step": 5810
},
{
"epoch": 0.74,
"learning_rate": 3.769347881248415e-05,
"loss": 1.0722,
"step": 5820
},
{
"epoch": 0.74,
"learning_rate": 3.767233358707604e-05,
"loss": 1.1972,
"step": 5830
},
{
"epoch": 0.74,
"learning_rate": 3.765118836166794e-05,
"loss": 0.8502,
"step": 5840
},
{
"epoch": 0.74,
"learning_rate": 3.7630043136259834e-05,
"loss": 1.1942,
"step": 5850
},
{
"epoch": 0.74,
"learning_rate": 3.7608897910851733e-05,
"loss": 1.1279,
"step": 5860
},
{
"epoch": 0.74,
"learning_rate": 3.7587752685443626e-05,
"loss": 1.1517,
"step": 5870
},
{
"epoch": 0.75,
"learning_rate": 3.7566607460035526e-05,
"loss": 1.2352,
"step": 5880
},
{
"epoch": 0.75,
"learning_rate": 3.754546223462742e-05,
"loss": 0.7588,
"step": 5890
},
{
"epoch": 0.75,
"learning_rate": 3.752431700921932e-05,
"loss": 1.1386,
"step": 5900
},
{
"epoch": 0.75,
"learning_rate": 3.750317178381122e-05,
"loss": 1.0729,
"step": 5910
},
{
"epoch": 0.75,
"learning_rate": 3.748202655840312e-05,
"loss": 1.1586,
"step": 5920
},
{
"epoch": 0.75,
"learning_rate": 3.746088133299501e-05,
"loss": 1.0868,
"step": 5930
},
{
"epoch": 0.75,
"learning_rate": 3.743973610758691e-05,
"loss": 1.1428,
"step": 5940
},
{
"epoch": 0.75,
"learning_rate": 3.7418590882178805e-05,
"loss": 0.9644,
"step": 5950
},
{
"epoch": 0.76,
"learning_rate": 3.7397445656770704e-05,
"loss": 1.2591,
"step": 5960
},
{
"epoch": 0.76,
"learning_rate": 3.73763004313626e-05,
"loss": 0.7665,
"step": 5970
},
{
"epoch": 0.76,
"learning_rate": 3.73551552059545e-05,
"loss": 1.1558,
"step": 5980
},
{
"epoch": 0.76,
"learning_rate": 3.733400998054639e-05,
"loss": 0.8359,
"step": 5990
},
{
"epoch": 0.76,
"learning_rate": 3.73128647551383e-05,
"loss": 0.9234,
"step": 6000
},
{
"epoch": 0.76,
"eval_loss": 1.1246055364608765,
"eval_rouge1": 0.7311888767709918,
"eval_rouge2": 0.6011218380441006,
"eval_rougeL": 0.6839981025502901,
"eval_rougeLsum": 0.6838567052847109,
"eval_runtime": 361.6632,
"eval_samples_per_second": 9.484,
"eval_steps_per_second": 2.372,
"step": 6000
},
{
"epoch": 0.76,
"learning_rate": 3.729171952973019e-05,
"loss": 1.2362,
"step": 6010
},
{
"epoch": 0.76,
"learning_rate": 3.727057430432209e-05,
"loss": 1.1178,
"step": 6020
},
{
"epoch": 0.77,
"learning_rate": 3.724942907891398e-05,
"loss": 1.1162,
"step": 6030
},
{
"epoch": 0.77,
"learning_rate": 3.722828385350588e-05,
"loss": 1.1241,
"step": 6040
},
{
"epoch": 0.77,
"learning_rate": 3.7207138628097775e-05,
"loss": 1.1855,
"step": 6050
},
{
"epoch": 0.77,
"learning_rate": 3.7185993402689675e-05,
"loss": 1.2081,
"step": 6060
},
{
"epoch": 0.77,
"learning_rate": 3.716484817728157e-05,
"loss": 1.0326,
"step": 6070
},
{
"epoch": 0.77,
"learning_rate": 3.714370295187347e-05,
"loss": 1.3032,
"step": 6080
},
{
"epoch": 0.77,
"learning_rate": 3.712255772646537e-05,
"loss": 0.8773,
"step": 6090
},
{
"epoch": 0.77,
"learning_rate": 3.710141250105727e-05,
"loss": 1.2036,
"step": 6100
},
{
"epoch": 0.78,
"learning_rate": 3.708026727564916e-05,
"loss": 1.1022,
"step": 6110
},
{
"epoch": 0.78,
"learning_rate": 3.705912205024106e-05,
"loss": 1.2184,
"step": 6120
},
{
"epoch": 0.78,
"learning_rate": 3.7037976824832953e-05,
"loss": 1.1782,
"step": 6130
},
{
"epoch": 0.78,
"learning_rate": 3.701683159942485e-05,
"loss": 1.0862,
"step": 6140
},
{
"epoch": 0.78,
"learning_rate": 3.6995686374016746e-05,
"loss": 0.8611,
"step": 6150
},
{
"epoch": 0.78,
"learning_rate": 3.6974541148608646e-05,
"loss": 1.0678,
"step": 6160
},
{
"epoch": 0.78,
"learning_rate": 3.6953395923200546e-05,
"loss": 1.2023,
"step": 6170
},
{
"epoch": 0.78,
"learning_rate": 3.693225069779244e-05,
"loss": 1.2114,
"step": 6180
},
{
"epoch": 0.79,
"learning_rate": 3.691110547238434e-05,
"loss": 1.2083,
"step": 6190
},
{
"epoch": 0.79,
"learning_rate": 3.688996024697623e-05,
"loss": 1.244,
"step": 6200
},
{
"epoch": 0.79,
"learning_rate": 3.686881502156813e-05,
"loss": 0.816,
"step": 6210
},
{
"epoch": 0.79,
"learning_rate": 3.684766979616003e-05,
"loss": 0.9887,
"step": 6220
},
{
"epoch": 0.79,
"learning_rate": 3.6826524570751924e-05,
"loss": 1.0372,
"step": 6230
},
{
"epoch": 0.79,
"learning_rate": 3.6805379345343824e-05,
"loss": 1.1094,
"step": 6240
},
{
"epoch": 0.79,
"learning_rate": 3.678423411993572e-05,
"loss": 1.1861,
"step": 6250
},
{
"epoch": 0.79,
"learning_rate": 3.676308889452762e-05,
"loss": 1.0627,
"step": 6260
},
{
"epoch": 0.8,
"learning_rate": 3.674194366911952e-05,
"loss": 1.1862,
"step": 6270
},
{
"epoch": 0.8,
"learning_rate": 3.672079844371141e-05,
"loss": 0.9385,
"step": 6280
},
{
"epoch": 0.8,
"learning_rate": 3.669965321830331e-05,
"loss": 1.0943,
"step": 6290
},
{
"epoch": 0.8,
"learning_rate": 3.66785079928952e-05,
"loss": 1.2006,
"step": 6300
},
{
"epoch": 0.8,
"learning_rate": 3.66573627674871e-05,
"loss": 1.2325,
"step": 6310
},
{
"epoch": 0.8,
"learning_rate": 3.6636217542078995e-05,
"loss": 1.0163,
"step": 6320
},
{
"epoch": 0.8,
"learning_rate": 3.6615072316670895e-05,
"loss": 1.0171,
"step": 6330
},
{
"epoch": 0.8,
"learning_rate": 3.6593927091262795e-05,
"loss": 0.8783,
"step": 6340
},
{
"epoch": 0.81,
"learning_rate": 3.6572781865854695e-05,
"loss": 0.9803,
"step": 6350
},
{
"epoch": 0.81,
"learning_rate": 3.655163664044659e-05,
"loss": 1.4375,
"step": 6360
},
{
"epoch": 0.81,
"learning_rate": 3.653049141503849e-05,
"loss": 1.0984,
"step": 6370
},
{
"epoch": 0.81,
"learning_rate": 3.650934618963038e-05,
"loss": 0.7408,
"step": 6380
},
{
"epoch": 0.81,
"learning_rate": 3.648820096422228e-05,
"loss": 0.9033,
"step": 6390
},
{
"epoch": 0.81,
"learning_rate": 3.6467055738814174e-05,
"loss": 1.2054,
"step": 6400
},
{
"epoch": 0.81,
"learning_rate": 3.644591051340607e-05,
"loss": 0.9976,
"step": 6410
},
{
"epoch": 0.81,
"learning_rate": 3.6424765287997966e-05,
"loss": 1.198,
"step": 6420
},
{
"epoch": 0.82,
"learning_rate": 3.640362006258987e-05,
"loss": 1.2638,
"step": 6430
},
{
"epoch": 0.82,
"learning_rate": 3.6382474837181766e-05,
"loss": 1.1577,
"step": 6440
},
{
"epoch": 0.82,
"learning_rate": 3.6361329611773666e-05,
"loss": 1.0408,
"step": 6450
},
{
"epoch": 0.82,
"learning_rate": 3.634018438636556e-05,
"loss": 0.9932,
"step": 6460
},
{
"epoch": 0.82,
"learning_rate": 3.631903916095746e-05,
"loss": 0.9057,
"step": 6470
},
{
"epoch": 0.82,
"learning_rate": 3.629789393554935e-05,
"loss": 1.0643,
"step": 6480
},
{
"epoch": 0.82,
"learning_rate": 3.627674871014125e-05,
"loss": 0.9496,
"step": 6490
},
{
"epoch": 0.82,
"learning_rate": 3.6255603484733144e-05,
"loss": 1.1414,
"step": 6500
},
{
"epoch": 0.83,
"learning_rate": 3.6234458259325044e-05,
"loss": 1.2274,
"step": 6510
},
{
"epoch": 0.83,
"learning_rate": 3.6213313033916944e-05,
"loss": 1.1571,
"step": 6520
},
{
"epoch": 0.83,
"learning_rate": 3.6192167808508844e-05,
"loss": 1.0121,
"step": 6530
},
{
"epoch": 0.83,
"learning_rate": 3.617102258310074e-05,
"loss": 0.7772,
"step": 6540
},
{
"epoch": 0.83,
"learning_rate": 3.614987735769264e-05,
"loss": 1.0713,
"step": 6550
},
{
"epoch": 0.83,
"learning_rate": 3.612873213228453e-05,
"loss": 1.0171,
"step": 6560
},
{
"epoch": 0.83,
"learning_rate": 3.610758690687643e-05,
"loss": 0.9217,
"step": 6570
},
{
"epoch": 0.83,
"learning_rate": 3.608644168146832e-05,
"loss": 1.0288,
"step": 6580
},
{
"epoch": 0.84,
"learning_rate": 3.606529645606022e-05,
"loss": 1.2451,
"step": 6590
},
{
"epoch": 0.84,
"learning_rate": 3.6044151230652115e-05,
"loss": 1.0372,
"step": 6600
},
{
"epoch": 0.84,
"learning_rate": 3.602300600524402e-05,
"loss": 1.2195,
"step": 6610
},
{
"epoch": 0.84,
"learning_rate": 3.6001860779835915e-05,
"loss": 1.3035,
"step": 6620
},
{
"epoch": 0.84,
"learning_rate": 3.5980715554427815e-05,
"loss": 0.8825,
"step": 6630
},
{
"epoch": 0.84,
"learning_rate": 3.595957032901971e-05,
"loss": 1.0959,
"step": 6640
},
{
"epoch": 0.84,
"learning_rate": 3.593842510361161e-05,
"loss": 1.14,
"step": 6650
},
{
"epoch": 0.84,
"learning_rate": 3.59172798782035e-05,
"loss": 0.8082,
"step": 6660
},
{
"epoch": 0.85,
"learning_rate": 3.58961346527954e-05,
"loss": 1.2626,
"step": 6670
},
{
"epoch": 0.85,
"learning_rate": 3.5874989427387293e-05,
"loss": 1.0167,
"step": 6680
},
{
"epoch": 0.85,
"learning_rate": 3.58538442019792e-05,
"loss": 0.9778,
"step": 6690
},
{
"epoch": 0.85,
"learning_rate": 3.583269897657109e-05,
"loss": 1.0101,
"step": 6700
},
{
"epoch": 0.85,
"learning_rate": 3.581155375116299e-05,
"loss": 0.9337,
"step": 6710
},
{
"epoch": 0.85,
"learning_rate": 3.5790408525754886e-05,
"loss": 1.0006,
"step": 6720
},
{
"epoch": 0.85,
"learning_rate": 3.5769263300346786e-05,
"loss": 1.0035,
"step": 6730
},
{
"epoch": 0.86,
"learning_rate": 3.574811807493868e-05,
"loss": 1.0181,
"step": 6740
},
{
"epoch": 0.86,
"learning_rate": 3.572697284953058e-05,
"loss": 1.1426,
"step": 6750
},
{
"epoch": 0.86,
"learning_rate": 3.570582762412247e-05,
"loss": 0.9948,
"step": 6760
},
{
"epoch": 0.86,
"learning_rate": 3.568468239871437e-05,
"loss": 0.8106,
"step": 6770
},
{
"epoch": 0.86,
"learning_rate": 3.566353717330627e-05,
"loss": 1.1669,
"step": 6780
},
{
"epoch": 0.86,
"learning_rate": 3.564239194789817e-05,
"loss": 1.0257,
"step": 6790
},
{
"epoch": 0.86,
"learning_rate": 3.5621246722490064e-05,
"loss": 0.9017,
"step": 6800
},
{
"epoch": 0.86,
"learning_rate": 3.5600101497081964e-05,
"loss": 1.0271,
"step": 6810
},
{
"epoch": 0.87,
"learning_rate": 3.557895627167386e-05,
"loss": 1.1189,
"step": 6820
},
{
"epoch": 0.87,
"learning_rate": 3.5557811046265757e-05,
"loss": 1.2997,
"step": 6830
},
{
"epoch": 0.87,
"learning_rate": 3.553666582085765e-05,
"loss": 0.9353,
"step": 6840
},
{
"epoch": 0.87,
"learning_rate": 3.551552059544955e-05,
"loss": 1.0409,
"step": 6850
},
{
"epoch": 0.87,
"learning_rate": 3.549437537004144e-05,
"loss": 0.9513,
"step": 6860
},
{
"epoch": 0.87,
"learning_rate": 3.547323014463335e-05,
"loss": 0.9113,
"step": 6870
},
{
"epoch": 0.87,
"learning_rate": 3.545208491922524e-05,
"loss": 0.9661,
"step": 6880
},
{
"epoch": 0.87,
"learning_rate": 3.543093969381714e-05,
"loss": 1.1896,
"step": 6890
},
{
"epoch": 0.88,
"learning_rate": 3.5409794468409035e-05,
"loss": 1.322,
"step": 6900
},
{
"epoch": 0.88,
"learning_rate": 3.5388649243000935e-05,
"loss": 1.0227,
"step": 6910
},
{
"epoch": 0.88,
"learning_rate": 3.536750401759283e-05,
"loss": 1.2631,
"step": 6920
},
{
"epoch": 0.88,
"learning_rate": 3.534635879218473e-05,
"loss": 1.0922,
"step": 6930
},
{
"epoch": 0.88,
"learning_rate": 3.532521356677662e-05,
"loss": 1.0135,
"step": 6940
},
{
"epoch": 0.88,
"learning_rate": 3.530406834136852e-05,
"loss": 1.2211,
"step": 6950
},
{
"epoch": 0.88,
"learning_rate": 3.528292311596042e-05,
"loss": 1.0378,
"step": 6960
},
{
"epoch": 0.88,
"learning_rate": 3.526177789055232e-05,
"loss": 1.2492,
"step": 6970
},
{
"epoch": 0.89,
"learning_rate": 3.524063266514421e-05,
"loss": 1.1058,
"step": 6980
},
{
"epoch": 0.89,
"learning_rate": 3.521948743973611e-05,
"loss": 1.0254,
"step": 6990
},
{
"epoch": 0.89,
"learning_rate": 3.5198342214328006e-05,
"loss": 1.3153,
"step": 7000
},
{
"epoch": 0.89,
"eval_loss": 1.023769497871399,
"eval_rouge1": 0.7203427116183512,
"eval_rouge2": 0.5832242674645857,
"eval_rougeL": 0.6666913344996883,
"eval_rougeLsum": 0.6663478294050371,
"eval_runtime": 364.0536,
"eval_samples_per_second": 9.422,
"eval_steps_per_second": 2.357,
"step": 7000
},
{
"epoch": 0.89,
"learning_rate": 3.5177196988919905e-05,
"loss": 1.1939,
"step": 7010
},
{
"epoch": 0.89,
"learning_rate": 3.51560517635118e-05,
"loss": 0.8688,
"step": 7020
},
{
"epoch": 0.89,
"learning_rate": 3.51349065381037e-05,
"loss": 1.2643,
"step": 7030
},
{
"epoch": 0.89,
"learning_rate": 3.51137613126956e-05,
"loss": 0.9855,
"step": 7040
},
{
"epoch": 0.89,
"learning_rate": 3.509261608728749e-05,
"loss": 0.8776,
"step": 7050
},
{
"epoch": 0.9,
"learning_rate": 3.507147086187939e-05,
"loss": 1.0196,
"step": 7060
},
{
"epoch": 0.9,
"learning_rate": 3.5050325636471284e-05,
"loss": 0.9841,
"step": 7070
},
{
"epoch": 0.9,
"learning_rate": 3.5029180411063184e-05,
"loss": 1.2096,
"step": 7080
},
{
"epoch": 0.9,
"learning_rate": 3.5008035185655084e-05,
"loss": 0.9965,
"step": 7090
},
{
"epoch": 0.9,
"learning_rate": 3.4986889960246977e-05,
"loss": 1.3095,
"step": 7100
},
{
"epoch": 0.9,
"learning_rate": 3.4965744734838876e-05,
"loss": 1.0811,
"step": 7110
},
{
"epoch": 0.9,
"learning_rate": 3.494459950943077e-05,
"loss": 0.9324,
"step": 7120
},
{
"epoch": 0.9,
"learning_rate": 3.492345428402267e-05,
"loss": 1.2518,
"step": 7130
},
{
"epoch": 0.91,
"learning_rate": 3.490230905861457e-05,
"loss": 1.5894,
"step": 7140
},
{
"epoch": 0.91,
"learning_rate": 3.488116383320646e-05,
"loss": 0.9089,
"step": 7150
},
{
"epoch": 0.91,
"learning_rate": 3.486001860779836e-05,
"loss": 0.7328,
"step": 7160
},
{
"epoch": 0.91,
"learning_rate": 3.4838873382390255e-05,
"loss": 0.7632,
"step": 7170
},
{
"epoch": 0.91,
"learning_rate": 3.4817728156982155e-05,
"loss": 1.0252,
"step": 7180
},
{
"epoch": 0.91,
"learning_rate": 3.479658293157405e-05,
"loss": 0.8877,
"step": 7190
},
{
"epoch": 0.91,
"learning_rate": 3.477543770616595e-05,
"loss": 1.1118,
"step": 7200
},
{
"epoch": 0.91,
"learning_rate": 3.475429248075785e-05,
"loss": 1.2551,
"step": 7210
},
{
"epoch": 0.92,
"learning_rate": 3.473314725534975e-05,
"loss": 0.9686,
"step": 7220
},
{
"epoch": 0.92,
"learning_rate": 3.471200202994164e-05,
"loss": 1.005,
"step": 7230
},
{
"epoch": 0.92,
"learning_rate": 3.469085680453354e-05,
"loss": 0.8196,
"step": 7240
},
{
"epoch": 0.92,
"learning_rate": 3.466971157912543e-05,
"loss": 1.1002,
"step": 7250
},
{
"epoch": 0.92,
"learning_rate": 3.464856635371733e-05,
"loss": 0.9108,
"step": 7260
},
{
"epoch": 0.92,
"learning_rate": 3.4627421128309226e-05,
"loss": 1.2738,
"step": 7270
},
{
"epoch": 0.92,
"learning_rate": 3.4606275902901126e-05,
"loss": 1.1258,
"step": 7280
},
{
"epoch": 0.92,
"learning_rate": 3.458513067749302e-05,
"loss": 1.0987,
"step": 7290
},
{
"epoch": 0.93,
"learning_rate": 3.456398545208492e-05,
"loss": 0.7407,
"step": 7300
},
{
"epoch": 0.93,
"learning_rate": 3.454284022667682e-05,
"loss": 0.9334,
"step": 7310
},
{
"epoch": 0.93,
"learning_rate": 3.452169500126872e-05,
"loss": 1.0585,
"step": 7320
},
{
"epoch": 0.93,
"learning_rate": 3.450054977586061e-05,
"loss": 1.1465,
"step": 7330
},
{
"epoch": 0.93,
"learning_rate": 3.447940455045251e-05,
"loss": 0.6161,
"step": 7340
},
{
"epoch": 0.93,
"learning_rate": 3.4458259325044404e-05,
"loss": 0.8161,
"step": 7350
},
{
"epoch": 0.93,
"learning_rate": 3.4437114099636304e-05,
"loss": 1.0717,
"step": 7360
},
{
"epoch": 0.94,
"learning_rate": 3.44159688742282e-05,
"loss": 0.8769,
"step": 7370
},
{
"epoch": 0.94,
"learning_rate": 3.4394823648820096e-05,
"loss": 1.2077,
"step": 7380
},
{
"epoch": 0.94,
"learning_rate": 3.4373678423411996e-05,
"loss": 1.0268,
"step": 7390
},
{
"epoch": 0.94,
"learning_rate": 3.4352533198003896e-05,
"loss": 0.8368,
"step": 7400
},
{
"epoch": 0.94,
"learning_rate": 3.433138797259579e-05,
"loss": 1.0471,
"step": 7410
},
{
"epoch": 0.94,
"learning_rate": 3.431024274718769e-05,
"loss": 0.9955,
"step": 7420
},
{
"epoch": 0.94,
"learning_rate": 3.428909752177958e-05,
"loss": 1.0865,
"step": 7430
},
{
"epoch": 0.94,
"learning_rate": 3.426795229637148e-05,
"loss": 1.0317,
"step": 7440
},
{
"epoch": 0.95,
"learning_rate": 3.4246807070963375e-05,
"loss": 1.1116,
"step": 7450
},
{
"epoch": 0.95,
"learning_rate": 3.4225661845555275e-05,
"loss": 1.0226,
"step": 7460
},
{
"epoch": 0.95,
"learning_rate": 3.420451662014717e-05,
"loss": 0.9561,
"step": 7470
},
{
"epoch": 0.95,
"learning_rate": 3.4183371394739074e-05,
"loss": 1.2086,
"step": 7480
},
{
"epoch": 0.95,
"learning_rate": 3.416222616933097e-05,
"loss": 1.3664,
"step": 7490
},
{
"epoch": 0.95,
"learning_rate": 3.414108094392287e-05,
"loss": 1.1278,
"step": 7500
},
{
"epoch": 0.95,
"learning_rate": 3.411993571851476e-05,
"loss": 1.026,
"step": 7510
},
{
"epoch": 0.95,
"learning_rate": 3.409879049310666e-05,
"loss": 0.8413,
"step": 7520
},
{
"epoch": 0.96,
"learning_rate": 3.407764526769855e-05,
"loss": 0.9132,
"step": 7530
},
{
"epoch": 0.96,
"learning_rate": 3.405650004229045e-05,
"loss": 1.1291,
"step": 7540
},
{
"epoch": 0.96,
"learning_rate": 3.4035354816882346e-05,
"loss": 1.0274,
"step": 7550
},
{
"epoch": 0.96,
"learning_rate": 3.4014209591474245e-05,
"loss": 1.0399,
"step": 7560
},
{
"epoch": 0.96,
"learning_rate": 3.3993064366066145e-05,
"loss": 1.1936,
"step": 7570
},
{
"epoch": 0.96,
"learning_rate": 3.3971919140658045e-05,
"loss": 1.246,
"step": 7580
},
{
"epoch": 0.96,
"learning_rate": 3.395077391524994e-05,
"loss": 1.3023,
"step": 7590
},
{
"epoch": 0.96,
"learning_rate": 3.392962868984184e-05,
"loss": 1.2733,
"step": 7600
},
{
"epoch": 0.97,
"learning_rate": 3.390848346443373e-05,
"loss": 1.0049,
"step": 7610
},
{
"epoch": 0.97,
"learning_rate": 3.388733823902563e-05,
"loss": 1.1676,
"step": 7620
},
{
"epoch": 0.97,
"learning_rate": 3.3866193013617524e-05,
"loss": 1.113,
"step": 7630
},
{
"epoch": 0.97,
"learning_rate": 3.3845047788209423e-05,
"loss": 0.9613,
"step": 7640
},
{
"epoch": 0.97,
"learning_rate": 3.3823902562801316e-05,
"loss": 0.8953,
"step": 7650
},
{
"epoch": 0.97,
"learning_rate": 3.380275733739322e-05,
"loss": 0.9144,
"step": 7660
},
{
"epoch": 0.97,
"learning_rate": 3.3781612111985116e-05,
"loss": 1.041,
"step": 7670
},
{
"epoch": 0.97,
"learning_rate": 3.3760466886577016e-05,
"loss": 1.1457,
"step": 7680
},
{
"epoch": 0.98,
"learning_rate": 3.373932166116891e-05,
"loss": 0.6395,
"step": 7690
},
{
"epoch": 0.98,
"learning_rate": 3.371817643576081e-05,
"loss": 0.78,
"step": 7700
},
{
"epoch": 0.98,
"learning_rate": 3.36970312103527e-05,
"loss": 0.9073,
"step": 7710
},
{
"epoch": 0.98,
"learning_rate": 3.36758859849446e-05,
"loss": 1.2017,
"step": 7720
},
{
"epoch": 0.98,
"learning_rate": 3.3654740759536495e-05,
"loss": 0.8813,
"step": 7730
},
{
"epoch": 0.98,
"learning_rate": 3.36335955341284e-05,
"loss": 1.0339,
"step": 7740
},
{
"epoch": 0.98,
"learning_rate": 3.3612450308720294e-05,
"loss": 1.0385,
"step": 7750
},
{
"epoch": 0.98,
"learning_rate": 3.3591305083312194e-05,
"loss": 0.9066,
"step": 7760
},
{
"epoch": 0.99,
"learning_rate": 3.357015985790409e-05,
"loss": 0.9785,
"step": 7770
},
{
"epoch": 0.99,
"learning_rate": 3.354901463249599e-05,
"loss": 0.9517,
"step": 7780
},
{
"epoch": 0.99,
"learning_rate": 3.352786940708788e-05,
"loss": 1.2143,
"step": 7790
},
{
"epoch": 0.99,
"learning_rate": 3.350672418167978e-05,
"loss": 0.8782,
"step": 7800
},
{
"epoch": 0.99,
"learning_rate": 3.348557895627167e-05,
"loss": 1.0278,
"step": 7810
},
{
"epoch": 0.99,
"learning_rate": 3.346443373086357e-05,
"loss": 0.8431,
"step": 7820
},
{
"epoch": 0.99,
"learning_rate": 3.344328850545547e-05,
"loss": 0.9073,
"step": 7830
},
{
"epoch": 0.99,
"learning_rate": 3.342214328004737e-05,
"loss": 0.9408,
"step": 7840
},
{
"epoch": 1.0,
"learning_rate": 3.3400998054639265e-05,
"loss": 1.0336,
"step": 7850
},
{
"epoch": 1.0,
"learning_rate": 3.3379852829231165e-05,
"loss": 1.0826,
"step": 7860
},
{
"epoch": 1.0,
"learning_rate": 3.335870760382306e-05,
"loss": 1.1778,
"step": 7870
},
{
"epoch": 1.0,
"learning_rate": 3.333756237841496e-05,
"loss": 0.8249,
"step": 7880
},
{
"epoch": 1.0,
"learning_rate": 3.331641715300685e-05,
"loss": 0.8889,
"step": 7890
},
{
"epoch": 1.0,
"learning_rate": 3.329527192759875e-05,
"loss": 0.704,
"step": 7900
},
{
"epoch": 1.0,
"learning_rate": 3.3274126702190644e-05,
"loss": 0.8796,
"step": 7910
},
{
"epoch": 1.0,
"learning_rate": 3.325298147678254e-05,
"loss": 0.9676,
"step": 7920
},
{
"epoch": 1.01,
"learning_rate": 3.323183625137444e-05,
"loss": 0.9408,
"step": 7930
},
{
"epoch": 1.01,
"learning_rate": 3.3210691025966336e-05,
"loss": 0.8577,
"step": 7940
},
{
"epoch": 1.01,
"learning_rate": 3.3189545800558236e-05,
"loss": 0.7594,
"step": 7950
},
{
"epoch": 1.01,
"learning_rate": 3.3168400575150136e-05,
"loss": 0.7654,
"step": 7960
},
{
"epoch": 1.01,
"learning_rate": 3.314725534974203e-05,
"loss": 0.8417,
"step": 7970
},
{
"epoch": 1.01,
"learning_rate": 3.312611012433393e-05,
"loss": 1.0401,
"step": 7980
},
{
"epoch": 1.01,
"learning_rate": 3.310496489892582e-05,
"loss": 0.8122,
"step": 7990
},
{
"epoch": 1.01,
"learning_rate": 3.308381967351772e-05,
"loss": 0.849,
"step": 8000
},
{
"epoch": 1.01,
"eval_loss": 1.062867283821106,
"eval_rouge1": 0.7333394898999004,
"eval_rouge2": 0.6040552154047258,
"eval_rougeL": 0.6821425462151816,
"eval_rougeLsum": 0.6820007292215543,
"eval_runtime": 366.1827,
"eval_samples_per_second": 9.367,
"eval_steps_per_second": 2.343,
"step": 8000
},
{
"epoch": 1.02,
"learning_rate": 3.306267444810962e-05,
"loss": 0.8715,
"step": 8010
},
{
"epoch": 1.02,
"learning_rate": 3.3041529222701514e-05,
"loss": 0.8833,
"step": 8020
},
{
"epoch": 1.02,
"learning_rate": 3.3020383997293414e-05,
"loss": 0.6727,
"step": 8030
},
{
"epoch": 1.02,
"learning_rate": 3.299923877188531e-05,
"loss": 0.8676,
"step": 8040
},
{
"epoch": 1.02,
"learning_rate": 3.297809354647721e-05,
"loss": 0.8652,
"step": 8050
},
{
"epoch": 1.02,
"learning_rate": 3.29569483210691e-05,
"loss": 0.7491,
"step": 8060
},
{
"epoch": 1.02,
"learning_rate": 3.2935803095661e-05,
"loss": 0.8906,
"step": 8070
},
{
"epoch": 1.03,
"learning_rate": 3.29146578702529e-05,
"loss": 0.776,
"step": 8080
},
{
"epoch": 1.03,
"learning_rate": 3.28935126448448e-05,
"loss": 0.8261,
"step": 8090
},
{
"epoch": 1.03,
"learning_rate": 3.287236741943669e-05,
"loss": 0.8681,
"step": 8100
},
{
"epoch": 1.03,
"learning_rate": 3.285122219402859e-05,
"loss": 0.8034,
"step": 8110
},
{
"epoch": 1.03,
"learning_rate": 3.2830076968620485e-05,
"loss": 0.9061,
"step": 8120
},
{
"epoch": 1.03,
"learning_rate": 3.2808931743212385e-05,
"loss": 0.6463,
"step": 8130
},
{
"epoch": 1.03,
"learning_rate": 3.278778651780428e-05,
"loss": 1.0844,
"step": 8140
},
{
"epoch": 1.03,
"learning_rate": 3.276664129239618e-05,
"loss": 0.7775,
"step": 8150
},
{
"epoch": 1.04,
"learning_rate": 3.274549606698807e-05,
"loss": 0.7389,
"step": 8160
},
{
"epoch": 1.04,
"learning_rate": 3.272435084157997e-05,
"loss": 1.099,
"step": 8170
},
{
"epoch": 1.04,
"learning_rate": 3.270320561617187e-05,
"loss": 0.6539,
"step": 8180
},
{
"epoch": 1.04,
"learning_rate": 3.268206039076377e-05,
"loss": 0.6092,
"step": 8190
},
{
"epoch": 1.04,
"learning_rate": 3.266091516535566e-05,
"loss": 0.5186,
"step": 8200
},
{
"epoch": 1.04,
"learning_rate": 3.263976993994756e-05,
"loss": 0.7842,
"step": 8210
},
{
"epoch": 1.04,
"learning_rate": 3.2618624714539456e-05,
"loss": 0.5038,
"step": 8220
},
{
"epoch": 1.04,
"learning_rate": 3.2597479489131356e-05,
"loss": 0.8065,
"step": 8230
},
{
"epoch": 1.05,
"learning_rate": 3.257633426372325e-05,
"loss": 0.9201,
"step": 8240
},
{
"epoch": 1.05,
"learning_rate": 3.255518903831515e-05,
"loss": 0.5929,
"step": 8250
},
{
"epoch": 1.05,
"learning_rate": 3.253404381290704e-05,
"loss": 0.7071,
"step": 8260
},
{
"epoch": 1.05,
"learning_rate": 3.251289858749895e-05,
"loss": 0.6853,
"step": 8270
},
{
"epoch": 1.05,
"learning_rate": 3.249175336209084e-05,
"loss": 0.862,
"step": 8280
},
{
"epoch": 1.05,
"learning_rate": 3.247060813668274e-05,
"loss": 0.9971,
"step": 8290
},
{
"epoch": 1.05,
"learning_rate": 3.2449462911274634e-05,
"loss": 0.8883,
"step": 8300
},
{
"epoch": 1.05,
"learning_rate": 3.2428317685866534e-05,
"loss": 0.7143,
"step": 8310
},
{
"epoch": 1.06,
"learning_rate": 3.240717246045843e-05,
"loss": 0.7519,
"step": 8320
},
{
"epoch": 1.06,
"learning_rate": 3.238602723505033e-05,
"loss": 0.6689,
"step": 8330
},
{
"epoch": 1.06,
"learning_rate": 3.236488200964222e-05,
"loss": 0.8966,
"step": 8340
},
{
"epoch": 1.06,
"learning_rate": 3.234373678423412e-05,
"loss": 0.8244,
"step": 8350
},
{
"epoch": 1.06,
"learning_rate": 3.232259155882602e-05,
"loss": 0.9893,
"step": 8360
},
{
"epoch": 1.06,
"learning_rate": 3.230144633341792e-05,
"loss": 0.7112,
"step": 8370
},
{
"epoch": 1.06,
"learning_rate": 3.228030110800981e-05,
"loss": 0.7914,
"step": 8380
},
{
"epoch": 1.06,
"learning_rate": 3.225915588260171e-05,
"loss": 0.6896,
"step": 8390
},
{
"epoch": 1.07,
"learning_rate": 3.2238010657193605e-05,
"loss": 0.9948,
"step": 8400
},
{
"epoch": 1.07,
"learning_rate": 3.2216865431785505e-05,
"loss": 0.6249,
"step": 8410
},
{
"epoch": 1.07,
"learning_rate": 3.21957202063774e-05,
"loss": 1.0178,
"step": 8420
},
{
"epoch": 1.07,
"learning_rate": 3.21745749809693e-05,
"loss": 0.9013,
"step": 8430
},
{
"epoch": 1.07,
"learning_rate": 3.21534297555612e-05,
"loss": 0.9126,
"step": 8440
},
{
"epoch": 1.07,
"learning_rate": 3.21322845301531e-05,
"loss": 0.7775,
"step": 8450
},
{
"epoch": 1.07,
"learning_rate": 3.211113930474499e-05,
"loss": 0.683,
"step": 8460
},
{
"epoch": 1.07,
"learning_rate": 3.208999407933689e-05,
"loss": 0.5967,
"step": 8470
},
{
"epoch": 1.08,
"learning_rate": 3.206884885392878e-05,
"loss": 0.6415,
"step": 8480
},
{
"epoch": 1.08,
"learning_rate": 3.204770362852068e-05,
"loss": 0.8251,
"step": 8490
},
{
"epoch": 1.08,
"learning_rate": 3.2026558403112576e-05,
"loss": 0.813,
"step": 8500
},
{
"epoch": 1.08,
"learning_rate": 3.2005413177704476e-05,
"loss": 0.8713,
"step": 8510
},
{
"epoch": 1.08,
"learning_rate": 3.198426795229637e-05,
"loss": 0.8182,
"step": 8520
},
{
"epoch": 1.08,
"learning_rate": 3.1963122726888275e-05,
"loss": 0.7671,
"step": 8530
},
{
"epoch": 1.08,
"learning_rate": 3.194197750148017e-05,
"loss": 0.7793,
"step": 8540
},
{
"epoch": 1.08,
"learning_rate": 3.192083227607207e-05,
"loss": 0.7322,
"step": 8550
},
{
"epoch": 1.09,
"learning_rate": 3.189968705066396e-05,
"loss": 0.6728,
"step": 8560
},
{
"epoch": 1.09,
"learning_rate": 3.187854182525586e-05,
"loss": 0.8608,
"step": 8570
},
{
"epoch": 1.09,
"learning_rate": 3.1857396599847754e-05,
"loss": 0.8026,
"step": 8580
},
{
"epoch": 1.09,
"learning_rate": 3.1836251374439654e-05,
"loss": 0.6852,
"step": 8590
},
{
"epoch": 1.09,
"learning_rate": 3.181510614903155e-05,
"loss": 0.7871,
"step": 8600
},
{
"epoch": 1.09,
"learning_rate": 3.1793960923623447e-05,
"loss": 0.7305,
"step": 8610
},
{
"epoch": 1.09,
"learning_rate": 3.1772815698215346e-05,
"loss": 1.0173,
"step": 8620
},
{
"epoch": 1.09,
"learning_rate": 3.1751670472807246e-05,
"loss": 0.7881,
"step": 8630
},
{
"epoch": 1.1,
"learning_rate": 3.173052524739914e-05,
"loss": 0.739,
"step": 8640
},
{
"epoch": 1.1,
"learning_rate": 3.170938002199104e-05,
"loss": 0.8105,
"step": 8650
},
{
"epoch": 1.1,
"learning_rate": 3.168823479658293e-05,
"loss": 0.7205,
"step": 8660
},
{
"epoch": 1.1,
"learning_rate": 3.166708957117483e-05,
"loss": 0.9918,
"step": 8670
},
{
"epoch": 1.1,
"learning_rate": 3.1645944345766725e-05,
"loss": 0.791,
"step": 8680
},
{
"epoch": 1.1,
"learning_rate": 3.1624799120358625e-05,
"loss": 0.9505,
"step": 8690
},
{
"epoch": 1.1,
"learning_rate": 3.160365389495052e-05,
"loss": 0.8394,
"step": 8700
},
{
"epoch": 1.11,
"learning_rate": 3.1582508669542424e-05,
"loss": 0.7609,
"step": 8710
},
{
"epoch": 1.11,
"learning_rate": 3.156136344413432e-05,
"loss": 0.9069,
"step": 8720
},
{
"epoch": 1.11,
"learning_rate": 3.154021821872622e-05,
"loss": 0.9528,
"step": 8730
},
{
"epoch": 1.11,
"learning_rate": 3.151907299331811e-05,
"loss": 0.9294,
"step": 8740
},
{
"epoch": 1.11,
"learning_rate": 3.149792776791001e-05,
"loss": 0.8322,
"step": 8750
},
{
"epoch": 1.11,
"learning_rate": 3.14767825425019e-05,
"loss": 0.799,
"step": 8760
},
{
"epoch": 1.11,
"learning_rate": 3.14556373170938e-05,
"loss": 0.6785,
"step": 8770
},
{
"epoch": 1.11,
"learning_rate": 3.1434492091685696e-05,
"loss": 0.94,
"step": 8780
},
{
"epoch": 1.12,
"learning_rate": 3.1413346866277596e-05,
"loss": 0.864,
"step": 8790
},
{
"epoch": 1.12,
"learning_rate": 3.1392201640869495e-05,
"loss": 0.6228,
"step": 8800
},
{
"epoch": 1.12,
"learning_rate": 3.137105641546139e-05,
"loss": 0.5613,
"step": 8810
},
{
"epoch": 1.12,
"learning_rate": 3.134991119005329e-05,
"loss": 0.8236,
"step": 8820
},
{
"epoch": 1.12,
"learning_rate": 3.132876596464519e-05,
"loss": 0.8864,
"step": 8830
},
{
"epoch": 1.12,
"learning_rate": 3.130762073923708e-05,
"loss": 0.5407,
"step": 8840
},
{
"epoch": 1.12,
"learning_rate": 3.128647551382898e-05,
"loss": 0.9236,
"step": 8850
},
{
"epoch": 1.12,
"learning_rate": 3.1265330288420874e-05,
"loss": 0.7576,
"step": 8860
},
{
"epoch": 1.13,
"learning_rate": 3.1244185063012774e-05,
"loss": 1.0343,
"step": 8870
},
{
"epoch": 1.13,
"learning_rate": 3.1223039837604673e-05,
"loss": 0.7048,
"step": 8880
},
{
"epoch": 1.13,
"learning_rate": 3.1201894612196566e-05,
"loss": 0.8502,
"step": 8890
},
{
"epoch": 1.13,
"learning_rate": 3.1180749386788466e-05,
"loss": 0.7094,
"step": 8900
},
{
"epoch": 1.13,
"learning_rate": 3.115960416138036e-05,
"loss": 0.525,
"step": 8910
},
{
"epoch": 1.13,
"learning_rate": 3.113845893597226e-05,
"loss": 0.7978,
"step": 8920
},
{
"epoch": 1.13,
"learning_rate": 3.111731371056415e-05,
"loss": 0.7002,
"step": 8930
},
{
"epoch": 1.13,
"learning_rate": 3.109616848515605e-05,
"loss": 0.8357,
"step": 8940
},
{
"epoch": 1.14,
"learning_rate": 3.107502325974795e-05,
"loss": 0.8647,
"step": 8950
},
{
"epoch": 1.14,
"learning_rate": 3.1053878034339845e-05,
"loss": 0.8646,
"step": 8960
},
{
"epoch": 1.14,
"learning_rate": 3.1032732808931744e-05,
"loss": 0.647,
"step": 8970
},
{
"epoch": 1.14,
"learning_rate": 3.1011587583523644e-05,
"loss": 1.0719,
"step": 8980
},
{
"epoch": 1.14,
"learning_rate": 3.099044235811554e-05,
"loss": 0.8311,
"step": 8990
},
{
"epoch": 1.14,
"learning_rate": 3.096929713270744e-05,
"loss": 0.7486,
"step": 9000
},
{
"epoch": 1.14,
"eval_loss": 1.0198091268539429,
"eval_rouge1": 0.7246906604157454,
"eval_rouge2": 0.5971172937796357,
"eval_rougeL": 0.6748792486790958,
"eval_rougeLsum": 0.674855350648949,
"eval_runtime": 365.4234,
"eval_samples_per_second": 9.386,
"eval_steps_per_second": 2.348,
"step": 9000
},
{
"epoch": 1.14,
"learning_rate": 3.094815190729933e-05,
"loss": 0.7394,
"step": 9010
},
{
"epoch": 1.14,
"learning_rate": 3.092700668189123e-05,
"loss": 0.8173,
"step": 9020
},
{
"epoch": 1.15,
"learning_rate": 3.090586145648312e-05,
"loss": 0.5526,
"step": 9030
},
{
"epoch": 1.15,
"learning_rate": 3.088471623107502e-05,
"loss": 0.8865,
"step": 9040
},
{
"epoch": 1.15,
"learning_rate": 3.0863571005666916e-05,
"loss": 0.6841,
"step": 9050
},
{
"epoch": 1.15,
"learning_rate": 3.084242578025882e-05,
"loss": 0.7907,
"step": 9060
},
{
"epoch": 1.15,
"learning_rate": 3.0821280554850715e-05,
"loss": 0.772,
"step": 9070
},
{
"epoch": 1.15,
"learning_rate": 3.0800135329442615e-05,
"loss": 0.9852,
"step": 9080
},
{
"epoch": 1.15,
"learning_rate": 3.077899010403451e-05,
"loss": 0.9018,
"step": 9090
},
{
"epoch": 1.15,
"learning_rate": 3.075784487862641e-05,
"loss": 0.8043,
"step": 9100
},
{
"epoch": 1.16,
"learning_rate": 3.07366996532183e-05,
"loss": 0.9834,
"step": 9110
},
{
"epoch": 1.16,
"learning_rate": 3.07155544278102e-05,
"loss": 0.6715,
"step": 9120
},
{
"epoch": 1.16,
"learning_rate": 3.0694409202402094e-05,
"loss": 0.714,
"step": 9130
},
{
"epoch": 1.16,
"learning_rate": 3.0673263976994e-05,
"loss": 0.6918,
"step": 9140
},
{
"epoch": 1.16,
"learning_rate": 3.0652118751585893e-05,
"loss": 0.9178,
"step": 9150
},
{
"epoch": 1.16,
"learning_rate": 3.063097352617779e-05,
"loss": 0.597,
"step": 9160
},
{
"epoch": 1.16,
"learning_rate": 3.0609828300769686e-05,
"loss": 0.5675,
"step": 9170
},
{
"epoch": 1.16,
"learning_rate": 3.0588683075361586e-05,
"loss": 0.6968,
"step": 9180
},
{
"epoch": 1.17,
"learning_rate": 3.056753784995348e-05,
"loss": 0.6922,
"step": 9190
},
{
"epoch": 1.17,
"learning_rate": 3.054639262454538e-05,
"loss": 0.7895,
"step": 9200
},
{
"epoch": 1.17,
"learning_rate": 3.052524739913727e-05,
"loss": 0.8627,
"step": 9210
},
{
"epoch": 1.17,
"learning_rate": 3.0504102173729172e-05,
"loss": 0.6498,
"step": 9220
},
{
"epoch": 1.17,
"learning_rate": 3.048295694832107e-05,
"loss": 0.8706,
"step": 9230
},
{
"epoch": 1.17,
"learning_rate": 3.0461811722912968e-05,
"loss": 0.601,
"step": 9240
},
{
"epoch": 1.17,
"learning_rate": 3.0440666497504868e-05,
"loss": 1.0145,
"step": 9250
},
{
"epoch": 1.17,
"learning_rate": 3.0419521272096764e-05,
"loss": 0.6547,
"step": 9260
},
{
"epoch": 1.18,
"learning_rate": 3.039837604668866e-05,
"loss": 0.7728,
"step": 9270
},
{
"epoch": 1.18,
"learning_rate": 3.0377230821280557e-05,
"loss": 0.7126,
"step": 9280
},
{
"epoch": 1.18,
"learning_rate": 3.0356085595872453e-05,
"loss": 0.7631,
"step": 9290
},
{
"epoch": 1.18,
"learning_rate": 3.033494037046435e-05,
"loss": 0.6819,
"step": 9300
},
{
"epoch": 1.18,
"learning_rate": 3.0313795145056246e-05,
"loss": 0.7614,
"step": 9310
},
{
"epoch": 1.18,
"learning_rate": 3.0292649919648146e-05,
"loss": 0.7835,
"step": 9320
},
{
"epoch": 1.18,
"learning_rate": 3.0271504694240042e-05,
"loss": 0.7103,
"step": 9330
},
{
"epoch": 1.18,
"learning_rate": 3.025035946883194e-05,
"loss": 0.9019,
"step": 9340
},
{
"epoch": 1.19,
"learning_rate": 3.0229214243423835e-05,
"loss": 1.0049,
"step": 9350
},
{
"epoch": 1.19,
"learning_rate": 3.020806901801573e-05,
"loss": 0.9035,
"step": 9360
},
{
"epoch": 1.19,
"learning_rate": 3.018692379260763e-05,
"loss": 0.7708,
"step": 9370
},
{
"epoch": 1.19,
"learning_rate": 3.0165778567199528e-05,
"loss": 0.8799,
"step": 9380
},
{
"epoch": 1.19,
"learning_rate": 3.0144633341791424e-05,
"loss": 0.6394,
"step": 9390
},
{
"epoch": 1.19,
"learning_rate": 3.012348811638332e-05,
"loss": 0.8509,
"step": 9400
},
{
"epoch": 1.19,
"learning_rate": 3.010234289097522e-05,
"loss": 0.8221,
"step": 9410
},
{
"epoch": 1.2,
"learning_rate": 3.0081197665567117e-05,
"loss": 0.7036,
"step": 9420
},
{
"epoch": 1.2,
"learning_rate": 3.0060052440159013e-05,
"loss": 0.8459,
"step": 9430
},
{
"epoch": 1.2,
"learning_rate": 3.003890721475091e-05,
"loss": 0.7608,
"step": 9440
},
{
"epoch": 1.2,
"learning_rate": 3.0017761989342806e-05,
"loss": 0.7512,
"step": 9450
},
{
"epoch": 1.2,
"learning_rate": 2.9996616763934703e-05,
"loss": 0.8203,
"step": 9460
},
{
"epoch": 1.2,
"learning_rate": 2.99754715385266e-05,
"loss": 0.7698,
"step": 9470
},
{
"epoch": 1.2,
"learning_rate": 2.9954326313118495e-05,
"loss": 0.6992,
"step": 9480
},
{
"epoch": 1.2,
"learning_rate": 2.99331810877104e-05,
"loss": 0.7954,
"step": 9490
},
{
"epoch": 1.21,
"learning_rate": 2.9912035862302295e-05,
"loss": 0.8899,
"step": 9500
},
{
"epoch": 1.21,
"learning_rate": 2.989089063689419e-05,
"loss": 0.822,
"step": 9510
},
{
"epoch": 1.21,
"learning_rate": 2.9869745411486088e-05,
"loss": 0.7832,
"step": 9520
},
{
"epoch": 1.21,
"learning_rate": 2.9848600186077984e-05,
"loss": 0.8201,
"step": 9530
},
{
"epoch": 1.21,
"learning_rate": 2.982745496066988e-05,
"loss": 1.0696,
"step": 9540
},
{
"epoch": 1.21,
"learning_rate": 2.9806309735261777e-05,
"loss": 0.9827,
"step": 9550
},
{
"epoch": 1.21,
"learning_rate": 2.9785164509853673e-05,
"loss": 0.8015,
"step": 9560
},
{
"epoch": 1.21,
"learning_rate": 2.976401928444557e-05,
"loss": 0.7345,
"step": 9570
},
{
"epoch": 1.22,
"learning_rate": 2.9742874059037473e-05,
"loss": 0.6928,
"step": 9580
},
{
"epoch": 1.22,
"learning_rate": 2.972172883362937e-05,
"loss": 0.5544,
"step": 9590
},
{
"epoch": 1.22,
"learning_rate": 2.9700583608221266e-05,
"loss": 0.8077,
"step": 9600
},
{
"epoch": 1.22,
"learning_rate": 2.9679438382813162e-05,
"loss": 0.9272,
"step": 9610
},
{
"epoch": 1.22,
"learning_rate": 2.965829315740506e-05,
"loss": 0.7831,
"step": 9620
},
{
"epoch": 1.22,
"learning_rate": 2.9637147931996955e-05,
"loss": 0.8763,
"step": 9630
},
{
"epoch": 1.22,
"learning_rate": 2.961600270658885e-05,
"loss": 0.7745,
"step": 9640
},
{
"epoch": 1.22,
"learning_rate": 2.9594857481180748e-05,
"loss": 0.7158,
"step": 9650
},
{
"epoch": 1.23,
"learning_rate": 2.9573712255772644e-05,
"loss": 0.786,
"step": 9660
},
{
"epoch": 1.23,
"learning_rate": 2.9552567030364548e-05,
"loss": 0.6519,
"step": 9670
},
{
"epoch": 1.23,
"learning_rate": 2.9531421804956444e-05,
"loss": 0.7942,
"step": 9680
},
{
"epoch": 1.23,
"learning_rate": 2.951027657954834e-05,
"loss": 0.7173,
"step": 9690
},
{
"epoch": 1.23,
"learning_rate": 2.9489131354140237e-05,
"loss": 0.675,
"step": 9700
},
{
"epoch": 1.23,
"learning_rate": 2.9467986128732133e-05,
"loss": 1.0228,
"step": 9710
},
{
"epoch": 1.23,
"learning_rate": 2.944684090332403e-05,
"loss": 0.9029,
"step": 9720
},
{
"epoch": 1.23,
"learning_rate": 2.9425695677915926e-05,
"loss": 0.9633,
"step": 9730
},
{
"epoch": 1.24,
"learning_rate": 2.9404550452507822e-05,
"loss": 0.6863,
"step": 9740
},
{
"epoch": 1.24,
"learning_rate": 2.938340522709972e-05,
"loss": 0.7393,
"step": 9750
},
{
"epoch": 1.24,
"learning_rate": 2.9362260001691622e-05,
"loss": 0.6962,
"step": 9760
},
{
"epoch": 1.24,
"learning_rate": 2.934111477628352e-05,
"loss": 0.8679,
"step": 9770
},
{
"epoch": 1.24,
"learning_rate": 2.9319969550875415e-05,
"loss": 0.6598,
"step": 9780
},
{
"epoch": 1.24,
"learning_rate": 2.929882432546731e-05,
"loss": 0.754,
"step": 9790
},
{
"epoch": 1.24,
"learning_rate": 2.9277679100059208e-05,
"loss": 0.8384,
"step": 9800
},
{
"epoch": 1.24,
"learning_rate": 2.9256533874651104e-05,
"loss": 0.8584,
"step": 9810
},
{
"epoch": 1.25,
"learning_rate": 2.9235388649243e-05,
"loss": 0.6651,
"step": 9820
},
{
"epoch": 1.25,
"learning_rate": 2.9214243423834897e-05,
"loss": 0.8985,
"step": 9830
},
{
"epoch": 1.25,
"learning_rate": 2.91930981984268e-05,
"loss": 0.7408,
"step": 9840
},
{
"epoch": 1.25,
"learning_rate": 2.9171952973018697e-05,
"loss": 1.022,
"step": 9850
},
{
"epoch": 1.25,
"learning_rate": 2.9150807747610593e-05,
"loss": 0.7627,
"step": 9860
},
{
"epoch": 1.25,
"learning_rate": 2.912966252220249e-05,
"loss": 0.9292,
"step": 9870
},
{
"epoch": 1.25,
"learning_rate": 2.9108517296794386e-05,
"loss": 0.705,
"step": 9880
},
{
"epoch": 1.25,
"learning_rate": 2.9087372071386282e-05,
"loss": 0.8046,
"step": 9890
},
{
"epoch": 1.26,
"learning_rate": 2.906622684597818e-05,
"loss": 0.6993,
"step": 9900
},
{
"epoch": 1.26,
"learning_rate": 2.9045081620570075e-05,
"loss": 0.8105,
"step": 9910
},
{
"epoch": 1.26,
"learning_rate": 2.902393639516197e-05,
"loss": 0.9693,
"step": 9920
},
{
"epoch": 1.26,
"learning_rate": 2.9002791169753875e-05,
"loss": 0.7996,
"step": 9930
},
{
"epoch": 1.26,
"learning_rate": 2.898164594434577e-05,
"loss": 0.9609,
"step": 9940
},
{
"epoch": 1.26,
"learning_rate": 2.8960500718937667e-05,
"loss": 0.724,
"step": 9950
},
{
"epoch": 1.26,
"learning_rate": 2.8939355493529564e-05,
"loss": 0.6073,
"step": 9960
},
{
"epoch": 1.26,
"learning_rate": 2.891821026812146e-05,
"loss": 0.7385,
"step": 9970
},
{
"epoch": 1.27,
"learning_rate": 2.8897065042713357e-05,
"loss": 0.5562,
"step": 9980
},
{
"epoch": 1.27,
"learning_rate": 2.8875919817305253e-05,
"loss": 0.5467,
"step": 9990
},
{
"epoch": 1.27,
"learning_rate": 2.885477459189715e-05,
"loss": 0.7467,
"step": 10000
},
{
"epoch": 1.27,
"eval_loss": 1.0794813632965088,
"eval_rouge1": 0.7288053145180498,
"eval_rouge2": 0.59702225177682,
"eval_rougeL": 0.6756712992467506,
"eval_rougeLsum": 0.6755305445633231,
"eval_runtime": 363.8849,
"eval_samples_per_second": 9.426,
"eval_steps_per_second": 2.358,
"step": 10000
},
{
"epoch": 1.27,
"learning_rate": 2.8833629366489046e-05,
"loss": 0.8497,
"step": 10010
},
{
"epoch": 1.27,
"learning_rate": 2.881248414108095e-05,
"loss": 0.8742,
"step": 10020
},
{
"epoch": 1.27,
"learning_rate": 2.8791338915672845e-05,
"loss": 0.877,
"step": 10030
},
{
"epoch": 1.27,
"learning_rate": 2.8770193690264742e-05,
"loss": 0.8508,
"step": 10040
},
{
"epoch": 1.28,
"learning_rate": 2.8749048464856638e-05,
"loss": 0.6094,
"step": 10050
},
{
"epoch": 1.28,
"learning_rate": 2.8727903239448535e-05,
"loss": 0.823,
"step": 10060
},
{
"epoch": 1.28,
"learning_rate": 2.870675801404043e-05,
"loss": 0.9055,
"step": 10070
},
{
"epoch": 1.28,
"learning_rate": 2.8685612788632328e-05,
"loss": 0.867,
"step": 10080
},
{
"epoch": 1.28,
"learning_rate": 2.8664467563224224e-05,
"loss": 1.0347,
"step": 10090
},
{
"epoch": 1.28,
"learning_rate": 2.864332233781612e-05,
"loss": 0.8413,
"step": 10100
},
{
"epoch": 1.28,
"learning_rate": 2.862217711240802e-05,
"loss": 0.7709,
"step": 10110
},
{
"epoch": 1.28,
"learning_rate": 2.860103188699992e-05,
"loss": 0.8152,
"step": 10120
},
{
"epoch": 1.29,
"learning_rate": 2.8579886661591816e-05,
"loss": 0.9416,
"step": 10130
},
{
"epoch": 1.29,
"learning_rate": 2.8558741436183713e-05,
"loss": 0.8394,
"step": 10140
},
{
"epoch": 1.29,
"learning_rate": 2.853759621077561e-05,
"loss": 0.805,
"step": 10150
},
{
"epoch": 1.29,
"learning_rate": 2.8516450985367506e-05,
"loss": 0.7935,
"step": 10160
},
{
"epoch": 1.29,
"learning_rate": 2.8495305759959402e-05,
"loss": 0.8465,
"step": 10170
},
{
"epoch": 1.29,
"learning_rate": 2.84741605345513e-05,
"loss": 0.8089,
"step": 10180
},
{
"epoch": 1.29,
"learning_rate": 2.8453015309143198e-05,
"loss": 0.7922,
"step": 10190
},
{
"epoch": 1.29,
"learning_rate": 2.8431870083735095e-05,
"loss": 0.7804,
"step": 10200
},
{
"epoch": 1.3,
"learning_rate": 2.841072485832699e-05,
"loss": 0.6966,
"step": 10210
},
{
"epoch": 1.3,
"learning_rate": 2.8389579632918887e-05,
"loss": 0.7149,
"step": 10220
},
{
"epoch": 1.3,
"learning_rate": 2.8368434407510784e-05,
"loss": 0.7072,
"step": 10230
},
{
"epoch": 1.3,
"learning_rate": 2.8347289182102684e-05,
"loss": 0.7227,
"step": 10240
},
{
"epoch": 1.3,
"learning_rate": 2.832614395669458e-05,
"loss": 0.8338,
"step": 10250
},
{
"epoch": 1.3,
"learning_rate": 2.8304998731286476e-05,
"loss": 0.742,
"step": 10260
},
{
"epoch": 1.3,
"learning_rate": 2.8283853505878373e-05,
"loss": 0.8338,
"step": 10270
},
{
"epoch": 1.3,
"learning_rate": 2.8262708280470273e-05,
"loss": 0.8547,
"step": 10280
},
{
"epoch": 1.31,
"learning_rate": 2.824156305506217e-05,
"loss": 0.6836,
"step": 10290
},
{
"epoch": 1.31,
"learning_rate": 2.8220417829654066e-05,
"loss": 0.6133,
"step": 10300
},
{
"epoch": 1.31,
"learning_rate": 2.8199272604245962e-05,
"loss": 0.7096,
"step": 10310
},
{
"epoch": 1.31,
"learning_rate": 2.817812737883786e-05,
"loss": 0.8601,
"step": 10320
},
{
"epoch": 1.31,
"learning_rate": 2.8156982153429755e-05,
"loss": 0.7412,
"step": 10330
},
{
"epoch": 1.31,
"learning_rate": 2.813583692802165e-05,
"loss": 0.999,
"step": 10340
},
{
"epoch": 1.31,
"learning_rate": 2.8114691702613548e-05,
"loss": 0.7643,
"step": 10350
},
{
"epoch": 1.31,
"learning_rate": 2.8093546477205447e-05,
"loss": 0.8381,
"step": 10360
},
{
"epoch": 1.32,
"learning_rate": 2.8072401251797347e-05,
"loss": 0.682,
"step": 10370
},
{
"epoch": 1.32,
"learning_rate": 2.8051256026389244e-05,
"loss": 1.1534,
"step": 10380
},
{
"epoch": 1.32,
"learning_rate": 2.803011080098114e-05,
"loss": 0.8262,
"step": 10390
},
{
"epoch": 1.32,
"learning_rate": 2.8008965575573036e-05,
"loss": 0.7585,
"step": 10400
},
{
"epoch": 1.32,
"learning_rate": 2.7987820350164933e-05,
"loss": 0.8076,
"step": 10410
},
{
"epoch": 1.32,
"learning_rate": 2.796667512475683e-05,
"loss": 0.5735,
"step": 10420
},
{
"epoch": 1.32,
"learning_rate": 2.7945529899348726e-05,
"loss": 0.8402,
"step": 10430
},
{
"epoch": 1.32,
"learning_rate": 2.7924384673940622e-05,
"loss": 0.644,
"step": 10440
},
{
"epoch": 1.33,
"learning_rate": 2.790323944853252e-05,
"loss": 0.7684,
"step": 10450
},
{
"epoch": 1.33,
"learning_rate": 2.788209422312442e-05,
"loss": 0.8434,
"step": 10460
},
{
"epoch": 1.33,
"learning_rate": 2.7860948997716318e-05,
"loss": 0.7619,
"step": 10470
},
{
"epoch": 1.33,
"learning_rate": 2.7839803772308214e-05,
"loss": 0.6935,
"step": 10480
},
{
"epoch": 1.33,
"learning_rate": 2.781865854690011e-05,
"loss": 0.7346,
"step": 10490
},
{
"epoch": 1.33,
"learning_rate": 2.7797513321492007e-05,
"loss": 0.6745,
"step": 10500
},
{
"epoch": 1.33,
"learning_rate": 2.7776368096083904e-05,
"loss": 0.8581,
"step": 10510
},
{
"epoch": 1.33,
"learning_rate": 2.77552228706758e-05,
"loss": 0.8517,
"step": 10520
},
{
"epoch": 1.34,
"learning_rate": 2.7734077645267697e-05,
"loss": 0.7695,
"step": 10530
},
{
"epoch": 1.34,
"learning_rate": 2.77129324198596e-05,
"loss": 0.5943,
"step": 10540
},
{
"epoch": 1.34,
"learning_rate": 2.7691787194451496e-05,
"loss": 0.8823,
"step": 10550
},
{
"epoch": 1.34,
"learning_rate": 2.7670641969043393e-05,
"loss": 0.8037,
"step": 10560
},
{
"epoch": 1.34,
"learning_rate": 2.764949674363529e-05,
"loss": 0.6155,
"step": 10570
},
{
"epoch": 1.34,
"learning_rate": 2.7628351518227185e-05,
"loss": 0.8208,
"step": 10580
},
{
"epoch": 1.34,
"learning_rate": 2.7607206292819082e-05,
"loss": 1.019,
"step": 10590
},
{
"epoch": 1.34,
"learning_rate": 2.7586061067410978e-05,
"loss": 0.6645,
"step": 10600
},
{
"epoch": 1.35,
"learning_rate": 2.7564915842002875e-05,
"loss": 0.8208,
"step": 10610
},
{
"epoch": 1.35,
"learning_rate": 2.754377061659477e-05,
"loss": 0.823,
"step": 10620
},
{
"epoch": 1.35,
"learning_rate": 2.7522625391186674e-05,
"loss": 1.0845,
"step": 10630
},
{
"epoch": 1.35,
"learning_rate": 2.750148016577857e-05,
"loss": 0.6644,
"step": 10640
},
{
"epoch": 1.35,
"learning_rate": 2.7480334940370467e-05,
"loss": 0.7437,
"step": 10650
},
{
"epoch": 1.35,
"learning_rate": 2.7459189714962363e-05,
"loss": 0.7097,
"step": 10660
},
{
"epoch": 1.35,
"learning_rate": 2.743804448955426e-05,
"loss": 0.7118,
"step": 10670
},
{
"epoch": 1.35,
"learning_rate": 2.7416899264146156e-05,
"loss": 0.8112,
"step": 10680
},
{
"epoch": 1.36,
"learning_rate": 2.7395754038738053e-05,
"loss": 0.8533,
"step": 10690
},
{
"epoch": 1.36,
"learning_rate": 2.737460881332995e-05,
"loss": 0.7482,
"step": 10700
},
{
"epoch": 1.36,
"learning_rate": 2.7353463587921846e-05,
"loss": 0.9384,
"step": 10710
},
{
"epoch": 1.36,
"learning_rate": 2.733231836251375e-05,
"loss": 0.7615,
"step": 10720
},
{
"epoch": 1.36,
"learning_rate": 2.7311173137105645e-05,
"loss": 0.6612,
"step": 10730
},
{
"epoch": 1.36,
"learning_rate": 2.729002791169754e-05,
"loss": 0.8246,
"step": 10740
},
{
"epoch": 1.36,
"learning_rate": 2.7268882686289438e-05,
"loss": 0.7894,
"step": 10750
},
{
"epoch": 1.37,
"learning_rate": 2.7247737460881334e-05,
"loss": 0.8635,
"step": 10760
},
{
"epoch": 1.37,
"learning_rate": 2.722659223547323e-05,
"loss": 0.8648,
"step": 10770
},
{
"epoch": 1.37,
"learning_rate": 2.7205447010065127e-05,
"loss": 0.6398,
"step": 10780
},
{
"epoch": 1.37,
"learning_rate": 2.7184301784657024e-05,
"loss": 0.9898,
"step": 10790
},
{
"epoch": 1.37,
"learning_rate": 2.7163156559248927e-05,
"loss": 0.8935,
"step": 10800
},
{
"epoch": 1.37,
"learning_rate": 2.7142011333840823e-05,
"loss": 0.6855,
"step": 10810
},
{
"epoch": 1.37,
"learning_rate": 2.712086610843272e-05,
"loss": 0.8315,
"step": 10820
},
{
"epoch": 1.37,
"learning_rate": 2.7099720883024616e-05,
"loss": 0.9544,
"step": 10830
},
{
"epoch": 1.38,
"learning_rate": 2.7078575657616512e-05,
"loss": 0.8272,
"step": 10840
},
{
"epoch": 1.38,
"learning_rate": 2.705743043220841e-05,
"loss": 0.8026,
"step": 10850
},
{
"epoch": 1.38,
"learning_rate": 2.7036285206800305e-05,
"loss": 0.9848,
"step": 10860
},
{
"epoch": 1.38,
"learning_rate": 2.70151399813922e-05,
"loss": 0.6433,
"step": 10870
},
{
"epoch": 1.38,
"learning_rate": 2.6993994755984098e-05,
"loss": 0.8458,
"step": 10880
},
{
"epoch": 1.38,
"learning_rate": 2.6972849530576e-05,
"loss": 0.8695,
"step": 10890
},
{
"epoch": 1.38,
"learning_rate": 2.6951704305167898e-05,
"loss": 0.6768,
"step": 10900
},
{
"epoch": 1.38,
"learning_rate": 2.6930559079759794e-05,
"loss": 0.7751,
"step": 10910
},
{
"epoch": 1.39,
"learning_rate": 2.690941385435169e-05,
"loss": 0.7812,
"step": 10920
},
{
"epoch": 1.39,
"learning_rate": 2.6888268628943587e-05,
"loss": 0.7727,
"step": 10930
},
{
"epoch": 1.39,
"learning_rate": 2.6867123403535483e-05,
"loss": 0.6107,
"step": 10940
},
{
"epoch": 1.39,
"learning_rate": 2.684597817812738e-05,
"loss": 0.57,
"step": 10950
},
{
"epoch": 1.39,
"learning_rate": 2.6824832952719276e-05,
"loss": 0.9912,
"step": 10960
},
{
"epoch": 1.39,
"learning_rate": 2.6803687727311173e-05,
"loss": 0.7194,
"step": 10970
},
{
"epoch": 1.39,
"learning_rate": 2.6782542501903072e-05,
"loss": 0.9826,
"step": 10980
},
{
"epoch": 1.39,
"learning_rate": 2.6761397276494972e-05,
"loss": 0.8282,
"step": 10990
},
{
"epoch": 1.4,
"learning_rate": 2.674025205108687e-05,
"loss": 0.6639,
"step": 11000
},
{
"epoch": 1.4,
"eval_loss": 1.0122628211975098,
"eval_rouge1": 0.728931516737085,
"eval_rouge2": 0.596669961520042,
"eval_rougeL": 0.6751281111001489,
"eval_rougeLsum": 0.6748566260310818,
"eval_runtime": 367.2924,
"eval_samples_per_second": 9.339,
"eval_steps_per_second": 2.336,
"step": 11000
},
{
"epoch": 1.4,
"learning_rate": 2.6719106825678765e-05,
"loss": 0.7458,
"step": 11010
},
{
"epoch": 1.4,
"learning_rate": 2.669796160027066e-05,
"loss": 1.0602,
"step": 11020
},
{
"epoch": 1.4,
"learning_rate": 2.6676816374862558e-05,
"loss": 0.7341,
"step": 11030
},
{
"epoch": 1.4,
"learning_rate": 2.6655671149454454e-05,
"loss": 0.7162,
"step": 11040
},
{
"epoch": 1.4,
"learning_rate": 2.663452592404635e-05,
"loss": 0.907,
"step": 11050
},
{
"epoch": 1.4,
"learning_rate": 2.6613380698638247e-05,
"loss": 0.6627,
"step": 11060
},
{
"epoch": 1.4,
"learning_rate": 2.6592235473230147e-05,
"loss": 0.7077,
"step": 11070
},
{
"epoch": 1.41,
"learning_rate": 2.6571090247822043e-05,
"loss": 0.7702,
"step": 11080
},
{
"epoch": 1.41,
"learning_rate": 2.654994502241394e-05,
"loss": 0.7501,
"step": 11090
},
{
"epoch": 1.41,
"learning_rate": 2.6528799797005836e-05,
"loss": 0.7739,
"step": 11100
},
{
"epoch": 1.41,
"learning_rate": 2.6507654571597736e-05,
"loss": 0.7285,
"step": 11110
},
{
"epoch": 1.41,
"learning_rate": 2.6486509346189632e-05,
"loss": 0.964,
"step": 11120
},
{
"epoch": 1.41,
"learning_rate": 2.646536412078153e-05,
"loss": 0.6703,
"step": 11130
},
{
"epoch": 1.41,
"learning_rate": 2.6444218895373425e-05,
"loss": 0.8126,
"step": 11140
},
{
"epoch": 1.41,
"learning_rate": 2.6423073669965325e-05,
"loss": 0.806,
"step": 11150
},
{
"epoch": 1.42,
"learning_rate": 2.640192844455722e-05,
"loss": 0.9477,
"step": 11160
},
{
"epoch": 1.42,
"learning_rate": 2.6380783219149118e-05,
"loss": 0.7944,
"step": 11170
},
{
"epoch": 1.42,
"learning_rate": 2.6359637993741014e-05,
"loss": 0.8311,
"step": 11180
},
{
"epoch": 1.42,
"learning_rate": 2.633849276833291e-05,
"loss": 0.8428,
"step": 11190
},
{
"epoch": 1.42,
"learning_rate": 2.6317347542924807e-05,
"loss": 0.8609,
"step": 11200
},
{
"epoch": 1.42,
"learning_rate": 2.6296202317516703e-05,
"loss": 0.8116,
"step": 11210
},
{
"epoch": 1.42,
"learning_rate": 2.62750570921086e-05,
"loss": 0.6022,
"step": 11220
},
{
"epoch": 1.42,
"learning_rate": 2.62539118667005e-05,
"loss": 0.5631,
"step": 11230
},
{
"epoch": 1.43,
"learning_rate": 2.62327666412924e-05,
"loss": 0.6502,
"step": 11240
},
{
"epoch": 1.43,
"learning_rate": 2.6211621415884296e-05,
"loss": 0.8514,
"step": 11250
},
{
"epoch": 1.43,
"learning_rate": 2.6190476190476192e-05,
"loss": 0.8166,
"step": 11260
},
{
"epoch": 1.43,
"learning_rate": 2.616933096506809e-05,
"loss": 0.9014,
"step": 11270
},
{
"epoch": 1.43,
"learning_rate": 2.6148185739659985e-05,
"loss": 0.7577,
"step": 11280
},
{
"epoch": 1.43,
"learning_rate": 2.612704051425188e-05,
"loss": 0.9828,
"step": 11290
},
{
"epoch": 1.43,
"learning_rate": 2.6105895288843778e-05,
"loss": 0.6564,
"step": 11300
},
{
"epoch": 1.43,
"learning_rate": 2.6084750063435674e-05,
"loss": 0.8623,
"step": 11310
},
{
"epoch": 1.44,
"learning_rate": 2.606360483802757e-05,
"loss": 0.7643,
"step": 11320
},
{
"epoch": 1.44,
"learning_rate": 2.6042459612619474e-05,
"loss": 0.7076,
"step": 11330
},
{
"epoch": 1.44,
"learning_rate": 2.602131438721137e-05,
"loss": 0.7002,
"step": 11340
},
{
"epoch": 1.44,
"learning_rate": 2.6000169161803267e-05,
"loss": 0.7856,
"step": 11350
},
{
"epoch": 1.44,
"learning_rate": 2.5979023936395163e-05,
"loss": 0.5246,
"step": 11360
},
{
"epoch": 1.44,
"learning_rate": 2.595787871098706e-05,
"loss": 0.753,
"step": 11370
},
{
"epoch": 1.44,
"learning_rate": 2.5936733485578956e-05,
"loss": 0.5785,
"step": 11380
},
{
"epoch": 1.45,
"learning_rate": 2.5915588260170852e-05,
"loss": 0.9187,
"step": 11390
},
{
"epoch": 1.45,
"learning_rate": 2.589444303476275e-05,
"loss": 0.5809,
"step": 11400
},
{
"epoch": 1.45,
"learning_rate": 2.5873297809354645e-05,
"loss": 0.8943,
"step": 11410
},
{
"epoch": 1.45,
"learning_rate": 2.585215258394655e-05,
"loss": 0.7887,
"step": 11420
},
{
"epoch": 1.45,
"learning_rate": 2.5831007358538445e-05,
"loss": 0.9797,
"step": 11430
},
{
"epoch": 1.45,
"learning_rate": 2.580986213313034e-05,
"loss": 0.7069,
"step": 11440
},
{
"epoch": 1.45,
"learning_rate": 2.5788716907722238e-05,
"loss": 0.8838,
"step": 11450
},
{
"epoch": 1.45,
"learning_rate": 2.5767571682314134e-05,
"loss": 0.6704,
"step": 11460
},
{
"epoch": 1.46,
"learning_rate": 2.574642645690603e-05,
"loss": 0.7148,
"step": 11470
},
{
"epoch": 1.46,
"learning_rate": 2.5725281231497927e-05,
"loss": 0.8098,
"step": 11480
},
{
"epoch": 1.46,
"learning_rate": 2.5704136006089823e-05,
"loss": 1.0083,
"step": 11490
},
{
"epoch": 1.46,
"learning_rate": 2.5682990780681726e-05,
"loss": 0.9377,
"step": 11500
},
{
"epoch": 1.46,
"learning_rate": 2.5661845555273623e-05,
"loss": 0.9135,
"step": 11510
},
{
"epoch": 1.46,
"learning_rate": 2.564070032986552e-05,
"loss": 0.7503,
"step": 11520
},
{
"epoch": 1.46,
"learning_rate": 2.5619555104457416e-05,
"loss": 0.8043,
"step": 11530
},
{
"epoch": 1.46,
"learning_rate": 2.5598409879049312e-05,
"loss": 0.5784,
"step": 11540
},
{
"epoch": 1.47,
"learning_rate": 2.557726465364121e-05,
"loss": 0.5905,
"step": 11550
},
{
"epoch": 1.47,
"learning_rate": 2.5556119428233105e-05,
"loss": 0.7875,
"step": 11560
},
{
"epoch": 1.47,
"learning_rate": 2.5534974202825e-05,
"loss": 0.9613,
"step": 11570
},
{
"epoch": 1.47,
"learning_rate": 2.5513828977416898e-05,
"loss": 0.6845,
"step": 11580
},
{
"epoch": 1.47,
"learning_rate": 2.54926837520088e-05,
"loss": 0.7011,
"step": 11590
},
{
"epoch": 1.47,
"learning_rate": 2.5471538526600697e-05,
"loss": 0.7837,
"step": 11600
},
{
"epoch": 1.47,
"learning_rate": 2.5450393301192594e-05,
"loss": 0.7275,
"step": 11610
},
{
"epoch": 1.47,
"learning_rate": 2.542924807578449e-05,
"loss": 0.6325,
"step": 11620
},
{
"epoch": 1.48,
"learning_rate": 2.5408102850376387e-05,
"loss": 0.7691,
"step": 11630
},
{
"epoch": 1.48,
"learning_rate": 2.5386957624968283e-05,
"loss": 0.6752,
"step": 11640
},
{
"epoch": 1.48,
"learning_rate": 2.536581239956018e-05,
"loss": 0.8254,
"step": 11650
},
{
"epoch": 1.48,
"learning_rate": 2.5344667174152076e-05,
"loss": 0.8092,
"step": 11660
},
{
"epoch": 1.48,
"learning_rate": 2.5323521948743972e-05,
"loss": 0.6477,
"step": 11670
},
{
"epoch": 1.48,
"learning_rate": 2.5302376723335875e-05,
"loss": 0.566,
"step": 11680
},
{
"epoch": 1.48,
"learning_rate": 2.5281231497927772e-05,
"loss": 0.6423,
"step": 11690
},
{
"epoch": 1.48,
"learning_rate": 2.5260086272519668e-05,
"loss": 0.7226,
"step": 11700
},
{
"epoch": 1.49,
"learning_rate": 2.5238941047111565e-05,
"loss": 0.8907,
"step": 11710
},
{
"epoch": 1.49,
"learning_rate": 2.521779582170346e-05,
"loss": 0.8896,
"step": 11720
},
{
"epoch": 1.49,
"learning_rate": 2.5196650596295357e-05,
"loss": 0.7911,
"step": 11730
},
{
"epoch": 1.49,
"learning_rate": 2.5175505370887254e-05,
"loss": 0.781,
"step": 11740
},
{
"epoch": 1.49,
"learning_rate": 2.515436014547915e-05,
"loss": 0.6124,
"step": 11750
},
{
"epoch": 1.49,
"learning_rate": 2.5133214920071047e-05,
"loss": 0.7398,
"step": 11760
},
{
"epoch": 1.49,
"learning_rate": 2.511206969466295e-05,
"loss": 0.8877,
"step": 11770
},
{
"epoch": 1.49,
"learning_rate": 2.5090924469254846e-05,
"loss": 0.8031,
"step": 11780
},
{
"epoch": 1.5,
"learning_rate": 2.5069779243846743e-05,
"loss": 0.6238,
"step": 11790
},
{
"epoch": 1.5,
"learning_rate": 2.504863401843864e-05,
"loss": 0.7096,
"step": 11800
},
{
"epoch": 1.5,
"learning_rate": 2.5027488793030536e-05,
"loss": 0.6641,
"step": 11810
},
{
"epoch": 1.5,
"learning_rate": 2.5006343567622432e-05,
"loss": 0.7458,
"step": 11820
},
{
"epoch": 1.5,
"learning_rate": 2.4985198342214332e-05,
"loss": 0.7145,
"step": 11830
},
{
"epoch": 1.5,
"learning_rate": 2.4964053116806228e-05,
"loss": 0.6667,
"step": 11840
},
{
"epoch": 1.5,
"learning_rate": 2.4942907891398125e-05,
"loss": 0.8383,
"step": 11850
},
{
"epoch": 1.5,
"learning_rate": 2.492176266599002e-05,
"loss": 0.8938,
"step": 11860
},
{
"epoch": 1.51,
"learning_rate": 2.4900617440581917e-05,
"loss": 0.6908,
"step": 11870
},
{
"epoch": 1.51,
"learning_rate": 2.4879472215173817e-05,
"loss": 0.7755,
"step": 11880
},
{
"epoch": 1.51,
"learning_rate": 2.4858326989765714e-05,
"loss": 0.743,
"step": 11890
},
{
"epoch": 1.51,
"learning_rate": 2.483718176435761e-05,
"loss": 0.84,
"step": 11900
},
{
"epoch": 1.51,
"learning_rate": 2.4816036538949506e-05,
"loss": 0.7788,
"step": 11910
},
{
"epoch": 1.51,
"learning_rate": 2.4794891313541406e-05,
"loss": 0.7378,
"step": 11920
},
{
"epoch": 1.51,
"learning_rate": 2.4773746088133303e-05,
"loss": 0.605,
"step": 11930
},
{
"epoch": 1.51,
"learning_rate": 2.47526008627252e-05,
"loss": 0.8596,
"step": 11940
},
{
"epoch": 1.52,
"learning_rate": 2.4731455637317095e-05,
"loss": 0.6083,
"step": 11950
},
{
"epoch": 1.52,
"learning_rate": 2.4710310411908992e-05,
"loss": 0.8275,
"step": 11960
},
{
"epoch": 1.52,
"learning_rate": 2.4689165186500888e-05,
"loss": 0.8073,
"step": 11970
},
{
"epoch": 1.52,
"learning_rate": 2.4668019961092788e-05,
"loss": 0.8415,
"step": 11980
},
{
"epoch": 1.52,
"learning_rate": 2.4646874735684684e-05,
"loss": 0.7018,
"step": 11990
},
{
"epoch": 1.52,
"learning_rate": 2.462572951027658e-05,
"loss": 0.6523,
"step": 12000
},
{
"epoch": 1.52,
"eval_loss": 1.0081225633621216,
"eval_rouge1": 0.7304821261480414,
"eval_rouge2": 0.5982025580758599,
"eval_rougeL": 0.6766505339586741,
"eval_rougeLsum": 0.6764001184529582,
"eval_runtime": 367.2853,
"eval_samples_per_second": 9.339,
"eval_steps_per_second": 2.336,
"step": 12000
},
{
"epoch": 1.52,
"learning_rate": 2.4604584284868477e-05,
"loss": 0.9606,
"step": 12010
},
{
"epoch": 1.52,
"learning_rate": 2.4583439059460374e-05,
"loss": 0.7747,
"step": 12020
},
{
"epoch": 1.53,
"learning_rate": 2.456229383405227e-05,
"loss": 0.5675,
"step": 12030
},
{
"epoch": 1.53,
"learning_rate": 2.454114860864417e-05,
"loss": 0.8177,
"step": 12040
},
{
"epoch": 1.53,
"learning_rate": 2.4520003383236066e-05,
"loss": 0.6079,
"step": 12050
},
{
"epoch": 1.53,
"learning_rate": 2.4498858157827963e-05,
"loss": 0.668,
"step": 12060
},
{
"epoch": 1.53,
"learning_rate": 2.447771293241986e-05,
"loss": 0.7332,
"step": 12070
},
{
"epoch": 1.53,
"learning_rate": 2.4456567707011756e-05,
"loss": 0.6782,
"step": 12080
},
{
"epoch": 1.53,
"learning_rate": 2.4435422481603655e-05,
"loss": 0.7229,
"step": 12090
},
{
"epoch": 1.54,
"learning_rate": 2.4414277256195552e-05,
"loss": 0.6442,
"step": 12100
},
{
"epoch": 1.54,
"learning_rate": 2.4393132030787448e-05,
"loss": 0.7871,
"step": 12110
},
{
"epoch": 1.54,
"learning_rate": 2.4371986805379345e-05,
"loss": 0.8963,
"step": 12120
},
{
"epoch": 1.54,
"learning_rate": 2.435084157997124e-05,
"loss": 0.9301,
"step": 12130
},
{
"epoch": 1.54,
"learning_rate": 2.432969635456314e-05,
"loss": 0.6472,
"step": 12140
},
{
"epoch": 1.54,
"learning_rate": 2.4308551129155037e-05,
"loss": 0.7145,
"step": 12150
},
{
"epoch": 1.54,
"learning_rate": 2.4287405903746934e-05,
"loss": 0.7907,
"step": 12160
},
{
"epoch": 1.54,
"learning_rate": 2.426626067833883e-05,
"loss": 0.5936,
"step": 12170
},
{
"epoch": 1.55,
"learning_rate": 2.424511545293073e-05,
"loss": 0.7855,
"step": 12180
},
{
"epoch": 1.55,
"learning_rate": 2.4223970227522626e-05,
"loss": 0.7044,
"step": 12190
},
{
"epoch": 1.55,
"learning_rate": 2.4202825002114523e-05,
"loss": 0.7496,
"step": 12200
},
{
"epoch": 1.55,
"learning_rate": 2.418167977670642e-05,
"loss": 0.8869,
"step": 12210
},
{
"epoch": 1.55,
"learning_rate": 2.4160534551298316e-05,
"loss": 0.8166,
"step": 12220
},
{
"epoch": 1.55,
"learning_rate": 2.4139389325890215e-05,
"loss": 0.6428,
"step": 12230
},
{
"epoch": 1.55,
"learning_rate": 2.4118244100482112e-05,
"loss": 0.8139,
"step": 12240
},
{
"epoch": 1.55,
"learning_rate": 2.4097098875074008e-05,
"loss": 0.7159,
"step": 12250
},
{
"epoch": 1.56,
"learning_rate": 2.4075953649665905e-05,
"loss": 0.6293,
"step": 12260
},
{
"epoch": 1.56,
"learning_rate": 2.4054808424257804e-05,
"loss": 0.5287,
"step": 12270
},
{
"epoch": 1.56,
"learning_rate": 2.40336631988497e-05,
"loss": 0.7412,
"step": 12280
},
{
"epoch": 1.56,
"learning_rate": 2.4012517973441597e-05,
"loss": 0.791,
"step": 12290
},
{
"epoch": 1.56,
"learning_rate": 2.3991372748033494e-05,
"loss": 0.7971,
"step": 12300
},
{
"epoch": 1.56,
"learning_rate": 2.3970227522625393e-05,
"loss": 0.6122,
"step": 12310
},
{
"epoch": 1.56,
"learning_rate": 2.394908229721729e-05,
"loss": 0.8092,
"step": 12320
},
{
"epoch": 1.56,
"learning_rate": 2.3927937071809186e-05,
"loss": 0.6984,
"step": 12330
},
{
"epoch": 1.57,
"learning_rate": 2.3906791846401083e-05,
"loss": 0.7862,
"step": 12340
},
{
"epoch": 1.57,
"learning_rate": 2.388564662099298e-05,
"loss": 0.5145,
"step": 12350
},
{
"epoch": 1.57,
"learning_rate": 2.386450139558488e-05,
"loss": 0.6887,
"step": 12360
},
{
"epoch": 1.57,
"learning_rate": 2.3843356170176775e-05,
"loss": 0.6677,
"step": 12370
},
{
"epoch": 1.57,
"learning_rate": 2.382221094476867e-05,
"loss": 0.6793,
"step": 12380
},
{
"epoch": 1.57,
"learning_rate": 2.3801065719360568e-05,
"loss": 0.7769,
"step": 12390
},
{
"epoch": 1.57,
"learning_rate": 2.3779920493952468e-05,
"loss": 0.6929,
"step": 12400
},
{
"epoch": 1.57,
"learning_rate": 2.3758775268544364e-05,
"loss": 0.7032,
"step": 12410
},
{
"epoch": 1.58,
"learning_rate": 2.373763004313626e-05,
"loss": 0.9568,
"step": 12420
},
{
"epoch": 1.58,
"learning_rate": 2.3716484817728157e-05,
"loss": 0.6132,
"step": 12430
},
{
"epoch": 1.58,
"learning_rate": 2.3695339592320057e-05,
"loss": 0.648,
"step": 12440
},
{
"epoch": 1.58,
"learning_rate": 2.3674194366911953e-05,
"loss": 0.8188,
"step": 12450
},
{
"epoch": 1.58,
"learning_rate": 2.365304914150385e-05,
"loss": 0.7784,
"step": 12460
},
{
"epoch": 1.58,
"learning_rate": 2.3631903916095746e-05,
"loss": 0.7876,
"step": 12470
},
{
"epoch": 1.58,
"learning_rate": 2.3610758690687643e-05,
"loss": 0.9562,
"step": 12480
},
{
"epoch": 1.58,
"learning_rate": 2.3589613465279542e-05,
"loss": 0.7118,
"step": 12490
},
{
"epoch": 1.59,
"learning_rate": 2.356846823987144e-05,
"loss": 0.7515,
"step": 12500
},
{
"epoch": 1.59,
"learning_rate": 2.3547323014463335e-05,
"loss": 0.7184,
"step": 12510
},
{
"epoch": 1.59,
"learning_rate": 2.352617778905523e-05,
"loss": 0.6821,
"step": 12520
},
{
"epoch": 1.59,
"learning_rate": 2.350503256364713e-05,
"loss": 0.9479,
"step": 12530
},
{
"epoch": 1.59,
"learning_rate": 2.3483887338239028e-05,
"loss": 0.6542,
"step": 12540
},
{
"epoch": 1.59,
"learning_rate": 2.3462742112830924e-05,
"loss": 0.7335,
"step": 12550
},
{
"epoch": 1.59,
"learning_rate": 2.344159688742282e-05,
"loss": 0.7511,
"step": 12560
},
{
"epoch": 1.59,
"learning_rate": 2.3420451662014717e-05,
"loss": 0.7741,
"step": 12570
},
{
"epoch": 1.6,
"learning_rate": 2.3399306436606617e-05,
"loss": 0.5558,
"step": 12580
},
{
"epoch": 1.6,
"learning_rate": 2.3378161211198513e-05,
"loss": 0.7343,
"step": 12590
},
{
"epoch": 1.6,
"learning_rate": 2.335701598579041e-05,
"loss": 0.8176,
"step": 12600
},
{
"epoch": 1.6,
"learning_rate": 2.3335870760382306e-05,
"loss": 0.7215,
"step": 12610
},
{
"epoch": 1.6,
"learning_rate": 2.3314725534974206e-05,
"loss": 0.6348,
"step": 12620
},
{
"epoch": 1.6,
"learning_rate": 2.3293580309566102e-05,
"loss": 0.8814,
"step": 12630
},
{
"epoch": 1.6,
"learning_rate": 2.3272435084158e-05,
"loss": 0.728,
"step": 12640
},
{
"epoch": 1.6,
"learning_rate": 2.3251289858749895e-05,
"loss": 0.6675,
"step": 12650
},
{
"epoch": 1.61,
"learning_rate": 2.3230144633341795e-05,
"loss": 0.729,
"step": 12660
},
{
"epoch": 1.61,
"learning_rate": 2.320899940793369e-05,
"loss": 0.7548,
"step": 12670
},
{
"epoch": 1.61,
"learning_rate": 2.3187854182525588e-05,
"loss": 0.6303,
"step": 12680
},
{
"epoch": 1.61,
"learning_rate": 2.3166708957117484e-05,
"loss": 0.6075,
"step": 12690
},
{
"epoch": 1.61,
"learning_rate": 2.314556373170938e-05,
"loss": 0.7667,
"step": 12700
},
{
"epoch": 1.61,
"learning_rate": 2.312441850630128e-05,
"loss": 1.0017,
"step": 12710
},
{
"epoch": 1.61,
"learning_rate": 2.3103273280893177e-05,
"loss": 0.6947,
"step": 12720
},
{
"epoch": 1.62,
"learning_rate": 2.3082128055485073e-05,
"loss": 0.968,
"step": 12730
},
{
"epoch": 1.62,
"learning_rate": 2.306098283007697e-05,
"loss": 0.9345,
"step": 12740
},
{
"epoch": 1.62,
"learning_rate": 2.303983760466887e-05,
"loss": 0.6884,
"step": 12750
},
{
"epoch": 1.62,
"learning_rate": 2.3018692379260766e-05,
"loss": 0.8185,
"step": 12760
},
{
"epoch": 1.62,
"learning_rate": 2.2997547153852662e-05,
"loss": 0.8517,
"step": 12770
},
{
"epoch": 1.62,
"learning_rate": 2.297640192844456e-05,
"loss": 0.7371,
"step": 12780
},
{
"epoch": 1.62,
"learning_rate": 2.295525670303646e-05,
"loss": 0.8269,
"step": 12790
},
{
"epoch": 1.62,
"learning_rate": 2.2934111477628355e-05,
"loss": 0.7799,
"step": 12800
},
{
"epoch": 1.63,
"learning_rate": 2.291296625222025e-05,
"loss": 0.8944,
"step": 12810
},
{
"epoch": 1.63,
"learning_rate": 2.2891821026812148e-05,
"loss": 0.8484,
"step": 12820
},
{
"epoch": 1.63,
"learning_rate": 2.2870675801404044e-05,
"loss": 0.8333,
"step": 12830
},
{
"epoch": 1.63,
"learning_rate": 2.284953057599594e-05,
"loss": 0.6712,
"step": 12840
},
{
"epoch": 1.63,
"learning_rate": 2.282838535058784e-05,
"loss": 0.6721,
"step": 12850
},
{
"epoch": 1.63,
"learning_rate": 2.2807240125179737e-05,
"loss": 0.539,
"step": 12860
},
{
"epoch": 1.63,
"learning_rate": 2.2786094899771633e-05,
"loss": 0.7256,
"step": 12870
},
{
"epoch": 1.63,
"learning_rate": 2.276494967436353e-05,
"loss": 0.6626,
"step": 12880
},
{
"epoch": 1.64,
"learning_rate": 2.2743804448955426e-05,
"loss": 0.5664,
"step": 12890
},
{
"epoch": 1.64,
"learning_rate": 2.2722659223547322e-05,
"loss": 0.7664,
"step": 12900
},
{
"epoch": 1.64,
"learning_rate": 2.2701513998139222e-05,
"loss": 0.7511,
"step": 12910
},
{
"epoch": 1.64,
"learning_rate": 2.268036877273112e-05,
"loss": 0.6643,
"step": 12920
},
{
"epoch": 1.64,
"learning_rate": 2.2659223547323015e-05,
"loss": 0.6614,
"step": 12930
},
{
"epoch": 1.64,
"learning_rate": 2.263807832191491e-05,
"loss": 0.6293,
"step": 12940
},
{
"epoch": 1.64,
"learning_rate": 2.2616933096506808e-05,
"loss": 0.7931,
"step": 12950
},
{
"epoch": 1.64,
"learning_rate": 2.2595787871098704e-05,
"loss": 0.7513,
"step": 12960
},
{
"epoch": 1.65,
"learning_rate": 2.2574642645690604e-05,
"loss": 0.8645,
"step": 12970
},
{
"epoch": 1.65,
"learning_rate": 2.25534974202825e-05,
"loss": 0.8105,
"step": 12980
},
{
"epoch": 1.65,
"learning_rate": 2.2532352194874397e-05,
"loss": 0.734,
"step": 12990
},
{
"epoch": 1.65,
"learning_rate": 2.2511206969466293e-05,
"loss": 0.7138,
"step": 13000
},
{
"epoch": 1.65,
"eval_loss": 0.9949945211410522,
"eval_rouge1": 0.7282422145404577,
"eval_rouge2": 0.596215009023991,
"eval_rougeL": 0.6755397761751523,
"eval_rougeLsum": 0.6753506760218706,
"eval_runtime": 367.8993,
"eval_samples_per_second": 9.323,
"eval_steps_per_second": 2.332,
"step": 13000
},
{
"epoch": 1.65,
"learning_rate": 2.2490061744058193e-05,
"loss": 0.7488,
"step": 13010
},
{
"epoch": 1.65,
"learning_rate": 2.246891651865009e-05,
"loss": 0.7069,
"step": 13020
},
{
"epoch": 1.65,
"learning_rate": 2.2447771293241986e-05,
"loss": 0.7667,
"step": 13030
},
{
"epoch": 1.65,
"learning_rate": 2.2426626067833882e-05,
"loss": 0.7195,
"step": 13040
},
{
"epoch": 1.66,
"learning_rate": 2.240548084242578e-05,
"loss": 0.6465,
"step": 13050
},
{
"epoch": 1.66,
"learning_rate": 2.238433561701768e-05,
"loss": 0.6939,
"step": 13060
},
{
"epoch": 1.66,
"learning_rate": 2.2363190391609575e-05,
"loss": 1.0187,
"step": 13070
},
{
"epoch": 1.66,
"learning_rate": 2.234204516620147e-05,
"loss": 0.7494,
"step": 13080
},
{
"epoch": 1.66,
"learning_rate": 2.2320899940793368e-05,
"loss": 0.7155,
"step": 13090
},
{
"epoch": 1.66,
"learning_rate": 2.2299754715385268e-05,
"loss": 0.9857,
"step": 13100
},
{
"epoch": 1.66,
"learning_rate": 2.2278609489977164e-05,
"loss": 0.8777,
"step": 13110
},
{
"epoch": 1.66,
"learning_rate": 2.225746426456906e-05,
"loss": 0.5415,
"step": 13120
},
{
"epoch": 1.67,
"learning_rate": 2.2236319039160957e-05,
"loss": 0.7782,
"step": 13130
},
{
"epoch": 1.67,
"learning_rate": 2.2215173813752857e-05,
"loss": 0.8442,
"step": 13140
},
{
"epoch": 1.67,
"learning_rate": 2.2194028588344753e-05,
"loss": 1.0212,
"step": 13150
},
{
"epoch": 1.67,
"learning_rate": 2.217288336293665e-05,
"loss": 0.6345,
"step": 13160
},
{
"epoch": 1.67,
"learning_rate": 2.2151738137528546e-05,
"loss": 0.7978,
"step": 13170
},
{
"epoch": 1.67,
"learning_rate": 2.2130592912120442e-05,
"loss": 0.6258,
"step": 13180
},
{
"epoch": 1.67,
"learning_rate": 2.2109447686712342e-05,
"loss": 0.5791,
"step": 13190
},
{
"epoch": 1.67,
"learning_rate": 2.208830246130424e-05,
"loss": 0.7919,
"step": 13200
},
{
"epoch": 1.68,
"learning_rate": 2.2067157235896135e-05,
"loss": 0.7164,
"step": 13210
},
{
"epoch": 1.68,
"learning_rate": 2.204601201048803e-05,
"loss": 0.7061,
"step": 13220
},
{
"epoch": 1.68,
"learning_rate": 2.202486678507993e-05,
"loss": 0.5851,
"step": 13230
},
{
"epoch": 1.68,
"learning_rate": 2.2003721559671827e-05,
"loss": 0.7641,
"step": 13240
},
{
"epoch": 1.68,
"learning_rate": 2.1982576334263724e-05,
"loss": 0.6567,
"step": 13250
},
{
"epoch": 1.68,
"learning_rate": 2.196143110885562e-05,
"loss": 0.5816,
"step": 13260
},
{
"epoch": 1.68,
"learning_rate": 2.1940285883447517e-05,
"loss": 0.6956,
"step": 13270
},
{
"epoch": 1.68,
"learning_rate": 2.1919140658039416e-05,
"loss": 0.7512,
"step": 13280
},
{
"epoch": 1.69,
"learning_rate": 2.1897995432631313e-05,
"loss": 0.6492,
"step": 13290
},
{
"epoch": 1.69,
"learning_rate": 2.187685020722321e-05,
"loss": 0.7788,
"step": 13300
},
{
"epoch": 1.69,
"learning_rate": 2.1855704981815106e-05,
"loss": 0.6221,
"step": 13310
},
{
"epoch": 1.69,
"learning_rate": 2.1834559756407006e-05,
"loss": 0.7901,
"step": 13320
},
{
"epoch": 1.69,
"learning_rate": 2.1813414530998902e-05,
"loss": 0.6186,
"step": 13330
},
{
"epoch": 1.69,
"learning_rate": 2.17922693055908e-05,
"loss": 0.6939,
"step": 13340
},
{
"epoch": 1.69,
"learning_rate": 2.1771124080182695e-05,
"loss": 0.6297,
"step": 13350
},
{
"epoch": 1.7,
"learning_rate": 2.1749978854774595e-05,
"loss": 0.7177,
"step": 13360
},
{
"epoch": 1.7,
"learning_rate": 2.172883362936649e-05,
"loss": 1.0119,
"step": 13370
},
{
"epoch": 1.7,
"learning_rate": 2.1707688403958387e-05,
"loss": 0.7259,
"step": 13380
},
{
"epoch": 1.7,
"learning_rate": 2.1686543178550284e-05,
"loss": 0.6493,
"step": 13390
},
{
"epoch": 1.7,
"learning_rate": 2.166539795314218e-05,
"loss": 0.7523,
"step": 13400
},
{
"epoch": 1.7,
"learning_rate": 2.164425272773408e-05,
"loss": 0.9458,
"step": 13410
},
{
"epoch": 1.7,
"learning_rate": 2.1623107502325976e-05,
"loss": 0.6441,
"step": 13420
},
{
"epoch": 1.7,
"learning_rate": 2.1601962276917873e-05,
"loss": 0.9359,
"step": 13430
},
{
"epoch": 1.71,
"learning_rate": 2.158081705150977e-05,
"loss": 0.5873,
"step": 13440
},
{
"epoch": 1.71,
"learning_rate": 2.155967182610167e-05,
"loss": 0.9961,
"step": 13450
},
{
"epoch": 1.71,
"learning_rate": 2.1538526600693565e-05,
"loss": 0.7437,
"step": 13460
},
{
"epoch": 1.71,
"learning_rate": 2.1517381375285462e-05,
"loss": 0.6914,
"step": 13470
},
{
"epoch": 1.71,
"learning_rate": 2.1496236149877358e-05,
"loss": 0.8729,
"step": 13480
},
{
"epoch": 1.71,
"learning_rate": 2.1475090924469258e-05,
"loss": 0.6984,
"step": 13490
},
{
"epoch": 1.71,
"learning_rate": 2.1453945699061154e-05,
"loss": 0.8213,
"step": 13500
},
{
"epoch": 1.71,
"learning_rate": 2.143280047365305e-05,
"loss": 0.5718,
"step": 13510
},
{
"epoch": 1.72,
"learning_rate": 2.1411655248244947e-05,
"loss": 0.9477,
"step": 13520
},
{
"epoch": 1.72,
"learning_rate": 2.1390510022836844e-05,
"loss": 0.5585,
"step": 13530
},
{
"epoch": 1.72,
"learning_rate": 2.1369364797428744e-05,
"loss": 0.8701,
"step": 13540
},
{
"epoch": 1.72,
"learning_rate": 2.134821957202064e-05,
"loss": 0.8297,
"step": 13550
},
{
"epoch": 1.72,
"learning_rate": 2.1327074346612536e-05,
"loss": 0.7963,
"step": 13560
},
{
"epoch": 1.72,
"learning_rate": 2.1305929121204433e-05,
"loss": 0.6083,
"step": 13570
},
{
"epoch": 1.72,
"learning_rate": 2.1284783895796333e-05,
"loss": 0.7382,
"step": 13580
},
{
"epoch": 1.72,
"learning_rate": 2.126363867038823e-05,
"loss": 0.8055,
"step": 13590
},
{
"epoch": 1.73,
"learning_rate": 2.1242493444980125e-05,
"loss": 0.7993,
"step": 13600
},
{
"epoch": 1.73,
"learning_rate": 2.1221348219572022e-05,
"loss": 0.7505,
"step": 13610
},
{
"epoch": 1.73,
"learning_rate": 2.1200202994163918e-05,
"loss": 0.7468,
"step": 13620
},
{
"epoch": 1.73,
"learning_rate": 2.1179057768755818e-05,
"loss": 0.5177,
"step": 13630
},
{
"epoch": 1.73,
"learning_rate": 2.1157912543347714e-05,
"loss": 0.6448,
"step": 13640
},
{
"epoch": 1.73,
"learning_rate": 2.113676731793961e-05,
"loss": 0.6958,
"step": 13650
},
{
"epoch": 1.73,
"learning_rate": 2.1115622092531507e-05,
"loss": 0.6063,
"step": 13660
},
{
"epoch": 1.73,
"learning_rate": 2.1094476867123407e-05,
"loss": 0.6442,
"step": 13670
},
{
"epoch": 1.74,
"learning_rate": 2.1073331641715303e-05,
"loss": 0.6584,
"step": 13680
},
{
"epoch": 1.74,
"learning_rate": 2.10521864163072e-05,
"loss": 0.8007,
"step": 13690
},
{
"epoch": 1.74,
"learning_rate": 2.1031041190899096e-05,
"loss": 0.4909,
"step": 13700
},
{
"epoch": 1.74,
"learning_rate": 2.1009895965490993e-05,
"loss": 0.7574,
"step": 13710
},
{
"epoch": 1.74,
"learning_rate": 2.0988750740082892e-05,
"loss": 0.6582,
"step": 13720
},
{
"epoch": 1.74,
"learning_rate": 2.096760551467479e-05,
"loss": 0.9148,
"step": 13730
},
{
"epoch": 1.74,
"learning_rate": 2.0946460289266685e-05,
"loss": 0.6785,
"step": 13740
},
{
"epoch": 1.74,
"learning_rate": 2.0925315063858582e-05,
"loss": 0.8007,
"step": 13750
},
{
"epoch": 1.75,
"learning_rate": 2.0904169838450478e-05,
"loss": 0.7748,
"step": 13760
},
{
"epoch": 1.75,
"learning_rate": 2.0883024613042375e-05,
"loss": 0.6437,
"step": 13770
},
{
"epoch": 1.75,
"learning_rate": 2.0861879387634274e-05,
"loss": 0.6601,
"step": 13780
},
{
"epoch": 1.75,
"learning_rate": 2.084073416222617e-05,
"loss": 0.9939,
"step": 13790
},
{
"epoch": 1.75,
"learning_rate": 2.0819588936818067e-05,
"loss": 0.7635,
"step": 13800
},
{
"epoch": 1.75,
"learning_rate": 2.0798443711409964e-05,
"loss": 0.7824,
"step": 13810
},
{
"epoch": 1.75,
"learning_rate": 2.077729848600186e-05,
"loss": 0.7152,
"step": 13820
},
{
"epoch": 1.75,
"learning_rate": 2.0756153260593756e-05,
"loss": 0.7003,
"step": 13830
},
{
"epoch": 1.76,
"learning_rate": 2.0735008035185656e-05,
"loss": 0.8023,
"step": 13840
},
{
"epoch": 1.76,
"learning_rate": 2.0713862809777553e-05,
"loss": 0.6666,
"step": 13850
},
{
"epoch": 1.76,
"learning_rate": 2.069271758436945e-05,
"loss": 0.5699,
"step": 13860
},
{
"epoch": 1.76,
"learning_rate": 2.0671572358961345e-05,
"loss": 0.7144,
"step": 13870
},
{
"epoch": 1.76,
"learning_rate": 2.0650427133553242e-05,
"loss": 0.6077,
"step": 13880
},
{
"epoch": 1.76,
"learning_rate": 2.062928190814514e-05,
"loss": 0.5962,
"step": 13890
},
{
"epoch": 1.76,
"learning_rate": 2.0608136682737038e-05,
"loss": 0.5912,
"step": 13900
},
{
"epoch": 1.76,
"learning_rate": 2.0586991457328934e-05,
"loss": 0.754,
"step": 13910
},
{
"epoch": 1.77,
"learning_rate": 2.056584623192083e-05,
"loss": 0.8693,
"step": 13920
},
{
"epoch": 1.77,
"learning_rate": 2.054470100651273e-05,
"loss": 0.8215,
"step": 13930
},
{
"epoch": 1.77,
"learning_rate": 2.0523555781104627e-05,
"loss": 0.7585,
"step": 13940
},
{
"epoch": 1.77,
"learning_rate": 2.0502410555696524e-05,
"loss": 0.5805,
"step": 13950
},
{
"epoch": 1.77,
"learning_rate": 2.048126533028842e-05,
"loss": 0.8019,
"step": 13960
},
{
"epoch": 1.77,
"learning_rate": 2.046012010488032e-05,
"loss": 0.5747,
"step": 13970
},
{
"epoch": 1.77,
"learning_rate": 2.0438974879472216e-05,
"loss": 0.6621,
"step": 13980
},
{
"epoch": 1.77,
"learning_rate": 2.0417829654064113e-05,
"loss": 0.5669,
"step": 13990
},
{
"epoch": 1.78,
"learning_rate": 2.039668442865601e-05,
"loss": 0.5504,
"step": 14000
},
{
"epoch": 1.78,
"eval_loss": 0.9902545213699341,
"eval_rouge1": 0.735777627916854,
"eval_rouge2": 0.6117996681743169,
"eval_rougeL": 0.6872489585433417,
"eval_rougeLsum": 0.6873374812875375,
"eval_runtime": 366.1081,
"eval_samples_per_second": 9.369,
"eval_steps_per_second": 2.344,
"step": 14000
},
{
"epoch": 1.78,
"learning_rate": 2.0375539203247905e-05,
"loss": 0.8317,
"step": 14010
},
{
"epoch": 1.78,
"learning_rate": 2.0354393977839805e-05,
"loss": 0.5913,
"step": 14020
},
{
"epoch": 1.78,
"learning_rate": 2.03332487524317e-05,
"loss": 0.6852,
"step": 14030
},
{
"epoch": 1.78,
"learning_rate": 2.0312103527023598e-05,
"loss": 0.6829,
"step": 14040
},
{
"epoch": 1.78,
"learning_rate": 2.0290958301615494e-05,
"loss": 0.7268,
"step": 14050
},
{
"epoch": 1.78,
"learning_rate": 2.0269813076207394e-05,
"loss": 0.8703,
"step": 14060
},
{
"epoch": 1.79,
"learning_rate": 2.024866785079929e-05,
"loss": 0.6795,
"step": 14070
},
{
"epoch": 1.79,
"learning_rate": 2.0227522625391187e-05,
"loss": 0.8408,
"step": 14080
},
{
"epoch": 1.79,
"learning_rate": 2.0206377399983083e-05,
"loss": 0.8681,
"step": 14090
},
{
"epoch": 1.79,
"learning_rate": 2.018523217457498e-05,
"loss": 0.6943,
"step": 14100
},
{
"epoch": 1.79,
"learning_rate": 2.016408694916688e-05,
"loss": 0.8387,
"step": 14110
},
{
"epoch": 1.79,
"learning_rate": 2.0142941723758776e-05,
"loss": 0.9716,
"step": 14120
},
{
"epoch": 1.79,
"learning_rate": 2.0121796498350672e-05,
"loss": 0.7651,
"step": 14130
},
{
"epoch": 1.79,
"learning_rate": 2.010065127294257e-05,
"loss": 0.5571,
"step": 14140
},
{
"epoch": 1.8,
"learning_rate": 2.007950604753447e-05,
"loss": 0.8035,
"step": 14150
},
{
"epoch": 1.8,
"learning_rate": 2.0058360822126365e-05,
"loss": 0.536,
"step": 14160
},
{
"epoch": 1.8,
"learning_rate": 2.003721559671826e-05,
"loss": 0.8181,
"step": 14170
},
{
"epoch": 1.8,
"learning_rate": 2.0016070371310158e-05,
"loss": 0.7806,
"step": 14180
},
{
"epoch": 1.8,
"learning_rate": 1.9994925145902058e-05,
"loss": 0.9146,
"step": 14190
},
{
"epoch": 1.8,
"learning_rate": 1.9973779920493954e-05,
"loss": 0.7645,
"step": 14200
},
{
"epoch": 1.8,
"learning_rate": 1.995263469508585e-05,
"loss": 0.7764,
"step": 14210
},
{
"epoch": 1.8,
"learning_rate": 1.9931489469677747e-05,
"loss": 0.5995,
"step": 14220
},
{
"epoch": 1.81,
"learning_rate": 1.9910344244269643e-05,
"loss": 0.8119,
"step": 14230
},
{
"epoch": 1.81,
"learning_rate": 1.9889199018861543e-05,
"loss": 0.8425,
"step": 14240
},
{
"epoch": 1.81,
"learning_rate": 1.986805379345344e-05,
"loss": 0.7919,
"step": 14250
},
{
"epoch": 1.81,
"learning_rate": 1.9846908568045336e-05,
"loss": 0.8202,
"step": 14260
},
{
"epoch": 1.81,
"learning_rate": 1.9825763342637232e-05,
"loss": 0.834,
"step": 14270
},
{
"epoch": 1.81,
"learning_rate": 1.9804618117229132e-05,
"loss": 0.6966,
"step": 14280
},
{
"epoch": 1.81,
"learning_rate": 1.978347289182103e-05,
"loss": 0.9434,
"step": 14290
},
{
"epoch": 1.81,
"learning_rate": 1.9762327666412925e-05,
"loss": 0.6992,
"step": 14300
},
{
"epoch": 1.82,
"learning_rate": 1.974118244100482e-05,
"loss": 0.7471,
"step": 14310
},
{
"epoch": 1.82,
"learning_rate": 1.972003721559672e-05,
"loss": 0.6712,
"step": 14320
},
{
"epoch": 1.82,
"learning_rate": 1.9698891990188618e-05,
"loss": 0.8092,
"step": 14330
},
{
"epoch": 1.82,
"learning_rate": 1.9677746764780514e-05,
"loss": 0.7347,
"step": 14340
},
{
"epoch": 1.82,
"learning_rate": 1.965660153937241e-05,
"loss": 0.7326,
"step": 14350
},
{
"epoch": 1.82,
"learning_rate": 1.9635456313964307e-05,
"loss": 0.7077,
"step": 14360
},
{
"epoch": 1.82,
"learning_rate": 1.9614311088556207e-05,
"loss": 0.7358,
"step": 14370
},
{
"epoch": 1.82,
"learning_rate": 1.9593165863148103e-05,
"loss": 0.7725,
"step": 14380
},
{
"epoch": 1.83,
"learning_rate": 1.957202063774e-05,
"loss": 0.701,
"step": 14390
},
{
"epoch": 1.83,
"learning_rate": 1.9550875412331896e-05,
"loss": 0.6524,
"step": 14400
},
{
"epoch": 1.83,
"learning_rate": 1.9529730186923796e-05,
"loss": 0.7813,
"step": 14410
},
{
"epoch": 1.83,
"learning_rate": 1.9508584961515692e-05,
"loss": 0.7874,
"step": 14420
},
{
"epoch": 1.83,
"learning_rate": 1.948743973610759e-05,
"loss": 0.8428,
"step": 14430
},
{
"epoch": 1.83,
"learning_rate": 1.9466294510699485e-05,
"loss": 0.5625,
"step": 14440
},
{
"epoch": 1.83,
"learning_rate": 1.944514928529138e-05,
"loss": 0.7378,
"step": 14450
},
{
"epoch": 1.83,
"learning_rate": 1.942400405988328e-05,
"loss": 0.7172,
"step": 14460
},
{
"epoch": 1.84,
"learning_rate": 1.9402858834475178e-05,
"loss": 0.6975,
"step": 14470
},
{
"epoch": 1.84,
"learning_rate": 1.9381713609067074e-05,
"loss": 0.5092,
"step": 14480
},
{
"epoch": 1.84,
"learning_rate": 1.936056838365897e-05,
"loss": 0.8234,
"step": 14490
},
{
"epoch": 1.84,
"learning_rate": 1.933942315825087e-05,
"loss": 0.6879,
"step": 14500
},
{
"epoch": 1.84,
"learning_rate": 1.9318277932842767e-05,
"loss": 0.7364,
"step": 14510
},
{
"epoch": 1.84,
"learning_rate": 1.9297132707434663e-05,
"loss": 0.7312,
"step": 14520
},
{
"epoch": 1.84,
"learning_rate": 1.927598748202656e-05,
"loss": 0.7522,
"step": 14530
},
{
"epoch": 1.84,
"learning_rate": 1.925484225661846e-05,
"loss": 0.7245,
"step": 14540
},
{
"epoch": 1.85,
"learning_rate": 1.9233697031210356e-05,
"loss": 0.5807,
"step": 14550
},
{
"epoch": 1.85,
"learning_rate": 1.9212551805802252e-05,
"loss": 0.6888,
"step": 14560
},
{
"epoch": 1.85,
"learning_rate": 1.919140658039415e-05,
"loss": 0.5955,
"step": 14570
},
{
"epoch": 1.85,
"learning_rate": 1.9170261354986045e-05,
"loss": 0.9903,
"step": 14580
},
{
"epoch": 1.85,
"learning_rate": 1.9149116129577945e-05,
"loss": 0.5526,
"step": 14590
},
{
"epoch": 1.85,
"learning_rate": 1.912797090416984e-05,
"loss": 0.7858,
"step": 14600
},
{
"epoch": 1.85,
"learning_rate": 1.9106825678761738e-05,
"loss": 0.9016,
"step": 14610
},
{
"epoch": 1.85,
"learning_rate": 1.9085680453353634e-05,
"loss": 0.6509,
"step": 14620
},
{
"epoch": 1.86,
"learning_rate": 1.906453522794553e-05,
"loss": 0.6477,
"step": 14630
},
{
"epoch": 1.86,
"learning_rate": 1.9043390002537427e-05,
"loss": 0.9305,
"step": 14640
},
{
"epoch": 1.86,
"learning_rate": 1.9022244777129327e-05,
"loss": 0.627,
"step": 14650
},
{
"epoch": 1.86,
"learning_rate": 1.9001099551721223e-05,
"loss": 0.6982,
"step": 14660
},
{
"epoch": 1.86,
"learning_rate": 1.897995432631312e-05,
"loss": 0.9114,
"step": 14670
},
{
"epoch": 1.86,
"learning_rate": 1.8958809100905016e-05,
"loss": 0.6183,
"step": 14680
},
{
"epoch": 1.86,
"learning_rate": 1.8937663875496912e-05,
"loss": 0.699,
"step": 14690
},
{
"epoch": 1.87,
"learning_rate": 1.891651865008881e-05,
"loss": 0.8178,
"step": 14700
},
{
"epoch": 1.87,
"learning_rate": 1.889537342468071e-05,
"loss": 1.058,
"step": 14710
},
{
"epoch": 1.87,
"learning_rate": 1.8874228199272605e-05,
"loss": 0.9091,
"step": 14720
},
{
"epoch": 1.87,
"learning_rate": 1.88530829738645e-05,
"loss": 0.8907,
"step": 14730
},
{
"epoch": 1.87,
"learning_rate": 1.8831937748456398e-05,
"loss": 0.6077,
"step": 14740
},
{
"epoch": 1.87,
"learning_rate": 1.8810792523048294e-05,
"loss": 0.697,
"step": 14750
},
{
"epoch": 1.87,
"learning_rate": 1.8789647297640194e-05,
"loss": 0.6752,
"step": 14760
},
{
"epoch": 1.87,
"learning_rate": 1.876850207223209e-05,
"loss": 0.7911,
"step": 14770
},
{
"epoch": 1.88,
"learning_rate": 1.8747356846823987e-05,
"loss": 0.6497,
"step": 14780
},
{
"epoch": 1.88,
"learning_rate": 1.8726211621415883e-05,
"loss": 0.7373,
"step": 14790
},
{
"epoch": 1.88,
"learning_rate": 1.870506639600778e-05,
"loss": 0.8619,
"step": 14800
},
{
"epoch": 1.88,
"learning_rate": 1.868392117059968e-05,
"loss": 0.7862,
"step": 14810
},
{
"epoch": 1.88,
"learning_rate": 1.8662775945191576e-05,
"loss": 0.7313,
"step": 14820
},
{
"epoch": 1.88,
"learning_rate": 1.8641630719783472e-05,
"loss": 0.666,
"step": 14830
},
{
"epoch": 1.88,
"learning_rate": 1.862048549437537e-05,
"loss": 0.8983,
"step": 14840
},
{
"epoch": 1.88,
"learning_rate": 1.859934026896727e-05,
"loss": 0.5863,
"step": 14850
},
{
"epoch": 1.89,
"learning_rate": 1.8578195043559165e-05,
"loss": 0.8136,
"step": 14860
},
{
"epoch": 1.89,
"learning_rate": 1.855704981815106e-05,
"loss": 0.8282,
"step": 14870
},
{
"epoch": 1.89,
"learning_rate": 1.8535904592742958e-05,
"loss": 0.8416,
"step": 14880
},
{
"epoch": 1.89,
"learning_rate": 1.8514759367334857e-05,
"loss": 0.6106,
"step": 14890
},
{
"epoch": 1.89,
"learning_rate": 1.8493614141926754e-05,
"loss": 0.6113,
"step": 14900
},
{
"epoch": 1.89,
"learning_rate": 1.847246891651865e-05,
"loss": 0.858,
"step": 14910
},
{
"epoch": 1.89,
"learning_rate": 1.8451323691110547e-05,
"loss": 0.6936,
"step": 14920
},
{
"epoch": 1.89,
"learning_rate": 1.8430178465702443e-05,
"loss": 0.6427,
"step": 14930
},
{
"epoch": 1.9,
"learning_rate": 1.8409033240294343e-05,
"loss": 0.8382,
"step": 14940
},
{
"epoch": 1.9,
"learning_rate": 1.838788801488624e-05,
"loss": 0.6101,
"step": 14950
},
{
"epoch": 1.9,
"learning_rate": 1.8366742789478136e-05,
"loss": 0.8126,
"step": 14960
},
{
"epoch": 1.9,
"learning_rate": 1.8345597564070032e-05,
"loss": 0.5735,
"step": 14970
},
{
"epoch": 1.9,
"learning_rate": 1.8324452338661932e-05,
"loss": 0.9253,
"step": 14980
},
{
"epoch": 1.9,
"learning_rate": 1.8303307113253828e-05,
"loss": 0.7719,
"step": 14990
},
{
"epoch": 1.9,
"learning_rate": 1.8282161887845725e-05,
"loss": 0.6708,
"step": 15000
},
{
"epoch": 1.9,
"eval_loss": 0.9625405669212341,
"eval_rouge1": 0.7305004260005835,
"eval_rouge2": 0.5993225634245649,
"eval_rougeL": 0.6768841504930387,
"eval_rougeLsum": 0.6768677676849031,
"eval_runtime": 368.6638,
"eval_samples_per_second": 9.304,
"eval_steps_per_second": 2.327,
"step": 15000
},
{
"epoch": 1.9,
"learning_rate": 1.826101666243762e-05,
"loss": 0.6104,
"step": 15010
},
{
"epoch": 1.91,
"learning_rate": 1.823987143702952e-05,
"loss": 0.6588,
"step": 15020
},
{
"epoch": 1.91,
"learning_rate": 1.8218726211621417e-05,
"loss": 0.6177,
"step": 15030
},
{
"epoch": 1.91,
"learning_rate": 1.8197580986213314e-05,
"loss": 0.5728,
"step": 15040
},
{
"epoch": 1.91,
"learning_rate": 1.817643576080521e-05,
"loss": 0.7742,
"step": 15050
},
{
"epoch": 1.91,
"learning_rate": 1.8155290535397107e-05,
"loss": 0.8013,
"step": 15060
},
{
"epoch": 1.91,
"learning_rate": 1.8134145309989006e-05,
"loss": 0.6319,
"step": 15070
},
{
"epoch": 1.91,
"learning_rate": 1.8113000084580903e-05,
"loss": 0.9761,
"step": 15080
},
{
"epoch": 1.91,
"learning_rate": 1.80918548591728e-05,
"loss": 0.6932,
"step": 15090
},
{
"epoch": 1.92,
"learning_rate": 1.8070709633764696e-05,
"loss": 0.615,
"step": 15100
},
{
"epoch": 1.92,
"learning_rate": 1.8049564408356595e-05,
"loss": 0.823,
"step": 15110
},
{
"epoch": 1.92,
"learning_rate": 1.8028419182948492e-05,
"loss": 0.5172,
"step": 15120
},
{
"epoch": 1.92,
"learning_rate": 1.8007273957540388e-05,
"loss": 0.6205,
"step": 15130
},
{
"epoch": 1.92,
"learning_rate": 1.7986128732132285e-05,
"loss": 0.7465,
"step": 15140
},
{
"epoch": 1.92,
"learning_rate": 1.796498350672418e-05,
"loss": 0.7382,
"step": 15150
},
{
"epoch": 1.92,
"learning_rate": 1.794383828131608e-05,
"loss": 0.6123,
"step": 15160
},
{
"epoch": 1.92,
"learning_rate": 1.7922693055907977e-05,
"loss": 0.7894,
"step": 15170
},
{
"epoch": 1.93,
"learning_rate": 1.7901547830499874e-05,
"loss": 0.5678,
"step": 15180
},
{
"epoch": 1.93,
"learning_rate": 1.788040260509177e-05,
"loss": 0.6042,
"step": 15190
},
{
"epoch": 1.93,
"learning_rate": 1.785925737968367e-05,
"loss": 0.7521,
"step": 15200
},
{
"epoch": 1.93,
"learning_rate": 1.7838112154275566e-05,
"loss": 0.7196,
"step": 15210
},
{
"epoch": 1.93,
"learning_rate": 1.7816966928867463e-05,
"loss": 0.9316,
"step": 15220
},
{
"epoch": 1.93,
"learning_rate": 1.779582170345936e-05,
"loss": 0.7189,
"step": 15230
},
{
"epoch": 1.93,
"learning_rate": 1.777467647805126e-05,
"loss": 0.7687,
"step": 15240
},
{
"epoch": 1.93,
"learning_rate": 1.7753531252643155e-05,
"loss": 0.7337,
"step": 15250
},
{
"epoch": 1.94,
"learning_rate": 1.7732386027235052e-05,
"loss": 0.7247,
"step": 15260
},
{
"epoch": 1.94,
"learning_rate": 1.7711240801826948e-05,
"loss": 0.7582,
"step": 15270
},
{
"epoch": 1.94,
"learning_rate": 1.7690095576418845e-05,
"loss": 0.6237,
"step": 15280
},
{
"epoch": 1.94,
"learning_rate": 1.7668950351010744e-05,
"loss": 0.4938,
"step": 15290
},
{
"epoch": 1.94,
"learning_rate": 1.764780512560264e-05,
"loss": 0.7099,
"step": 15300
},
{
"epoch": 1.94,
"learning_rate": 1.7626659900194537e-05,
"loss": 0.803,
"step": 15310
},
{
"epoch": 1.94,
"learning_rate": 1.7605514674786434e-05,
"loss": 0.69,
"step": 15320
},
{
"epoch": 1.94,
"learning_rate": 1.7584369449378333e-05,
"loss": 0.7246,
"step": 15330
},
{
"epoch": 1.95,
"learning_rate": 1.756322422397023e-05,
"loss": 0.7419,
"step": 15340
},
{
"epoch": 1.95,
"learning_rate": 1.7542078998562126e-05,
"loss": 0.6963,
"step": 15350
},
{
"epoch": 1.95,
"learning_rate": 1.7520933773154023e-05,
"loss": 0.709,
"step": 15360
},
{
"epoch": 1.95,
"learning_rate": 1.7499788547745922e-05,
"loss": 0.9114,
"step": 15370
},
{
"epoch": 1.95,
"learning_rate": 1.747864332233782e-05,
"loss": 0.8218,
"step": 15380
},
{
"epoch": 1.95,
"learning_rate": 1.7457498096929715e-05,
"loss": 0.7821,
"step": 15390
},
{
"epoch": 1.95,
"learning_rate": 1.743635287152161e-05,
"loss": 0.8542,
"step": 15400
},
{
"epoch": 1.96,
"learning_rate": 1.7415207646113508e-05,
"loss": 0.6597,
"step": 15410
},
{
"epoch": 1.96,
"learning_rate": 1.7394062420705408e-05,
"loss": 0.6302,
"step": 15420
},
{
"epoch": 1.96,
"learning_rate": 1.7372917195297304e-05,
"loss": 0.7364,
"step": 15430
},
{
"epoch": 1.96,
"learning_rate": 1.73517719698892e-05,
"loss": 0.6639,
"step": 15440
},
{
"epoch": 1.96,
"learning_rate": 1.7330626744481097e-05,
"loss": 0.6704,
"step": 15450
},
{
"epoch": 1.96,
"learning_rate": 1.7309481519072997e-05,
"loss": 0.7728,
"step": 15460
},
{
"epoch": 1.96,
"learning_rate": 1.7288336293664893e-05,
"loss": 0.8546,
"step": 15470
},
{
"epoch": 1.96,
"learning_rate": 1.726719106825679e-05,
"loss": 0.9402,
"step": 15480
},
{
"epoch": 1.97,
"learning_rate": 1.7246045842848686e-05,
"loss": 0.6257,
"step": 15490
},
{
"epoch": 1.97,
"learning_rate": 1.7224900617440583e-05,
"loss": 0.6707,
"step": 15500
},
{
"epoch": 1.97,
"learning_rate": 1.7203755392032482e-05,
"loss": 0.7253,
"step": 15510
},
{
"epoch": 1.97,
"learning_rate": 1.718261016662438e-05,
"loss": 0.7514,
"step": 15520
},
{
"epoch": 1.97,
"learning_rate": 1.7161464941216275e-05,
"loss": 0.6743,
"step": 15530
},
{
"epoch": 1.97,
"learning_rate": 1.714031971580817e-05,
"loss": 0.4856,
"step": 15540
},
{
"epoch": 1.97,
"learning_rate": 1.7119174490400068e-05,
"loss": 0.732,
"step": 15550
},
{
"epoch": 1.97,
"learning_rate": 1.7098029264991964e-05,
"loss": 0.6034,
"step": 15560
},
{
"epoch": 1.98,
"learning_rate": 1.7076884039583864e-05,
"loss": 0.6967,
"step": 15570
},
{
"epoch": 1.98,
"learning_rate": 1.705573881417576e-05,
"loss": 0.747,
"step": 15580
},
{
"epoch": 1.98,
"learning_rate": 1.7034593588767657e-05,
"loss": 0.7506,
"step": 15590
},
{
"epoch": 1.98,
"learning_rate": 1.7013448363359553e-05,
"loss": 0.5789,
"step": 15600
},
{
"epoch": 1.98,
"learning_rate": 1.699230313795145e-05,
"loss": 0.6548,
"step": 15610
},
{
"epoch": 1.98,
"learning_rate": 1.6971157912543346e-05,
"loss": 0.873,
"step": 15620
},
{
"epoch": 1.98,
"learning_rate": 1.6950012687135246e-05,
"loss": 0.7502,
"step": 15630
},
{
"epoch": 1.98,
"learning_rate": 1.6928867461727142e-05,
"loss": 0.8749,
"step": 15640
},
{
"epoch": 1.99,
"learning_rate": 1.690772223631904e-05,
"loss": 0.9358,
"step": 15650
},
{
"epoch": 1.99,
"learning_rate": 1.6886577010910935e-05,
"loss": 0.5585,
"step": 15660
},
{
"epoch": 1.99,
"learning_rate": 1.6865431785502832e-05,
"loss": 0.7049,
"step": 15670
},
{
"epoch": 1.99,
"learning_rate": 1.684428656009473e-05,
"loss": 0.5735,
"step": 15680
},
{
"epoch": 1.99,
"learning_rate": 1.6823141334686628e-05,
"loss": 0.8819,
"step": 15690
},
{
"epoch": 1.99,
"learning_rate": 1.6801996109278524e-05,
"loss": 0.6498,
"step": 15700
},
{
"epoch": 1.99,
"learning_rate": 1.678085088387042e-05,
"loss": 0.7547,
"step": 15710
},
{
"epoch": 1.99,
"learning_rate": 1.675970565846232e-05,
"loss": 0.6465,
"step": 15720
},
{
"epoch": 2.0,
"learning_rate": 1.6738560433054217e-05,
"loss": 0.661,
"step": 15730
},
{
"epoch": 2.0,
"learning_rate": 1.6717415207646113e-05,
"loss": 0.5594,
"step": 15740
},
{
"epoch": 2.0,
"learning_rate": 1.669626998223801e-05,
"loss": 0.7443,
"step": 15750
},
{
"epoch": 2.0,
"learning_rate": 1.6675124756829906e-05,
"loss": 0.647,
"step": 15760
},
{
"epoch": 2.0,
"learning_rate": 1.6653979531421806e-05,
"loss": 0.6345,
"step": 15770
},
{
"epoch": 2.0,
"learning_rate": 1.6632834306013702e-05,
"loss": 0.4894,
"step": 15780
},
{
"epoch": 2.0,
"learning_rate": 1.66116890806056e-05,
"loss": 0.4588,
"step": 15790
},
{
"epoch": 2.0,
"learning_rate": 1.6590543855197495e-05,
"loss": 0.3699,
"step": 15800
},
{
"epoch": 2.01,
"learning_rate": 1.6569398629789395e-05,
"loss": 0.5344,
"step": 15810
},
{
"epoch": 2.01,
"learning_rate": 1.654825340438129e-05,
"loss": 0.5497,
"step": 15820
},
{
"epoch": 2.01,
"learning_rate": 1.6527108178973188e-05,
"loss": 0.6316,
"step": 15830
},
{
"epoch": 2.01,
"learning_rate": 1.6505962953565084e-05,
"loss": 0.5043,
"step": 15840
},
{
"epoch": 2.01,
"learning_rate": 1.6484817728156984e-05,
"loss": 0.604,
"step": 15850
},
{
"epoch": 2.01,
"learning_rate": 1.646367250274888e-05,
"loss": 0.3851,
"step": 15860
},
{
"epoch": 2.01,
"learning_rate": 1.6442527277340777e-05,
"loss": 0.4892,
"step": 15870
},
{
"epoch": 2.01,
"learning_rate": 1.6421382051932673e-05,
"loss": 0.5684,
"step": 15880
},
{
"epoch": 2.02,
"learning_rate": 1.640023682652457e-05,
"loss": 0.5456,
"step": 15890
},
{
"epoch": 2.02,
"learning_rate": 1.637909160111647e-05,
"loss": 0.6092,
"step": 15900
},
{
"epoch": 2.02,
"learning_rate": 1.6357946375708366e-05,
"loss": 0.3216,
"step": 15910
},
{
"epoch": 2.02,
"learning_rate": 1.6336801150300262e-05,
"loss": 0.5148,
"step": 15920
},
{
"epoch": 2.02,
"learning_rate": 1.631565592489216e-05,
"loss": 0.4432,
"step": 15930
},
{
"epoch": 2.02,
"learning_rate": 1.629451069948406e-05,
"loss": 0.4731,
"step": 15940
},
{
"epoch": 2.02,
"learning_rate": 1.6273365474075955e-05,
"loss": 0.5103,
"step": 15950
},
{
"epoch": 2.02,
"learning_rate": 1.625222024866785e-05,
"loss": 0.4937,
"step": 15960
},
{
"epoch": 2.03,
"learning_rate": 1.6231075023259748e-05,
"loss": 0.6771,
"step": 15970
},
{
"epoch": 2.03,
"learning_rate": 1.6209929797851644e-05,
"loss": 0.528,
"step": 15980
},
{
"epoch": 2.03,
"learning_rate": 1.6188784572443544e-05,
"loss": 0.4522,
"step": 15990
},
{
"epoch": 2.03,
"learning_rate": 1.616763934703544e-05,
"loss": 0.5044,
"step": 16000
},
{
"epoch": 2.03,
"eval_loss": 1.0004602670669556,
"eval_rouge1": 0.7284788537204008,
"eval_rouge2": 0.5960733921892193,
"eval_rougeL": 0.6734586206298199,
"eval_rougeLsum": 0.6732468644824681,
"eval_runtime": 369.4253,
"eval_samples_per_second": 9.285,
"eval_steps_per_second": 2.323,
"step": 16000
},
{
"epoch": 2.03,
"learning_rate": 1.6146494121627337e-05,
"loss": 0.613,
"step": 16010
},
{
"epoch": 2.03,
"learning_rate": 1.6125348896219233e-05,
"loss": 0.4601,
"step": 16020
},
{
"epoch": 2.03,
"learning_rate": 1.6104203670811133e-05,
"loss": 0.52,
"step": 16030
},
{
"epoch": 2.04,
"learning_rate": 1.608305844540303e-05,
"loss": 0.5779,
"step": 16040
},
{
"epoch": 2.04,
"learning_rate": 1.6061913219994926e-05,
"loss": 0.6133,
"step": 16050
},
{
"epoch": 2.04,
"learning_rate": 1.6040767994586822e-05,
"loss": 0.52,
"step": 16060
},
{
"epoch": 2.04,
"learning_rate": 1.6019622769178722e-05,
"loss": 0.5125,
"step": 16070
},
{
"epoch": 2.04,
"learning_rate": 1.599847754377062e-05,
"loss": 0.5423,
"step": 16080
},
{
"epoch": 2.04,
"learning_rate": 1.5977332318362515e-05,
"loss": 0.4696,
"step": 16090
},
{
"epoch": 2.04,
"learning_rate": 1.595618709295441e-05,
"loss": 0.3436,
"step": 16100
},
{
"epoch": 2.04,
"learning_rate": 1.5935041867546308e-05,
"loss": 0.4866,
"step": 16110
},
{
"epoch": 2.05,
"learning_rate": 1.5913896642138208e-05,
"loss": 0.5355,
"step": 16120
},
{
"epoch": 2.05,
"learning_rate": 1.5892751416730104e-05,
"loss": 0.4076,
"step": 16130
},
{
"epoch": 2.05,
"learning_rate": 1.5871606191322e-05,
"loss": 0.6213,
"step": 16140
},
{
"epoch": 2.05,
"learning_rate": 1.5850460965913897e-05,
"loss": 0.4977,
"step": 16150
},
{
"epoch": 2.05,
"learning_rate": 1.5829315740505797e-05,
"loss": 0.4932,
"step": 16160
},
{
"epoch": 2.05,
"learning_rate": 1.5808170515097693e-05,
"loss": 0.3592,
"step": 16170
},
{
"epoch": 2.05,
"learning_rate": 1.578702528968959e-05,
"loss": 0.4649,
"step": 16180
},
{
"epoch": 2.05,
"learning_rate": 1.5765880064281486e-05,
"loss": 0.4026,
"step": 16190
},
{
"epoch": 2.06,
"learning_rate": 1.5744734838873386e-05,
"loss": 0.5233,
"step": 16200
},
{
"epoch": 2.06,
"learning_rate": 1.5723589613465282e-05,
"loss": 0.5298,
"step": 16210
},
{
"epoch": 2.06,
"learning_rate": 1.570244438805718e-05,
"loss": 0.5768,
"step": 16220
},
{
"epoch": 2.06,
"learning_rate": 1.5681299162649075e-05,
"loss": 0.5902,
"step": 16230
},
{
"epoch": 2.06,
"learning_rate": 1.566015393724097e-05,
"loss": 0.3098,
"step": 16240
},
{
"epoch": 2.06,
"learning_rate": 1.563900871183287e-05,
"loss": 0.4276,
"step": 16250
},
{
"epoch": 2.06,
"learning_rate": 1.5617863486424767e-05,
"loss": 0.5209,
"step": 16260
},
{
"epoch": 2.06,
"learning_rate": 1.5596718261016664e-05,
"loss": 0.4803,
"step": 16270
},
{
"epoch": 2.07,
"learning_rate": 1.557557303560856e-05,
"loss": 0.4389,
"step": 16280
},
{
"epoch": 2.07,
"learning_rate": 1.555442781020046e-05,
"loss": 0.5205,
"step": 16290
},
{
"epoch": 2.07,
"learning_rate": 1.5533282584792356e-05,
"loss": 0.5407,
"step": 16300
},
{
"epoch": 2.07,
"learning_rate": 1.5512137359384253e-05,
"loss": 0.4779,
"step": 16310
},
{
"epoch": 2.07,
"learning_rate": 1.549099213397615e-05,
"loss": 0.3647,
"step": 16320
},
{
"epoch": 2.07,
"learning_rate": 1.5469846908568046e-05,
"loss": 0.6327,
"step": 16330
},
{
"epoch": 2.07,
"learning_rate": 1.5448701683159946e-05,
"loss": 0.5578,
"step": 16340
},
{
"epoch": 2.07,
"learning_rate": 1.5427556457751842e-05,
"loss": 0.5383,
"step": 16350
},
{
"epoch": 2.08,
"learning_rate": 1.540641123234374e-05,
"loss": 0.4099,
"step": 16360
},
{
"epoch": 2.08,
"learning_rate": 1.5385266006935635e-05,
"loss": 0.3619,
"step": 16370
},
{
"epoch": 2.08,
"learning_rate": 1.5364120781527535e-05,
"loss": 0.4939,
"step": 16380
},
{
"epoch": 2.08,
"learning_rate": 1.534297555611943e-05,
"loss": 0.5041,
"step": 16390
},
{
"epoch": 2.08,
"learning_rate": 1.5321830330711327e-05,
"loss": 0.742,
"step": 16400
},
{
"epoch": 2.08,
"learning_rate": 1.5300685105303224e-05,
"loss": 0.5059,
"step": 16410
},
{
"epoch": 2.08,
"learning_rate": 1.527953987989512e-05,
"loss": 0.5088,
"step": 16420
},
{
"epoch": 2.08,
"learning_rate": 1.5258394654487018e-05,
"loss": 0.2912,
"step": 16430
},
{
"epoch": 2.09,
"learning_rate": 1.5237249429078915e-05,
"loss": 0.5572,
"step": 16440
},
{
"epoch": 2.09,
"learning_rate": 1.5216104203670811e-05,
"loss": 0.3842,
"step": 16450
},
{
"epoch": 2.09,
"learning_rate": 1.5194958978262708e-05,
"loss": 0.5422,
"step": 16460
},
{
"epoch": 2.09,
"learning_rate": 1.5173813752854607e-05,
"loss": 0.6506,
"step": 16470
},
{
"epoch": 2.09,
"learning_rate": 1.5152668527446504e-05,
"loss": 0.3357,
"step": 16480
},
{
"epoch": 2.09,
"learning_rate": 1.51315233020384e-05,
"loss": 0.4826,
"step": 16490
},
{
"epoch": 2.09,
"learning_rate": 1.5110378076630297e-05,
"loss": 0.4391,
"step": 16500
},
{
"epoch": 2.09,
"learning_rate": 1.5089232851222196e-05,
"loss": 0.4957,
"step": 16510
},
{
"epoch": 2.1,
"learning_rate": 1.5068087625814093e-05,
"loss": 0.449,
"step": 16520
},
{
"epoch": 2.1,
"learning_rate": 1.504694240040599e-05,
"loss": 0.4061,
"step": 16530
},
{
"epoch": 2.1,
"learning_rate": 1.5025797174997886e-05,
"loss": 0.4779,
"step": 16540
},
{
"epoch": 2.1,
"learning_rate": 1.5004651949589785e-05,
"loss": 0.5563,
"step": 16550
},
{
"epoch": 2.1,
"learning_rate": 1.4983506724181682e-05,
"loss": 0.5534,
"step": 16560
},
{
"epoch": 2.1,
"learning_rate": 1.4962361498773578e-05,
"loss": 0.4191,
"step": 16570
},
{
"epoch": 2.1,
"learning_rate": 1.4941216273365475e-05,
"loss": 0.4769,
"step": 16580
},
{
"epoch": 2.1,
"learning_rate": 1.4920071047957371e-05,
"loss": 0.5209,
"step": 16590
},
{
"epoch": 2.11,
"learning_rate": 1.4898925822549271e-05,
"loss": 0.503,
"step": 16600
},
{
"epoch": 2.11,
"learning_rate": 1.4877780597141167e-05,
"loss": 0.521,
"step": 16610
},
{
"epoch": 2.11,
"learning_rate": 1.4856635371733064e-05,
"loss": 0.4729,
"step": 16620
},
{
"epoch": 2.11,
"learning_rate": 1.483549014632496e-05,
"loss": 0.5847,
"step": 16630
},
{
"epoch": 2.11,
"learning_rate": 1.4814344920916858e-05,
"loss": 0.5267,
"step": 16640
},
{
"epoch": 2.11,
"learning_rate": 1.4793199695508755e-05,
"loss": 0.4691,
"step": 16650
},
{
"epoch": 2.11,
"learning_rate": 1.4772054470100653e-05,
"loss": 0.3556,
"step": 16660
},
{
"epoch": 2.11,
"learning_rate": 1.4750909244692549e-05,
"loss": 0.8047,
"step": 16670
},
{
"epoch": 2.12,
"learning_rate": 1.4729764019284446e-05,
"loss": 0.4874,
"step": 16680
},
{
"epoch": 2.12,
"learning_rate": 1.4708618793876344e-05,
"loss": 0.42,
"step": 16690
},
{
"epoch": 2.12,
"learning_rate": 1.468747356846824e-05,
"loss": 0.6537,
"step": 16700
},
{
"epoch": 2.12,
"learning_rate": 1.4666328343060136e-05,
"loss": 0.3945,
"step": 16710
},
{
"epoch": 2.12,
"learning_rate": 1.4645183117652035e-05,
"loss": 0.4093,
"step": 16720
},
{
"epoch": 2.12,
"learning_rate": 1.4624037892243933e-05,
"loss": 0.4623,
"step": 16730
},
{
"epoch": 2.12,
"learning_rate": 1.4602892666835829e-05,
"loss": 0.5056,
"step": 16740
},
{
"epoch": 2.13,
"learning_rate": 1.4581747441427726e-05,
"loss": 0.5691,
"step": 16750
},
{
"epoch": 2.13,
"learning_rate": 1.4560602216019622e-05,
"loss": 0.407,
"step": 16760
},
{
"epoch": 2.13,
"learning_rate": 1.4539456990611522e-05,
"loss": 0.5715,
"step": 16770
},
{
"epoch": 2.13,
"learning_rate": 1.4518311765203418e-05,
"loss": 0.5043,
"step": 16780
},
{
"epoch": 2.13,
"learning_rate": 1.4497166539795315e-05,
"loss": 0.4006,
"step": 16790
},
{
"epoch": 2.13,
"learning_rate": 1.4476021314387211e-05,
"loss": 0.4787,
"step": 16800
},
{
"epoch": 2.13,
"learning_rate": 1.4454876088979107e-05,
"loss": 0.4221,
"step": 16810
},
{
"epoch": 2.13,
"learning_rate": 1.4433730863571007e-05,
"loss": 0.5721,
"step": 16820
},
{
"epoch": 2.14,
"learning_rate": 1.4412585638162904e-05,
"loss": 0.4968,
"step": 16830
},
{
"epoch": 2.14,
"learning_rate": 1.43914404127548e-05,
"loss": 0.4454,
"step": 16840
},
{
"epoch": 2.14,
"learning_rate": 1.4370295187346696e-05,
"loss": 0.4171,
"step": 16850
},
{
"epoch": 2.14,
"learning_rate": 1.4349149961938596e-05,
"loss": 0.5466,
"step": 16860
},
{
"epoch": 2.14,
"learning_rate": 1.4328004736530493e-05,
"loss": 0.5528,
"step": 16870
},
{
"epoch": 2.14,
"learning_rate": 1.4306859511122389e-05,
"loss": 0.575,
"step": 16880
},
{
"epoch": 2.14,
"learning_rate": 1.4285714285714285e-05,
"loss": 0.5636,
"step": 16890
},
{
"epoch": 2.14,
"learning_rate": 1.4264569060306185e-05,
"loss": 0.5503,
"step": 16900
},
{
"epoch": 2.15,
"learning_rate": 1.4243423834898082e-05,
"loss": 0.5493,
"step": 16910
},
{
"epoch": 2.15,
"learning_rate": 1.4222278609489978e-05,
"loss": 0.6498,
"step": 16920
},
{
"epoch": 2.15,
"learning_rate": 1.4201133384081874e-05,
"loss": 0.4465,
"step": 16930
},
{
"epoch": 2.15,
"learning_rate": 1.4179988158673771e-05,
"loss": 0.5167,
"step": 16940
},
{
"epoch": 2.15,
"learning_rate": 1.415884293326567e-05,
"loss": 0.5112,
"step": 16950
},
{
"epoch": 2.15,
"learning_rate": 1.4137697707857567e-05,
"loss": 0.5296,
"step": 16960
},
{
"epoch": 2.15,
"learning_rate": 1.4116552482449464e-05,
"loss": 0.4518,
"step": 16970
},
{
"epoch": 2.15,
"learning_rate": 1.409540725704136e-05,
"loss": 0.5424,
"step": 16980
},
{
"epoch": 2.16,
"learning_rate": 1.407426203163326e-05,
"loss": 0.3649,
"step": 16990
},
{
"epoch": 2.16,
"learning_rate": 1.4053116806225156e-05,
"loss": 0.372,
"step": 17000
},
{
"epoch": 2.16,
"eval_loss": 1.0192131996154785,
"eval_rouge1": 0.7291480265243359,
"eval_rouge2": 0.598509342987833,
"eval_rougeL": 0.6764652132501234,
"eval_rougeLsum": 0.6760566980686993,
"eval_runtime": 370.1761,
"eval_samples_per_second": 9.266,
"eval_steps_per_second": 2.318,
"step": 17000
},
{
"epoch": 2.16,
"learning_rate": 1.4031971580817053e-05,
"loss": 0.4693,
"step": 17010
},
{
"epoch": 2.16,
"learning_rate": 1.4010826355408949e-05,
"loss": 0.4037,
"step": 17020
},
{
"epoch": 2.16,
"learning_rate": 1.3989681130000845e-05,
"loss": 0.5961,
"step": 17030
},
{
"epoch": 2.16,
"learning_rate": 1.3968535904592745e-05,
"loss": 0.5295,
"step": 17040
},
{
"epoch": 2.16,
"learning_rate": 1.3947390679184642e-05,
"loss": 0.6799,
"step": 17050
},
{
"epoch": 2.16,
"learning_rate": 1.3926245453776538e-05,
"loss": 0.5574,
"step": 17060
},
{
"epoch": 2.17,
"learning_rate": 1.3905100228368434e-05,
"loss": 0.6018,
"step": 17070
},
{
"epoch": 2.17,
"learning_rate": 1.3883955002960333e-05,
"loss": 0.5479,
"step": 17080
},
{
"epoch": 2.17,
"learning_rate": 1.3862809777552229e-05,
"loss": 0.3455,
"step": 17090
},
{
"epoch": 2.17,
"learning_rate": 1.3841664552144127e-05,
"loss": 0.5256,
"step": 17100
},
{
"epoch": 2.17,
"learning_rate": 1.3820519326736023e-05,
"loss": 0.5124,
"step": 17110
},
{
"epoch": 2.17,
"learning_rate": 1.3799374101327922e-05,
"loss": 0.4558,
"step": 17120
},
{
"epoch": 2.17,
"learning_rate": 1.3778228875919818e-05,
"loss": 0.4299,
"step": 17130
},
{
"epoch": 2.17,
"learning_rate": 1.3757083650511714e-05,
"loss": 0.5747,
"step": 17140
},
{
"epoch": 2.18,
"learning_rate": 1.373593842510361e-05,
"loss": 0.4386,
"step": 17150
},
{
"epoch": 2.18,
"learning_rate": 1.3714793199695509e-05,
"loss": 0.5158,
"step": 17160
},
{
"epoch": 2.18,
"learning_rate": 1.3693647974287407e-05,
"loss": 0.3937,
"step": 17170
},
{
"epoch": 2.18,
"learning_rate": 1.3672502748879303e-05,
"loss": 0.34,
"step": 17180
},
{
"epoch": 2.18,
"learning_rate": 1.36513575234712e-05,
"loss": 0.4551,
"step": 17190
},
{
"epoch": 2.18,
"learning_rate": 1.3630212298063096e-05,
"loss": 0.47,
"step": 17200
},
{
"epoch": 2.18,
"learning_rate": 1.3609067072654996e-05,
"loss": 0.4422,
"step": 17210
},
{
"epoch": 2.18,
"learning_rate": 1.3587921847246892e-05,
"loss": 0.4962,
"step": 17220
},
{
"epoch": 2.19,
"learning_rate": 1.3566776621838789e-05,
"loss": 0.4904,
"step": 17230
},
{
"epoch": 2.19,
"learning_rate": 1.3545631396430685e-05,
"loss": 0.5912,
"step": 17240
},
{
"epoch": 2.19,
"learning_rate": 1.3524486171022585e-05,
"loss": 0.4565,
"step": 17250
},
{
"epoch": 2.19,
"learning_rate": 1.3503340945614481e-05,
"loss": 0.4721,
"step": 17260
},
{
"epoch": 2.19,
"learning_rate": 1.3482195720206378e-05,
"loss": 0.5221,
"step": 17270
},
{
"epoch": 2.19,
"learning_rate": 1.3461050494798274e-05,
"loss": 0.4416,
"step": 17280
},
{
"epoch": 2.19,
"learning_rate": 1.343990526939017e-05,
"loss": 0.4517,
"step": 17290
},
{
"epoch": 2.19,
"learning_rate": 1.341876004398207e-05,
"loss": 0.5161,
"step": 17300
},
{
"epoch": 2.2,
"learning_rate": 1.3397614818573967e-05,
"loss": 0.4242,
"step": 17310
},
{
"epoch": 2.2,
"learning_rate": 1.3376469593165863e-05,
"loss": 0.3725,
"step": 17320
},
{
"epoch": 2.2,
"learning_rate": 1.335532436775776e-05,
"loss": 0.501,
"step": 17330
},
{
"epoch": 2.2,
"learning_rate": 1.333417914234966e-05,
"loss": 0.5392,
"step": 17340
},
{
"epoch": 2.2,
"learning_rate": 1.3313033916941556e-05,
"loss": 0.4849,
"step": 17350
},
{
"epoch": 2.2,
"learning_rate": 1.3291888691533452e-05,
"loss": 0.5468,
"step": 17360
},
{
"epoch": 2.2,
"learning_rate": 1.3270743466125349e-05,
"loss": 0.583,
"step": 17370
},
{
"epoch": 2.21,
"learning_rate": 1.3249598240717245e-05,
"loss": 0.568,
"step": 17380
},
{
"epoch": 2.21,
"learning_rate": 1.3228453015309145e-05,
"loss": 0.4809,
"step": 17390
},
{
"epoch": 2.21,
"learning_rate": 1.3207307789901041e-05,
"loss": 0.5212,
"step": 17400
},
{
"epoch": 2.21,
"learning_rate": 1.3186162564492938e-05,
"loss": 0.43,
"step": 17410
},
{
"epoch": 2.21,
"learning_rate": 1.3165017339084834e-05,
"loss": 0.3785,
"step": 17420
},
{
"epoch": 2.21,
"learning_rate": 1.3143872113676734e-05,
"loss": 0.504,
"step": 17430
},
{
"epoch": 2.21,
"learning_rate": 1.312272688826863e-05,
"loss": 0.5854,
"step": 17440
},
{
"epoch": 2.21,
"learning_rate": 1.3101581662860527e-05,
"loss": 0.4499,
"step": 17450
},
{
"epoch": 2.22,
"learning_rate": 1.3080436437452423e-05,
"loss": 0.5381,
"step": 17460
},
{
"epoch": 2.22,
"learning_rate": 1.3059291212044323e-05,
"loss": 0.6122,
"step": 17470
},
{
"epoch": 2.22,
"learning_rate": 1.303814598663622e-05,
"loss": 0.5245,
"step": 17480
},
{
"epoch": 2.22,
"learning_rate": 1.3017000761228116e-05,
"loss": 0.4767,
"step": 17490
},
{
"epoch": 2.22,
"learning_rate": 1.2995855535820012e-05,
"loss": 0.5936,
"step": 17500
},
{
"epoch": 2.22,
"learning_rate": 1.2974710310411909e-05,
"loss": 0.4379,
"step": 17510
},
{
"epoch": 2.22,
"learning_rate": 1.2953565085003807e-05,
"loss": 0.5546,
"step": 17520
},
{
"epoch": 2.22,
"learning_rate": 1.2932419859595705e-05,
"loss": 0.3859,
"step": 17530
},
{
"epoch": 2.23,
"learning_rate": 1.2911274634187601e-05,
"loss": 0.4247,
"step": 17540
},
{
"epoch": 2.23,
"learning_rate": 1.2890129408779498e-05,
"loss": 0.5065,
"step": 17550
},
{
"epoch": 2.23,
"learning_rate": 1.2868984183371396e-05,
"loss": 0.5002,
"step": 17560
},
{
"epoch": 2.23,
"learning_rate": 1.2847838957963292e-05,
"loss": 0.4927,
"step": 17570
},
{
"epoch": 2.23,
"learning_rate": 1.2826693732555189e-05,
"loss": 0.5069,
"step": 17580
},
{
"epoch": 2.23,
"learning_rate": 1.2805548507147087e-05,
"loss": 0.666,
"step": 17590
},
{
"epoch": 2.23,
"learning_rate": 1.2784403281738985e-05,
"loss": 0.4137,
"step": 17600
},
{
"epoch": 2.23,
"learning_rate": 1.2763258056330881e-05,
"loss": 0.6006,
"step": 17610
},
{
"epoch": 2.24,
"learning_rate": 1.2742112830922778e-05,
"loss": 0.4229,
"step": 17620
},
{
"epoch": 2.24,
"learning_rate": 1.2720967605514674e-05,
"loss": 0.5373,
"step": 17630
},
{
"epoch": 2.24,
"learning_rate": 1.269982238010657e-05,
"loss": 0.4984,
"step": 17640
},
{
"epoch": 2.24,
"learning_rate": 1.267867715469847e-05,
"loss": 0.4852,
"step": 17650
},
{
"epoch": 2.24,
"learning_rate": 1.2657531929290367e-05,
"loss": 0.633,
"step": 17660
},
{
"epoch": 2.24,
"learning_rate": 1.2636386703882263e-05,
"loss": 0.4999,
"step": 17670
},
{
"epoch": 2.24,
"learning_rate": 1.261524147847416e-05,
"loss": 0.5427,
"step": 17680
},
{
"epoch": 2.24,
"learning_rate": 1.259409625306606e-05,
"loss": 0.4432,
"step": 17690
},
{
"epoch": 2.25,
"learning_rate": 1.2572951027657956e-05,
"loss": 0.5501,
"step": 17700
},
{
"epoch": 2.25,
"learning_rate": 1.2551805802249852e-05,
"loss": 0.5239,
"step": 17710
},
{
"epoch": 2.25,
"learning_rate": 1.2530660576841749e-05,
"loss": 0.3843,
"step": 17720
},
{
"epoch": 2.25,
"learning_rate": 1.2509515351433645e-05,
"loss": 0.598,
"step": 17730
},
{
"epoch": 2.25,
"learning_rate": 1.2488370126025543e-05,
"loss": 0.4732,
"step": 17740
},
{
"epoch": 2.25,
"learning_rate": 1.2467224900617441e-05,
"loss": 0.397,
"step": 17750
},
{
"epoch": 2.25,
"learning_rate": 1.2446079675209338e-05,
"loss": 0.4564,
"step": 17760
},
{
"epoch": 2.25,
"learning_rate": 1.2424934449801236e-05,
"loss": 0.4627,
"step": 17770
},
{
"epoch": 2.26,
"learning_rate": 1.2403789224393132e-05,
"loss": 0.6373,
"step": 17780
},
{
"epoch": 2.26,
"learning_rate": 1.238264399898503e-05,
"loss": 0.3913,
"step": 17790
},
{
"epoch": 2.26,
"learning_rate": 1.2361498773576927e-05,
"loss": 0.6373,
"step": 17800
},
{
"epoch": 2.26,
"learning_rate": 1.2340353548168825e-05,
"loss": 0.4026,
"step": 17810
},
{
"epoch": 2.26,
"learning_rate": 1.2319208322760721e-05,
"loss": 0.4719,
"step": 17820
},
{
"epoch": 2.26,
"learning_rate": 1.229806309735262e-05,
"loss": 0.4594,
"step": 17830
},
{
"epoch": 2.26,
"learning_rate": 1.2276917871944516e-05,
"loss": 0.5069,
"step": 17840
},
{
"epoch": 2.26,
"learning_rate": 1.2255772646536412e-05,
"loss": 0.4372,
"step": 17850
},
{
"epoch": 2.27,
"learning_rate": 1.223462742112831e-05,
"loss": 0.5336,
"step": 17860
},
{
"epoch": 2.27,
"learning_rate": 1.2213482195720207e-05,
"loss": 0.3861,
"step": 17870
},
{
"epoch": 2.27,
"learning_rate": 1.2192336970312105e-05,
"loss": 0.5287,
"step": 17880
},
{
"epoch": 2.27,
"learning_rate": 1.2171191744904001e-05,
"loss": 0.4488,
"step": 17890
},
{
"epoch": 2.27,
"learning_rate": 1.21500465194959e-05,
"loss": 0.493,
"step": 17900
},
{
"epoch": 2.27,
"learning_rate": 1.2128901294087796e-05,
"loss": 0.4262,
"step": 17910
},
{
"epoch": 2.27,
"learning_rate": 1.2107756068679694e-05,
"loss": 0.4206,
"step": 17920
},
{
"epoch": 2.27,
"learning_rate": 1.208661084327159e-05,
"loss": 0.4599,
"step": 17930
},
{
"epoch": 2.28,
"learning_rate": 1.2065465617863488e-05,
"loss": 0.5817,
"step": 17940
},
{
"epoch": 2.28,
"learning_rate": 1.2044320392455385e-05,
"loss": 0.421,
"step": 17950
},
{
"epoch": 2.28,
"learning_rate": 1.2023175167047283e-05,
"loss": 0.3902,
"step": 17960
},
{
"epoch": 2.28,
"learning_rate": 1.200202994163918e-05,
"loss": 0.5948,
"step": 17970
},
{
"epoch": 2.28,
"learning_rate": 1.1980884716231076e-05,
"loss": 0.5171,
"step": 17980
},
{
"epoch": 2.28,
"learning_rate": 1.1959739490822974e-05,
"loss": 0.4016,
"step": 17990
},
{
"epoch": 2.28,
"learning_rate": 1.193859426541487e-05,
"loss": 0.4282,
"step": 18000
},
{
"epoch": 2.28,
"eval_loss": 1.0121049880981445,
"eval_rouge1": 0.7351889582527759,
"eval_rouge2": 0.6069914631932839,
"eval_rougeL": 0.6835570525542682,
"eval_rougeLsum": 0.6834839152388781,
"eval_runtime": 376.1764,
"eval_samples_per_second": 9.118,
"eval_steps_per_second": 2.281,
"step": 18000
},
{
"epoch": 2.28,
"learning_rate": 1.1917449040006767e-05,
"loss": 0.6512,
"step": 18010
},
{
"epoch": 2.29,
"learning_rate": 1.1896303814598665e-05,
"loss": 0.4837,
"step": 18020
},
{
"epoch": 2.29,
"learning_rate": 1.1875158589190561e-05,
"loss": 0.3195,
"step": 18030
},
{
"epoch": 2.29,
"learning_rate": 1.1854013363782457e-05,
"loss": 0.4274,
"step": 18040
},
{
"epoch": 2.29,
"learning_rate": 1.1832868138374356e-05,
"loss": 0.4594,
"step": 18050
},
{
"epoch": 2.29,
"learning_rate": 1.1811722912966252e-05,
"loss": 0.5726,
"step": 18060
},
{
"epoch": 2.29,
"learning_rate": 1.179057768755815e-05,
"loss": 0.5338,
"step": 18070
},
{
"epoch": 2.29,
"learning_rate": 1.1769432462150047e-05,
"loss": 0.4579,
"step": 18080
},
{
"epoch": 2.3,
"learning_rate": 1.1748287236741943e-05,
"loss": 0.6511,
"step": 18090
},
{
"epoch": 2.3,
"learning_rate": 1.1727142011333841e-05,
"loss": 0.5581,
"step": 18100
},
{
"epoch": 2.3,
"learning_rate": 1.1705996785925737e-05,
"loss": 0.6973,
"step": 18110
},
{
"epoch": 2.3,
"learning_rate": 1.1684851560517636e-05,
"loss": 0.5521,
"step": 18120
},
{
"epoch": 2.3,
"learning_rate": 1.1663706335109532e-05,
"loss": 0.414,
"step": 18130
},
{
"epoch": 2.3,
"learning_rate": 1.164256110970143e-05,
"loss": 0.5659,
"step": 18140
},
{
"epoch": 2.3,
"learning_rate": 1.1621415884293326e-05,
"loss": 0.5276,
"step": 18150
},
{
"epoch": 2.3,
"learning_rate": 1.1600270658885225e-05,
"loss": 0.4907,
"step": 18160
},
{
"epoch": 2.31,
"learning_rate": 1.1579125433477121e-05,
"loss": 0.5071,
"step": 18170
},
{
"epoch": 2.31,
"learning_rate": 1.1557980208069019e-05,
"loss": 0.6638,
"step": 18180
},
{
"epoch": 2.31,
"learning_rate": 1.1536834982660916e-05,
"loss": 0.4273,
"step": 18190
},
{
"epoch": 2.31,
"learning_rate": 1.1515689757252812e-05,
"loss": 0.4562,
"step": 18200
},
{
"epoch": 2.31,
"learning_rate": 1.149454453184471e-05,
"loss": 0.5357,
"step": 18210
},
{
"epoch": 2.31,
"learning_rate": 1.1473399306436606e-05,
"loss": 0.4343,
"step": 18220
},
{
"epoch": 2.31,
"learning_rate": 1.1452254081028505e-05,
"loss": 0.6214,
"step": 18230
},
{
"epoch": 2.31,
"learning_rate": 1.1431108855620401e-05,
"loss": 0.4571,
"step": 18240
},
{
"epoch": 2.32,
"learning_rate": 1.1409963630212299e-05,
"loss": 0.6076,
"step": 18250
},
{
"epoch": 2.32,
"learning_rate": 1.1388818404804195e-05,
"loss": 0.5182,
"step": 18260
},
{
"epoch": 2.32,
"learning_rate": 1.1367673179396094e-05,
"loss": 0.5681,
"step": 18270
},
{
"epoch": 2.32,
"learning_rate": 1.134652795398799e-05,
"loss": 0.5106,
"step": 18280
},
{
"epoch": 2.32,
"learning_rate": 1.1325382728579888e-05,
"loss": 0.6166,
"step": 18290
},
{
"epoch": 2.32,
"learning_rate": 1.1304237503171785e-05,
"loss": 0.4268,
"step": 18300
},
{
"epoch": 2.32,
"learning_rate": 1.1283092277763683e-05,
"loss": 0.5774,
"step": 18310
},
{
"epoch": 2.32,
"learning_rate": 1.1261947052355579e-05,
"loss": 0.5193,
"step": 18320
},
{
"epoch": 2.33,
"learning_rate": 1.1240801826947475e-05,
"loss": 0.4086,
"step": 18330
},
{
"epoch": 2.33,
"learning_rate": 1.1219656601539374e-05,
"loss": 0.5339,
"step": 18340
},
{
"epoch": 2.33,
"learning_rate": 1.119851137613127e-05,
"loss": 0.6255,
"step": 18350
},
{
"epoch": 2.33,
"learning_rate": 1.1177366150723168e-05,
"loss": 0.5525,
"step": 18360
},
{
"epoch": 2.33,
"learning_rate": 1.1156220925315064e-05,
"loss": 0.4951,
"step": 18370
},
{
"epoch": 2.33,
"learning_rate": 1.1135075699906963e-05,
"loss": 0.4168,
"step": 18380
},
{
"epoch": 2.33,
"learning_rate": 1.1113930474498859e-05,
"loss": 0.4496,
"step": 18390
},
{
"epoch": 2.33,
"learning_rate": 1.1092785249090757e-05,
"loss": 0.3589,
"step": 18400
},
{
"epoch": 2.34,
"learning_rate": 1.1071640023682654e-05,
"loss": 0.3696,
"step": 18410
},
{
"epoch": 2.34,
"learning_rate": 1.105049479827455e-05,
"loss": 0.4934,
"step": 18420
},
{
"epoch": 2.34,
"learning_rate": 1.1029349572866448e-05,
"loss": 0.5056,
"step": 18430
},
{
"epoch": 2.34,
"learning_rate": 1.1008204347458344e-05,
"loss": 0.4571,
"step": 18440
},
{
"epoch": 2.34,
"learning_rate": 1.0987059122050241e-05,
"loss": 0.4958,
"step": 18450
},
{
"epoch": 2.34,
"learning_rate": 1.0965913896642139e-05,
"loss": 0.4721,
"step": 18460
},
{
"epoch": 2.34,
"learning_rate": 1.0944768671234035e-05,
"loss": 0.4593,
"step": 18470
},
{
"epoch": 2.34,
"learning_rate": 1.0923623445825932e-05,
"loss": 0.5238,
"step": 18480
},
{
"epoch": 2.35,
"learning_rate": 1.090247822041783e-05,
"loss": 0.568,
"step": 18490
},
{
"epoch": 2.35,
"learning_rate": 1.0881332995009726e-05,
"loss": 0.5426,
"step": 18500
},
{
"epoch": 2.35,
"learning_rate": 1.0860187769601624e-05,
"loss": 0.5424,
"step": 18510
},
{
"epoch": 2.35,
"learning_rate": 1.083904254419352e-05,
"loss": 0.5096,
"step": 18520
},
{
"epoch": 2.35,
"learning_rate": 1.0817897318785419e-05,
"loss": 0.4409,
"step": 18530
},
{
"epoch": 2.35,
"learning_rate": 1.0796752093377315e-05,
"loss": 0.5421,
"step": 18540
},
{
"epoch": 2.35,
"learning_rate": 1.0775606867969213e-05,
"loss": 0.5459,
"step": 18550
},
{
"epoch": 2.35,
"learning_rate": 1.075446164256111e-05,
"loss": 0.6103,
"step": 18560
},
{
"epoch": 2.36,
"learning_rate": 1.0733316417153006e-05,
"loss": 0.4316,
"step": 18570
},
{
"epoch": 2.36,
"learning_rate": 1.0712171191744904e-05,
"loss": 0.4721,
"step": 18580
},
{
"epoch": 2.36,
"learning_rate": 1.06910259663368e-05,
"loss": 0.3692,
"step": 18590
},
{
"epoch": 2.36,
"learning_rate": 1.0669880740928699e-05,
"loss": 0.4413,
"step": 18600
},
{
"epoch": 2.36,
"learning_rate": 1.0648735515520595e-05,
"loss": 0.4248,
"step": 18610
},
{
"epoch": 2.36,
"learning_rate": 1.0627590290112493e-05,
"loss": 0.5076,
"step": 18620
},
{
"epoch": 2.36,
"learning_rate": 1.060644506470439e-05,
"loss": 0.6368,
"step": 18630
},
{
"epoch": 2.36,
"learning_rate": 1.0585299839296288e-05,
"loss": 0.4968,
"step": 18640
},
{
"epoch": 2.37,
"learning_rate": 1.0564154613888184e-05,
"loss": 0.5878,
"step": 18650
},
{
"epoch": 2.37,
"learning_rate": 1.0543009388480082e-05,
"loss": 0.41,
"step": 18660
},
{
"epoch": 2.37,
"learning_rate": 1.0521864163071979e-05,
"loss": 0.4225,
"step": 18670
},
{
"epoch": 2.37,
"learning_rate": 1.0500718937663875e-05,
"loss": 0.4449,
"step": 18680
},
{
"epoch": 2.37,
"learning_rate": 1.0479573712255773e-05,
"loss": 0.5047,
"step": 18690
},
{
"epoch": 2.37,
"learning_rate": 1.045842848684767e-05,
"loss": 0.5026,
"step": 18700
},
{
"epoch": 2.37,
"learning_rate": 1.0437283261439568e-05,
"loss": 0.4846,
"step": 18710
},
{
"epoch": 2.38,
"learning_rate": 1.0416138036031464e-05,
"loss": 0.4825,
"step": 18720
},
{
"epoch": 2.38,
"learning_rate": 1.0394992810623362e-05,
"loss": 0.6326,
"step": 18730
},
{
"epoch": 2.38,
"learning_rate": 1.0373847585215259e-05,
"loss": 0.4286,
"step": 18740
},
{
"epoch": 2.38,
"learning_rate": 1.0352702359807157e-05,
"loss": 0.4876,
"step": 18750
},
{
"epoch": 2.38,
"learning_rate": 1.0331557134399053e-05,
"loss": 0.6074,
"step": 18760
},
{
"epoch": 2.38,
"learning_rate": 1.0310411908990951e-05,
"loss": 0.6184,
"step": 18770
},
{
"epoch": 2.38,
"learning_rate": 1.0289266683582848e-05,
"loss": 0.5565,
"step": 18780
},
{
"epoch": 2.38,
"learning_rate": 1.0268121458174744e-05,
"loss": 0.3163,
"step": 18790
},
{
"epoch": 2.39,
"learning_rate": 1.0246976232766642e-05,
"loss": 0.4641,
"step": 18800
},
{
"epoch": 2.39,
"learning_rate": 1.0225831007358539e-05,
"loss": 0.4337,
"step": 18810
},
{
"epoch": 2.39,
"learning_rate": 1.0204685781950437e-05,
"loss": 0.5684,
"step": 18820
},
{
"epoch": 2.39,
"learning_rate": 1.0183540556542333e-05,
"loss": 0.4303,
"step": 18830
},
{
"epoch": 2.39,
"learning_rate": 1.0162395331134231e-05,
"loss": 0.6727,
"step": 18840
},
{
"epoch": 2.39,
"learning_rate": 1.0141250105726128e-05,
"loss": 0.4185,
"step": 18850
},
{
"epoch": 2.39,
"learning_rate": 1.0120104880318026e-05,
"loss": 0.4889,
"step": 18860
},
{
"epoch": 2.39,
"learning_rate": 1.0098959654909922e-05,
"loss": 0.4126,
"step": 18870
},
{
"epoch": 2.4,
"learning_rate": 1.0077814429501819e-05,
"loss": 0.4139,
"step": 18880
},
{
"epoch": 2.4,
"learning_rate": 1.0056669204093717e-05,
"loss": 0.4589,
"step": 18890
},
{
"epoch": 2.4,
"learning_rate": 1.0035523978685613e-05,
"loss": 0.4261,
"step": 18900
},
{
"epoch": 2.4,
"learning_rate": 1.001437875327751e-05,
"loss": 0.5179,
"step": 18910
},
{
"epoch": 2.4,
"learning_rate": 9.993233527869408e-06,
"loss": 0.5277,
"step": 18920
},
{
"epoch": 2.4,
"learning_rate": 9.972088302461304e-06,
"loss": 0.4994,
"step": 18930
},
{
"epoch": 2.4,
"learning_rate": 9.9509430770532e-06,
"loss": 0.3655,
"step": 18940
},
{
"epoch": 2.4,
"learning_rate": 9.929797851645099e-06,
"loss": 0.7167,
"step": 18950
},
{
"epoch": 2.41,
"learning_rate": 9.908652626236995e-06,
"loss": 0.4232,
"step": 18960
},
{
"epoch": 2.41,
"learning_rate": 9.887507400828893e-06,
"loss": 0.6849,
"step": 18970
},
{
"epoch": 2.41,
"learning_rate": 9.86636217542079e-06,
"loss": 0.4965,
"step": 18980
},
{
"epoch": 2.41,
"learning_rate": 9.845216950012688e-06,
"loss": 0.508,
"step": 18990
},
{
"epoch": 2.41,
"learning_rate": 9.824071724604584e-06,
"loss": 0.295,
"step": 19000
},
{
"epoch": 2.41,
"eval_loss": 1.0050384998321533,
"eval_rouge1": 0.7397789075242276,
"eval_rouge2": 0.6149723309916985,
"eval_rougeL": 0.6905032604059703,
"eval_rougeLsum": 0.6903674916477082,
"eval_runtime": 368.3216,
"eval_samples_per_second": 9.313,
"eval_steps_per_second": 2.329,
"step": 19000
},
{
"epoch": 2.41,
"learning_rate": 9.802926499196482e-06,
"loss": 0.4764,
"step": 19010
},
{
"epoch": 2.41,
"learning_rate": 9.781781273788379e-06,
"loss": 0.4395,
"step": 19020
},
{
"epoch": 2.41,
"learning_rate": 9.760636048380275e-06,
"loss": 0.6073,
"step": 19030
},
{
"epoch": 2.42,
"learning_rate": 9.739490822972173e-06,
"loss": 0.427,
"step": 19040
},
{
"epoch": 2.42,
"learning_rate": 9.71834559756407e-06,
"loss": 0.48,
"step": 19050
},
{
"epoch": 2.42,
"learning_rate": 9.697200372155968e-06,
"loss": 0.613,
"step": 19060
},
{
"epoch": 2.42,
"learning_rate": 9.676055146747864e-06,
"loss": 0.6169,
"step": 19070
},
{
"epoch": 2.42,
"learning_rate": 9.654909921339762e-06,
"loss": 0.3921,
"step": 19080
},
{
"epoch": 2.42,
"learning_rate": 9.633764695931659e-06,
"loss": 0.5199,
"step": 19090
},
{
"epoch": 2.42,
"learning_rate": 9.612619470523557e-06,
"loss": 0.5727,
"step": 19100
},
{
"epoch": 2.42,
"learning_rate": 9.591474245115453e-06,
"loss": 0.4943,
"step": 19110
},
{
"epoch": 2.43,
"learning_rate": 9.570329019707351e-06,
"loss": 0.4869,
"step": 19120
},
{
"epoch": 2.43,
"learning_rate": 9.549183794299248e-06,
"loss": 0.5307,
"step": 19130
},
{
"epoch": 2.43,
"learning_rate": 9.528038568891144e-06,
"loss": 0.5109,
"step": 19140
},
{
"epoch": 2.43,
"learning_rate": 9.506893343483042e-06,
"loss": 0.398,
"step": 19150
},
{
"epoch": 2.43,
"learning_rate": 9.485748118074939e-06,
"loss": 0.4272,
"step": 19160
},
{
"epoch": 2.43,
"learning_rate": 9.464602892666837e-06,
"loss": 0.5897,
"step": 19170
},
{
"epoch": 2.43,
"learning_rate": 9.443457667258733e-06,
"loss": 0.499,
"step": 19180
},
{
"epoch": 2.43,
"learning_rate": 9.422312441850631e-06,
"loss": 0.4236,
"step": 19190
},
{
"epoch": 2.44,
"learning_rate": 9.401167216442528e-06,
"loss": 0.4113,
"step": 19200
},
{
"epoch": 2.44,
"learning_rate": 9.380021991034426e-06,
"loss": 0.4152,
"step": 19210
},
{
"epoch": 2.44,
"learning_rate": 9.358876765626322e-06,
"loss": 0.4724,
"step": 19220
},
{
"epoch": 2.44,
"learning_rate": 9.33773154021822e-06,
"loss": 0.4179,
"step": 19230
},
{
"epoch": 2.44,
"learning_rate": 9.316586314810117e-06,
"loss": 0.5261,
"step": 19240
},
{
"epoch": 2.44,
"learning_rate": 9.295441089402015e-06,
"loss": 0.4883,
"step": 19250
},
{
"epoch": 2.44,
"learning_rate": 9.274295863993911e-06,
"loss": 0.4972,
"step": 19260
},
{
"epoch": 2.44,
"learning_rate": 9.253150638585808e-06,
"loss": 0.5781,
"step": 19270
},
{
"epoch": 2.45,
"learning_rate": 9.232005413177706e-06,
"loss": 0.4977,
"step": 19280
},
{
"epoch": 2.45,
"learning_rate": 9.210860187769602e-06,
"loss": 0.4982,
"step": 19290
},
{
"epoch": 2.45,
"learning_rate": 9.1897149623615e-06,
"loss": 0.4412,
"step": 19300
},
{
"epoch": 2.45,
"learning_rate": 9.168569736953397e-06,
"loss": 0.4969,
"step": 19310
},
{
"epoch": 2.45,
"learning_rate": 9.147424511545293e-06,
"loss": 0.5407,
"step": 19320
},
{
"epoch": 2.45,
"learning_rate": 9.126279286137191e-06,
"loss": 0.5027,
"step": 19330
},
{
"epoch": 2.45,
"learning_rate": 9.105134060729088e-06,
"loss": 0.5515,
"step": 19340
},
{
"epoch": 2.45,
"learning_rate": 9.083988835320984e-06,
"loss": 0.3334,
"step": 19350
},
{
"epoch": 2.46,
"learning_rate": 9.062843609912882e-06,
"loss": 0.4192,
"step": 19360
},
{
"epoch": 2.46,
"learning_rate": 9.041698384504779e-06,
"loss": 0.5027,
"step": 19370
},
{
"epoch": 2.46,
"learning_rate": 9.020553159096675e-06,
"loss": 0.4843,
"step": 19380
},
{
"epoch": 2.46,
"learning_rate": 8.999407933688573e-06,
"loss": 0.3903,
"step": 19390
},
{
"epoch": 2.46,
"learning_rate": 8.97826270828047e-06,
"loss": 0.7256,
"step": 19400
},
{
"epoch": 2.46,
"learning_rate": 8.957117482872368e-06,
"loss": 0.562,
"step": 19410
},
{
"epoch": 2.46,
"learning_rate": 8.935972257464264e-06,
"loss": 0.385,
"step": 19420
},
{
"epoch": 2.47,
"learning_rate": 8.914827032056162e-06,
"loss": 0.4013,
"step": 19430
},
{
"epoch": 2.47,
"learning_rate": 8.893681806648058e-06,
"loss": 0.4956,
"step": 19440
},
{
"epoch": 2.47,
"learning_rate": 8.872536581239957e-06,
"loss": 0.5103,
"step": 19450
},
{
"epoch": 2.47,
"learning_rate": 8.851391355831853e-06,
"loss": 0.4643,
"step": 19460
},
{
"epoch": 2.47,
"learning_rate": 8.830246130423751e-06,
"loss": 0.5032,
"step": 19470
},
{
"epoch": 2.47,
"learning_rate": 8.809100905015648e-06,
"loss": 0.4982,
"step": 19480
},
{
"epoch": 2.47,
"learning_rate": 8.787955679607544e-06,
"loss": 0.5429,
"step": 19490
},
{
"epoch": 2.47,
"learning_rate": 8.766810454199442e-06,
"loss": 0.4528,
"step": 19500
},
{
"epoch": 2.48,
"learning_rate": 8.745665228791338e-06,
"loss": 0.4183,
"step": 19510
},
{
"epoch": 2.48,
"learning_rate": 8.724520003383237e-06,
"loss": 0.472,
"step": 19520
},
{
"epoch": 2.48,
"learning_rate": 8.703374777975133e-06,
"loss": 0.4452,
"step": 19530
},
{
"epoch": 2.48,
"learning_rate": 8.682229552567031e-06,
"loss": 0.4564,
"step": 19540
},
{
"epoch": 2.48,
"learning_rate": 8.661084327158927e-06,
"loss": 0.4702,
"step": 19550
},
{
"epoch": 2.48,
"learning_rate": 8.639939101750826e-06,
"loss": 0.535,
"step": 19560
},
{
"epoch": 2.48,
"learning_rate": 8.618793876342722e-06,
"loss": 0.4198,
"step": 19570
},
{
"epoch": 2.48,
"learning_rate": 8.59764865093462e-06,
"loss": 0.3911,
"step": 19580
},
{
"epoch": 2.49,
"learning_rate": 8.576503425526517e-06,
"loss": 0.4073,
"step": 19590
},
{
"epoch": 2.49,
"learning_rate": 8.555358200118415e-06,
"loss": 0.4631,
"step": 19600
},
{
"epoch": 2.49,
"learning_rate": 8.534212974710311e-06,
"loss": 0.4169,
"step": 19610
},
{
"epoch": 2.49,
"learning_rate": 8.513067749302207e-06,
"loss": 0.5047,
"step": 19620
},
{
"epoch": 2.49,
"learning_rate": 8.491922523894106e-06,
"loss": 0.5301,
"step": 19630
},
{
"epoch": 2.49,
"learning_rate": 8.470777298486002e-06,
"loss": 0.4544,
"step": 19640
},
{
"epoch": 2.49,
"learning_rate": 8.4496320730779e-06,
"loss": 0.512,
"step": 19650
},
{
"epoch": 2.49,
"learning_rate": 8.428486847669796e-06,
"loss": 0.6239,
"step": 19660
},
{
"epoch": 2.5,
"learning_rate": 8.407341622261695e-06,
"loss": 0.4767,
"step": 19670
},
{
"epoch": 2.5,
"learning_rate": 8.386196396853591e-06,
"loss": 0.5554,
"step": 19680
},
{
"epoch": 2.5,
"learning_rate": 8.365051171445489e-06,
"loss": 0.4669,
"step": 19690
},
{
"epoch": 2.5,
"learning_rate": 8.343905946037386e-06,
"loss": 0.6003,
"step": 19700
},
{
"epoch": 2.5,
"learning_rate": 8.322760720629284e-06,
"loss": 0.4976,
"step": 19710
},
{
"epoch": 2.5,
"learning_rate": 8.30161549522118e-06,
"loss": 0.4087,
"step": 19720
},
{
"epoch": 2.5,
"learning_rate": 8.280470269813076e-06,
"loss": 0.6493,
"step": 19730
},
{
"epoch": 2.5,
"learning_rate": 8.259325044404975e-06,
"loss": 0.624,
"step": 19740
},
{
"epoch": 2.51,
"learning_rate": 8.238179818996871e-06,
"loss": 0.3607,
"step": 19750
},
{
"epoch": 2.51,
"learning_rate": 8.217034593588769e-06,
"loss": 0.5355,
"step": 19760
},
{
"epoch": 2.51,
"learning_rate": 8.195889368180665e-06,
"loss": 0.474,
"step": 19770
},
{
"epoch": 2.51,
"learning_rate": 8.174744142772562e-06,
"loss": 0.5891,
"step": 19780
},
{
"epoch": 2.51,
"learning_rate": 8.15359891736446e-06,
"loss": 0.3573,
"step": 19790
},
{
"epoch": 2.51,
"learning_rate": 8.132453691956356e-06,
"loss": 0.5149,
"step": 19800
},
{
"epoch": 2.51,
"learning_rate": 8.111308466548253e-06,
"loss": 0.4149,
"step": 19810
},
{
"epoch": 2.51,
"learning_rate": 8.090163241140151e-06,
"loss": 0.4456,
"step": 19820
},
{
"epoch": 2.52,
"learning_rate": 8.069018015732047e-06,
"loss": 0.4991,
"step": 19830
},
{
"epoch": 2.52,
"learning_rate": 8.047872790323945e-06,
"loss": 0.455,
"step": 19840
},
{
"epoch": 2.52,
"learning_rate": 8.026727564915842e-06,
"loss": 0.5257,
"step": 19850
},
{
"epoch": 2.52,
"learning_rate": 8.005582339507738e-06,
"loss": 0.543,
"step": 19860
},
{
"epoch": 2.52,
"learning_rate": 7.984437114099636e-06,
"loss": 0.4272,
"step": 19870
},
{
"epoch": 2.52,
"learning_rate": 7.963291888691533e-06,
"loss": 0.5201,
"step": 19880
},
{
"epoch": 2.52,
"learning_rate": 7.942146663283431e-06,
"loss": 0.4046,
"step": 19890
},
{
"epoch": 2.52,
"learning_rate": 7.921001437875327e-06,
"loss": 0.4569,
"step": 19900
},
{
"epoch": 2.53,
"learning_rate": 7.899856212467225e-06,
"loss": 0.4313,
"step": 19910
},
{
"epoch": 2.53,
"learning_rate": 7.878710987059122e-06,
"loss": 0.3629,
"step": 19920
},
{
"epoch": 2.53,
"learning_rate": 7.85756576165102e-06,
"loss": 0.5107,
"step": 19930
},
{
"epoch": 2.53,
"learning_rate": 7.836420536242916e-06,
"loss": 0.4869,
"step": 19940
},
{
"epoch": 2.53,
"learning_rate": 7.815275310834814e-06,
"loss": 0.4291,
"step": 19950
},
{
"epoch": 2.53,
"learning_rate": 7.794130085426711e-06,
"loss": 0.4094,
"step": 19960
},
{
"epoch": 2.53,
"learning_rate": 7.772984860018607e-06,
"loss": 0.353,
"step": 19970
},
{
"epoch": 2.53,
"learning_rate": 7.751839634610505e-06,
"loss": 0.5075,
"step": 19980
},
{
"epoch": 2.54,
"learning_rate": 7.730694409202402e-06,
"loss": 0.5566,
"step": 19990
},
{
"epoch": 2.54,
"learning_rate": 7.7095491837943e-06,
"loss": 0.522,
"step": 20000
},
{
"epoch": 2.54,
"eval_loss": 0.9954192042350769,
"eval_rouge1": 0.7365916673138146,
"eval_rouge2": 0.6112395013332061,
"eval_rougeL": 0.6875565915040678,
"eval_rougeLsum": 0.6872503126372578,
"eval_runtime": 368.7211,
"eval_samples_per_second": 9.302,
"eval_steps_per_second": 2.327,
"step": 20000
},
{
"epoch": 2.54,
"learning_rate": 7.688403958386196e-06,
"loss": 0.4642,
"step": 20010
},
{
"epoch": 2.54,
"learning_rate": 7.667258732978094e-06,
"loss": 0.4858,
"step": 20020
},
{
"epoch": 2.54,
"learning_rate": 7.64611350756999e-06,
"loss": 0.6072,
"step": 20030
},
{
"epoch": 2.54,
"learning_rate": 7.624968282161889e-06,
"loss": 0.4382,
"step": 20040
},
{
"epoch": 2.54,
"learning_rate": 7.603823056753785e-06,
"loss": 0.6593,
"step": 20050
},
{
"epoch": 2.55,
"learning_rate": 7.5826778313456835e-06,
"loss": 0.6513,
"step": 20060
},
{
"epoch": 2.55,
"learning_rate": 7.56153260593758e-06,
"loss": 0.414,
"step": 20070
},
{
"epoch": 2.55,
"learning_rate": 7.540387380529476e-06,
"loss": 0.457,
"step": 20080
},
{
"epoch": 2.55,
"learning_rate": 7.519242155121374e-06,
"loss": 0.3535,
"step": 20090
},
{
"epoch": 2.55,
"learning_rate": 7.498096929713271e-06,
"loss": 0.456,
"step": 20100
},
{
"epoch": 2.55,
"learning_rate": 7.476951704305168e-06,
"loss": 0.5817,
"step": 20110
},
{
"epoch": 2.55,
"learning_rate": 7.455806478897065e-06,
"loss": 0.3913,
"step": 20120
},
{
"epoch": 2.55,
"learning_rate": 7.4346612534889626e-06,
"loss": 0.6022,
"step": 20130
},
{
"epoch": 2.56,
"learning_rate": 7.413516028080859e-06,
"loss": 0.3442,
"step": 20140
},
{
"epoch": 2.56,
"learning_rate": 7.392370802672757e-06,
"loss": 0.4602,
"step": 20150
},
{
"epoch": 2.56,
"learning_rate": 7.3712255772646535e-06,
"loss": 0.7046,
"step": 20160
},
{
"epoch": 2.56,
"learning_rate": 7.350080351856552e-06,
"loss": 0.6219,
"step": 20170
},
{
"epoch": 2.56,
"learning_rate": 7.328935126448448e-06,
"loss": 0.6017,
"step": 20180
},
{
"epoch": 2.56,
"learning_rate": 7.307789901040346e-06,
"loss": 0.591,
"step": 20190
},
{
"epoch": 2.56,
"learning_rate": 7.2866446756322425e-06,
"loss": 0.5266,
"step": 20200
},
{
"epoch": 2.56,
"learning_rate": 7.265499450224139e-06,
"loss": 0.5317,
"step": 20210
},
{
"epoch": 2.57,
"learning_rate": 7.244354224816037e-06,
"loss": 0.5732,
"step": 20220
},
{
"epoch": 2.57,
"learning_rate": 7.2232089994079335e-06,
"loss": 0.3673,
"step": 20230
},
{
"epoch": 2.57,
"learning_rate": 7.2020637739998316e-06,
"loss": 0.4437,
"step": 20240
},
{
"epoch": 2.57,
"learning_rate": 7.180918548591728e-06,
"loss": 0.3552,
"step": 20250
},
{
"epoch": 2.57,
"learning_rate": 7.159773323183626e-06,
"loss": 0.5452,
"step": 20260
},
{
"epoch": 2.57,
"learning_rate": 7.1386280977755225e-06,
"loss": 0.5679,
"step": 20270
},
{
"epoch": 2.57,
"learning_rate": 7.117482872367421e-06,
"loss": 0.3773,
"step": 20280
},
{
"epoch": 2.57,
"learning_rate": 7.096337646959317e-06,
"loss": 0.4429,
"step": 20290
},
{
"epoch": 2.58,
"learning_rate": 7.075192421551214e-06,
"loss": 0.5496,
"step": 20300
},
{
"epoch": 2.58,
"learning_rate": 7.0540471961431115e-06,
"loss": 0.5942,
"step": 20310
},
{
"epoch": 2.58,
"learning_rate": 7.032901970735008e-06,
"loss": 0.4472,
"step": 20320
},
{
"epoch": 2.58,
"learning_rate": 7.011756745326905e-06,
"loss": 0.3562,
"step": 20330
},
{
"epoch": 2.58,
"learning_rate": 6.9906115199188025e-06,
"loss": 0.4663,
"step": 20340
},
{
"epoch": 2.58,
"learning_rate": 6.9694662945107e-06,
"loss": 0.4205,
"step": 20350
},
{
"epoch": 2.58,
"learning_rate": 6.948321069102596e-06,
"loss": 0.6307,
"step": 20360
},
{
"epoch": 2.58,
"learning_rate": 6.927175843694494e-06,
"loss": 0.6136,
"step": 20370
},
{
"epoch": 2.59,
"learning_rate": 6.906030618286391e-06,
"loss": 0.3435,
"step": 20380
},
{
"epoch": 2.59,
"learning_rate": 6.884885392878289e-06,
"loss": 0.4172,
"step": 20390
},
{
"epoch": 2.59,
"learning_rate": 6.863740167470185e-06,
"loss": 0.5655,
"step": 20400
},
{
"epoch": 2.59,
"learning_rate": 6.842594942062083e-06,
"loss": 0.4646,
"step": 20410
},
{
"epoch": 2.59,
"learning_rate": 6.82144971665398e-06,
"loss": 0.394,
"step": 20420
},
{
"epoch": 2.59,
"learning_rate": 6.800304491245876e-06,
"loss": 0.5188,
"step": 20430
},
{
"epoch": 2.59,
"learning_rate": 6.779159265837774e-06,
"loss": 0.5636,
"step": 20440
},
{
"epoch": 2.59,
"learning_rate": 6.758014040429671e-06,
"loss": 0.4973,
"step": 20450
},
{
"epoch": 2.6,
"learning_rate": 6.736868815021569e-06,
"loss": 0.4989,
"step": 20460
},
{
"epoch": 2.6,
"learning_rate": 6.715723589613465e-06,
"loss": 0.6132,
"step": 20470
},
{
"epoch": 2.6,
"learning_rate": 6.694578364205363e-06,
"loss": 0.3653,
"step": 20480
},
{
"epoch": 2.6,
"learning_rate": 6.67343313879726e-06,
"loss": 0.4908,
"step": 20490
},
{
"epoch": 2.6,
"learning_rate": 6.652287913389158e-06,
"loss": 0.3532,
"step": 20500
},
{
"epoch": 2.6,
"learning_rate": 6.631142687981054e-06,
"loss": 0.512,
"step": 20510
},
{
"epoch": 2.6,
"learning_rate": 6.609997462572952e-06,
"loss": 0.5704,
"step": 20520
},
{
"epoch": 2.6,
"learning_rate": 6.588852237164849e-06,
"loss": 0.4418,
"step": 20530
},
{
"epoch": 2.61,
"learning_rate": 6.567707011756746e-06,
"loss": 0.5538,
"step": 20540
},
{
"epoch": 2.61,
"learning_rate": 6.546561786348643e-06,
"loss": 0.4689,
"step": 20550
},
{
"epoch": 2.61,
"learning_rate": 6.52541656094054e-06,
"loss": 0.3252,
"step": 20560
},
{
"epoch": 2.61,
"learning_rate": 6.504271335532437e-06,
"loss": 0.4944,
"step": 20570
},
{
"epoch": 2.61,
"learning_rate": 6.483126110124334e-06,
"loss": 0.7336,
"step": 20580
},
{
"epoch": 2.61,
"learning_rate": 6.461980884716231e-06,
"loss": 0.5153,
"step": 20590
},
{
"epoch": 2.61,
"learning_rate": 6.440835659308128e-06,
"loss": 0.4322,
"step": 20600
},
{
"epoch": 2.61,
"learning_rate": 6.419690433900026e-06,
"loss": 0.4704,
"step": 20610
},
{
"epoch": 2.62,
"learning_rate": 6.398545208491922e-06,
"loss": 0.5649,
"step": 20620
},
{
"epoch": 2.62,
"learning_rate": 6.3773999830838204e-06,
"loss": 0.6029,
"step": 20630
},
{
"epoch": 2.62,
"learning_rate": 6.356254757675717e-06,
"loss": 0.5191,
"step": 20640
},
{
"epoch": 2.62,
"learning_rate": 6.335109532267615e-06,
"loss": 0.5468,
"step": 20650
},
{
"epoch": 2.62,
"learning_rate": 6.313964306859511e-06,
"loss": 0.4635,
"step": 20660
},
{
"epoch": 2.62,
"learning_rate": 6.292819081451408e-06,
"loss": 0.4439,
"step": 20670
},
{
"epoch": 2.62,
"learning_rate": 6.271673856043306e-06,
"loss": 0.3613,
"step": 20680
},
{
"epoch": 2.62,
"learning_rate": 6.250528630635202e-06,
"loss": 0.3864,
"step": 20690
},
{
"epoch": 2.63,
"learning_rate": 6.2293834052271e-06,
"loss": 0.4128,
"step": 20700
},
{
"epoch": 2.63,
"learning_rate": 6.208238179818998e-06,
"loss": 0.5097,
"step": 20710
},
{
"epoch": 2.63,
"learning_rate": 6.187092954410895e-06,
"loss": 0.5738,
"step": 20720
},
{
"epoch": 2.63,
"learning_rate": 6.165947729002791e-06,
"loss": 0.3576,
"step": 20730
},
{
"epoch": 2.63,
"learning_rate": 6.144802503594689e-06,
"loss": 0.5411,
"step": 20740
},
{
"epoch": 2.63,
"learning_rate": 6.123657278186586e-06,
"loss": 0.5502,
"step": 20750
},
{
"epoch": 2.63,
"learning_rate": 6.102512052778483e-06,
"loss": 0.346,
"step": 20760
},
{
"epoch": 2.64,
"learning_rate": 6.08136682737038e-06,
"loss": 0.4663,
"step": 20770
},
{
"epoch": 2.64,
"learning_rate": 6.060221601962277e-06,
"loss": 0.5874,
"step": 20780
},
{
"epoch": 2.64,
"learning_rate": 6.039076376554174e-06,
"loss": 0.452,
"step": 20790
},
{
"epoch": 2.64,
"learning_rate": 6.017931151146071e-06,
"loss": 0.2486,
"step": 20800
},
{
"epoch": 2.64,
"learning_rate": 5.9967859257379686e-06,
"loss": 0.3609,
"step": 20810
},
{
"epoch": 2.64,
"learning_rate": 5.975640700329866e-06,
"loss": 0.3711,
"step": 20820
},
{
"epoch": 2.64,
"learning_rate": 5.954495474921763e-06,
"loss": 0.5113,
"step": 20830
},
{
"epoch": 2.64,
"learning_rate": 5.9333502495136595e-06,
"loss": 0.387,
"step": 20840
},
{
"epoch": 2.65,
"learning_rate": 5.912205024105557e-06,
"loss": 0.4204,
"step": 20850
},
{
"epoch": 2.65,
"learning_rate": 5.891059798697454e-06,
"loss": 0.4952,
"step": 20860
},
{
"epoch": 2.65,
"learning_rate": 5.869914573289351e-06,
"loss": 0.4798,
"step": 20870
},
{
"epoch": 2.65,
"learning_rate": 5.8487693478812485e-06,
"loss": 0.4874,
"step": 20880
},
{
"epoch": 2.65,
"learning_rate": 5.827624122473146e-06,
"loss": 0.5781,
"step": 20890
},
{
"epoch": 2.65,
"learning_rate": 5.806478897065043e-06,
"loss": 0.6932,
"step": 20900
},
{
"epoch": 2.65,
"learning_rate": 5.78533367165694e-06,
"loss": 0.3419,
"step": 20910
},
{
"epoch": 2.65,
"learning_rate": 5.7641884462488376e-06,
"loss": 0.3999,
"step": 20920
},
{
"epoch": 2.66,
"learning_rate": 5.743043220840735e-06,
"loss": 0.4476,
"step": 20930
},
{
"epoch": 2.66,
"learning_rate": 5.721897995432632e-06,
"loss": 0.5326,
"step": 20940
},
{
"epoch": 2.66,
"learning_rate": 5.700752770024529e-06,
"loss": 0.4616,
"step": 20950
},
{
"epoch": 2.66,
"learning_rate": 5.679607544616426e-06,
"loss": 0.4929,
"step": 20960
},
{
"epoch": 2.66,
"learning_rate": 5.658462319208323e-06,
"loss": 0.6214,
"step": 20970
},
{
"epoch": 2.66,
"learning_rate": 5.63731709380022e-06,
"loss": 0.5991,
"step": 20980
},
{
"epoch": 2.66,
"learning_rate": 5.6161718683921175e-06,
"loss": 0.4567,
"step": 20990
},
{
"epoch": 2.66,
"learning_rate": 5.595026642984015e-06,
"loss": 0.6464,
"step": 21000
},
{
"epoch": 2.66,
"eval_loss": 0.9962205290794373,
"eval_rouge1": 0.741439310015436,
"eval_rouge2": 0.6164352333996164,
"eval_rougeL": 0.6926049619928674,
"eval_rougeLsum": 0.6924766227131494,
"eval_runtime": 369.2945,
"eval_samples_per_second": 9.288,
"eval_steps_per_second": 2.323,
"step": 21000
},
{
"epoch": 2.67,
"learning_rate": 5.573881417575911e-06,
"loss": 0.3697,
"step": 21010
},
{
"epoch": 2.67,
"learning_rate": 5.5527361921678085e-06,
"loss": 0.45,
"step": 21020
},
{
"epoch": 2.67,
"learning_rate": 5.531590966759706e-06,
"loss": 0.3727,
"step": 21030
},
{
"epoch": 2.67,
"learning_rate": 5.510445741351603e-06,
"loss": 0.3412,
"step": 21040
},
{
"epoch": 2.67,
"learning_rate": 5.4893005159435e-06,
"loss": 0.4117,
"step": 21050
},
{
"epoch": 2.67,
"learning_rate": 5.4681552905353975e-06,
"loss": 0.4178,
"step": 21060
},
{
"epoch": 2.67,
"learning_rate": 5.447010065127295e-06,
"loss": 0.4419,
"step": 21070
},
{
"epoch": 2.67,
"learning_rate": 5.425864839719191e-06,
"loss": 0.6782,
"step": 21080
},
{
"epoch": 2.68,
"learning_rate": 5.404719614311088e-06,
"loss": 0.3942,
"step": 21090
},
{
"epoch": 2.68,
"learning_rate": 5.383574388902986e-06,
"loss": 0.4622,
"step": 21100
},
{
"epoch": 2.68,
"learning_rate": 5.362429163494883e-06,
"loss": 0.5591,
"step": 21110
},
{
"epoch": 2.68,
"learning_rate": 5.34128393808678e-06,
"loss": 0.4245,
"step": 21120
},
{
"epoch": 2.68,
"learning_rate": 5.3201387126786775e-06,
"loss": 0.4589,
"step": 21130
},
{
"epoch": 2.68,
"learning_rate": 5.298993487270575e-06,
"loss": 0.5699,
"step": 21140
},
{
"epoch": 2.68,
"learning_rate": 5.277848261862472e-06,
"loss": 0.5912,
"step": 21150
},
{
"epoch": 2.68,
"learning_rate": 5.256703036454369e-06,
"loss": 0.4672,
"step": 21160
},
{
"epoch": 2.69,
"learning_rate": 5.2355578110462665e-06,
"loss": 0.4732,
"step": 21170
},
{
"epoch": 2.69,
"learning_rate": 5.214412585638164e-06,
"loss": 0.4615,
"step": 21180
},
{
"epoch": 2.69,
"learning_rate": 5.193267360230061e-06,
"loss": 0.4562,
"step": 21190
},
{
"epoch": 2.69,
"learning_rate": 5.172122134821957e-06,
"loss": 0.4172,
"step": 21200
},
{
"epoch": 2.69,
"learning_rate": 5.150976909413855e-06,
"loss": 0.3666,
"step": 21210
},
{
"epoch": 2.69,
"learning_rate": 5.129831684005752e-06,
"loss": 0.5198,
"step": 21220
},
{
"epoch": 2.69,
"learning_rate": 5.108686458597649e-06,
"loss": 0.4645,
"step": 21230
},
{
"epoch": 2.69,
"learning_rate": 5.087541233189546e-06,
"loss": 0.4282,
"step": 21240
},
{
"epoch": 2.7,
"learning_rate": 5.066396007781443e-06,
"loss": 0.6272,
"step": 21250
},
{
"epoch": 2.7,
"learning_rate": 5.04525078237334e-06,
"loss": 0.5303,
"step": 21260
},
{
"epoch": 2.7,
"learning_rate": 5.024105556965237e-06,
"loss": 0.5319,
"step": 21270
},
{
"epoch": 2.7,
"learning_rate": 5.002960331557135e-06,
"loss": 0.6712,
"step": 21280
},
{
"epoch": 2.7,
"learning_rate": 4.981815106149032e-06,
"loss": 0.4607,
"step": 21290
},
{
"epoch": 2.7,
"learning_rate": 4.960669880740929e-06,
"loss": 0.5461,
"step": 21300
},
{
"epoch": 2.7,
"learning_rate": 4.9395246553328256e-06,
"loss": 0.3756,
"step": 21310
},
{
"epoch": 2.7,
"learning_rate": 4.918379429924723e-06,
"loss": 0.4443,
"step": 21320
},
{
"epoch": 2.71,
"learning_rate": 4.89723420451662e-06,
"loss": 0.5608,
"step": 21330
},
{
"epoch": 2.71,
"learning_rate": 4.876088979108517e-06,
"loss": 0.4689,
"step": 21340
},
{
"epoch": 2.71,
"learning_rate": 4.854943753700415e-06,
"loss": 0.4179,
"step": 21350
},
{
"epoch": 2.71,
"learning_rate": 4.833798528292312e-06,
"loss": 0.472,
"step": 21360
},
{
"epoch": 2.71,
"learning_rate": 4.812653302884209e-06,
"loss": 0.5084,
"step": 21370
},
{
"epoch": 2.71,
"learning_rate": 4.791508077476106e-06,
"loss": 0.4727,
"step": 21380
},
{
"epoch": 2.71,
"learning_rate": 4.770362852068004e-06,
"loss": 0.3656,
"step": 21390
},
{
"epoch": 2.72,
"learning_rate": 4.749217626659901e-06,
"loss": 0.4946,
"step": 21400
},
{
"epoch": 2.72,
"learning_rate": 4.728072401251798e-06,
"loss": 0.5387,
"step": 21410
},
{
"epoch": 2.72,
"learning_rate": 4.706927175843695e-06,
"loss": 0.3715,
"step": 21420
},
{
"epoch": 2.72,
"learning_rate": 4.685781950435592e-06,
"loss": 0.5384,
"step": 21430
},
{
"epoch": 2.72,
"learning_rate": 4.664636725027489e-06,
"loss": 0.3601,
"step": 21440
},
{
"epoch": 2.72,
"learning_rate": 4.643491499619386e-06,
"loss": 0.5665,
"step": 21450
},
{
"epoch": 2.72,
"learning_rate": 4.622346274211283e-06,
"loss": 0.5288,
"step": 21460
},
{
"epoch": 2.72,
"learning_rate": 4.60120104880318e-06,
"loss": 0.5086,
"step": 21470
},
{
"epoch": 2.73,
"learning_rate": 4.580055823395077e-06,
"loss": 0.4795,
"step": 21480
},
{
"epoch": 2.73,
"learning_rate": 4.5589105979869745e-06,
"loss": 0.3736,
"step": 21490
},
{
"epoch": 2.73,
"learning_rate": 4.537765372578872e-06,
"loss": 0.3579,
"step": 21500
},
{
"epoch": 2.73,
"learning_rate": 4.516620147170769e-06,
"loss": 0.4727,
"step": 21510
},
{
"epoch": 2.73,
"learning_rate": 4.495474921762666e-06,
"loss": 0.5056,
"step": 21520
},
{
"epoch": 2.73,
"learning_rate": 4.4743296963545636e-06,
"loss": 0.4547,
"step": 21530
},
{
"epoch": 2.73,
"learning_rate": 4.453184470946461e-06,
"loss": 0.3088,
"step": 21540
},
{
"epoch": 2.73,
"learning_rate": 4.432039245538357e-06,
"loss": 0.5873,
"step": 21550
},
{
"epoch": 2.74,
"learning_rate": 4.4108940201302545e-06,
"loss": 0.4088,
"step": 21560
},
{
"epoch": 2.74,
"learning_rate": 4.389748794722152e-06,
"loss": 0.5303,
"step": 21570
},
{
"epoch": 2.74,
"learning_rate": 4.368603569314049e-06,
"loss": 0.4645,
"step": 21580
},
{
"epoch": 2.74,
"learning_rate": 4.347458343905946e-06,
"loss": 0.5382,
"step": 21590
},
{
"epoch": 2.74,
"learning_rate": 4.3263131184978435e-06,
"loss": 0.6384,
"step": 21600
},
{
"epoch": 2.74,
"learning_rate": 4.305167893089741e-06,
"loss": 0.5667,
"step": 21610
},
{
"epoch": 2.74,
"learning_rate": 4.284022667681638e-06,
"loss": 0.4626,
"step": 21620
},
{
"epoch": 2.74,
"learning_rate": 4.262877442273535e-06,
"loss": 0.478,
"step": 21630
},
{
"epoch": 2.75,
"learning_rate": 4.2417322168654326e-06,
"loss": 0.523,
"step": 21640
},
{
"epoch": 2.75,
"learning_rate": 4.22058699145733e-06,
"loss": 0.3625,
"step": 21650
},
{
"epoch": 2.75,
"learning_rate": 4.199441766049226e-06,
"loss": 0.5328,
"step": 21660
},
{
"epoch": 2.75,
"learning_rate": 4.1782965406411235e-06,
"loss": 0.6964,
"step": 21670
},
{
"epoch": 2.75,
"learning_rate": 4.157151315233021e-06,
"loss": 0.4284,
"step": 21680
},
{
"epoch": 2.75,
"learning_rate": 4.136006089824917e-06,
"loss": 0.5959,
"step": 21690
},
{
"epoch": 2.75,
"learning_rate": 4.1148608644168144e-06,
"loss": 0.3996,
"step": 21700
},
{
"epoch": 2.75,
"learning_rate": 4.093715639008712e-06,
"loss": 0.4347,
"step": 21710
},
{
"epoch": 2.76,
"learning_rate": 4.072570413600609e-06,
"loss": 0.4792,
"step": 21720
},
{
"epoch": 2.76,
"learning_rate": 4.051425188192506e-06,
"loss": 0.4156,
"step": 21730
},
{
"epoch": 2.76,
"learning_rate": 4.0302799627844035e-06,
"loss": 0.4937,
"step": 21740
},
{
"epoch": 2.76,
"learning_rate": 4.009134737376301e-06,
"loss": 0.4666,
"step": 21750
},
{
"epoch": 2.76,
"learning_rate": 3.987989511968198e-06,
"loss": 0.3805,
"step": 21760
},
{
"epoch": 2.76,
"learning_rate": 3.966844286560095e-06,
"loss": 0.4638,
"step": 21770
},
{
"epoch": 2.76,
"learning_rate": 3.945699061151992e-06,
"loss": 0.5764,
"step": 21780
},
{
"epoch": 2.76,
"learning_rate": 3.924553835743889e-06,
"loss": 0.5338,
"step": 21790
},
{
"epoch": 2.77,
"learning_rate": 3.903408610335786e-06,
"loss": 0.5998,
"step": 21800
},
{
"epoch": 2.77,
"learning_rate": 3.8822633849276834e-06,
"loss": 0.4682,
"step": 21810
},
{
"epoch": 2.77,
"learning_rate": 3.861118159519581e-06,
"loss": 0.4512,
"step": 21820
},
{
"epoch": 2.77,
"learning_rate": 3.839972934111478e-06,
"loss": 0.4968,
"step": 21830
},
{
"epoch": 2.77,
"learning_rate": 3.818827708703375e-06,
"loss": 0.3572,
"step": 21840
},
{
"epoch": 2.77,
"learning_rate": 3.797682483295272e-06,
"loss": 0.7262,
"step": 21850
},
{
"epoch": 2.77,
"learning_rate": 3.7765372578871693e-06,
"loss": 0.3381,
"step": 21860
},
{
"epoch": 2.77,
"learning_rate": 3.7553920324790666e-06,
"loss": 0.5167,
"step": 21870
},
{
"epoch": 2.78,
"learning_rate": 3.734246807070964e-06,
"loss": 0.4583,
"step": 21880
},
{
"epoch": 2.78,
"learning_rate": 3.713101581662861e-06,
"loss": 0.4677,
"step": 21890
},
{
"epoch": 2.78,
"learning_rate": 3.6919563562547575e-06,
"loss": 0.4689,
"step": 21900
},
{
"epoch": 2.78,
"learning_rate": 3.6708111308466547e-06,
"loss": 0.4191,
"step": 21910
},
{
"epoch": 2.78,
"learning_rate": 3.649665905438552e-06,
"loss": 0.5312,
"step": 21920
},
{
"epoch": 2.78,
"learning_rate": 3.6285206800304493e-06,
"loss": 0.6154,
"step": 21930
},
{
"epoch": 2.78,
"learning_rate": 3.6073754546223465e-06,
"loss": 0.478,
"step": 21940
},
{
"epoch": 2.78,
"learning_rate": 3.5862302292142434e-06,
"loss": 0.4396,
"step": 21950
},
{
"epoch": 2.79,
"learning_rate": 3.5650850038061406e-06,
"loss": 0.3887,
"step": 21960
},
{
"epoch": 2.79,
"learning_rate": 3.543939778398038e-06,
"loss": 0.3277,
"step": 21970
},
{
"epoch": 2.79,
"learning_rate": 3.522794552989935e-06,
"loss": 0.4962,
"step": 21980
},
{
"epoch": 2.79,
"learning_rate": 3.5016493275818324e-06,
"loss": 0.4257,
"step": 21990
},
{
"epoch": 2.79,
"learning_rate": 3.4805041021737297e-06,
"loss": 0.3686,
"step": 22000
},
{
"epoch": 2.79,
"eval_loss": 0.9851571321487427,
"eval_rouge1": 0.735793414724685,
"eval_rouge2": 0.6073396411072542,
"eval_rougeL": 0.6847326990723057,
"eval_rougeLsum": 0.6846013539004916,
"eval_runtime": 368.9454,
"eval_samples_per_second": 9.297,
"eval_steps_per_second": 2.326,
"step": 22000
},
{
"epoch": 2.79,
"learning_rate": 3.459358876765627e-06,
"loss": 0.463,
"step": 22010
},
{
"epoch": 2.79,
"learning_rate": 3.4382136513575233e-06,
"loss": 0.6165,
"step": 22020
},
{
"epoch": 2.79,
"learning_rate": 3.4170684259494206e-06,
"loss": 0.4181,
"step": 22030
},
{
"epoch": 2.8,
"learning_rate": 3.395923200541318e-06,
"loss": 0.6783,
"step": 22040
},
{
"epoch": 2.8,
"learning_rate": 3.374777975133215e-06,
"loss": 0.5672,
"step": 22050
},
{
"epoch": 2.8,
"learning_rate": 3.3536327497251124e-06,
"loss": 0.5229,
"step": 22060
},
{
"epoch": 2.8,
"learning_rate": 3.332487524317009e-06,
"loss": 0.5021,
"step": 22070
},
{
"epoch": 2.8,
"learning_rate": 3.3113422989089065e-06,
"loss": 0.3753,
"step": 22080
},
{
"epoch": 2.8,
"learning_rate": 3.2901970735008037e-06,
"loss": 0.3297,
"step": 22090
},
{
"epoch": 2.8,
"learning_rate": 3.269051848092701e-06,
"loss": 0.5002,
"step": 22100
},
{
"epoch": 2.81,
"learning_rate": 3.2479066226845982e-06,
"loss": 0.5817,
"step": 22110
},
{
"epoch": 2.81,
"learning_rate": 3.2267613972764955e-06,
"loss": 0.5804,
"step": 22120
},
{
"epoch": 2.81,
"learning_rate": 3.2056161718683928e-06,
"loss": 0.7145,
"step": 22130
},
{
"epoch": 2.81,
"learning_rate": 3.184470946460289e-06,
"loss": 0.4462,
"step": 22140
},
{
"epoch": 2.81,
"learning_rate": 3.1633257210521864e-06,
"loss": 0.4839,
"step": 22150
},
{
"epoch": 2.81,
"learning_rate": 3.1421804956440837e-06,
"loss": 0.5427,
"step": 22160
},
{
"epoch": 2.81,
"learning_rate": 3.121035270235981e-06,
"loss": 0.5418,
"step": 22170
},
{
"epoch": 2.81,
"learning_rate": 3.0998900448278778e-06,
"loss": 0.3647,
"step": 22180
},
{
"epoch": 2.82,
"learning_rate": 3.078744819419775e-06,
"loss": 0.5941,
"step": 22190
},
{
"epoch": 2.82,
"learning_rate": 3.0575995940116723e-06,
"loss": 0.4613,
"step": 22200
},
{
"epoch": 2.82,
"learning_rate": 3.0364543686035696e-06,
"loss": 0.6058,
"step": 22210
},
{
"epoch": 2.82,
"learning_rate": 3.0153091431954664e-06,
"loss": 0.4819,
"step": 22220
},
{
"epoch": 2.82,
"learning_rate": 2.9941639177873636e-06,
"loss": 0.6973,
"step": 22230
},
{
"epoch": 2.82,
"learning_rate": 2.973018692379261e-06,
"loss": 0.4759,
"step": 22240
},
{
"epoch": 2.82,
"learning_rate": 2.951873466971158e-06,
"loss": 0.5051,
"step": 22250
},
{
"epoch": 2.82,
"learning_rate": 2.9307282415630554e-06,
"loss": 0.4779,
"step": 22260
},
{
"epoch": 2.83,
"learning_rate": 2.9095830161549527e-06,
"loss": 0.456,
"step": 22270
},
{
"epoch": 2.83,
"learning_rate": 2.8884377907468495e-06,
"loss": 0.432,
"step": 22280
},
{
"epoch": 2.83,
"learning_rate": 2.8672925653387464e-06,
"loss": 0.7129,
"step": 22290
},
{
"epoch": 2.83,
"learning_rate": 2.8461473399306436e-06,
"loss": 0.541,
"step": 22300
},
{
"epoch": 2.83,
"learning_rate": 2.825002114522541e-06,
"loss": 0.5639,
"step": 22310
},
{
"epoch": 2.83,
"learning_rate": 2.803856889114438e-06,
"loss": 0.411,
"step": 22320
},
{
"epoch": 2.83,
"learning_rate": 2.7827116637063354e-06,
"loss": 0.5593,
"step": 22330
},
{
"epoch": 2.83,
"learning_rate": 2.7615664382982322e-06,
"loss": 0.6096,
"step": 22340
},
{
"epoch": 2.84,
"learning_rate": 2.7404212128901295e-06,
"loss": 0.3716,
"step": 22350
},
{
"epoch": 2.84,
"learning_rate": 2.7192759874820267e-06,
"loss": 0.5806,
"step": 22360
},
{
"epoch": 2.84,
"learning_rate": 2.698130762073924e-06,
"loss": 0.4832,
"step": 22370
},
{
"epoch": 2.84,
"learning_rate": 2.6769855366658213e-06,
"loss": 0.6147,
"step": 22380
},
{
"epoch": 2.84,
"learning_rate": 2.655840311257718e-06,
"loss": 0.4555,
"step": 22390
},
{
"epoch": 2.84,
"learning_rate": 2.634695085849615e-06,
"loss": 0.4489,
"step": 22400
},
{
"epoch": 2.84,
"learning_rate": 2.613549860441512e-06,
"loss": 0.3009,
"step": 22410
},
{
"epoch": 2.84,
"learning_rate": 2.5924046350334094e-06,
"loss": 0.4032,
"step": 22420
},
{
"epoch": 2.85,
"learning_rate": 2.5712594096253067e-06,
"loss": 0.4113,
"step": 22430
},
{
"epoch": 2.85,
"learning_rate": 2.550114184217204e-06,
"loss": 0.442,
"step": 22440
},
{
"epoch": 2.85,
"learning_rate": 2.5289689588091012e-06,
"loss": 0.4277,
"step": 22450
},
{
"epoch": 2.85,
"learning_rate": 2.507823733400998e-06,
"loss": 0.3684,
"step": 22460
},
{
"epoch": 2.85,
"learning_rate": 2.4866785079928953e-06,
"loss": 0.4701,
"step": 22470
},
{
"epoch": 2.85,
"learning_rate": 2.4655332825847926e-06,
"loss": 0.3602,
"step": 22480
},
{
"epoch": 2.85,
"learning_rate": 2.44438805717669e-06,
"loss": 0.6264,
"step": 22490
},
{
"epoch": 2.85,
"learning_rate": 2.4232428317685867e-06,
"loss": 0.621,
"step": 22500
},
{
"epoch": 2.86,
"learning_rate": 2.402097606360484e-06,
"loss": 0.5177,
"step": 22510
},
{
"epoch": 2.86,
"learning_rate": 2.3809523809523808e-06,
"loss": 0.578,
"step": 22520
},
{
"epoch": 2.86,
"learning_rate": 2.359807155544278e-06,
"loss": 0.6237,
"step": 22530
},
{
"epoch": 2.86,
"learning_rate": 2.3386619301361753e-06,
"loss": 0.6771,
"step": 22540
},
{
"epoch": 2.86,
"learning_rate": 2.3175167047280725e-06,
"loss": 0.5638,
"step": 22550
},
{
"epoch": 2.86,
"learning_rate": 2.29637147931997e-06,
"loss": 0.3666,
"step": 22560
},
{
"epoch": 2.86,
"learning_rate": 2.275226253911867e-06,
"loss": 0.5146,
"step": 22570
},
{
"epoch": 2.86,
"learning_rate": 2.254081028503764e-06,
"loss": 0.482,
"step": 22580
},
{
"epoch": 2.87,
"learning_rate": 2.232935803095661e-06,
"loss": 0.3881,
"step": 22590
},
{
"epoch": 2.87,
"learning_rate": 2.2117905776875584e-06,
"loss": 0.445,
"step": 22600
},
{
"epoch": 2.87,
"learning_rate": 2.1906453522794552e-06,
"loss": 0.4116,
"step": 22610
},
{
"epoch": 2.87,
"learning_rate": 2.1695001268713525e-06,
"loss": 0.4647,
"step": 22620
},
{
"epoch": 2.87,
"learning_rate": 2.1483549014632498e-06,
"loss": 0.479,
"step": 22630
},
{
"epoch": 2.87,
"learning_rate": 2.1272096760551466e-06,
"loss": 0.5227,
"step": 22640
},
{
"epoch": 2.87,
"learning_rate": 2.106064450647044e-06,
"loss": 0.5385,
"step": 22650
},
{
"epoch": 2.87,
"learning_rate": 2.084919225238941e-06,
"loss": 0.3218,
"step": 22660
},
{
"epoch": 2.88,
"learning_rate": 2.0637739998308384e-06,
"loss": 0.5982,
"step": 22670
},
{
"epoch": 2.88,
"learning_rate": 2.0426287744227356e-06,
"loss": 0.3835,
"step": 22680
},
{
"epoch": 2.88,
"learning_rate": 2.0214835490146325e-06,
"loss": 0.4207,
"step": 22690
},
{
"epoch": 2.88,
"learning_rate": 2.0003383236065297e-06,
"loss": 0.5506,
"step": 22700
},
{
"epoch": 2.88,
"learning_rate": 1.979193098198427e-06,
"loss": 0.5565,
"step": 22710
},
{
"epoch": 2.88,
"learning_rate": 1.9580478727903242e-06,
"loss": 0.4352,
"step": 22720
},
{
"epoch": 2.88,
"learning_rate": 1.936902647382221e-06,
"loss": 0.3719,
"step": 22730
},
{
"epoch": 2.89,
"learning_rate": 1.9157574219741183e-06,
"loss": 0.496,
"step": 22740
},
{
"epoch": 2.89,
"learning_rate": 1.8946121965660154e-06,
"loss": 0.4908,
"step": 22750
},
{
"epoch": 2.89,
"learning_rate": 1.8734669711579124e-06,
"loss": 0.5186,
"step": 22760
},
{
"epoch": 2.89,
"learning_rate": 1.8523217457498097e-06,
"loss": 0.5483,
"step": 22770
},
{
"epoch": 2.89,
"learning_rate": 1.831176520341707e-06,
"loss": 0.3343,
"step": 22780
},
{
"epoch": 2.89,
"learning_rate": 1.8100312949336042e-06,
"loss": 0.5978,
"step": 22790
},
{
"epoch": 2.89,
"learning_rate": 1.7888860695255013e-06,
"loss": 0.4617,
"step": 22800
},
{
"epoch": 2.89,
"learning_rate": 1.7677408441173983e-06,
"loss": 0.4982,
"step": 22810
},
{
"epoch": 2.9,
"learning_rate": 1.7465956187092954e-06,
"loss": 0.4358,
"step": 22820
},
{
"epoch": 2.9,
"learning_rate": 1.7254503933011926e-06,
"loss": 0.4034,
"step": 22830
},
{
"epoch": 2.9,
"learning_rate": 1.7043051678930899e-06,
"loss": 0.4236,
"step": 22840
},
{
"epoch": 2.9,
"learning_rate": 1.6831599424849871e-06,
"loss": 0.4428,
"step": 22850
},
{
"epoch": 2.9,
"learning_rate": 1.6620147170768842e-06,
"loss": 0.4327,
"step": 22860
},
{
"epoch": 2.9,
"learning_rate": 1.6408694916687812e-06,
"loss": 0.3967,
"step": 22870
},
{
"epoch": 2.9,
"learning_rate": 1.6197242662606783e-06,
"loss": 0.4391,
"step": 22880
},
{
"epoch": 2.9,
"learning_rate": 1.5985790408525755e-06,
"loss": 0.3967,
"step": 22890
},
{
"epoch": 2.91,
"learning_rate": 1.5774338154444728e-06,
"loss": 0.4322,
"step": 22900
},
{
"epoch": 2.91,
"learning_rate": 1.5562885900363698e-06,
"loss": 0.4156,
"step": 22910
},
{
"epoch": 2.91,
"learning_rate": 1.5351433646282669e-06,
"loss": 0.4748,
"step": 22920
},
{
"epoch": 2.91,
"learning_rate": 1.5139981392201641e-06,
"loss": 0.3539,
"step": 22930
},
{
"epoch": 2.91,
"learning_rate": 1.4928529138120612e-06,
"loss": 0.3934,
"step": 22940
},
{
"epoch": 2.91,
"learning_rate": 1.4717076884039584e-06,
"loss": 0.469,
"step": 22950
},
{
"epoch": 2.91,
"learning_rate": 1.4505624629958557e-06,
"loss": 0.4797,
"step": 22960
},
{
"epoch": 2.91,
"learning_rate": 1.4294172375877528e-06,
"loss": 0.4803,
"step": 22970
},
{
"epoch": 2.92,
"learning_rate": 1.4082720121796498e-06,
"loss": 0.5004,
"step": 22980
},
{
"epoch": 2.92,
"learning_rate": 1.387126786771547e-06,
"loss": 0.4915,
"step": 22990
},
{
"epoch": 2.92,
"learning_rate": 1.3659815613634441e-06,
"loss": 0.3737,
"step": 23000
},
{
"epoch": 2.92,
"eval_loss": 0.9804850220680237,
"eval_rouge1": 0.7389355580258474,
"eval_rouge2": 0.6119786433724955,
"eval_rougeL": 0.6882161499350804,
"eval_rougeLsum": 0.6879571606404977,
"eval_runtime": 371.0141,
"eval_samples_per_second": 9.245,
"eval_steps_per_second": 2.313,
"step": 23000
}
],
"max_steps": 23646,
"num_train_epochs": 3,
"total_flos": 4.984181753276006e+16,
"trial_name": null,
"trial_params": null
}