{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 6.0, "eval_steps": 500, "global_step": 2394, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.012531328320802004, "grad_norm": 11.967602793791775, "learning_rate": 6.666666666666667e-07, "loss": 0.9827, "loss_nan_ranks": 0, "loss_rank_avg": 1.0594820976257324, "step": 5, "valid_targets_mean": 1399.4, "valid_targets_min": 664 }, { "epoch": 0.02506265664160401, "grad_norm": 8.98761722777954, "learning_rate": 1.5e-06, "loss": 0.943, "loss_nan_ranks": 0, "loss_rank_avg": 0.920002818107605, "step": 10, "valid_targets_mean": 1760.4, "valid_targets_min": 580 }, { "epoch": 0.03759398496240601, "grad_norm": 7.940426239884903, "learning_rate": 2.3333333333333336e-06, "loss": 0.9234, "loss_nan_ranks": 0, "loss_rank_avg": 0.9283522963523865, "step": 15, "valid_targets_mean": 1458.5, "valid_targets_min": 677 }, { "epoch": 0.05012531328320802, "grad_norm": 5.224005967961386, "learning_rate": 3.1666666666666667e-06, "loss": 0.884, "loss_nan_ranks": 0, "loss_rank_avg": 0.8793013095855713, "step": 20, "valid_targets_mean": 1278.1, "valid_targets_min": 680 }, { "epoch": 0.06265664160401002, "grad_norm": 3.7348565637539166, "learning_rate": 4.000000000000001e-06, "loss": 0.8446, "loss_nan_ranks": 0, "loss_rank_avg": 0.8814775943756104, "step": 25, "valid_targets_mean": 1153.8, "valid_targets_min": 728 }, { "epoch": 0.07518796992481203, "grad_norm": 2.075582535169858, "learning_rate": 4.833333333333333e-06, "loss": 0.7618, "loss_nan_ranks": 0, "loss_rank_avg": 0.6769101619720459, "step": 30, "valid_targets_mean": 1775.1, "valid_targets_min": 597 }, { "epoch": 0.08771929824561403, "grad_norm": 1.651201460536687, "learning_rate": 5.666666666666667e-06, "loss": 0.7189, "loss_nan_ranks": 0, "loss_rank_avg": 0.7634528279304504, "step": 35, "valid_targets_mean": 1523.2, "valid_targets_min": 699 }, { "epoch": 0.10025062656641603, "grad_norm": 1.5275695524844886, "learning_rate": 6.5000000000000004e-06, "loss": 0.7277, "loss_nan_ranks": 0, "loss_rank_avg": 0.750813364982605, "step": 40, "valid_targets_mean": 1344.6, "valid_targets_min": 679 }, { "epoch": 0.11278195488721804, "grad_norm": 1.361371709568494, "learning_rate": 7.333333333333333e-06, "loss": 0.703, "loss_nan_ranks": 0, "loss_rank_avg": 0.6691080927848816, "step": 45, "valid_targets_mean": 1303.0, "valid_targets_min": 734 }, { "epoch": 0.12531328320802004, "grad_norm": 1.152708781810184, "learning_rate": 8.166666666666668e-06, "loss": 0.6218, "loss_nan_ranks": 0, "loss_rank_avg": 0.6578604578971863, "step": 50, "valid_targets_mean": 1385.8, "valid_targets_min": 715 }, { "epoch": 0.13784461152882205, "grad_norm": 0.9820138424429671, "learning_rate": 9e-06, "loss": 0.6597, "loss_nan_ranks": 0, "loss_rank_avg": 0.6535561084747314, "step": 55, "valid_targets_mean": 1496.4, "valid_targets_min": 717 }, { "epoch": 0.15037593984962405, "grad_norm": 1.158994935896899, "learning_rate": 9.833333333333333e-06, "loss": 0.6282, "loss_nan_ranks": 0, "loss_rank_avg": 0.7068292498588562, "step": 60, "valid_targets_mean": 1289.7, "valid_targets_min": 666 }, { "epoch": 0.16290726817042606, "grad_norm": 0.9426226170878005, "learning_rate": 1.0666666666666667e-05, "loss": 0.5982, "loss_nan_ranks": 0, "loss_rank_avg": 0.585883378982544, "step": 65, "valid_targets_mean": 1464.9, "valid_targets_min": 601 }, { "epoch": 0.17543859649122806, "grad_norm": 0.9405312188936708, "learning_rate": 1.15e-05, "loss": 0.5808, "loss_nan_ranks": 0, "loss_rank_avg": 0.6200866103172302, "step": 70, "valid_targets_mean": 1546.4, "valid_targets_min": 667 }, { "epoch": 0.18796992481203006, "grad_norm": 1.0123323450686448, "learning_rate": 1.2333333333333334e-05, "loss": 0.5854, "loss_nan_ranks": 0, "loss_rank_avg": 0.6166951060295105, "step": 75, "valid_targets_mean": 1234.2, "valid_targets_min": 603 }, { "epoch": 0.20050125313283207, "grad_norm": 0.994487225478387, "learning_rate": 1.3166666666666667e-05, "loss": 0.5918, "loss_nan_ranks": 0, "loss_rank_avg": 0.5867680311203003, "step": 80, "valid_targets_mean": 1479.2, "valid_targets_min": 654 }, { "epoch": 0.21303258145363407, "grad_norm": 0.8431318418016086, "learning_rate": 1.4e-05, "loss": 0.5669, "loss_nan_ranks": 0, "loss_rank_avg": 0.5588182210922241, "step": 85, "valid_targets_mean": 1593.0, "valid_targets_min": 641 }, { "epoch": 0.22556390977443608, "grad_norm": 0.9663714100983305, "learning_rate": 1.4833333333333336e-05, "loss": 0.5683, "loss_nan_ranks": 0, "loss_rank_avg": 0.601900041103363, "step": 90, "valid_targets_mean": 1452.9, "valid_targets_min": 647 }, { "epoch": 0.23809523809523808, "grad_norm": 0.9262179876650385, "learning_rate": 1.5666666666666667e-05, "loss": 0.5344, "loss_nan_ranks": 0, "loss_rank_avg": 0.5362147092819214, "step": 95, "valid_targets_mean": 1426.3, "valid_targets_min": 705 }, { "epoch": 0.2506265664160401, "grad_norm": 0.8714735709336389, "learning_rate": 1.65e-05, "loss": 0.5704, "loss_nan_ranks": 0, "loss_rank_avg": 0.5262184739112854, "step": 100, "valid_targets_mean": 1516.5, "valid_targets_min": 683 }, { "epoch": 0.2631578947368421, "grad_norm": 0.8918423929105987, "learning_rate": 1.7333333333333336e-05, "loss": 0.5614, "loss_nan_ranks": 0, "loss_rank_avg": 0.5865960717201233, "step": 105, "valid_targets_mean": 1535.5, "valid_targets_min": 671 }, { "epoch": 0.2756892230576441, "grad_norm": 0.8935023571966156, "learning_rate": 1.8166666666666667e-05, "loss": 0.538, "loss_nan_ranks": 0, "loss_rank_avg": 0.5343993306159973, "step": 110, "valid_targets_mean": 1453.6, "valid_targets_min": 705 }, { "epoch": 0.2882205513784461, "grad_norm": 0.9703027895486921, "learning_rate": 1.9e-05, "loss": 0.5571, "loss_nan_ranks": 0, "loss_rank_avg": 0.59378981590271, "step": 115, "valid_targets_mean": 1433.0, "valid_targets_min": 641 }, { "epoch": 0.3007518796992481, "grad_norm": 1.0870045224468745, "learning_rate": 1.9833333333333335e-05, "loss": 0.5618, "loss_nan_ranks": 0, "loss_rank_avg": 0.5384276509284973, "step": 120, "valid_targets_mean": 1152.8, "valid_targets_min": 707 }, { "epoch": 0.3132832080200501, "grad_norm": 0.7700331714467691, "learning_rate": 2.066666666666667e-05, "loss": 0.5313, "loss_nan_ranks": 0, "loss_rank_avg": 0.4870373010635376, "step": 125, "valid_targets_mean": 1642.6, "valid_targets_min": 668 }, { "epoch": 0.3258145363408521, "grad_norm": 0.8789807975640997, "learning_rate": 2.15e-05, "loss": 0.55, "loss_nan_ranks": 0, "loss_rank_avg": 0.5609132051467896, "step": 130, "valid_targets_mean": 1823.2, "valid_targets_min": 723 }, { "epoch": 0.3383458646616541, "grad_norm": 0.8680952778590917, "learning_rate": 2.2333333333333335e-05, "loss": 0.5295, "loss_nan_ranks": 0, "loss_rank_avg": 0.5506467223167419, "step": 135, "valid_targets_mean": 1608.2, "valid_targets_min": 695 }, { "epoch": 0.3508771929824561, "grad_norm": 0.8541140586653626, "learning_rate": 2.316666666666667e-05, "loss": 0.5193, "loss_nan_ranks": 0, "loss_rank_avg": 0.5101579427719116, "step": 140, "valid_targets_mean": 1443.7, "valid_targets_min": 700 }, { "epoch": 0.3634085213032581, "grad_norm": 0.8260671930156039, "learning_rate": 2.4e-05, "loss": 0.5157, "loss_nan_ranks": 0, "loss_rank_avg": 0.4936548173427582, "step": 145, "valid_targets_mean": 1672.0, "valid_targets_min": 652 }, { "epoch": 0.37593984962406013, "grad_norm": 0.8136337955135831, "learning_rate": 2.4833333333333335e-05, "loss": 0.4955, "loss_nan_ranks": 0, "loss_rank_avg": 0.48357129096984863, "step": 150, "valid_targets_mean": 1661.6, "valid_targets_min": 512 }, { "epoch": 0.38847117794486213, "grad_norm": 0.8534689528421746, "learning_rate": 2.566666666666667e-05, "loss": 0.5144, "loss_nan_ranks": 0, "loss_rank_avg": 0.49319130182266235, "step": 155, "valid_targets_mean": 1579.5, "valid_targets_min": 759 }, { "epoch": 0.40100250626566414, "grad_norm": 0.883526035160319, "learning_rate": 2.65e-05, "loss": 0.5072, "loss_nan_ranks": 0, "loss_rank_avg": 0.5153460502624512, "step": 160, "valid_targets_mean": 1388.1, "valid_targets_min": 620 }, { "epoch": 0.41353383458646614, "grad_norm": 0.8934224689157233, "learning_rate": 2.7333333333333335e-05, "loss": 0.5193, "loss_nan_ranks": 0, "loss_rank_avg": 0.5074542760848999, "step": 165, "valid_targets_mean": 1612.6, "valid_targets_min": 852 }, { "epoch": 0.42606516290726815, "grad_norm": 0.9616691914227905, "learning_rate": 2.8166666666666673e-05, "loss": 0.5136, "loss_nan_ranks": 0, "loss_rank_avg": 0.5238911509513855, "step": 170, "valid_targets_mean": 1431.7, "valid_targets_min": 700 }, { "epoch": 0.43859649122807015, "grad_norm": 0.9858740124742356, "learning_rate": 2.9e-05, "loss": 0.4963, "loss_nan_ranks": 0, "loss_rank_avg": 0.5187824368476868, "step": 175, "valid_targets_mean": 1465.6, "valid_targets_min": 741 }, { "epoch": 0.45112781954887216, "grad_norm": 0.735291369280417, "learning_rate": 2.9833333333333338e-05, "loss": 0.4942, "loss_nan_ranks": 0, "loss_rank_avg": 0.45208507776260376, "step": 180, "valid_targets_mean": 2039.2, "valid_targets_min": 791 }, { "epoch": 0.46365914786967416, "grad_norm": 1.0047344686962825, "learning_rate": 3.066666666666667e-05, "loss": 0.5027, "loss_nan_ranks": 0, "loss_rank_avg": 0.5117394924163818, "step": 185, "valid_targets_mean": 1217.4, "valid_targets_min": 661 }, { "epoch": 0.47619047619047616, "grad_norm": 0.9220228098812638, "learning_rate": 3.15e-05, "loss": 0.5381, "loss_nan_ranks": 0, "loss_rank_avg": 0.5018247365951538, "step": 190, "valid_targets_mean": 1535.4, "valid_targets_min": 719 }, { "epoch": 0.48872180451127817, "grad_norm": 0.876119425855413, "learning_rate": 3.233333333333334e-05, "loss": 0.5283, "loss_nan_ranks": 0, "loss_rank_avg": 0.47728070616722107, "step": 195, "valid_targets_mean": 1649.7, "valid_targets_min": 757 }, { "epoch": 0.5012531328320802, "grad_norm": 0.9997111593508858, "learning_rate": 3.316666666666667e-05, "loss": 0.5187, "loss_nan_ranks": 0, "loss_rank_avg": 0.5628472566604614, "step": 200, "valid_targets_mean": 1353.6, "valid_targets_min": 763 }, { "epoch": 0.5137844611528822, "grad_norm": 0.8132225371914904, "learning_rate": 3.4e-05, "loss": 0.5012, "loss_nan_ranks": 0, "loss_rank_avg": 0.5150524377822876, "step": 205, "valid_targets_mean": 1791.1, "valid_targets_min": 846 }, { "epoch": 0.5263157894736842, "grad_norm": 0.9327330282905936, "learning_rate": 3.483333333333334e-05, "loss": 0.4954, "loss_nan_ranks": 0, "loss_rank_avg": 0.532800018787384, "step": 210, "valid_targets_mean": 1568.7, "valid_targets_min": 618 }, { "epoch": 0.5388471177944862, "grad_norm": 0.8846848263288521, "learning_rate": 3.566666666666667e-05, "loss": 0.4663, "loss_nan_ranks": 0, "loss_rank_avg": 0.4891921877861023, "step": 215, "valid_targets_mean": 1425.6, "valid_targets_min": 710 }, { "epoch": 0.5513784461152882, "grad_norm": 0.9695082520487255, "learning_rate": 3.65e-05, "loss": 0.4982, "loss_nan_ranks": 0, "loss_rank_avg": 0.5132033824920654, "step": 220, "valid_targets_mean": 1346.6, "valid_targets_min": 864 }, { "epoch": 0.5639097744360902, "grad_norm": 0.8581508906819361, "learning_rate": 3.733333333333334e-05, "loss": 0.4923, "loss_nan_ranks": 0, "loss_rank_avg": 0.4817158877849579, "step": 225, "valid_targets_mean": 1754.4, "valid_targets_min": 779 }, { "epoch": 0.5764411027568922, "grad_norm": 0.9214561305546174, "learning_rate": 3.8166666666666675e-05, "loss": 0.502, "loss_nan_ranks": 0, "loss_rank_avg": 0.44954267144203186, "step": 230, "valid_targets_mean": 1653.5, "valid_targets_min": 704 }, { "epoch": 0.5889724310776943, "grad_norm": 0.9242189734635586, "learning_rate": 3.9e-05, "loss": 0.5113, "loss_nan_ranks": 0, "loss_rank_avg": 0.5104361772537231, "step": 235, "valid_targets_mean": 1506.8, "valid_targets_min": 637 }, { "epoch": 0.6015037593984962, "grad_norm": 0.8447888551101427, "learning_rate": 3.983333333333334e-05, "loss": 0.4779, "loss_nan_ranks": 0, "loss_rank_avg": 0.4449302554130554, "step": 240, "valid_targets_mean": 1498.4, "valid_targets_min": 641 }, { "epoch": 0.6140350877192983, "grad_norm": 0.7911386578048909, "learning_rate": 3.9999659648947195e-05, "loss": 0.4974, "loss_nan_ranks": 0, "loss_rank_avg": 0.46942049264907837, "step": 245, "valid_targets_mean": 1783.9, "valid_targets_min": 696 }, { "epoch": 0.6265664160401002, "grad_norm": 0.8948760865225036, "learning_rate": 3.999827699264838e-05, "loss": 0.4816, "loss_nan_ranks": 0, "loss_rank_avg": 0.45563480257987976, "step": 250, "valid_targets_mean": 1652.1, "valid_targets_min": 619 }, { "epoch": 0.6390977443609023, "grad_norm": 1.0798792286240466, "learning_rate": 3.999583083263554e-05, "loss": 0.5077, "loss_nan_ranks": 0, "loss_rank_avg": 0.5357744693756104, "step": 255, "valid_targets_mean": 1125.9, "valid_targets_min": 604 }, { "epoch": 0.6516290726817042, "grad_norm": 0.8575070696270298, "learning_rate": 3.999232129899488e-05, "loss": 0.485, "loss_nan_ranks": 0, "loss_rank_avg": 0.4918670952320099, "step": 260, "valid_targets_mean": 1625.6, "valid_targets_min": 680 }, { "epoch": 0.6641604010025063, "grad_norm": 0.8850776630304149, "learning_rate": 3.99877485783626e-05, "loss": 0.5075, "loss_nan_ranks": 0, "loss_rank_avg": 0.4874800145626068, "step": 265, "valid_targets_mean": 1550.4, "valid_targets_min": 706 }, { "epoch": 0.6766917293233082, "grad_norm": 0.8166417216262739, "learning_rate": 3.998211291391491e-05, "loss": 0.4939, "loss_nan_ranks": 0, "loss_rank_avg": 0.466120183467865, "step": 270, "valid_targets_mean": 1398.1, "valid_targets_min": 715 }, { "epoch": 0.6892230576441103, "grad_norm": 0.7154428431730908, "learning_rate": 3.997541460535513e-05, "loss": 0.4851, "loss_nan_ranks": 0, "loss_rank_avg": 0.423753023147583, "step": 275, "valid_targets_mean": 2016.4, "valid_targets_min": 750 }, { "epoch": 0.7017543859649122, "grad_norm": 0.9213430720357552, "learning_rate": 3.996765400889775e-05, "loss": 0.4881, "loss_nan_ranks": 0, "loss_rank_avg": 0.5173033475875854, "step": 280, "valid_targets_mean": 1301.8, "valid_targets_min": 515 }, { "epoch": 0.7142857142857143, "grad_norm": 0.8023640529446733, "learning_rate": 3.9958831537249484e-05, "loss": 0.4717, "loss_nan_ranks": 0, "loss_rank_avg": 0.4825734496116638, "step": 285, "valid_targets_mean": 1722.1, "valid_targets_min": 726 }, { "epoch": 0.7268170426065163, "grad_norm": 0.8706455117416396, "learning_rate": 3.9948947659587336e-05, "loss": 0.4857, "loss_nan_ranks": 0, "loss_rank_avg": 0.48450374603271484, "step": 290, "valid_targets_mean": 1486.9, "valid_targets_min": 766 }, { "epoch": 0.7393483709273183, "grad_norm": 0.853787916696495, "learning_rate": 3.993800290153359e-05, "loss": 0.4717, "loss_nan_ranks": 0, "loss_rank_avg": 0.46074461936950684, "step": 295, "valid_targets_mean": 1472.6, "valid_targets_min": 735 }, { "epoch": 0.7518796992481203, "grad_norm": 0.7835226381075628, "learning_rate": 3.992599784512795e-05, "loss": 0.4826, "loss_nan_ranks": 0, "loss_rank_avg": 0.4899185001850128, "step": 300, "valid_targets_mean": 1684.3, "valid_targets_min": 734 }, { "epoch": 0.7644110275689223, "grad_norm": 0.9897427541429156, "learning_rate": 3.991293312879652e-05, "loss": 0.4896, "loss_nan_ranks": 0, "loss_rank_avg": 0.5004957914352417, "step": 305, "valid_targets_mean": 1242.6, "valid_targets_min": 606 }, { "epoch": 0.7769423558897243, "grad_norm": 0.8938793841595182, "learning_rate": 3.989880944731786e-05, "loss": 0.4727, "loss_nan_ranks": 0, "loss_rank_avg": 0.49720150232315063, "step": 310, "valid_targets_mean": 1363.1, "valid_targets_min": 624 }, { "epoch": 0.7894736842105263, "grad_norm": 0.8382612026880438, "learning_rate": 3.9883627551786074e-05, "loss": 0.4978, "loss_nan_ranks": 0, "loss_rank_avg": 0.48912370204925537, "step": 315, "valid_targets_mean": 1447.2, "valid_targets_min": 783 }, { "epoch": 0.8020050125313283, "grad_norm": 0.761439572529583, "learning_rate": 3.9867388249570836e-05, "loss": 0.4772, "loss_nan_ranks": 0, "loss_rank_avg": 0.4344802498817444, "step": 320, "valid_targets_mean": 1695.3, "valid_targets_min": 632 }, { "epoch": 0.8145363408521303, "grad_norm": 0.8785902546234519, "learning_rate": 3.985009240427443e-05, "loss": 0.4843, "loss_nan_ranks": 0, "loss_rank_avg": 0.4630719721317291, "step": 325, "valid_targets_mean": 1378.1, "valid_targets_min": 683 }, { "epoch": 0.8270676691729323, "grad_norm": 0.8228485065620595, "learning_rate": 3.983174093568591e-05, "loss": 0.4878, "loss_nan_ranks": 0, "loss_rank_avg": 0.47758162021636963, "step": 330, "valid_targets_mean": 1509.3, "valid_targets_min": 689 }, { "epoch": 0.8395989974937343, "grad_norm": 0.9717758816642885, "learning_rate": 3.98123348197321e-05, "loss": 0.4698, "loss_nan_ranks": 0, "loss_rank_avg": 0.4755571484565735, "step": 335, "valid_targets_mean": 1212.8, "valid_targets_min": 613 }, { "epoch": 0.8521303258145363, "grad_norm": 0.8614023270458415, "learning_rate": 3.979187508842571e-05, "loss": 0.4786, "loss_nan_ranks": 0, "loss_rank_avg": 0.5126999020576477, "step": 340, "valid_targets_mean": 1392.4, "valid_targets_min": 682 }, { "epoch": 0.8646616541353384, "grad_norm": 0.7611158699746358, "learning_rate": 3.977036282981051e-05, "loss": 0.471, "loss_nan_ranks": 0, "loss_rank_avg": 0.4539802372455597, "step": 345, "valid_targets_mean": 1647.4, "valid_targets_min": 580 }, { "epoch": 0.8771929824561403, "grad_norm": 0.8616664739071654, "learning_rate": 3.974779918790338e-05, "loss": 0.4884, "loss_nan_ranks": 0, "loss_rank_avg": 0.47764351963996887, "step": 350, "valid_targets_mean": 1359.4, "valid_targets_min": 692 }, { "epoch": 0.8897243107769424, "grad_norm": 0.8389852758904627, "learning_rate": 3.972418536263355e-05, "loss": 0.4866, "loss_nan_ranks": 0, "loss_rank_avg": 0.5188552141189575, "step": 355, "valid_targets_mean": 1837.6, "valid_targets_min": 776 }, { "epoch": 0.9022556390977443, "grad_norm": 0.8061384574451791, "learning_rate": 3.969952260977877e-05, "loss": 0.4801, "loss_nan_ranks": 0, "loss_rank_avg": 0.48911938071250916, "step": 360, "valid_targets_mean": 1631.5, "valid_targets_min": 600 }, { "epoch": 0.9147869674185464, "grad_norm": 0.6985984050928506, "learning_rate": 3.9673812240898466e-05, "loss": 0.4723, "loss_nan_ranks": 0, "loss_rank_avg": 0.44051438570022583, "step": 365, "valid_targets_mean": 1888.6, "valid_targets_min": 661 }, { "epoch": 0.9273182957393483, "grad_norm": 0.8231367554229954, "learning_rate": 3.964705562326408e-05, "loss": 0.472, "loss_nan_ranks": 0, "loss_rank_avg": 0.4927082359790802, "step": 370, "valid_targets_mean": 1530.6, "valid_targets_min": 648 }, { "epoch": 0.9398496240601504, "grad_norm": 0.7775064944719399, "learning_rate": 3.961925417978632e-05, "loss": 0.4734, "loss_nan_ranks": 0, "loss_rank_avg": 0.4817471206188202, "step": 375, "valid_targets_mean": 1528.3, "valid_targets_min": 806 }, { "epoch": 0.9523809523809523, "grad_norm": 0.7068701184233374, "learning_rate": 3.959040938893946e-05, "loss": 0.4798, "loss_nan_ranks": 0, "loss_rank_avg": 0.4508395791053772, "step": 380, "valid_targets_mean": 1779.4, "valid_targets_min": 722 }, { "epoch": 0.9649122807017544, "grad_norm": 0.7292299060821251, "learning_rate": 3.9560522784682766e-05, "loss": 0.4798, "loss_nan_ranks": 0, "loss_rank_avg": 0.45591917634010315, "step": 385, "valid_targets_mean": 1567.5, "valid_targets_min": 769 }, { "epoch": 0.9774436090225563, "grad_norm": 0.8273251382462157, "learning_rate": 3.952959595637889e-05, "loss": 0.4629, "loss_nan_ranks": 0, "loss_rank_avg": 0.46951186656951904, "step": 390, "valid_targets_mean": 1494.0, "valid_targets_min": 732 }, { "epoch": 0.9899749373433584, "grad_norm": 0.8808510863928986, "learning_rate": 3.9497630548709375e-05, "loss": 0.4756, "loss_nan_ranks": 0, "loss_rank_avg": 0.5034153461456299, "step": 395, "valid_targets_mean": 1323.8, "valid_targets_min": 646 }, { "epoch": 1.0025062656641603, "grad_norm": 1.3955002841045798, "learning_rate": 3.946462826158714e-05, "loss": 0.4594, "loss_nan_ranks": 0, "loss_rank_avg": 0.45724648237228394, "step": 400, "valid_targets_mean": 1202.4, "valid_targets_min": 772 }, { "epoch": 1.0150375939849625, "grad_norm": 0.8269517494031545, "learning_rate": 3.943059085006613e-05, "loss": 0.4671, "loss_nan_ranks": 0, "loss_rank_avg": 0.45007988810539246, "step": 405, "valid_targets_mean": 1828.4, "valid_targets_min": 547 }, { "epoch": 1.0275689223057645, "grad_norm": 0.7493476492079604, "learning_rate": 3.9395520124247984e-05, "loss": 0.4473, "loss_nan_ranks": 0, "loss_rank_avg": 0.4048207104206085, "step": 410, "valid_targets_mean": 1577.7, "valid_targets_min": 512 }, { "epoch": 1.0401002506265664, "grad_norm": 0.750270123972134, "learning_rate": 3.935941794918572e-05, "loss": 0.4573, "loss_nan_ranks": 0, "loss_rank_avg": 0.4433751702308655, "step": 415, "valid_targets_mean": 1736.9, "valid_targets_min": 813 }, { "epoch": 1.0526315789473684, "grad_norm": 0.834001389405996, "learning_rate": 3.9322286244784597e-05, "loss": 0.4473, "loss_nan_ranks": 0, "loss_rank_avg": 0.4450203776359558, "step": 420, "valid_targets_mean": 1364.6, "valid_targets_min": 617 }, { "epoch": 1.0651629072681703, "grad_norm": 0.794005813742166, "learning_rate": 3.9284126985700016e-05, "loss": 0.4351, "loss_nan_ranks": 0, "loss_rank_avg": 0.41918760538101196, "step": 425, "valid_targets_mean": 1459.9, "valid_targets_min": 712 }, { "epoch": 1.0776942355889725, "grad_norm": 0.68082444015136, "learning_rate": 3.9244942201232507e-05, "loss": 0.4397, "loss_nan_ranks": 0, "loss_rank_avg": 0.43921422958374023, "step": 430, "valid_targets_mean": 1819.3, "valid_targets_min": 691 }, { "epoch": 1.0902255639097744, "grad_norm": 0.7208521260635171, "learning_rate": 3.9204733975219754e-05, "loss": 0.4409, "loss_nan_ranks": 0, "loss_rank_avg": 0.43605563044548035, "step": 435, "valid_targets_mean": 1719.1, "valid_targets_min": 770 }, { "epoch": 1.1027568922305764, "grad_norm": 0.7489888034624764, "learning_rate": 3.9163504445925865e-05, "loss": 0.44, "loss_nan_ranks": 0, "loss_rank_avg": 0.42712393403053284, "step": 440, "valid_targets_mean": 1795.5, "valid_targets_min": 740 }, { "epoch": 1.1152882205513786, "grad_norm": 0.8335448902554116, "learning_rate": 3.9121255805927615e-05, "loss": 0.4535, "loss_nan_ranks": 0, "loss_rank_avg": 0.4607224464416504, "step": 445, "valid_targets_mean": 1583.9, "valid_targets_min": 582 }, { "epoch": 1.1278195488721805, "grad_norm": 0.7654806876894515, "learning_rate": 3.907799030199784e-05, "loss": 0.4491, "loss_nan_ranks": 0, "loss_rank_avg": 0.45577600598335266, "step": 450, "valid_targets_mean": 1676.8, "valid_targets_min": 497 }, { "epoch": 1.1403508771929824, "grad_norm": 0.929732543345461, "learning_rate": 3.903371023498596e-05, "loss": 0.4441, "loss_nan_ranks": 0, "loss_rank_avg": 0.4645495116710663, "step": 455, "valid_targets_mean": 1188.7, "valid_targets_min": 427 }, { "epoch": 1.1528822055137844, "grad_norm": 0.7852422253164664, "learning_rate": 3.898841795969563e-05, "loss": 0.4581, "loss_nan_ranks": 0, "loss_rank_avg": 0.4440934658050537, "step": 460, "valid_targets_mean": 1563.7, "valid_targets_min": 649 }, { "epoch": 1.1654135338345863, "grad_norm": 0.8757302453873744, "learning_rate": 3.8942115884759505e-05, "loss": 0.4516, "loss_nan_ranks": 0, "loss_rank_avg": 0.4700010418891907, "step": 465, "valid_targets_mean": 1330.1, "valid_targets_min": 607 }, { "epoch": 1.1779448621553885, "grad_norm": 0.8499909272201124, "learning_rate": 3.889480647251115e-05, "loss": 0.4419, "loss_nan_ranks": 0, "loss_rank_avg": 0.441257119178772, "step": 470, "valid_targets_mean": 1289.9, "valid_targets_min": 711 }, { "epoch": 1.1904761904761905, "grad_norm": 0.8713204781666626, "learning_rate": 3.884649223885409e-05, "loss": 0.4443, "loss_nan_ranks": 0, "loss_rank_avg": 0.4358166456222534, "step": 475, "valid_targets_mean": 1215.9, "valid_targets_min": 638 }, { "epoch": 1.2030075187969924, "grad_norm": 0.876755138815846, "learning_rate": 3.879717575312802e-05, "loss": 0.4547, "loss_nan_ranks": 0, "loss_rank_avg": 0.4684372842311859, "step": 480, "valid_targets_mean": 1500.8, "valid_targets_min": 691 }, { "epoch": 1.2155388471177946, "grad_norm": 0.8095343697428509, "learning_rate": 3.874685963797218e-05, "loss": 0.4396, "loss_nan_ranks": 0, "loss_rank_avg": 0.4964834749698639, "step": 485, "valid_targets_mean": 1463.1, "valid_targets_min": 616 }, { "epoch": 1.2280701754385965, "grad_norm": 0.7915449220424059, "learning_rate": 3.869554656918584e-05, "loss": 0.4585, "loss_nan_ranks": 0, "loss_rank_avg": 0.46632125973701477, "step": 490, "valid_targets_mean": 1562.4, "valid_targets_min": 618 }, { "epoch": 1.2406015037593985, "grad_norm": 0.8865984056691865, "learning_rate": 3.864323927558606e-05, "loss": 0.4422, "loss_nan_ranks": 0, "loss_rank_avg": 0.4493522047996521, "step": 495, "valid_targets_mean": 1433.1, "valid_targets_min": 736 }, { "epoch": 1.2531328320802004, "grad_norm": 0.8529626057872378, "learning_rate": 3.858994053886254e-05, "loss": 0.4557, "loss_nan_ranks": 0, "loss_rank_avg": 0.4794151782989502, "step": 500, "valid_targets_mean": 1461.8, "valid_targets_min": 747 }, { "epoch": 1.2656641604010024, "grad_norm": 0.73988664696522, "learning_rate": 3.853565319342968e-05, "loss": 0.4464, "loss_nan_ranks": 0, "loss_rank_avg": 0.4382275938987732, "step": 505, "valid_targets_mean": 1613.5, "valid_targets_min": 740 }, { "epoch": 1.2781954887218046, "grad_norm": 0.8499742272557709, "learning_rate": 3.848038012627587e-05, "loss": 0.4403, "loss_nan_ranks": 0, "loss_rank_avg": 0.45188936591148376, "step": 510, "valid_targets_mean": 1327.7, "valid_targets_min": 646 }, { "epoch": 1.2907268170426065, "grad_norm": 0.837806554951509, "learning_rate": 3.8424124276809956e-05, "loss": 0.4637, "loss_nan_ranks": 0, "loss_rank_avg": 0.5095597505569458, "step": 515, "valid_targets_mean": 1469.3, "valid_targets_min": 898 }, { "epoch": 1.3032581453634084, "grad_norm": 0.7019492892336352, "learning_rate": 3.8366888636704916e-05, "loss": 0.4481, "loss_nan_ranks": 0, "loss_rank_avg": 0.4414467215538025, "step": 520, "valid_targets_mean": 1896.4, "valid_targets_min": 809 }, { "epoch": 1.3157894736842106, "grad_norm": 0.8718791272496377, "learning_rate": 3.830867624973875e-05, "loss": 0.4268, "loss_nan_ranks": 0, "loss_rank_avg": 0.43715381622314453, "step": 525, "valid_targets_mean": 1746.9, "valid_targets_min": 756 }, { "epoch": 1.3283208020050126, "grad_norm": 0.8909245916987222, "learning_rate": 3.824949021163265e-05, "loss": 0.4586, "loss_nan_ranks": 0, "loss_rank_avg": 0.4303131699562073, "step": 530, "valid_targets_mean": 1252.4, "valid_targets_min": 735 }, { "epoch": 1.3408521303258145, "grad_norm": 0.740400513063558, "learning_rate": 3.8189333669886354e-05, "loss": 0.4349, "loss_nan_ranks": 0, "loss_rank_avg": 0.4084957242012024, "step": 535, "valid_targets_mean": 1536.8, "valid_targets_min": 612 }, { "epoch": 1.3533834586466165, "grad_norm": 0.8115498137086447, "learning_rate": 3.8128209823610735e-05, "loss": 0.4547, "loss_nan_ranks": 0, "loss_rank_avg": 0.45533132553100586, "step": 540, "valid_targets_mean": 1381.9, "valid_targets_min": 675 }, { "epoch": 1.3659147869674184, "grad_norm": 0.6549907366136642, "learning_rate": 3.806612192335769e-05, "loss": 0.4426, "loss_nan_ranks": 0, "loss_rank_avg": 0.434446781873703, "step": 545, "valid_targets_mean": 1967.2, "valid_targets_min": 689 }, { "epoch": 1.3784461152882206, "grad_norm": 0.7386898782090013, "learning_rate": 3.800307327094733e-05, "loss": 0.433, "loss_nan_ranks": 0, "loss_rank_avg": 0.41058215498924255, "step": 550, "valid_targets_mean": 1503.0, "valid_targets_min": 626 }, { "epoch": 1.3909774436090225, "grad_norm": 0.6970870873869073, "learning_rate": 3.7939067219292284e-05, "loss": 0.4199, "loss_nan_ranks": 0, "loss_rank_avg": 0.42085281014442444, "step": 555, "valid_targets_mean": 1788.9, "valid_targets_min": 641 }, { "epoch": 1.4035087719298245, "grad_norm": 0.7828068884431777, "learning_rate": 3.787410717221948e-05, "loss": 0.4458, "loss_nan_ranks": 0, "loss_rank_avg": 0.44116562604904175, "step": 560, "valid_targets_mean": 1510.1, "valid_targets_min": 591 }, { "epoch": 1.4160401002506267, "grad_norm": 0.657058991321496, "learning_rate": 3.780819658428911e-05, "loss": 0.4276, "loss_nan_ranks": 0, "loss_rank_avg": 0.3789742588996887, "step": 565, "valid_targets_mean": 1835.7, "valid_targets_min": 890 }, { "epoch": 1.4285714285714286, "grad_norm": 0.6395861985446157, "learning_rate": 3.7741338960610885e-05, "loss": 0.426, "loss_nan_ranks": 0, "loss_rank_avg": 0.3952099680900574, "step": 570, "valid_targets_mean": 2071.3, "valid_targets_min": 612 }, { "epoch": 1.4411027568922306, "grad_norm": 0.8347795153118129, "learning_rate": 3.767353785665765e-05, "loss": 0.4436, "loss_nan_ranks": 0, "loss_rank_avg": 0.4351791441440582, "step": 575, "valid_targets_mean": 1441.6, "valid_targets_min": 678 }, { "epoch": 1.4536340852130325, "grad_norm": 0.8033860871708556, "learning_rate": 3.760479687807635e-05, "loss": 0.4476, "loss_nan_ranks": 0, "loss_rank_avg": 0.46652624011039734, "step": 580, "valid_targets_mean": 1368.7, "valid_targets_min": 643 }, { "epoch": 1.4661654135338344, "grad_norm": 0.6504013496758371, "learning_rate": 3.753511968049622e-05, "loss": 0.4294, "loss_nan_ranks": 0, "loss_rank_avg": 0.4036969542503357, "step": 585, "valid_targets_mean": 1799.1, "valid_targets_min": 431 }, { "epoch": 1.4786967418546366, "grad_norm": 0.7381764886100423, "learning_rate": 3.746450996933438e-05, "loss": 0.4513, "loss_nan_ranks": 0, "loss_rank_avg": 0.4756660461425781, "step": 590, "valid_targets_mean": 1778.2, "valid_targets_min": 696 }, { "epoch": 1.4912280701754386, "grad_norm": 0.6936827960015854, "learning_rate": 3.739297149959884e-05, "loss": 0.4447, "loss_nan_ranks": 0, "loss_rank_avg": 0.42643094062805176, "step": 595, "valid_targets_mean": 1805.2, "valid_targets_min": 776 }, { "epoch": 1.5037593984962405, "grad_norm": 0.697264472740107, "learning_rate": 3.732050807568878e-05, "loss": 0.4349, "loss_nan_ranks": 0, "loss_rank_avg": 0.44037938117980957, "step": 600, "valid_targets_mean": 1873.8, "valid_targets_min": 780 }, { "epoch": 1.5162907268170427, "grad_norm": 0.7975402798314732, "learning_rate": 3.724712355119218e-05, "loss": 0.4468, "loss_nan_ranks": 0, "loss_rank_avg": 0.45365750789642334, "step": 605, "valid_targets_mean": 1470.1, "valid_targets_min": 710 }, { "epoch": 1.5288220551378446, "grad_norm": 0.6786451129500303, "learning_rate": 3.7172821828681e-05, "loss": 0.4255, "loss_nan_ranks": 0, "loss_rank_avg": 0.41390031576156616, "step": 610, "valid_targets_mean": 1679.1, "valid_targets_min": 684 }, { "epoch": 1.5413533834586466, "grad_norm": 0.7498724478327179, "learning_rate": 3.7097606859503514e-05, "loss": 0.4409, "loss_nan_ranks": 0, "loss_rank_avg": 0.4342663884162903, "step": 615, "valid_targets_mean": 1787.7, "valid_targets_min": 549 }, { "epoch": 1.5538847117794488, "grad_norm": 0.7293841788979415, "learning_rate": 3.702148264357428e-05, "loss": 0.4501, "loss_nan_ranks": 0, "loss_rank_avg": 0.43075722455978394, "step": 620, "valid_targets_mean": 1639.2, "valid_targets_min": 662 }, { "epoch": 1.5664160401002505, "grad_norm": 0.7467516913324442, "learning_rate": 3.694445322916138e-05, "loss": 0.4488, "loss_nan_ranks": 0, "loss_rank_avg": 0.4477841556072235, "step": 625, "valid_targets_mean": 1638.9, "valid_targets_min": 763 }, { "epoch": 1.5789473684210527, "grad_norm": 0.719672038793378, "learning_rate": 3.686652271267114e-05, "loss": 0.4334, "loss_nan_ranks": 0, "loss_rank_avg": 0.432794988155365, "step": 630, "valid_targets_mean": 1743.4, "valid_targets_min": 722 }, { "epoch": 1.5914786967418546, "grad_norm": 0.8220616262947018, "learning_rate": 3.678769523843026e-05, "loss": 0.4415, "loss_nan_ranks": 0, "loss_rank_avg": 0.48474329710006714, "step": 635, "valid_targets_mean": 1533.6, "valid_targets_min": 673 }, { "epoch": 1.6040100250626566, "grad_norm": 0.7771307016733878, "learning_rate": 3.6707974998465495e-05, "loss": 0.4383, "loss_nan_ranks": 0, "loss_rank_avg": 0.4561769962310791, "step": 640, "valid_targets_mean": 1554.4, "valid_targets_min": 745 }, { "epoch": 1.6165413533834587, "grad_norm": 0.7761963492870784, "learning_rate": 3.662736623228062e-05, "loss": 0.4391, "loss_nan_ranks": 0, "loss_rank_avg": 0.4495946764945984, "step": 645, "valid_targets_mean": 1472.9, "valid_targets_min": 682 }, { "epoch": 1.6290726817042607, "grad_norm": 0.9978606274160214, "learning_rate": 3.654587322663106e-05, "loss": 0.4431, "loss_nan_ranks": 0, "loss_rank_avg": 0.4660862684249878, "step": 650, "valid_targets_mean": 1242.4, "valid_targets_min": 668 }, { "epoch": 1.6416040100250626, "grad_norm": 0.8231848952496575, "learning_rate": 3.64635003152959e-05, "loss": 0.4522, "loss_nan_ranks": 0, "loss_rank_avg": 0.45088180899620056, "step": 655, "valid_targets_mean": 1370.2, "valid_targets_min": 623 }, { "epoch": 1.6541353383458648, "grad_norm": 0.7471399307147799, "learning_rate": 3.6380251878847356e-05, "loss": 0.4316, "loss_nan_ranks": 0, "loss_rank_avg": 0.4242357611656189, "step": 660, "valid_targets_mean": 1712.2, "valid_targets_min": 708 }, { "epoch": 1.6666666666666665, "grad_norm": 0.6863054821104536, "learning_rate": 3.62961323444179e-05, "loss": 0.4625, "loss_nan_ranks": 0, "loss_rank_avg": 0.4189673066139221, "step": 665, "valid_targets_mean": 1814.9, "valid_targets_min": 751 }, { "epoch": 1.6791979949874687, "grad_norm": 0.786683608287003, "learning_rate": 3.62111461854648e-05, "loss": 0.4414, "loss_nan_ranks": 0, "loss_rank_avg": 0.41071224212646484, "step": 670, "valid_targets_mean": 1481.6, "valid_targets_min": 792 }, { "epoch": 1.6917293233082706, "grad_norm": 0.7691848685754727, "learning_rate": 3.6125297921532195e-05, "loss": 0.4441, "loss_nan_ranks": 0, "loss_rank_avg": 0.4370245337486267, "step": 675, "valid_targets_mean": 1699.9, "valid_targets_min": 725 }, { "epoch": 1.7042606516290726, "grad_norm": 0.9076573617201255, "learning_rate": 3.603859211801076e-05, "loss": 0.4384, "loss_nan_ranks": 0, "loss_rank_avg": 0.4542667269706726, "step": 680, "valid_targets_mean": 1348.2, "valid_targets_min": 759 }, { "epoch": 1.7167919799498748, "grad_norm": 0.7736607294399541, "learning_rate": 3.5951033385894955e-05, "loss": 0.4534, "loss_nan_ranks": 0, "loss_rank_avg": 0.41055262088775635, "step": 685, "valid_targets_mean": 1575.7, "valid_targets_min": 739 }, { "epoch": 1.7293233082706767, "grad_norm": 0.7320170764193196, "learning_rate": 3.5862626381537753e-05, "loss": 0.4496, "loss_nan_ranks": 0, "loss_rank_avg": 0.45462262630462646, "step": 690, "valid_targets_mean": 1573.2, "valid_targets_min": 759 }, { "epoch": 1.7418546365914787, "grad_norm": 0.6977308735657218, "learning_rate": 3.577337580640307e-05, "loss": 0.4405, "loss_nan_ranks": 0, "loss_rank_avg": 0.43793463706970215, "step": 695, "valid_targets_mean": 1780.3, "valid_targets_min": 710 }, { "epoch": 1.7543859649122808, "grad_norm": 0.7573922216037817, "learning_rate": 3.568328640681571e-05, "loss": 0.4495, "loss_nan_ranks": 0, "loss_rank_avg": 0.4407915771007538, "step": 700, "valid_targets_mean": 1433.2, "valid_targets_min": 668 }, { "epoch": 1.7669172932330826, "grad_norm": 0.6368027104315123, "learning_rate": 3.559236297370896e-05, "loss": 0.4414, "loss_nan_ranks": 0, "loss_rank_avg": 0.40819916129112244, "step": 705, "valid_targets_mean": 1925.6, "valid_targets_min": 697 }, { "epoch": 1.7794486215538847, "grad_norm": 0.8715670762508443, "learning_rate": 3.550061034236982e-05, "loss": 0.4544, "loss_nan_ranks": 0, "loss_rank_avg": 0.4552498459815979, "step": 710, "valid_targets_mean": 1135.5, "valid_targets_min": 764 }, { "epoch": 1.7919799498746867, "grad_norm": 0.6732436597197207, "learning_rate": 3.540803339218187e-05, "loss": 0.4169, "loss_nan_ranks": 0, "loss_rank_avg": 0.42018571496009827, "step": 715, "valid_targets_mean": 1718.8, "valid_targets_min": 833 }, { "epoch": 1.8045112781954886, "grad_norm": 0.6263088871441661, "learning_rate": 3.531463704636577e-05, "loss": 0.4383, "loss_nan_ranks": 0, "loss_rank_avg": 0.4200010895729065, "step": 720, "valid_targets_mean": 2012.9, "valid_targets_min": 767 }, { "epoch": 1.8170426065162908, "grad_norm": 0.8384719503511425, "learning_rate": 3.5220426271717426e-05, "loss": 0.4509, "loss_nan_ranks": 0, "loss_rank_avg": 0.4631063938140869, "step": 725, "valid_targets_mean": 1250.4, "valid_targets_min": 679 }, { "epoch": 1.8295739348370927, "grad_norm": 0.6843860284326129, "learning_rate": 3.512540607834391e-05, "loss": 0.4309, "loss_nan_ranks": 0, "loss_rank_avg": 0.4209910035133362, "step": 730, "valid_targets_mean": 1574.2, "valid_targets_min": 487 }, { "epoch": 1.8421052631578947, "grad_norm": 0.7167917125977807, "learning_rate": 3.5029581519396973e-05, "loss": 0.4364, "loss_nan_ranks": 0, "loss_rank_avg": 0.45028942823410034, "step": 735, "valid_targets_mean": 1659.6, "valid_targets_min": 847 }, { "epoch": 1.8546365914786969, "grad_norm": 0.7314433156494289, "learning_rate": 3.4932957690804356e-05, "loss": 0.437, "loss_nan_ranks": 0, "loss_rank_avg": 0.4612088203430176, "step": 740, "valid_targets_mean": 1709.0, "valid_targets_min": 657 }, { "epoch": 1.8671679197994986, "grad_norm": 0.6529714800972096, "learning_rate": 3.483553973099876e-05, "loss": 0.4396, "loss_nan_ranks": 0, "loss_rank_avg": 0.42435121536254883, "step": 745, "valid_targets_mean": 1877.4, "valid_targets_min": 907 }, { "epoch": 1.8796992481203008, "grad_norm": 0.8085177111679603, "learning_rate": 3.473733282064461e-05, "loss": 0.4494, "loss_nan_ranks": 0, "loss_rank_avg": 0.41868555545806885, "step": 750, "valid_targets_mean": 1269.4, "valid_targets_min": 695 }, { "epoch": 1.8922305764411027, "grad_norm": 0.6608685301684564, "learning_rate": 3.463834218236253e-05, "loss": 0.4278, "loss_nan_ranks": 0, "loss_rank_avg": 0.41869646310806274, "step": 755, "valid_targets_mean": 1862.9, "valid_targets_min": 556 }, { "epoch": 1.9047619047619047, "grad_norm": 0.7928065426796191, "learning_rate": 3.453857308045161e-05, "loss": 0.4357, "loss_nan_ranks": 0, "loss_rank_avg": 0.45367372035980225, "step": 760, "valid_targets_mean": 1294.9, "valid_targets_min": 637 }, { "epoch": 1.9172932330827068, "grad_norm": 0.6788143401778731, "learning_rate": 3.443803082060946e-05, "loss": 0.4262, "loss_nan_ranks": 0, "loss_rank_avg": 0.3976025879383087, "step": 765, "valid_targets_mean": 1658.4, "valid_targets_min": 820 }, { "epoch": 1.9298245614035088, "grad_norm": 0.6538240412408628, "learning_rate": 3.433672074965006e-05, "loss": 0.4287, "loss_nan_ranks": 0, "loss_rank_avg": 0.39592060446739197, "step": 770, "valid_targets_mean": 1820.4, "valid_targets_min": 779 }, { "epoch": 1.9423558897243107, "grad_norm": 0.8563678164989981, "learning_rate": 3.423464825521937e-05, "loss": 0.4369, "loss_nan_ranks": 0, "loss_rank_avg": 0.45248812437057495, "step": 775, "valid_targets_mean": 1235.8, "valid_targets_min": 635 }, { "epoch": 1.954887218045113, "grad_norm": 0.7053812027905907, "learning_rate": 3.41318187655089e-05, "loss": 0.4271, "loss_nan_ranks": 0, "loss_rank_avg": 0.41953131556510925, "step": 780, "valid_targets_mean": 1648.8, "valid_targets_min": 641 }, { "epoch": 1.9674185463659146, "grad_norm": 0.6560343734177334, "learning_rate": 3.4028237748966964e-05, "loss": 0.4302, "loss_nan_ranks": 0, "loss_rank_avg": 0.4066466689109802, "step": 785, "valid_targets_mean": 1783.5, "valid_targets_min": 723 }, { "epoch": 1.9799498746867168, "grad_norm": 0.71779268417739, "learning_rate": 3.3923910714007896e-05, "loss": 0.4229, "loss_nan_ranks": 0, "loss_rank_avg": 0.41889142990112305, "step": 790, "valid_targets_mean": 1806.1, "valid_targets_min": 618 }, { "epoch": 1.9924812030075187, "grad_norm": 0.7925201428753242, "learning_rate": 3.381884320871912e-05, "loss": 0.446, "loss_nan_ranks": 0, "loss_rank_avg": 0.44433343410491943, "step": 795, "valid_targets_mean": 1272.4, "valid_targets_min": 660 }, { "epoch": 2.0050125313283207, "grad_norm": 0.6808799015569391, "learning_rate": 3.3713040820566126e-05, "loss": 0.4209, "loss_nan_ranks": 0, "loss_rank_avg": 0.3874009847640991, "step": 800, "valid_targets_mean": 1936.3, "valid_targets_min": 614 }, { "epoch": 2.017543859649123, "grad_norm": 0.7283657760207235, "learning_rate": 3.360650917609526e-05, "loss": 0.4173, "loss_nan_ranks": 0, "loss_rank_avg": 0.4246293008327484, "step": 805, "valid_targets_mean": 1771.4, "valid_targets_min": 675 }, { "epoch": 2.030075187969925, "grad_norm": 0.7242169851088842, "learning_rate": 3.349925394063458e-05, "loss": 0.3962, "loss_nan_ranks": 0, "loss_rank_avg": 0.3861202597618103, "step": 810, "valid_targets_mean": 1551.2, "valid_targets_min": 759 }, { "epoch": 2.0426065162907268, "grad_norm": 0.7303228725795594, "learning_rate": 3.339128081799253e-05, "loss": 0.3989, "loss_nan_ranks": 0, "loss_rank_avg": 0.3861742913722992, "step": 815, "valid_targets_mean": 1691.4, "valid_targets_min": 606 }, { "epoch": 2.055137844611529, "grad_norm": 0.6855353676041959, "learning_rate": 3.3282595550154626e-05, "loss": 0.4015, "loss_nan_ranks": 0, "loss_rank_avg": 0.37186822295188904, "step": 820, "valid_targets_mean": 1639.1, "valid_targets_min": 653 }, { "epoch": 2.0676691729323307, "grad_norm": 0.692410129531128, "learning_rate": 3.317320391697811e-05, "loss": 0.4083, "loss_nan_ranks": 0, "loss_rank_avg": 0.38654983043670654, "step": 825, "valid_targets_mean": 1639.6, "valid_targets_min": 649 }, { "epoch": 2.080200501253133, "grad_norm": 0.7399489397821283, "learning_rate": 3.306311173588457e-05, "loss": 0.414, "loss_nan_ranks": 0, "loss_rank_avg": 0.3871074318885803, "step": 830, "valid_targets_mean": 1683.2, "valid_targets_min": 732 }, { "epoch": 2.092731829573935, "grad_norm": 0.6913493221032149, "learning_rate": 3.2952324861550555e-05, "loss": 0.4165, "loss_nan_ranks": 0, "loss_rank_avg": 0.3757636249065399, "step": 835, "valid_targets_mean": 1750.5, "valid_targets_min": 761 }, { "epoch": 2.1052631578947367, "grad_norm": 0.7411241018642346, "learning_rate": 3.284084918559625e-05, "loss": 0.3957, "loss_nan_ranks": 0, "loss_rank_avg": 0.3886348605155945, "step": 840, "valid_targets_mean": 1481.9, "valid_targets_min": 626 }, { "epoch": 2.117794486215539, "grad_norm": 0.7262456607700576, "learning_rate": 3.2728690636272146e-05, "loss": 0.4199, "loss_nan_ranks": 0, "loss_rank_avg": 0.39896824955940247, "step": 845, "valid_targets_mean": 1553.8, "valid_targets_min": 597 }, { "epoch": 2.1303258145363406, "grad_norm": 0.7934064754386024, "learning_rate": 3.261585517814378e-05, "loss": 0.4075, "loss_nan_ranks": 0, "loss_rank_avg": 0.37877142429351807, "step": 850, "valid_targets_mean": 1613.4, "valid_targets_min": 634 }, { "epoch": 2.142857142857143, "grad_norm": 0.8938513100589157, "learning_rate": 3.250234881177453e-05, "loss": 0.4098, "loss_nan_ranks": 0, "loss_rank_avg": 0.41370391845703125, "step": 855, "valid_targets_mean": 1570.8, "valid_targets_min": 787 }, { "epoch": 2.155388471177945, "grad_norm": 0.8636183813830631, "learning_rate": 3.238817757340655e-05, "loss": 0.4124, "loss_nan_ranks": 0, "loss_rank_avg": 0.4297131896018982, "step": 860, "valid_targets_mean": 1242.0, "valid_targets_min": 647 }, { "epoch": 2.1679197994987467, "grad_norm": 0.8040657962250063, "learning_rate": 3.2273347534639705e-05, "loss": 0.4093, "loss_nan_ranks": 0, "loss_rank_avg": 0.3994751572608948, "step": 865, "valid_targets_mean": 1337.7, "valid_targets_min": 604 }, { "epoch": 2.180451127819549, "grad_norm": 0.8949819852190363, "learning_rate": 3.215786480210872e-05, "loss": 0.4109, "loss_nan_ranks": 0, "loss_rank_avg": 0.44259727001190186, "step": 870, "valid_targets_mean": 1190.2, "valid_targets_min": 612 }, { "epoch": 2.192982456140351, "grad_norm": 0.7843761916464653, "learning_rate": 3.204173551715841e-05, "loss": 0.4103, "loss_nan_ranks": 0, "loss_rank_avg": 0.40686869621276855, "step": 875, "valid_targets_mean": 1634.0, "valid_targets_min": 736 }, { "epoch": 2.2055137844611528, "grad_norm": 0.9181123085943382, "learning_rate": 3.1924965855517135e-05, "loss": 0.4168, "loss_nan_ranks": 0, "loss_rank_avg": 0.4497694969177246, "step": 880, "valid_targets_mean": 1156.1, "valid_targets_min": 647 }, { "epoch": 2.218045112781955, "grad_norm": 0.9016745290827513, "learning_rate": 3.180756202696829e-05, "loss": 0.4237, "loss_nan_ranks": 0, "loss_rank_avg": 0.41279345750808716, "step": 885, "valid_targets_mean": 1226.4, "valid_targets_min": 641 }, { "epoch": 2.230576441102757, "grad_norm": 0.8164305088369335, "learning_rate": 3.1689530275020144e-05, "loss": 0.423, "loss_nan_ranks": 0, "loss_rank_avg": 0.4257521331310272, "step": 890, "valid_targets_mean": 1375.3, "valid_targets_min": 759 }, { "epoch": 2.243107769423559, "grad_norm": 0.7873273298467938, "learning_rate": 3.1570876876573784e-05, "loss": 0.4244, "loss_nan_ranks": 0, "loss_rank_avg": 0.4395800232887268, "step": 895, "valid_targets_mean": 1549.6, "valid_targets_min": 751 }, { "epoch": 2.255639097744361, "grad_norm": 0.7340510236447725, "learning_rate": 3.145160814158932e-05, "loss": 0.406, "loss_nan_ranks": 0, "loss_rank_avg": 0.4071933925151825, "step": 900, "valid_targets_mean": 1613.9, "valid_targets_min": 631 }, { "epoch": 2.2681704260651627, "grad_norm": 0.7520496021818598, "learning_rate": 3.13317304127503e-05, "loss": 0.4019, "loss_nan_ranks": 0, "loss_rank_avg": 0.3831161558628082, "step": 905, "valid_targets_mean": 1568.0, "valid_targets_min": 744 }, { "epoch": 2.280701754385965, "grad_norm": 0.7830050957892405, "learning_rate": 3.121125006512644e-05, "loss": 0.4211, "loss_nan_ranks": 0, "loss_rank_avg": 0.40112242102622986, "step": 910, "valid_targets_mean": 1376.7, "valid_targets_min": 599 }, { "epoch": 2.293233082706767, "grad_norm": 0.8046622817084864, "learning_rate": 3.109017350583455e-05, "loss": 0.4095, "loss_nan_ranks": 0, "loss_rank_avg": 0.4121268093585968, "step": 915, "valid_targets_mean": 1299.8, "valid_targets_min": 662 }, { "epoch": 2.305764411027569, "grad_norm": 0.7944019436211966, "learning_rate": 3.0968507173697895e-05, "loss": 0.398, "loss_nan_ranks": 0, "loss_rank_avg": 0.4244152009487152, "step": 920, "valid_targets_mean": 1441.0, "valid_targets_min": 549 }, { "epoch": 2.318295739348371, "grad_norm": 0.7834254798584847, "learning_rate": 3.0846257538903664e-05, "loss": 0.4027, "loss_nan_ranks": 0, "loss_rank_avg": 0.41384357213974, "step": 925, "valid_targets_mean": 1396.5, "valid_targets_min": 736 }, { "epoch": 2.3308270676691727, "grad_norm": 0.8554976494228926, "learning_rate": 3.072343110265898e-05, "loss": 0.4015, "loss_nan_ranks": 0, "loss_rank_avg": 0.4069422483444214, "step": 930, "valid_targets_mean": 1266.4, "valid_targets_min": 751 }, { "epoch": 2.343358395989975, "grad_norm": 0.7346165996567373, "learning_rate": 3.060003439684512e-05, "loss": 0.3969, "loss_nan_ranks": 0, "loss_rank_avg": 0.41480642557144165, "step": 935, "valid_targets_mean": 1756.4, "valid_targets_min": 780 }, { "epoch": 2.355889724310777, "grad_norm": 0.7421530852305365, "learning_rate": 3.047607398367017e-05, "loss": 0.4009, "loss_nan_ranks": 0, "loss_rank_avg": 0.383364737033844, "step": 940, "valid_targets_mean": 1633.2, "valid_targets_min": 658 }, { "epoch": 2.3684210526315788, "grad_norm": 0.6801690596484246, "learning_rate": 3.0351556455320035e-05, "loss": 0.4018, "loss_nan_ranks": 0, "loss_rank_avg": 0.4127517640590668, "step": 945, "valid_targets_mean": 1938.9, "valid_targets_min": 582 }, { "epoch": 2.380952380952381, "grad_norm": 0.7725948108990202, "learning_rate": 3.0226488433607895e-05, "loss": 0.4051, "loss_nan_ranks": 0, "loss_rank_avg": 0.4012241065502167, "step": 950, "valid_targets_mean": 1393.2, "valid_targets_min": 580 }, { "epoch": 2.393483709273183, "grad_norm": 0.6915866301278406, "learning_rate": 3.0100876569622003e-05, "loss": 0.4127, "loss_nan_ranks": 0, "loss_rank_avg": 0.3731994032859802, "step": 955, "valid_targets_mean": 1624.6, "valid_targets_min": 616 }, { "epoch": 2.406015037593985, "grad_norm": 0.8576673007047129, "learning_rate": 2.9974727543372046e-05, "loss": 0.3954, "loss_nan_ranks": 0, "loss_rank_avg": 0.4130919575691223, "step": 960, "valid_targets_mean": 1161.0, "valid_targets_min": 615 }, { "epoch": 2.418546365914787, "grad_norm": 0.8074845008674979, "learning_rate": 2.984804806343387e-05, "loss": 0.4344, "loss_nan_ranks": 0, "loss_rank_avg": 0.4470018446445465, "step": 965, "valid_targets_mean": 1505.1, "valid_targets_min": 750 }, { "epoch": 2.431077694235589, "grad_norm": 0.8067633327978935, "learning_rate": 2.9720844866592726e-05, "loss": 0.4337, "loss_nan_ranks": 0, "loss_rank_avg": 0.41012510657310486, "step": 970, "valid_targets_mean": 1421.9, "valid_targets_min": 712 }, { "epoch": 2.443609022556391, "grad_norm": 0.6741517814058764, "learning_rate": 2.959312471748499e-05, "loss": 0.4035, "loss_nan_ranks": 0, "loss_rank_avg": 0.4045283794403076, "step": 975, "valid_targets_mean": 1926.6, "valid_targets_min": 792 }, { "epoch": 2.456140350877193, "grad_norm": 0.7515096216376789, "learning_rate": 2.946489440823846e-05, "loss": 0.4097, "loss_nan_ranks": 0, "loss_rank_avg": 0.4114806652069092, "step": 980, "valid_targets_mean": 1532.7, "valid_targets_min": 627 }, { "epoch": 2.468671679197995, "grad_norm": 0.9200860671355993, "learning_rate": 2.9336160758111126e-05, "loss": 0.4104, "loss_nan_ranks": 0, "loss_rank_avg": 0.3898894488811493, "step": 985, "valid_targets_mean": 1654.8, "valid_targets_min": 803 }, { "epoch": 2.481203007518797, "grad_norm": 0.7828468543626956, "learning_rate": 2.9206930613128528e-05, "loss": 0.3933, "loss_nan_ranks": 0, "loss_rank_avg": 0.4030555486679077, "step": 990, "valid_targets_mean": 1597.6, "valid_targets_min": 614 }, { "epoch": 2.493734335839599, "grad_norm": 0.7537348447247004, "learning_rate": 2.9077210845719684e-05, "loss": 0.4043, "loss_nan_ranks": 0, "loss_rank_avg": 0.3929010331630707, "step": 995, "valid_targets_mean": 1445.5, "valid_targets_min": 740 }, { "epoch": 2.506265664160401, "grad_norm": 0.8806951653745704, "learning_rate": 2.894700835435162e-05, "loss": 0.4061, "loss_nan_ranks": 0, "loss_rank_avg": 0.4137513041496277, "step": 1000, "valid_targets_mean": 1148.5, "valid_targets_min": 594 }, { "epoch": 2.518796992481203, "grad_norm": 0.7768143066148521, "learning_rate": 2.8816330063162508e-05, "loss": 0.4052, "loss_nan_ranks": 0, "loss_rank_avg": 0.4147866666316986, "step": 1005, "valid_targets_mean": 1484.4, "valid_targets_min": 569 }, { "epoch": 2.5313283208020048, "grad_norm": 0.7017072219607325, "learning_rate": 2.8685182921593458e-05, "loss": 0.4007, "loss_nan_ranks": 0, "loss_rank_avg": 0.3714948296546936, "step": 1010, "valid_targets_mean": 1581.1, "valid_targets_min": 739 }, { "epoch": 2.543859649122807, "grad_norm": 0.843310856599826, "learning_rate": 2.85535739040189e-05, "loss": 0.3918, "loss_nan_ranks": 0, "loss_rank_avg": 0.4192495346069336, "step": 1015, "valid_targets_mean": 1274.7, "valid_targets_min": 712 }, { "epoch": 2.556390977443609, "grad_norm": 0.6861456371527429, "learning_rate": 2.8421510009375767e-05, "loss": 0.4024, "loss_nan_ranks": 0, "loss_rank_avg": 0.3807116746902466, "step": 1020, "valid_targets_mean": 1922.1, "valid_targets_min": 606 }, { "epoch": 2.568922305764411, "grad_norm": 0.7797110243365486, "learning_rate": 2.8288998260791217e-05, "loss": 0.411, "loss_nan_ranks": 0, "loss_rank_avg": 0.42045170068740845, "step": 1025, "valid_targets_mean": 1480.0, "valid_targets_min": 738 }, { "epoch": 2.581453634085213, "grad_norm": 0.7493284725461535, "learning_rate": 2.8156045705209182e-05, "loss": 0.4207, "loss_nan_ranks": 0, "loss_rank_avg": 0.4190444350242615, "step": 1030, "valid_targets_mean": 1863.0, "valid_targets_min": 686 }, { "epoch": 2.593984962406015, "grad_norm": 0.9715778306128012, "learning_rate": 2.8022659413015616e-05, "loss": 0.4098, "loss_nan_ranks": 0, "loss_rank_avg": 0.3925316035747528, "step": 1035, "valid_targets_mean": 1929.9, "valid_targets_min": 803 }, { "epoch": 2.606516290726817, "grad_norm": 0.8202300357445593, "learning_rate": 2.7888846477662474e-05, "loss": 0.4054, "loss_nan_ranks": 0, "loss_rank_avg": 0.435418039560318, "step": 1040, "valid_targets_mean": 1275.8, "valid_targets_min": 627 }, { "epoch": 2.619047619047619, "grad_norm": 0.8065052554671248, "learning_rate": 2.7754614015290512e-05, "loss": 0.4052, "loss_nan_ranks": 0, "loss_rank_avg": 0.39296644926071167, "step": 1045, "valid_targets_mean": 1280.7, "valid_targets_min": 625 }, { "epoch": 2.6315789473684212, "grad_norm": 0.8191455525882082, "learning_rate": 2.7619969164350814e-05, "loss": 0.4182, "loss_nan_ranks": 0, "loss_rank_avg": 0.42794761061668396, "step": 1050, "valid_targets_mean": 1448.4, "valid_targets_min": 633 }, { "epoch": 2.644110275689223, "grad_norm": 0.7770712433544891, "learning_rate": 2.7484919085225203e-05, "loss": 0.4021, "loss_nan_ranks": 0, "loss_rank_avg": 0.4223942756652832, "step": 1055, "valid_targets_mean": 1644.5, "valid_targets_min": 625 }, { "epoch": 2.656641604010025, "grad_norm": 0.6581892363783577, "learning_rate": 2.734947095984544e-05, "loss": 0.4125, "loss_nan_ranks": 0, "loss_rank_avg": 0.36657726764678955, "step": 1060, "valid_targets_mean": 1759.4, "valid_targets_min": 739 }, { "epoch": 2.6691729323308273, "grad_norm": 0.8646402657014103, "learning_rate": 2.7213631991311303e-05, "loss": 0.4111, "loss_nan_ranks": 0, "loss_rank_avg": 0.4207683801651001, "step": 1065, "valid_targets_mean": 1353.9, "valid_targets_min": 734 }, { "epoch": 2.681704260651629, "grad_norm": 0.7319122083607305, "learning_rate": 2.7077409403507503e-05, "loss": 0.414, "loss_nan_ranks": 0, "loss_rank_avg": 0.43628567457199097, "step": 1070, "valid_targets_mean": 1707.5, "valid_targets_min": 697 }, { "epoch": 2.694235588972431, "grad_norm": 0.8342526774577462, "learning_rate": 2.6940810440719545e-05, "loss": 0.4015, "loss_nan_ranks": 0, "loss_rank_avg": 0.41509440541267395, "step": 1075, "valid_targets_mean": 1300.9, "valid_targets_min": 700 }, { "epoch": 2.706766917293233, "grad_norm": 0.7902962051936262, "learning_rate": 2.680384236724846e-05, "loss": 0.4186, "loss_nan_ranks": 0, "loss_rank_avg": 0.40989580750465393, "step": 1080, "valid_targets_mean": 1366.7, "valid_targets_min": 773 }, { "epoch": 2.719298245614035, "grad_norm": 0.8189188800470997, "learning_rate": 2.6666512467024506e-05, "loss": 0.4085, "loss_nan_ranks": 0, "loss_rank_avg": 0.40571969747543335, "step": 1085, "valid_targets_mean": 1528.2, "valid_targets_min": 678 }, { "epoch": 2.731829573934837, "grad_norm": 0.6678143387997657, "learning_rate": 2.6528828043219798e-05, "loss": 0.3835, "loss_nan_ranks": 0, "loss_rank_avg": 0.35410448908805847, "step": 1090, "valid_targets_mean": 1824.2, "valid_targets_min": 755 }, { "epoch": 2.744360902255639, "grad_norm": 0.8132896008113664, "learning_rate": 2.6390796417859937e-05, "loss": 0.4163, "loss_nan_ranks": 0, "loss_rank_avg": 0.4377764165401459, "step": 1095, "valid_targets_mean": 1452.2, "valid_targets_min": 593 }, { "epoch": 2.756892230576441, "grad_norm": 0.8586131814893146, "learning_rate": 2.625242493143462e-05, "loss": 0.3996, "loss_nan_ranks": 0, "loss_rank_avg": 0.42037975788116455, "step": 1100, "valid_targets_mean": 1271.2, "valid_targets_min": 735 }, { "epoch": 2.769423558897243, "grad_norm": 0.8058156340524054, "learning_rate": 2.6113720942507277e-05, "loss": 0.4146, "loss_nan_ranks": 0, "loss_rank_avg": 0.38332855701446533, "step": 1105, "valid_targets_mean": 1270.2, "valid_targets_min": 771 }, { "epoch": 2.781954887218045, "grad_norm": 0.8219535969366415, "learning_rate": 2.5974691827323747e-05, "loss": 0.429, "loss_nan_ranks": 0, "loss_rank_avg": 0.44158488512039185, "step": 1110, "valid_targets_mean": 1437.2, "valid_targets_min": 730 }, { "epoch": 2.7944862155388472, "grad_norm": 0.7213100908994415, "learning_rate": 2.5835344979419992e-05, "loss": 0.3945, "loss_nan_ranks": 0, "loss_rank_avg": 0.4036768078804016, "step": 1115, "valid_targets_mean": 1581.8, "valid_targets_min": 723 }, { "epoch": 2.807017543859649, "grad_norm": 0.7160583991847018, "learning_rate": 2.569568780922895e-05, "loss": 0.3934, "loss_nan_ranks": 0, "loss_rank_avg": 0.3890629708766937, "step": 1120, "valid_targets_mean": 1857.3, "valid_targets_min": 736 }, { "epoch": 2.819548872180451, "grad_norm": 0.6030656035509802, "learning_rate": 2.5555727743686404e-05, "loss": 0.4076, "loss_nan_ranks": 0, "loss_rank_avg": 0.38233476877212524, "step": 1125, "valid_targets_mean": 2011.9, "valid_targets_min": 805 }, { "epoch": 2.8320802005012533, "grad_norm": 0.7989881611675778, "learning_rate": 2.541547222583606e-05, "loss": 0.4064, "loss_nan_ranks": 0, "loss_rank_avg": 0.4262428283691406, "step": 1130, "valid_targets_mean": 1484.9, "valid_targets_min": 637 }, { "epoch": 2.844611528822055, "grad_norm": 0.7670693348326154, "learning_rate": 2.5274928714433694e-05, "loss": 0.3995, "loss_nan_ranks": 0, "loss_rank_avg": 0.39012211561203003, "step": 1135, "valid_targets_mean": 1433.4, "valid_targets_min": 727 }, { "epoch": 2.857142857142857, "grad_norm": 0.7329434894141582, "learning_rate": 2.5134104683550514e-05, "loss": 0.4124, "loss_nan_ranks": 0, "loss_rank_avg": 0.39894163608551025, "step": 1140, "valid_targets_mean": 1380.9, "valid_targets_min": 740 }, { "epoch": 2.8696741854636594, "grad_norm": 0.8338871518868882, "learning_rate": 2.4993007622175704e-05, "loss": 0.4149, "loss_nan_ranks": 0, "loss_rank_avg": 0.4379524290561676, "step": 1145, "valid_targets_mean": 1150.1, "valid_targets_min": 742 }, { "epoch": 2.882205513784461, "grad_norm": 0.8375405196939691, "learning_rate": 2.485164503381813e-05, "loss": 0.4183, "loss_nan_ranks": 0, "loss_rank_avg": 0.41192495822906494, "step": 1150, "valid_targets_mean": 1206.6, "valid_targets_min": 741 }, { "epoch": 2.8947368421052633, "grad_norm": 0.8512040546281613, "learning_rate": 2.471002443610732e-05, "loss": 0.4053, "loss_nan_ranks": 0, "loss_rank_avg": 0.4261419475078583, "step": 1155, "valid_targets_mean": 1371.6, "valid_targets_min": 753 }, { "epoch": 2.907268170426065, "grad_norm": 0.6905558161225763, "learning_rate": 2.4568153360393692e-05, "loss": 0.4029, "loss_nan_ranks": 0, "loss_rank_avg": 0.399014413356781, "step": 1160, "valid_targets_mean": 1654.8, "valid_targets_min": 690 }, { "epoch": 2.919799498746867, "grad_norm": 0.7795729327578536, "learning_rate": 2.4426039351348016e-05, "loss": 0.4074, "loss_nan_ranks": 0, "loss_rank_avg": 0.43625450134277344, "step": 1165, "valid_targets_mean": 1567.2, "valid_targets_min": 847 }, { "epoch": 2.932330827067669, "grad_norm": 0.696371052775749, "learning_rate": 2.4283689966560205e-05, "loss": 0.3931, "loss_nan_ranks": 0, "loss_rank_avg": 0.37107014656066895, "step": 1170, "valid_targets_mean": 1794.1, "valid_targets_min": 497 }, { "epoch": 2.944862155388471, "grad_norm": 0.7090326982911932, "learning_rate": 2.414111277613739e-05, "loss": 0.4125, "loss_nan_ranks": 0, "loss_rank_avg": 0.4178668260574341, "step": 1175, "valid_targets_mean": 1603.3, "valid_targets_min": 580 }, { "epoch": 2.9573934837092732, "grad_norm": 0.7411555880347811, "learning_rate": 2.3998315362301367e-05, "loss": 0.4061, "loss_nan_ranks": 0, "loss_rank_avg": 0.39853695034980774, "step": 1180, "valid_targets_mean": 1641.4, "valid_targets_min": 728 }, { "epoch": 2.969924812030075, "grad_norm": 0.7157497676079337, "learning_rate": 2.3855305318985353e-05, "loss": 0.3983, "loss_nan_ranks": 0, "loss_rank_avg": 0.36533060669898987, "step": 1185, "valid_targets_mean": 1622.5, "valid_targets_min": 698 }, { "epoch": 2.982456140350877, "grad_norm": 0.8725579962324914, "learning_rate": 2.371209025143016e-05, "loss": 0.4121, "loss_nan_ranks": 0, "loss_rank_avg": 0.43822264671325684, "step": 1190, "valid_targets_mean": 1269.9, "valid_targets_min": 735 }, { "epoch": 2.9949874686716793, "grad_norm": 0.7833600243534485, "learning_rate": 2.3568677775779716e-05, "loss": 0.4112, "loss_nan_ranks": 0, "loss_rank_avg": 0.42297977209091187, "step": 1195, "valid_targets_mean": 1463.1, "valid_targets_min": 726 }, { "epoch": 3.007518796992481, "grad_norm": 0.7524818799186773, "learning_rate": 2.34250755186761e-05, "loss": 0.3949, "loss_nan_ranks": 0, "loss_rank_avg": 0.38546401262283325, "step": 1200, "valid_targets_mean": 1481.1, "valid_targets_min": 427 }, { "epoch": 3.020050125313283, "grad_norm": 0.7269674684497573, "learning_rate": 2.3281291116853903e-05, "loss": 0.3829, "loss_nan_ranks": 0, "loss_rank_avg": 0.3603457808494568, "step": 1205, "valid_targets_mean": 1522.2, "valid_targets_min": 739 }, { "epoch": 3.0325814536340854, "grad_norm": 0.8551480190784867, "learning_rate": 2.3137332216734132e-05, "loss": 0.3841, "loss_nan_ranks": 0, "loss_rank_avg": 0.39454448223114014, "step": 1210, "valid_targets_mean": 1317.9, "valid_targets_min": 770 }, { "epoch": 3.045112781954887, "grad_norm": 0.7469871028238138, "learning_rate": 2.2993206474017576e-05, "loss": 0.3769, "loss_nan_ranks": 0, "loss_rank_avg": 0.37015241384506226, "step": 1215, "valid_targets_mean": 1690.9, "valid_targets_min": 641 }, { "epoch": 3.0576441102756893, "grad_norm": 0.7826410951147598, "learning_rate": 2.2848921553277674e-05, "loss": 0.3927, "loss_nan_ranks": 0, "loss_rank_avg": 0.3815900385379791, "step": 1220, "valid_targets_mean": 1497.1, "valid_targets_min": 559 }, { "epoch": 3.0701754385964914, "grad_norm": 0.7933176189991209, "learning_rate": 2.270448512755292e-05, "loss": 0.3832, "loss_nan_ranks": 0, "loss_rank_avg": 0.38196152448654175, "step": 1225, "valid_targets_mean": 1607.9, "valid_targets_min": 736 }, { "epoch": 3.082706766917293, "grad_norm": 0.790107574228091, "learning_rate": 2.2559904877938803e-05, "loss": 0.3789, "loss_nan_ranks": 0, "loss_rank_avg": 0.3487051725387573, "step": 1230, "valid_targets_mean": 1450.9, "valid_targets_min": 790 }, { "epoch": 3.0952380952380953, "grad_norm": 0.8142328651019091, "learning_rate": 2.241518849317933e-05, "loss": 0.3952, "loss_nan_ranks": 0, "loss_rank_avg": 0.39621999859809875, "step": 1235, "valid_targets_mean": 1305.6, "valid_targets_min": 644 }, { "epoch": 3.107769423558897, "grad_norm": 0.8379046959783162, "learning_rate": 2.227034366925814e-05, "loss": 0.3802, "loss_nan_ranks": 0, "loss_rank_avg": 0.4048861265182495, "step": 1240, "valid_targets_mean": 1384.4, "valid_targets_min": 671 }, { "epoch": 3.1203007518796992, "grad_norm": 0.6842829184061702, "learning_rate": 2.2125378108989257e-05, "loss": 0.3814, "loss_nan_ranks": 0, "loss_rank_avg": 0.3356945514678955, "step": 1245, "valid_targets_mean": 1627.7, "valid_targets_min": 674 }, { "epoch": 3.1328320802005014, "grad_norm": 0.8150081084171071, "learning_rate": 2.198029952160742e-05, "loss": 0.3809, "loss_nan_ranks": 0, "loss_rank_avg": 0.37989962100982666, "step": 1250, "valid_targets_mean": 1446.6, "valid_targets_min": 730 }, { "epoch": 3.145363408521303, "grad_norm": 0.9065745245822664, "learning_rate": 2.1835115622358113e-05, "loss": 0.378, "loss_nan_ranks": 0, "loss_rank_avg": 0.4172065258026123, "step": 1255, "valid_targets_mean": 1195.8, "valid_targets_min": 753 }, { "epoch": 3.1578947368421053, "grad_norm": 0.797667764442672, "learning_rate": 2.16898341320873e-05, "loss": 0.3919, "loss_nan_ranks": 0, "loss_rank_avg": 0.40342891216278076, "step": 1260, "valid_targets_mean": 1506.2, "valid_targets_min": 731 }, { "epoch": 3.170426065162907, "grad_norm": 0.82920979257728, "learning_rate": 2.154446277683081e-05, "loss": 0.3809, "loss_nan_ranks": 0, "loss_rank_avg": 0.3811074495315552, "step": 1265, "valid_targets_mean": 1842.1, "valid_targets_min": 785 }, { "epoch": 3.182957393483709, "grad_norm": 0.8609643118461479, "learning_rate": 2.1399009287403464e-05, "loss": 0.3845, "loss_nan_ranks": 0, "loss_rank_avg": 0.39563995599746704, "step": 1270, "valid_targets_mean": 1280.9, "valid_targets_min": 648 }, { "epoch": 3.1954887218045114, "grad_norm": 0.8575392033562702, "learning_rate": 2.1253481398987965e-05, "loss": 0.4002, "loss_nan_ranks": 0, "loss_rank_avg": 0.3950216770172119, "step": 1275, "valid_targets_mean": 1358.1, "valid_targets_min": 647 }, { "epoch": 3.208020050125313, "grad_norm": 0.9108739991923912, "learning_rate": 2.1107886850723527e-05, "loss": 0.3846, "loss_nan_ranks": 0, "loss_rank_avg": 0.4000109136104584, "step": 1280, "valid_targets_mean": 1106.9, "valid_targets_min": 641 }, { "epoch": 3.2205513784461153, "grad_norm": 0.7468400623645985, "learning_rate": 2.096223338529433e-05, "loss": 0.3853, "loss_nan_ranks": 0, "loss_rank_avg": 0.3904936909675598, "step": 1285, "valid_targets_mean": 1848.0, "valid_targets_min": 591 }, { "epoch": 3.2330827067669174, "grad_norm": 0.8186011370453243, "learning_rate": 2.0816528748517753e-05, "loss": 0.361, "loss_nan_ranks": 0, "loss_rank_avg": 0.36554673314094543, "step": 1290, "valid_targets_mean": 1278.4, "valid_targets_min": 668 }, { "epoch": 3.245614035087719, "grad_norm": 0.8151988405085241, "learning_rate": 2.067078068893245e-05, "loss": 0.4028, "loss_nan_ranks": 0, "loss_rank_avg": 0.39774322509765625, "step": 1295, "valid_targets_mean": 1529.9, "valid_targets_min": 658 }, { "epoch": 3.2581453634085213, "grad_norm": 0.8281756678211144, "learning_rate": 2.0524996957386297e-05, "loss": 0.3752, "loss_nan_ranks": 0, "loss_rank_avg": 0.3898346722126007, "step": 1300, "valid_targets_mean": 1593.6, "valid_targets_min": 805 }, { "epoch": 3.2706766917293235, "grad_norm": 0.9193624245811687, "learning_rate": 2.037918530662419e-05, "loss": 0.3747, "loss_nan_ranks": 0, "loss_rank_avg": 0.3889794945716858, "step": 1305, "valid_targets_mean": 1275.1, "valid_targets_min": 727 }, { "epoch": 3.2832080200501252, "grad_norm": 0.7060451326734182, "learning_rate": 2.0233353490875773e-05, "loss": 0.3758, "loss_nan_ranks": 0, "loss_rank_avg": 0.36215609312057495, "step": 1310, "valid_targets_mean": 1662.6, "valid_targets_min": 906 }, { "epoch": 3.2957393483709274, "grad_norm": 0.855976457831653, "learning_rate": 2.0087509265443038e-05, "loss": 0.3621, "loss_nan_ranks": 0, "loss_rank_avg": 0.3756676912307739, "step": 1315, "valid_targets_mean": 1367.2, "valid_targets_min": 610 }, { "epoch": 3.308270676691729, "grad_norm": 0.6704601020026827, "learning_rate": 1.9941660386287946e-05, "loss": 0.3722, "loss_nan_ranks": 0, "loss_rank_avg": 0.35007140040397644, "step": 1320, "valid_targets_mean": 1839.5, "valid_targets_min": 863 }, { "epoch": 3.3208020050125313, "grad_norm": 0.8563097392456597, "learning_rate": 1.9795814609619925e-05, "loss": 0.3717, "loss_nan_ranks": 0, "loss_rank_avg": 0.3476110100746155, "step": 1325, "valid_targets_mean": 1556.3, "valid_targets_min": 638 }, { "epoch": 3.3333333333333335, "grad_norm": 0.7150924024477262, "learning_rate": 1.9649979691483426e-05, "loss": 0.391, "loss_nan_ranks": 0, "loss_rank_avg": 0.35187599062919617, "step": 1330, "valid_targets_mean": 1596.5, "valid_targets_min": 684 }, { "epoch": 3.345864661654135, "grad_norm": 0.874681446014307, "learning_rate": 1.950416338734543e-05, "loss": 0.3823, "loss_nan_ranks": 0, "loss_rank_avg": 0.38704127073287964, "step": 1335, "valid_targets_mean": 1349.2, "valid_targets_min": 634 }, { "epoch": 3.3583959899749374, "grad_norm": 0.7127005137441507, "learning_rate": 1.9358373451683047e-05, "loss": 0.3748, "loss_nan_ranks": 0, "loss_rank_avg": 0.3557561933994293, "step": 1340, "valid_targets_mean": 1700.8, "valid_targets_min": 772 }, { "epoch": 3.370927318295739, "grad_norm": 0.7447684168527092, "learning_rate": 1.9212617637571104e-05, "loss": 0.3857, "loss_nan_ranks": 0, "loss_rank_avg": 0.3649003505706787, "step": 1345, "valid_targets_mean": 1484.9, "valid_targets_min": 720 }, { "epoch": 3.3834586466165413, "grad_norm": 0.7347851437358622, "learning_rate": 1.906690369626986e-05, "loss": 0.376, "loss_nan_ranks": 0, "loss_rank_avg": 0.3531128764152527, "step": 1350, "valid_targets_mean": 2112.2, "valid_targets_min": 760 }, { "epoch": 3.3959899749373434, "grad_norm": 0.8064613290760294, "learning_rate": 1.8921239376812782e-05, "loss": 0.3851, "loss_nan_ranks": 0, "loss_rank_avg": 0.39800935983657837, "step": 1355, "valid_targets_mean": 1409.1, "valid_targets_min": 547 }, { "epoch": 3.408521303258145, "grad_norm": 0.8320563236978611, "learning_rate": 1.8775632425594467e-05, "loss": 0.3764, "loss_nan_ranks": 0, "loss_rank_avg": 0.3904891610145569, "step": 1360, "valid_targets_mean": 1439.0, "valid_targets_min": 582 }, { "epoch": 3.4210526315789473, "grad_norm": 0.7726671817431195, "learning_rate": 1.8630090585958676e-05, "loss": 0.3811, "loss_nan_ranks": 0, "loss_rank_avg": 0.35375556349754333, "step": 1365, "valid_targets_mean": 1437.6, "valid_targets_min": 603 }, { "epoch": 3.4335839598997495, "grad_norm": 0.8558402291328209, "learning_rate": 1.8484621597786547e-05, "loss": 0.3817, "loss_nan_ranks": 0, "loss_rank_avg": 0.38245689868927, "step": 1370, "valid_targets_mean": 1224.4, "valid_targets_min": 649 }, { "epoch": 3.4461152882205512, "grad_norm": 0.5757371027540783, "learning_rate": 1.8339233197085006e-05, "loss": 0.3628, "loss_nan_ranks": 0, "loss_rank_avg": 0.33190372586250305, "step": 1375, "valid_targets_mean": 2415.6, "valid_targets_min": 710 }, { "epoch": 3.4586466165413534, "grad_norm": 0.825713016108861, "learning_rate": 1.8193933115575347e-05, "loss": 0.378, "loss_nan_ranks": 0, "loss_rank_avg": 0.3584468364715576, "step": 1380, "valid_targets_mean": 1350.2, "valid_targets_min": 633 }, { "epoch": 3.4711779448621556, "grad_norm": 0.8287859805915134, "learning_rate": 1.8048729080282076e-05, "loss": 0.3875, "loss_nan_ranks": 0, "loss_rank_avg": 0.3999900221824646, "step": 1385, "valid_targets_mean": 1409.1, "valid_targets_min": 614 }, { "epoch": 3.4837092731829573, "grad_norm": 0.8158738188623406, "learning_rate": 1.7903628813121976e-05, "loss": 0.3814, "loss_nan_ranks": 0, "loss_rank_avg": 0.3847970962524414, "step": 1390, "valid_targets_mean": 1375.8, "valid_targets_min": 637 }, { "epoch": 3.4962406015037595, "grad_norm": 0.9806141114108778, "learning_rate": 1.775864003049347e-05, "loss": 0.3935, "loss_nan_ranks": 0, "loss_rank_avg": 0.43756890296936035, "step": 1395, "valid_targets_mean": 1060.5, "valid_targets_min": 662 }, { "epoch": 3.5087719298245617, "grad_norm": 0.9388820887180466, "learning_rate": 1.761377044286626e-05, "loss": 0.3796, "loss_nan_ranks": 0, "loss_rank_avg": 0.4040834307670593, "step": 1400, "valid_targets_mean": 1213.0, "valid_targets_min": 647 }, { "epoch": 3.5213032581453634, "grad_norm": 0.7535144988237941, "learning_rate": 1.746902775437129e-05, "loss": 0.3795, "loss_nan_ranks": 0, "loss_rank_avg": 0.3977915048599243, "step": 1405, "valid_targets_mean": 1685.8, "valid_targets_min": 750 }, { "epoch": 3.5338345864661656, "grad_norm": 0.8469541672098552, "learning_rate": 1.7324419662391022e-05, "loss": 0.3809, "loss_nan_ranks": 0, "loss_rank_avg": 0.41281846165657043, "step": 1410, "valid_targets_mean": 1293.9, "valid_targets_min": 808 }, { "epoch": 3.5463659147869673, "grad_norm": 0.8324398561287292, "learning_rate": 1.717995385715012e-05, "loss": 0.3903, "loss_nan_ranks": 0, "loss_rank_avg": 0.4070686101913452, "step": 1415, "valid_targets_mean": 1355.4, "valid_targets_min": 678 }, { "epoch": 3.5588972431077694, "grad_norm": 0.7587541092559444, "learning_rate": 1.7035638021306467e-05, "loss": 0.3842, "loss_nan_ranks": 0, "loss_rank_avg": 0.3555733561515808, "step": 1420, "valid_targets_mean": 1363.9, "valid_targets_min": 732 }, { "epoch": 3.571428571428571, "grad_norm": 0.7000504384554054, "learning_rate": 1.6891479829542613e-05, "loss": 0.3612, "loss_nan_ranks": 0, "loss_rank_avg": 0.3316074013710022, "step": 1425, "valid_targets_mean": 1686.2, "valid_targets_min": 618 }, { "epoch": 3.5839598997493733, "grad_norm": 0.75752037224634, "learning_rate": 1.674748694815763e-05, "loss": 0.3795, "loss_nan_ranks": 0, "loss_rank_avg": 0.3835802674293518, "step": 1430, "valid_targets_mean": 1537.1, "valid_targets_min": 642 }, { "epoch": 3.5964912280701755, "grad_norm": 0.8010015007994762, "learning_rate": 1.6603667034659427e-05, "loss": 0.3928, "loss_nan_ranks": 0, "loss_rank_avg": 0.411435067653656, "step": 1435, "valid_targets_mean": 1514.9, "valid_targets_min": 728 }, { "epoch": 3.6090225563909772, "grad_norm": 0.8427953151697828, "learning_rate": 1.6460027737357507e-05, "loss": 0.3716, "loss_nan_ranks": 0, "loss_rank_avg": 0.40870070457458496, "step": 1440, "valid_targets_mean": 1463.4, "valid_targets_min": 606 }, { "epoch": 3.6215538847117794, "grad_norm": 0.7787782369841209, "learning_rate": 1.631657669495626e-05, "loss": 0.3733, "loss_nan_ranks": 0, "loss_rank_avg": 0.39166802167892456, "step": 1445, "valid_targets_mean": 1467.4, "valid_targets_min": 612 }, { "epoch": 3.6340852130325816, "grad_norm": 0.6852579078522438, "learning_rate": 1.617332153614872e-05, "loss": 0.3887, "loss_nan_ranks": 0, "loss_rank_avg": 0.3678228557109833, "step": 1450, "valid_targets_mean": 1836.8, "valid_targets_min": 696 }, { "epoch": 3.6466165413533833, "grad_norm": 0.7780223246726713, "learning_rate": 1.6030269879210882e-05, "loss": 0.386, "loss_nan_ranks": 0, "loss_rank_avg": 0.40032103657722473, "step": 1455, "valid_targets_mean": 1652.6, "valid_targets_min": 659 }, { "epoch": 3.6591478696741855, "grad_norm": 0.774087518297068, "learning_rate": 1.588742933159654e-05, "loss": 0.383, "loss_nan_ranks": 0, "loss_rank_avg": 0.40035444498062134, "step": 1460, "valid_targets_mean": 1652.8, "valid_targets_min": 812 }, { "epoch": 3.6716791979949877, "grad_norm": 0.8202131392244767, "learning_rate": 1.5744807489532766e-05, "loss": 0.3878, "loss_nan_ranks": 0, "loss_rank_avg": 0.3781554400920868, "step": 1465, "valid_targets_mean": 1362.3, "valid_targets_min": 731 }, { "epoch": 3.6842105263157894, "grad_norm": 0.6274517417710915, "learning_rate": 1.5602411937615913e-05, "loss": 0.3786, "loss_nan_ranks": 0, "loss_rank_avg": 0.3454907238483429, "step": 1470, "valid_targets_mean": 2221.0, "valid_targets_min": 735 }, { "epoch": 3.6967418546365916, "grad_norm": 0.7412975036909865, "learning_rate": 1.5460250248408273e-05, "loss": 0.3668, "loss_nan_ranks": 0, "loss_rank_avg": 0.3802959620952606, "step": 1475, "valid_targets_mean": 1712.1, "valid_targets_min": 612 }, { "epoch": 3.7092731829573937, "grad_norm": 0.7855111216631802, "learning_rate": 1.5318329982035377e-05, "loss": 0.3818, "loss_nan_ranks": 0, "loss_rank_avg": 0.3868759870529175, "step": 1480, "valid_targets_mean": 1714.4, "valid_targets_min": 700 }, { "epoch": 3.7218045112781954, "grad_norm": 0.736710536856686, "learning_rate": 1.5176658685783947e-05, "loss": 0.3855, "loss_nan_ranks": 0, "loss_rank_avg": 0.3668166399002075, "step": 1485, "valid_targets_mean": 1660.8, "valid_targets_min": 446 }, { "epoch": 3.7343358395989976, "grad_norm": 0.8206766311128828, "learning_rate": 1.5035243893700535e-05, "loss": 0.3894, "loss_nan_ranks": 0, "loss_rank_avg": 0.37208032608032227, "step": 1490, "valid_targets_mean": 1317.5, "valid_targets_min": 803 }, { "epoch": 3.7468671679197993, "grad_norm": 0.7536710064360856, "learning_rate": 1.489409312619085e-05, "loss": 0.3802, "loss_nan_ranks": 0, "loss_rank_avg": 0.4111523926258087, "step": 1495, "valid_targets_mean": 1782.4, "valid_targets_min": 743 }, { "epoch": 3.7593984962406015, "grad_norm": 0.8364316599116858, "learning_rate": 1.4753213889619841e-05, "loss": 0.3964, "loss_nan_ranks": 0, "loss_rank_avg": 0.37741121649742126, "step": 1500, "valid_targets_mean": 1405.8, "valid_targets_min": 689 }, { "epoch": 3.7719298245614032, "grad_norm": 0.7382628078568813, "learning_rate": 1.4612613675912512e-05, "loss": 0.3883, "loss_nan_ranks": 0, "loss_rank_avg": 0.3787643015384674, "step": 1505, "valid_targets_mean": 1712.3, "valid_targets_min": 823 }, { "epoch": 3.7844611528822054, "grad_norm": 0.6910932736068953, "learning_rate": 1.4472299962155492e-05, "loss": 0.3769, "loss_nan_ranks": 0, "loss_rank_avg": 0.3468532860279083, "step": 1510, "valid_targets_mean": 1946.0, "valid_targets_min": 828 }, { "epoch": 3.7969924812030076, "grad_norm": 0.8339264830378494, "learning_rate": 1.4332280210199398e-05, "loss": 0.3867, "loss_nan_ranks": 0, "loss_rank_avg": 0.3885452151298523, "step": 1515, "valid_targets_mean": 1297.6, "valid_targets_min": 668 }, { "epoch": 3.8095238095238093, "grad_norm": 0.872717499618267, "learning_rate": 1.4192561866262044e-05, "loss": 0.3799, "loss_nan_ranks": 0, "loss_rank_avg": 0.4075925946235657, "step": 1520, "valid_targets_mean": 1278.0, "valid_targets_min": 673 }, { "epoch": 3.8220551378446115, "grad_norm": 0.7816477296593115, "learning_rate": 1.4053152360532427e-05, "loss": 0.3719, "loss_nan_ranks": 0, "loss_rank_avg": 0.36789825558662415, "step": 1525, "valid_targets_mean": 1411.9, "valid_targets_min": 512 }, { "epoch": 3.8345864661654137, "grad_norm": 0.8184377473400201, "learning_rate": 1.3914059106775604e-05, "loss": 0.3742, "loss_nan_ranks": 0, "loss_rank_avg": 0.39734745025634766, "step": 1530, "valid_targets_mean": 1389.1, "valid_targets_min": 672 }, { "epoch": 3.8471177944862154, "grad_norm": 0.7515969959020206, "learning_rate": 1.3775289501938416e-05, "loss": 0.3778, "loss_nan_ranks": 0, "loss_rank_avg": 0.35744476318359375, "step": 1535, "valid_targets_mean": 1552.1, "valid_targets_min": 735 }, { "epoch": 3.8596491228070176, "grad_norm": 1.402932466951827, "learning_rate": 1.3636850925756142e-05, "loss": 0.3929, "loss_nan_ranks": 0, "loss_rank_avg": 0.398654580116272, "step": 1540, "valid_targets_mean": 1618.7, "valid_targets_min": 763 }, { "epoch": 3.8721804511278197, "grad_norm": 0.7241112856924586, "learning_rate": 1.3498750740360027e-05, "loss": 0.3917, "loss_nan_ranks": 0, "loss_rank_avg": 0.3559287190437317, "step": 1545, "valid_targets_mean": 1582.4, "valid_targets_min": 760 }, { "epoch": 3.8847117794486214, "grad_norm": 0.786140387459547, "learning_rate": 1.3360996289885776e-05, "loss": 0.3838, "loss_nan_ranks": 0, "loss_rank_avg": 0.4194572865962982, "step": 1550, "valid_targets_mean": 1645.4, "valid_targets_min": 606 }, { "epoch": 3.8972431077694236, "grad_norm": 0.746696006161874, "learning_rate": 1.322359490008299e-05, "loss": 0.3742, "loss_nan_ranks": 0, "loss_rank_avg": 0.35523080825805664, "step": 1555, "valid_targets_mean": 1703.7, "valid_targets_min": 646 }, { "epoch": 3.909774436090226, "grad_norm": 0.9846950145958907, "learning_rate": 1.3086553877925585e-05, "loss": 0.3714, "loss_nan_ranks": 0, "loss_rank_avg": 0.38349106907844543, "step": 1560, "valid_targets_mean": 1237.5, "valid_targets_min": 705 }, { "epoch": 3.9223057644110275, "grad_norm": 0.8985016618941177, "learning_rate": 1.2949880511223211e-05, "loss": 0.3789, "loss_nan_ranks": 0, "loss_rank_avg": 0.4175274968147278, "step": 1565, "valid_targets_mean": 1333.2, "valid_targets_min": 630 }, { "epoch": 3.9348370927318297, "grad_norm": 0.6911200210688109, "learning_rate": 1.2813582068233692e-05, "loss": 0.3831, "loss_nan_ranks": 0, "loss_rank_avg": 0.3444245159626007, "step": 1570, "valid_targets_mean": 1834.4, "valid_targets_min": 697 }, { "epoch": 3.9473684210526314, "grad_norm": 0.7919585555911091, "learning_rate": 1.267766579727648e-05, "loss": 0.3713, "loss_nan_ranks": 0, "loss_rank_avg": 0.389361172914505, "step": 1575, "valid_targets_mean": 1583.0, "valid_targets_min": 836 }, { "epoch": 3.9598997493734336, "grad_norm": 0.8157469102876466, "learning_rate": 1.2542138926347219e-05, "loss": 0.379, "loss_nan_ranks": 0, "loss_rank_avg": 0.3919796049594879, "step": 1580, "valid_targets_mean": 1358.5, "valid_targets_min": 680 }, { "epoch": 3.9724310776942353, "grad_norm": 0.7821339820816429, "learning_rate": 1.2407008662733354e-05, "loss": 0.3702, "loss_nan_ranks": 0, "loss_rank_avg": 0.3739144206047058, "step": 1585, "valid_targets_mean": 1608.3, "valid_targets_min": 637 }, { "epoch": 3.9849624060150375, "grad_norm": 0.868931464158729, "learning_rate": 1.227228219263084e-05, "loss": 0.3906, "loss_nan_ranks": 0, "loss_rank_avg": 0.38923490047454834, "step": 1590, "valid_targets_mean": 1265.2, "valid_targets_min": 569 }, { "epoch": 3.9974937343358397, "grad_norm": 0.8489429245422577, "learning_rate": 1.213796668076198e-05, "loss": 0.3873, "loss_nan_ranks": 0, "loss_rank_avg": 0.3736661970615387, "step": 1595, "valid_targets_mean": 1199.9, "valid_targets_min": 675 }, { "epoch": 4.010025062656641, "grad_norm": 0.8281878570880064, "learning_rate": 1.2004069269994428e-05, "loss": 0.3628, "loss_nan_ranks": 0, "loss_rank_avg": 0.3639451563358307, "step": 1600, "valid_targets_mean": 1318.1, "valid_targets_min": 770 }, { "epoch": 4.022556390977444, "grad_norm": 0.7895190631839228, "learning_rate": 1.1870597080961308e-05, "loss": 0.3557, "loss_nan_ranks": 0, "loss_rank_avg": 0.3259652853012085, "step": 1605, "valid_targets_mean": 1762.9, "valid_targets_min": 642 }, { "epoch": 4.035087719298246, "grad_norm": 0.802426459799358, "learning_rate": 1.173755721168256e-05, "loss": 0.3618, "loss_nan_ranks": 0, "loss_rank_avg": 0.3714388310909271, "step": 1610, "valid_targets_mean": 1549.3, "valid_targets_min": 690 }, { "epoch": 4.0476190476190474, "grad_norm": 0.8143506173545653, "learning_rate": 1.160495673718744e-05, "loss": 0.3525, "loss_nan_ranks": 0, "loss_rank_avg": 0.35231781005859375, "step": 1615, "valid_targets_mean": 1594.6, "valid_targets_min": 712 }, { "epoch": 4.06015037593985, "grad_norm": 0.9111334577543276, "learning_rate": 1.1472802709138335e-05, "loss": 0.3706, "loss_nan_ranks": 0, "loss_rank_avg": 0.3951305150985718, "step": 1620, "valid_targets_mean": 1291.2, "valid_targets_min": 668 }, { "epoch": 4.072681704260652, "grad_norm": 0.8425868781426432, "learning_rate": 1.1341102155455656e-05, "loss": 0.3578, "loss_nan_ranks": 0, "loss_rank_avg": 0.39576777815818787, "step": 1625, "valid_targets_mean": 1467.6, "valid_targets_min": 749 }, { "epoch": 4.0852130325814535, "grad_norm": 0.7205858154186265, "learning_rate": 1.1209862079944198e-05, "loss": 0.3638, "loss_nan_ranks": 0, "loss_rank_avg": 0.34877854585647583, "step": 1630, "valid_targets_mean": 1918.1, "valid_targets_min": 615 }, { "epoch": 4.097744360902255, "grad_norm": 0.817307445046329, "learning_rate": 1.107908946192061e-05, "loss": 0.3663, "loss_nan_ranks": 0, "loss_rank_avg": 0.3607422411441803, "step": 1635, "valid_targets_mean": 1482.1, "valid_targets_min": 670 }, { "epoch": 4.110275689223058, "grad_norm": 0.6775906170894619, "learning_rate": 1.094879125584228e-05, "loss": 0.3534, "loss_nan_ranks": 0, "loss_rank_avg": 0.3350442051887512, "step": 1640, "valid_targets_mean": 2074.7, "valid_targets_min": 768 }, { "epoch": 4.12280701754386, "grad_norm": 0.803939930552659, "learning_rate": 1.081897439093746e-05, "loss": 0.3625, "loss_nan_ranks": 0, "loss_rank_avg": 0.35282477736473083, "step": 1645, "valid_targets_mean": 1837.1, "valid_targets_min": 623 }, { "epoch": 4.135338345864661, "grad_norm": 0.6719027490110735, "learning_rate": 1.06896457708368e-05, "loss": 0.3599, "loss_nan_ranks": 0, "loss_rank_avg": 0.30827414989471436, "step": 1650, "valid_targets_mean": 1859.6, "valid_targets_min": 770 }, { "epoch": 4.147869674185464, "grad_norm": 0.8890942790175431, "learning_rate": 1.0560812273206215e-05, "loss": 0.3603, "loss_nan_ranks": 0, "loss_rank_avg": 0.35805994272232056, "step": 1655, "valid_targets_mean": 1179.4, "valid_targets_min": 584 }, { "epoch": 4.160401002506266, "grad_norm": 0.9347570311165075, "learning_rate": 1.0432480749381093e-05, "loss": 0.3651, "loss_nan_ranks": 0, "loss_rank_avg": 0.38746821880340576, "step": 1660, "valid_targets_mean": 1310.9, "valid_targets_min": 547 }, { "epoch": 4.172932330827067, "grad_norm": 0.7661173205906099, "learning_rate": 1.0304658024002002e-05, "loss": 0.3572, "loss_nan_ranks": 0, "loss_rank_avg": 0.36425966024398804, "step": 1665, "valid_targets_mean": 1738.4, "valid_targets_min": 829 }, { "epoch": 4.18546365914787, "grad_norm": 0.8060874639241118, "learning_rate": 1.0177350894651708e-05, "loss": 0.3593, "loss_nan_ranks": 0, "loss_rank_avg": 0.35910189151763916, "step": 1670, "valid_targets_mean": 1575.8, "valid_targets_min": 601 }, { "epoch": 4.197994987468672, "grad_norm": 0.8596879937640446, "learning_rate": 1.005056613149371e-05, "loss": 0.3636, "loss_nan_ranks": 0, "loss_rank_avg": 0.3838065564632416, "step": 1675, "valid_targets_mean": 1479.6, "valid_targets_min": 809 }, { "epoch": 4.2105263157894735, "grad_norm": 0.8946285345544369, "learning_rate": 9.924310476912196e-06, "loss": 0.3631, "loss_nan_ranks": 0, "loss_rank_avg": 0.37626439332962036, "step": 1680, "valid_targets_mean": 1487.1, "valid_targets_min": 497 }, { "epoch": 4.223057644110276, "grad_norm": 0.8825349942645182, "learning_rate": 9.798590645153473e-06, "loss": 0.381, "loss_nan_ranks": 0, "loss_rank_avg": 0.3923253118991852, "step": 1685, "valid_targets_mean": 1455.2, "valid_targets_min": 845 }, { "epoch": 4.235588972431078, "grad_norm": 0.93306267674495, "learning_rate": 9.673413321968934e-06, "loss": 0.3722, "loss_nan_ranks": 0, "loss_rank_avg": 0.38771435618400574, "step": 1690, "valid_targets_mean": 1282.9, "valid_targets_min": 669 }, { "epoch": 4.2481203007518795, "grad_norm": 0.7779130043424681, "learning_rate": 9.548785164259461e-06, "loss": 0.3712, "loss_nan_ranks": 0, "loss_rank_avg": 0.3406516909599304, "step": 1695, "valid_targets_mean": 1632.7, "valid_targets_min": 732 }, { "epoch": 4.260651629072681, "grad_norm": 0.8720036417507798, "learning_rate": 9.424712799721472e-06, "loss": 0.35, "loss_nan_ranks": 0, "loss_rank_avg": 0.33976292610168457, "step": 1700, "valid_targets_mean": 1479.0, "valid_targets_min": 870 }, { "epoch": 4.273182957393484, "grad_norm": 0.7464452343967485, "learning_rate": 9.301202826494428e-06, "loss": 0.337, "loss_nan_ranks": 0, "loss_rank_avg": 0.3533982038497925, "step": 1705, "valid_targets_mean": 1797.8, "valid_targets_min": 731 }, { "epoch": 4.285714285714286, "grad_norm": 0.7435681280302439, "learning_rate": 9.178261812809934e-06, "loss": 0.3678, "loss_nan_ranks": 0, "loss_rank_avg": 0.3589174747467041, "step": 1710, "valid_targets_mean": 1821.1, "valid_targets_min": 726 }, { "epoch": 4.298245614035087, "grad_norm": 0.778889350577403, "learning_rate": 9.055896296642481e-06, "loss": 0.3518, "loss_nan_ranks": 0, "loss_rank_avg": 0.33703145384788513, "step": 1715, "valid_targets_mean": 1610.9, "valid_targets_min": 675 }, { "epoch": 4.31077694235589, "grad_norm": 0.8506286418125527, "learning_rate": 8.934112785361726e-06, "loss": 0.3607, "loss_nan_ranks": 0, "loss_rank_avg": 0.34579887986183167, "step": 1720, "valid_targets_mean": 1332.0, "valid_targets_min": 688 }, { "epoch": 4.323308270676692, "grad_norm": 0.9291089907595611, "learning_rate": 8.81291775538644e-06, "loss": 0.3556, "loss_nan_ranks": 0, "loss_rank_avg": 0.38011759519577026, "step": 1725, "valid_targets_mean": 1284.8, "valid_targets_min": 630 }, { "epoch": 4.335839598997493, "grad_norm": 0.6690325258072783, "learning_rate": 8.692317651840101e-06, "loss": 0.3555, "loss_nan_ranks": 0, "loss_rank_avg": 0.32536908984184265, "step": 1730, "valid_targets_mean": 1997.7, "valid_targets_min": 786 }, { "epoch": 4.348370927318296, "grad_norm": 0.9074516079828592, "learning_rate": 8.572318888208135e-06, "loss": 0.3676, "loss_nan_ranks": 0, "loss_rank_avg": 0.3661980628967285, "step": 1735, "valid_targets_mean": 1338.8, "valid_targets_min": 616 }, { "epoch": 4.360902255639098, "grad_norm": 0.8527642895752466, "learning_rate": 8.452927845996854e-06, "loss": 0.362, "loss_nan_ranks": 0, "loss_rank_avg": 0.37586355209350586, "step": 1740, "valid_targets_mean": 1442.7, "valid_targets_min": 720 }, { "epoch": 4.3734335839598995, "grad_norm": 0.8219078557591193, "learning_rate": 8.334150874394075e-06, "loss": 0.3684, "loss_nan_ranks": 0, "loss_rank_avg": 0.3594920039176941, "step": 1745, "valid_targets_mean": 1493.9, "valid_targets_min": 801 }, { "epoch": 4.385964912280702, "grad_norm": 0.8353204864275154, "learning_rate": 8.215994289931495e-06, "loss": 0.3576, "loss_nan_ranks": 0, "loss_rank_avg": 0.35578057169914246, "step": 1750, "valid_targets_mean": 1527.2, "valid_targets_min": 541 }, { "epoch": 4.398496240601504, "grad_norm": 0.7956210259824289, "learning_rate": 8.098464376148765e-06, "loss": 0.3654, "loss_nan_ranks": 0, "loss_rank_avg": 0.3480678200721741, "step": 1755, "valid_targets_mean": 1577.4, "valid_targets_min": 695 }, { "epoch": 4.4110275689223055, "grad_norm": 0.7360349691169649, "learning_rate": 7.981567383259332e-06, "loss": 0.3556, "loss_nan_ranks": 0, "loss_rank_avg": 0.3734129071235657, "step": 1760, "valid_targets_mean": 1928.4, "valid_targets_min": 955 }, { "epoch": 4.423558897243108, "grad_norm": 0.7466972215988599, "learning_rate": 7.865309527818063e-06, "loss": 0.3588, "loss_nan_ranks": 0, "loss_rank_avg": 0.36618131399154663, "step": 1765, "valid_targets_mean": 1647.6, "valid_targets_min": 687 }, { "epoch": 4.43609022556391, "grad_norm": 0.7337253381376839, "learning_rate": 7.749696992390639e-06, "loss": 0.3747, "loss_nan_ranks": 0, "loss_rank_avg": 0.35264647006988525, "step": 1770, "valid_targets_mean": 1900.6, "valid_targets_min": 762 }, { "epoch": 4.448621553884712, "grad_norm": 0.8256948835805943, "learning_rate": 7.634735925224775e-06, "loss": 0.3592, "loss_nan_ranks": 0, "loss_rank_avg": 0.3599795401096344, "step": 1775, "valid_targets_mean": 1649.9, "valid_targets_min": 759 }, { "epoch": 4.461152882205514, "grad_norm": 0.7054242837878253, "learning_rate": 7.52043243992326e-06, "loss": 0.359, "loss_nan_ranks": 0, "loss_rank_avg": 0.3563918173313141, "step": 1780, "valid_targets_mean": 1888.7, "valid_targets_min": 648 }, { "epoch": 4.473684210526316, "grad_norm": 0.7727784292442016, "learning_rate": 7.406792615118817e-06, "loss": 0.3604, "loss_nan_ranks": 0, "loss_rank_avg": 0.34456440806388855, "step": 1785, "valid_targets_mean": 1713.8, "valid_targets_min": 859 }, { "epoch": 4.486215538847118, "grad_norm": 0.899362312646131, "learning_rate": 7.293822494150873e-06, "loss": 0.3626, "loss_nan_ranks": 0, "loss_rank_avg": 0.3481305241584778, "step": 1790, "valid_targets_mean": 1350.6, "valid_targets_min": 647 }, { "epoch": 4.498746867167919, "grad_norm": 1.0030617001629305, "learning_rate": 7.181528084744158e-06, "loss": 0.3756, "loss_nan_ranks": 0, "loss_rank_avg": 0.3706076145172119, "step": 1795, "valid_targets_mean": 1212.2, "valid_targets_min": 813 }, { "epoch": 4.511278195488722, "grad_norm": 0.8685403109225178, "learning_rate": 7.069915358689217e-06, "loss": 0.3562, "loss_nan_ranks": 0, "loss_rank_avg": 0.35512638092041016, "step": 1800, "valid_targets_mean": 1506.2, "valid_targets_min": 674 }, { "epoch": 4.523809523809524, "grad_norm": 0.791391902227632, "learning_rate": 6.958990251524835e-06, "loss": 0.3619, "loss_nan_ranks": 0, "loss_rank_avg": 0.3524198830127716, "step": 1805, "valid_targets_mean": 1708.7, "valid_targets_min": 694 }, { "epoch": 4.5363408521303255, "grad_norm": 0.850539866019582, "learning_rate": 6.848758662222383e-06, "loss": 0.3581, "loss_nan_ranks": 0, "loss_rank_avg": 0.3824390172958374, "step": 1810, "valid_targets_mean": 1577.4, "valid_targets_min": 696 }, { "epoch": 4.548872180451128, "grad_norm": 0.7417616786020198, "learning_rate": 6.7392264528721115e-06, "loss": 0.3572, "loss_nan_ranks": 0, "loss_rank_avg": 0.35435038805007935, "step": 1815, "valid_targets_mean": 1835.0, "valid_targets_min": 641 }, { "epoch": 4.56140350877193, "grad_norm": 0.7298033849882459, "learning_rate": 6.630399448371416e-06, "loss": 0.3618, "loss_nan_ranks": 0, "loss_rank_avg": 0.3553701937198639, "step": 1820, "valid_targets_mean": 1834.9, "valid_targets_min": 762 }, { "epoch": 4.5739348370927315, "grad_norm": 0.7386646813073992, "learning_rate": 6.522283436115046e-06, "loss": 0.3656, "loss_nan_ranks": 0, "loss_rank_avg": 0.34987321496009827, "step": 1825, "valid_targets_mean": 1897.1, "valid_targets_min": 681 }, { "epoch": 4.586466165413534, "grad_norm": 0.9117333312529112, "learning_rate": 6.4148841656873675e-06, "loss": 0.365, "loss_nan_ranks": 0, "loss_rank_avg": 0.37728363275527954, "step": 1830, "valid_targets_mean": 1299.4, "valid_targets_min": 752 }, { "epoch": 4.598997493734336, "grad_norm": 0.8585767700709204, "learning_rate": 6.30820734855657e-06, "loss": 0.3608, "loss_nan_ranks": 0, "loss_rank_avg": 0.3458079695701599, "step": 1835, "valid_targets_mean": 1372.4, "valid_targets_min": 607 }, { "epoch": 4.611528822055138, "grad_norm": 0.8891984018071964, "learning_rate": 6.20225865777095e-06, "loss": 0.3552, "loss_nan_ranks": 0, "loss_rank_avg": 0.3820083439350128, "step": 1840, "valid_targets_mean": 1449.1, "valid_targets_min": 680 }, { "epoch": 4.62406015037594, "grad_norm": 0.8609457925615936, "learning_rate": 6.097043727657217e-06, "loss": 0.342, "loss_nan_ranks": 0, "loss_rank_avg": 0.3439421057701111, "step": 1845, "valid_targets_mean": 1339.6, "valid_targets_min": 673 }, { "epoch": 4.636591478696742, "grad_norm": 0.7270110211202997, "learning_rate": 5.992568153520857e-06, "loss": 0.348, "loss_nan_ranks": 0, "loss_rank_avg": 0.37063953280448914, "step": 1850, "valid_targets_mean": 2134.8, "valid_targets_min": 765 }, { "epoch": 4.649122807017544, "grad_norm": 1.0500038015228268, "learning_rate": 5.888837491348571e-06, "loss": 0.371, "loss_nan_ranks": 0, "loss_rank_avg": 0.3767479360103607, "step": 1855, "valid_targets_mean": 1030.8, "valid_targets_min": 573 }, { "epoch": 4.661654135338345, "grad_norm": 0.847254023798705, "learning_rate": 5.785857257512832e-06, "loss": 0.3629, "loss_nan_ranks": 0, "loss_rank_avg": 0.3626944422721863, "step": 1860, "valid_targets_mean": 1471.9, "valid_targets_min": 527 }, { "epoch": 4.674185463659148, "grad_norm": 0.9635967545009322, "learning_rate": 5.683632928478482e-06, "loss": 0.3618, "loss_nan_ranks": 0, "loss_rank_avg": 0.3814685344696045, "step": 1865, "valid_targets_mean": 1185.8, "valid_targets_min": 594 }, { "epoch": 4.68671679197995, "grad_norm": 0.9070771054657724, "learning_rate": 5.582169940511544e-06, "loss": 0.3541, "loss_nan_ranks": 0, "loss_rank_avg": 0.40833860635757446, "step": 1870, "valid_targets_mean": 1570.8, "valid_targets_min": 779 }, { "epoch": 4.6992481203007515, "grad_norm": 0.9110570136084019, "learning_rate": 5.481473689390091e-06, "loss": 0.3673, "loss_nan_ranks": 0, "loss_rank_avg": 0.3828704059123993, "step": 1875, "valid_targets_mean": 1317.1, "valid_targets_min": 593 }, { "epoch": 4.711779448621554, "grad_norm": 0.7552289878852424, "learning_rate": 5.381549530117316e-06, "loss": 0.3644, "loss_nan_ranks": 0, "loss_rank_avg": 0.34786558151245117, "step": 1880, "valid_targets_mean": 1746.8, "valid_targets_min": 684 }, { "epoch": 4.724310776942356, "grad_norm": 0.7974717999699457, "learning_rate": 5.28240277663674e-06, "loss": 0.3671, "loss_nan_ranks": 0, "loss_rank_avg": 0.3812224268913269, "step": 1885, "valid_targets_mean": 1763.7, "valid_targets_min": 749 }, { "epoch": 4.7368421052631575, "grad_norm": 0.8018221996566985, "learning_rate": 5.184038701549625e-06, "loss": 0.3722, "loss_nan_ranks": 0, "loss_rank_avg": 0.34907105565071106, "step": 1890, "valid_targets_mean": 1535.9, "valid_targets_min": 611 }, { "epoch": 4.74937343358396, "grad_norm": 1.0612817335584024, "learning_rate": 5.0864625358345905e-06, "loss": 0.3479, "loss_nan_ranks": 0, "loss_rank_avg": 0.3929625451564789, "step": 1895, "valid_targets_mean": 1216.8, "valid_targets_min": 630 }, { "epoch": 4.761904761904762, "grad_norm": 0.8051135149931052, "learning_rate": 4.989679468569397e-06, "loss": 0.3629, "loss_nan_ranks": 0, "loss_rank_avg": 0.363945871591568, "step": 1900, "valid_targets_mean": 1474.8, "valid_targets_min": 620 }, { "epoch": 4.774436090225564, "grad_norm": 0.9182485899219046, "learning_rate": 4.8936946466550314e-06, "loss": 0.3677, "loss_nan_ranks": 0, "loss_rank_avg": 0.3759042024612427, "step": 1905, "valid_targets_mean": 1297.1, "valid_targets_min": 743 }, { "epoch": 4.786967418546366, "grad_norm": 0.7657705069544768, "learning_rate": 4.7985131745419745e-06, "loss": 0.357, "loss_nan_ranks": 0, "loss_rank_avg": 0.32689371705055237, "step": 1910, "valid_targets_mean": 1497.1, "valid_targets_min": 839 }, { "epoch": 4.799498746867168, "grad_norm": 0.8497598450408903, "learning_rate": 4.70414011395875e-06, "loss": 0.3765, "loss_nan_ranks": 0, "loss_rank_avg": 0.37474486231803894, "step": 1915, "valid_targets_mean": 1450.2, "valid_targets_min": 877 }, { "epoch": 4.81203007518797, "grad_norm": 0.7728175275071781, "learning_rate": 4.610580483642748e-06, "loss": 0.3415, "loss_nan_ranks": 0, "loss_rank_avg": 0.3417680263519287, "step": 1920, "valid_targets_mean": 1643.1, "valid_targets_min": 546 }, { "epoch": 4.824561403508772, "grad_norm": 0.9532676991958982, "learning_rate": 4.517839259073322e-06, "loss": 0.3664, "loss_nan_ranks": 0, "loss_rank_avg": 0.36030858755111694, "step": 1925, "valid_targets_mean": 1275.1, "valid_targets_min": 808 }, { "epoch": 4.837092731829574, "grad_norm": 0.9345157903304709, "learning_rate": 4.425921372207201e-06, "loss": 0.3461, "loss_nan_ranks": 0, "loss_rank_avg": 0.3792099952697754, "step": 1930, "valid_targets_mean": 1430.7, "valid_targets_min": 624 }, { "epoch": 4.849624060150376, "grad_norm": 0.7539611030052837, "learning_rate": 4.334831711216209e-06, "loss": 0.3657, "loss_nan_ranks": 0, "loss_rank_avg": 0.34493887424468994, "step": 1935, "valid_targets_mean": 1758.8, "valid_targets_min": 710 }, { "epoch": 4.862155388471178, "grad_norm": 0.9159733465774322, "learning_rate": 4.244575120227303e-06, "loss": 0.359, "loss_nan_ranks": 0, "loss_rank_avg": 0.36277949810028076, "step": 1940, "valid_targets_mean": 1285.8, "valid_targets_min": 644 }, { "epoch": 4.87468671679198, "grad_norm": 0.9917940035130076, "learning_rate": 4.155156399064977e-06, "loss": 0.3698, "loss_nan_ranks": 0, "loss_rank_avg": 0.38035470247268677, "step": 1945, "valid_targets_mean": 1096.4, "valid_targets_min": 714 }, { "epoch": 4.887218045112782, "grad_norm": 0.8015299461290246, "learning_rate": 4.066580302996004e-06, "loss": 0.3595, "loss_nan_ranks": 0, "loss_rank_avg": 0.3660500943660736, "step": 1950, "valid_targets_mean": 1779.2, "valid_targets_min": 633 }, { "epoch": 4.899749373433584, "grad_norm": 0.7730826390487343, "learning_rate": 3.978851542476547e-06, "loss": 0.3477, "loss_nan_ranks": 0, "loss_rank_avg": 0.35395899415016174, "step": 1955, "valid_targets_mean": 1528.4, "valid_targets_min": 717 }, { "epoch": 4.912280701754386, "grad_norm": 0.8322096620940707, "learning_rate": 3.891974782901666e-06, "loss": 0.3748, "loss_nan_ranks": 0, "loss_rank_avg": 0.3833672106266022, "step": 1960, "valid_targets_mean": 1523.0, "valid_targets_min": 649 }, { "epoch": 4.924812030075188, "grad_norm": 0.8417191866894002, "learning_rate": 3.805954644357206e-06, "loss": 0.3562, "loss_nan_ranks": 0, "loss_rank_avg": 0.36396360397338867, "step": 1965, "valid_targets_mean": 1521.4, "valid_targets_min": 677 }, { "epoch": 4.93734335839599, "grad_norm": 0.7291255713954147, "learning_rate": 3.720795701374109e-06, "loss": 0.3693, "loss_nan_ranks": 0, "loss_rank_avg": 0.3539201617240906, "step": 1970, "valid_targets_mean": 1917.8, "valid_targets_min": 843 }, { "epoch": 4.949874686716792, "grad_norm": 0.765912521584318, "learning_rate": 3.636502482685125e-06, "loss": 0.359, "loss_nan_ranks": 0, "loss_rank_avg": 0.34452271461486816, "step": 1975, "valid_targets_mean": 1644.4, "valid_targets_min": 790 }, { "epoch": 4.962406015037594, "grad_norm": 0.9446530016204546, "learning_rate": 3.553079470984002e-06, "loss": 0.3639, "loss_nan_ranks": 0, "loss_rank_avg": 0.36807239055633545, "step": 1980, "valid_targets_mean": 1425.4, "valid_targets_min": 639 }, { "epoch": 4.974937343358396, "grad_norm": 0.865026454724945, "learning_rate": 3.4705311026870848e-06, "loss": 0.3639, "loss_nan_ranks": 0, "loss_rank_avg": 0.3669584393501282, "step": 1985, "valid_targets_mean": 1342.0, "valid_targets_min": 616 }, { "epoch": 4.987468671679198, "grad_norm": 0.685032837279059, "learning_rate": 3.3888617676973645e-06, "loss": 0.3487, "loss_nan_ranks": 0, "loss_rank_avg": 0.3004816174507141, "step": 1990, "valid_targets_mean": 1637.9, "valid_targets_min": 668 }, { "epoch": 5.0, "grad_norm": 0.7752928593373746, "learning_rate": 3.3080758091710676e-06, "loss": 0.3592, "loss_nan_ranks": 0, "loss_rank_avg": 0.32106631994247437, "step": 1995, "valid_targets_mean": 1756.9, "valid_targets_min": 707 }, { "epoch": 5.012531328320802, "grad_norm": 0.7859655644778784, "learning_rate": 3.22817752328666e-06, "loss": 0.3555, "loss_nan_ranks": 0, "loss_rank_avg": 0.3647751808166504, "step": 2000, "valid_targets_mean": 1628.6, "valid_targets_min": 698 }, { "epoch": 5.025062656641604, "grad_norm": 0.8381726272050811, "learning_rate": 3.1491711590163777e-06, "loss": 0.3522, "loss_nan_ranks": 0, "loss_rank_avg": 0.3480392098426819, "step": 2005, "valid_targets_mean": 1342.1, "valid_targets_min": 710 }, { "epoch": 5.037593984962406, "grad_norm": 0.7949118559237107, "learning_rate": 3.071060917900277e-06, "loss": 0.3535, "loss_nan_ranks": 0, "loss_rank_avg": 0.36498141288757324, "step": 2010, "valid_targets_mean": 1858.8, "valid_targets_min": 750 }, { "epoch": 5.050125313283208, "grad_norm": 0.7779825433802904, "learning_rate": 2.9938509538227944e-06, "loss": 0.3511, "loss_nan_ranks": 0, "loss_rank_avg": 0.31160634756088257, "step": 2015, "valid_targets_mean": 1482.8, "valid_targets_min": 669 }, { "epoch": 5.06265664160401, "grad_norm": 0.8058330367764247, "learning_rate": 2.9175453727918478e-06, "loss": 0.3369, "loss_nan_ranks": 0, "loss_rank_avg": 0.3252519965171814, "step": 2020, "valid_targets_mean": 1552.9, "valid_targets_min": 635 }, { "epoch": 5.075187969924812, "grad_norm": 0.8859191425899697, "learning_rate": 2.8421482327204565e-06, "loss": 0.3569, "loss_nan_ranks": 0, "loss_rank_avg": 0.36055076122283936, "step": 2025, "valid_targets_mean": 1473.4, "valid_targets_min": 623 }, { "epoch": 5.087719298245614, "grad_norm": 0.8268710147356328, "learning_rate": 2.7676635432109813e-06, "loss": 0.3399, "loss_nan_ranks": 0, "loss_rank_avg": 0.3586581349372864, "step": 2030, "valid_targets_mean": 1494.9, "valid_targets_min": 603 }, { "epoch": 5.100250626566416, "grad_norm": 0.7794366089521846, "learning_rate": 2.6940952653418674e-06, "loss": 0.347, "loss_nan_ranks": 0, "loss_rank_avg": 0.3325762152671814, "step": 2035, "valid_targets_mean": 1564.1, "valid_targets_min": 502 }, { "epoch": 5.112781954887218, "grad_norm": 0.789354683665489, "learning_rate": 2.6214473114569995e-06, "loss": 0.3445, "loss_nan_ranks": 0, "loss_rank_avg": 0.3338083028793335, "step": 2040, "valid_targets_mean": 1670.4, "valid_targets_min": 728 }, { "epoch": 5.12531328320802, "grad_norm": 0.87114480518287, "learning_rate": 2.549723544957652e-06, "loss": 0.3597, "loss_nan_ranks": 0, "loss_rank_avg": 0.37028050422668457, "step": 2045, "valid_targets_mean": 1722.1, "valid_targets_min": 657 }, { "epoch": 5.137844611528822, "grad_norm": 0.9067749222243434, "learning_rate": 2.4789277800970247e-06, "loss": 0.3489, "loss_nan_ranks": 0, "loss_rank_avg": 0.363986611366272, "step": 2050, "valid_targets_mean": 1255.3, "valid_targets_min": 622 }, { "epoch": 5.150375939849624, "grad_norm": 0.8809799469692904, "learning_rate": 2.409063781777412e-06, "loss": 0.3579, "loss_nan_ranks": 0, "loss_rank_avg": 0.35490089654922485, "step": 2055, "valid_targets_mean": 1388.8, "valid_targets_min": 600 }, { "epoch": 5.162907268170426, "grad_norm": 0.8021691453837716, "learning_rate": 2.3401352653499785e-06, "loss": 0.349, "loss_nan_ranks": 0, "loss_rank_avg": 0.34150004386901855, "step": 2060, "valid_targets_mean": 1621.1, "valid_targets_min": 678 }, { "epoch": 5.175438596491228, "grad_norm": 1.0492221755553988, "learning_rate": 2.2721458964171704e-06, "loss": 0.3566, "loss_nan_ranks": 0, "loss_rank_avg": 0.3937349021434784, "step": 2065, "valid_targets_mean": 1146.8, "valid_targets_min": 649 }, { "epoch": 5.18796992481203, "grad_norm": 0.8387344530484291, "learning_rate": 2.2050992906378023e-06, "loss": 0.342, "loss_nan_ranks": 0, "loss_rank_avg": 0.3412122130393982, "step": 2070, "valid_targets_mean": 1392.2, "valid_targets_min": 637 }, { "epoch": 5.200501253132832, "grad_norm": 1.0394180201727279, "learning_rate": 2.1389990135347593e-06, "loss": 0.3486, "loss_nan_ranks": 0, "loss_rank_avg": 0.397787868976593, "step": 2075, "valid_targets_mean": 1076.6, "valid_targets_min": 665 }, { "epoch": 5.213032581453634, "grad_norm": 0.8677433903945426, "learning_rate": 2.073848580305382e-06, "loss": 0.3484, "loss_nan_ranks": 0, "loss_rank_avg": 0.35740649700164795, "step": 2080, "valid_targets_mean": 1429.4, "valid_targets_min": 768 }, { "epoch": 5.225563909774436, "grad_norm": 0.8209101680229015, "learning_rate": 2.0096514556345448e-06, "loss": 0.3383, "loss_nan_ranks": 0, "loss_rank_avg": 0.35316136479377747, "step": 2085, "valid_targets_mean": 1729.0, "valid_targets_min": 767 }, { "epoch": 5.238095238095238, "grad_norm": 0.7997489355459917, "learning_rate": 1.9464110535103885e-06, "loss": 0.355, "loss_nan_ranks": 0, "loss_rank_avg": 0.33274149894714355, "step": 2090, "valid_targets_mean": 1570.3, "valid_targets_min": 635 }, { "epoch": 5.25062656641604, "grad_norm": 0.8865845295441469, "learning_rate": 1.8841307370427708e-06, "loss": 0.3474, "loss_nan_ranks": 0, "loss_rank_avg": 0.35728734731674194, "step": 2095, "valid_targets_mean": 1383.3, "valid_targets_min": 703 }, { "epoch": 5.2631578947368425, "grad_norm": 0.9292445555776634, "learning_rate": 1.822813818284428e-06, "loss": 0.3551, "loss_nan_ranks": 0, "loss_rank_avg": 0.3874461054801941, "step": 2100, "valid_targets_mean": 1317.5, "valid_targets_min": 676 }, { "epoch": 5.275689223057644, "grad_norm": 0.8077993677313544, "learning_rate": 1.76246355805481e-06, "loss": 0.3345, "loss_nan_ranks": 0, "loss_rank_avg": 0.3541961908340454, "step": 2105, "valid_targets_mean": 1608.7, "valid_targets_min": 580 }, { "epoch": 5.288220551378446, "grad_norm": 0.8473451759624002, "learning_rate": 1.7030831657667125e-06, "loss": 0.3548, "loss_nan_ranks": 0, "loss_rank_avg": 0.35867220163345337, "step": 2110, "valid_targets_mean": 1477.7, "valid_targets_min": 693 }, { "epoch": 5.3007518796992485, "grad_norm": 0.8117634827136613, "learning_rate": 1.6446757992555662e-06, "loss": 0.353, "loss_nan_ranks": 0, "loss_rank_avg": 0.33366554975509644, "step": 2115, "valid_targets_mean": 1496.1, "valid_targets_min": 688 }, { "epoch": 5.31328320802005, "grad_norm": 0.7969470271219807, "learning_rate": 1.5872445646115253e-06, "loss": 0.3608, "loss_nan_ranks": 0, "loss_rank_avg": 0.3518367409706116, "step": 2120, "valid_targets_mean": 1646.6, "valid_targets_min": 749 }, { "epoch": 5.325814536340852, "grad_norm": 0.9442497476680294, "learning_rate": 1.5307925160142767e-06, "loss": 0.3527, "loss_nan_ranks": 0, "loss_rank_avg": 0.3696008324623108, "step": 2125, "valid_targets_mean": 1246.6, "valid_targets_min": 618 }, { "epoch": 5.338345864661654, "grad_norm": 0.890713206901813, "learning_rate": 1.4753226555706169e-06, "loss": 0.3601, "loss_nan_ranks": 0, "loss_rank_avg": 0.36075615882873535, "step": 2130, "valid_targets_mean": 1409.6, "valid_targets_min": 696 }, { "epoch": 5.350877192982456, "grad_norm": 0.7836357760961904, "learning_rate": 1.4208379331548127e-06, "loss": 0.344, "loss_nan_ranks": 0, "loss_rank_avg": 0.32944750785827637, "step": 2135, "valid_targets_mean": 1811.9, "valid_targets_min": 761 }, { "epoch": 5.363408521303258, "grad_norm": 0.790615433618982, "learning_rate": 1.3673412462517165e-06, "loss": 0.347, "loss_nan_ranks": 0, "loss_rank_avg": 0.3488330841064453, "step": 2140, "valid_targets_mean": 1668.8, "valid_targets_min": 718 }, { "epoch": 5.37593984962406, "grad_norm": 0.9850953675065485, "learning_rate": 1.3148354398026753e-06, "loss": 0.3601, "loss_nan_ranks": 0, "loss_rank_avg": 0.3874185085296631, "step": 2145, "valid_targets_mean": 1317.6, "valid_targets_min": 784 }, { "epoch": 5.388471177944862, "grad_norm": 0.7871735563246215, "learning_rate": 1.2633233060542538e-06, "loss": 0.3378, "loss_nan_ranks": 0, "loss_rank_avg": 0.3537478744983673, "step": 2150, "valid_targets_mean": 1825.2, "valid_targets_min": 818 }, { "epoch": 5.401002506265664, "grad_norm": 0.8663311136018653, "learning_rate": 1.2128075844097321e-06, "loss": 0.3641, "loss_nan_ranks": 0, "loss_rank_avg": 0.34863555431365967, "step": 2155, "valid_targets_mean": 1305.8, "valid_targets_min": 613 }, { "epoch": 5.413533834586466, "grad_norm": 0.8444399292374044, "learning_rate": 1.163290961283423e-06, "loss": 0.3504, "loss_nan_ranks": 0, "loss_rank_avg": 0.3386199474334717, "step": 2160, "valid_targets_mean": 1479.9, "valid_targets_min": 732 }, { "epoch": 5.4260651629072685, "grad_norm": 0.7455079817083609, "learning_rate": 1.114776069957817e-06, "loss": 0.3478, "loss_nan_ranks": 0, "loss_rank_avg": 0.3595254421234131, "step": 2165, "valid_targets_mean": 1820.9, "valid_targets_min": 559 }, { "epoch": 5.43859649122807, "grad_norm": 0.8420039526065428, "learning_rate": 1.0672654904435364e-06, "loss": 0.3531, "loss_nan_ranks": 0, "loss_rank_avg": 0.329376757144928, "step": 2170, "valid_targets_mean": 1556.8, "valid_targets_min": 686 }, { "epoch": 5.451127819548872, "grad_norm": 0.798992177534451, "learning_rate": 1.0207617493421385e-06, "loss": 0.3302, "loss_nan_ranks": 0, "loss_rank_avg": 0.3616739511489868, "step": 2175, "valid_targets_mean": 1766.6, "valid_targets_min": 726 }, { "epoch": 5.4636591478696745, "grad_norm": 0.7437629572079977, "learning_rate": 9.752673197117456e-07, "loss": 0.3404, "loss_nan_ranks": 0, "loss_rank_avg": 0.34114813804626465, "step": 2180, "valid_targets_mean": 1888.8, "valid_targets_min": 800 }, { "epoch": 5.476190476190476, "grad_norm": 0.8454039260128821, "learning_rate": 9.307846209355342e-07, "loss": 0.3504, "loss_nan_ranks": 0, "loss_rank_avg": 0.3508765995502472, "step": 2185, "valid_targets_mean": 1470.9, "valid_targets_min": 840 }, { "epoch": 5.488721804511278, "grad_norm": 1.0242145963904694, "learning_rate": 8.873160185930674e-07, "loss": 0.3701, "loss_nan_ranks": 0, "loss_rank_avg": 0.3790890574455261, "step": 2190, "valid_targets_mean": 1180.7, "valid_targets_min": 512 }, { "epoch": 5.50125313283208, "grad_norm": 0.8818078968821532, "learning_rate": 8.448638243344942e-07, "loss": 0.3529, "loss_nan_ranks": 0, "loss_rank_avg": 0.3618394136428833, "step": 2195, "valid_targets_mean": 1515.6, "valid_targets_min": 734 }, { "epoch": 5.513784461152882, "grad_norm": 1.038219375367169, "learning_rate": 8.034302957576234e-07, "loss": 0.364, "loss_nan_ranks": 0, "loss_rank_avg": 0.37004348635673523, "step": 2200, "valid_targets_mean": 1204.1, "valid_targets_min": 768 }, { "epoch": 5.526315789473684, "grad_norm": 0.9570498930145379, "learning_rate": 7.63017636287855e-07, "loss": 0.3666, "loss_nan_ranks": 0, "loss_rank_avg": 0.3601672649383545, "step": 2205, "valid_targets_mean": 1163.2, "valid_targets_min": 527 }, { "epoch": 5.538847117794486, "grad_norm": 0.8361698631636353, "learning_rate": 7.236279950610136e-07, "loss": 0.352, "loss_nan_ranks": 0, "loss_rank_avg": 0.36759644746780396, "step": 2210, "valid_targets_mean": 1655.2, "valid_targets_min": 576 }, { "epoch": 5.551378446115288, "grad_norm": 0.9285533875940273, "learning_rate": 6.852634668090452e-07, "loss": 0.3431, "loss_nan_ranks": 0, "loss_rank_avg": 0.36507898569107056, "step": 2215, "valid_targets_mean": 1249.8, "valid_targets_min": 697 }, { "epoch": 5.56390977443609, "grad_norm": 0.9419414098830893, "learning_rate": 6.479260917486296e-07, "loss": 0.3498, "loss_nan_ranks": 0, "loss_rank_avg": 0.36119532585144043, "step": 2220, "valid_targets_mean": 1252.7, "valid_targets_min": 670 }, { "epoch": 5.576441102756892, "grad_norm": 0.8711529899588126, "learning_rate": 6.116178554726771e-07, "loss": 0.3376, "loss_nan_ranks": 0, "loss_rank_avg": 0.3531295657157898, "step": 2225, "valid_targets_mean": 1507.8, "valid_targets_min": 705 }, { "epoch": 5.5889724310776945, "grad_norm": 0.8284481667772925, "learning_rate": 5.763406888447432e-07, "loss": 0.3356, "loss_nan_ranks": 0, "loss_rank_avg": 0.3536621332168579, "step": 2230, "valid_targets_mean": 1475.7, "valid_targets_min": 668 }, { "epoch": 5.601503759398496, "grad_norm": 0.8662173388424959, "learning_rate": 5.420964678963314e-07, "loss": 0.3533, "loss_nan_ranks": 0, "loss_rank_avg": 0.3492635488510132, "step": 2235, "valid_targets_mean": 1494.6, "valid_targets_min": 804 }, { "epoch": 5.614035087719298, "grad_norm": 0.9354045755972811, "learning_rate": 5.088870137271396e-07, "loss": 0.374, "loss_nan_ranks": 0, "loss_rank_avg": 0.3668023347854614, "step": 2240, "valid_targets_mean": 1324.1, "valid_targets_min": 668 }, { "epoch": 5.6265664160401005, "grad_norm": 0.7423454160245688, "learning_rate": 4.767140924082059e-07, "loss": 0.3425, "loss_nan_ranks": 0, "loss_rank_avg": 0.3354753255844116, "step": 2245, "valid_targets_mean": 1914.1, "valid_targets_min": 751 }, { "epoch": 5.639097744360902, "grad_norm": 0.879259246955938, "learning_rate": 4.4557941488799995e-07, "loss": 0.3384, "loss_nan_ranks": 0, "loss_rank_avg": 0.35705995559692383, "step": 2250, "valid_targets_mean": 1369.6, "valid_targets_min": 692 }, { "epoch": 5.651629072681704, "grad_norm": 1.0842382888757587, "learning_rate": 4.154846369014198e-07, "loss": 0.3424, "loss_nan_ranks": 0, "loss_rank_avg": 0.33172884583473206, "step": 2255, "valid_targets_mean": 1276.8, "valid_targets_min": 640 }, { "epoch": 5.664160401002507, "grad_norm": 0.9302183552170346, "learning_rate": 3.8643135888175145e-07, "loss": 0.3463, "loss_nan_ranks": 0, "loss_rank_avg": 0.3444492816925049, "step": 2260, "valid_targets_mean": 1480.1, "valid_targets_min": 718 }, { "epoch": 5.676691729323308, "grad_norm": 0.908184468618399, "learning_rate": 3.5842112587555213e-07, "loss": 0.3589, "loss_nan_ranks": 0, "loss_rank_avg": 0.35247802734375, "step": 2265, "valid_targets_mean": 1239.0, "valid_targets_min": 670 }, { "epoch": 5.68922305764411, "grad_norm": 1.2712211577471397, "learning_rate": 3.314554274604964e-07, "loss": 0.3497, "loss_nan_ranks": 0, "loss_rank_avg": 0.3652925193309784, "step": 2270, "valid_targets_mean": 1641.1, "valid_targets_min": 734 }, { "epoch": 5.701754385964913, "grad_norm": 0.7963337935187879, "learning_rate": 3.055356976661417e-07, "loss": 0.3449, "loss_nan_ranks": 0, "loss_rank_avg": 0.33966144919395447, "step": 2275, "valid_targets_mean": 1846.9, "valid_targets_min": 506 }, { "epoch": 5.714285714285714, "grad_norm": 0.8327621927793852, "learning_rate": 2.8066331489768894e-07, "loss": 0.3628, "loss_nan_ranks": 0, "loss_rank_avg": 0.3613581657409668, "step": 2280, "valid_targets_mean": 1714.9, "valid_targets_min": 654 }, { "epoch": 5.726817042606516, "grad_norm": 0.8144928766077396, "learning_rate": 2.5683960186265954e-07, "loss": 0.3453, "loss_nan_ranks": 0, "loss_rank_avg": 0.31915366649627686, "step": 2285, "valid_targets_mean": 1746.2, "valid_targets_min": 755 }, { "epoch": 5.739348370927319, "grad_norm": 0.6630996787867633, "learning_rate": 2.3406582550056455e-07, "loss": 0.3366, "loss_nan_ranks": 0, "loss_rank_avg": 0.31774675846099854, "step": 2290, "valid_targets_mean": 2273.2, "valid_targets_min": 908 }, { "epoch": 5.7518796992481205, "grad_norm": 0.8547740880071824, "learning_rate": 2.1234319691553206e-07, "loss": 0.3493, "loss_nan_ranks": 0, "loss_rank_avg": 0.3441549241542816, "step": 2295, "valid_targets_mean": 1516.8, "valid_targets_min": 633 }, { "epoch": 5.764411027568922, "grad_norm": 0.9343549299288464, "learning_rate": 1.9167287131188982e-07, "loss": 0.3547, "loss_nan_ranks": 0, "loss_rank_avg": 0.39410120248794556, "step": 2300, "valid_targets_mean": 1385.6, "valid_targets_min": 738 }, { "epoch": 5.776942355889724, "grad_norm": 0.7370330092462523, "learning_rate": 1.7205594793273882e-07, "loss": 0.3609, "loss_nan_ranks": 0, "loss_rank_avg": 0.3407435715198517, "step": 2305, "valid_targets_mean": 1888.4, "valid_targets_min": 743 }, { "epoch": 5.7894736842105265, "grad_norm": 0.8021220836906139, "learning_rate": 1.5349347000149784e-07, "loss": 0.3576, "loss_nan_ranks": 0, "loss_rank_avg": 0.3627573251724243, "step": 2310, "valid_targets_mean": 1671.4, "valid_targets_min": 788 }, { "epoch": 5.802005012531328, "grad_norm": 0.8883295497800556, "learning_rate": 1.359864246664233e-07, "loss": 0.3584, "loss_nan_ranks": 0, "loss_rank_avg": 0.3748079538345337, "step": 2315, "valid_targets_mean": 1306.4, "valid_targets_min": 697 }, { "epoch": 5.81453634085213, "grad_norm": 0.8790652000994345, "learning_rate": 1.19535742948107e-07, "loss": 0.3489, "loss_nan_ranks": 0, "loss_rank_avg": 0.3644014596939087, "step": 2320, "valid_targets_mean": 1427.2, "valid_targets_min": 640 }, { "epoch": 5.827067669172933, "grad_norm": 0.8377812295959398, "learning_rate": 1.0414229968997325e-07, "loss": 0.3518, "loss_nan_ranks": 0, "loss_rank_avg": 0.3724502921104431, "step": 2325, "valid_targets_mean": 1496.2, "valid_targets_min": 792 }, { "epoch": 5.839598997493734, "grad_norm": 0.7607147698350342, "learning_rate": 8.980691351174964e-08, "loss": 0.3545, "loss_nan_ranks": 0, "loss_rank_avg": 0.3551217019557953, "step": 2330, "valid_targets_mean": 1877.3, "valid_targets_min": 766 }, { "epoch": 5.852130325814536, "grad_norm": 0.9043120198314769, "learning_rate": 7.65303467659373e-08, "loss": 0.35, "loss_nan_ranks": 0, "loss_rank_avg": 0.3573995530605316, "step": 2335, "valid_targets_mean": 1316.8, "valid_targets_min": 691 }, { "epoch": 5.864661654135339, "grad_norm": 0.8347596516353907, "learning_rate": 6.431330549726555e-08, "loss": 0.3584, "loss_nan_ranks": 0, "loss_rank_avg": 0.359147846698761, "step": 2340, "valid_targets_mean": 1556.8, "valid_targets_min": 707 }, { "epoch": 5.87719298245614, "grad_norm": 0.8303183935456322, "learning_rate": 5.3156439405139817e-08, "loss": 0.339, "loss_nan_ranks": 0, "loss_rank_avg": 0.3554539084434509, "step": 2345, "valid_targets_mean": 1561.7, "valid_targets_min": 761 }, { "epoch": 5.889724310776942, "grad_norm": 0.8175782660251781, "learning_rate": 4.306034180910246e-08, "loss": 0.3503, "loss_nan_ranks": 0, "loss_rank_avg": 0.3494277596473694, "step": 2350, "valid_targets_mean": 1573.6, "valid_targets_min": 726 }, { "epoch": 5.902255639097744, "grad_norm": 0.7580462701515597, "learning_rate": 3.402554961727367e-08, "loss": 0.3558, "loss_nan_ranks": 0, "loss_rank_avg": 0.3425142168998718, "step": 2355, "valid_targets_mean": 1730.6, "valid_targets_min": 549 }, { "epoch": 5.9147869674185465, "grad_norm": 0.8314422303569725, "learning_rate": 2.6052543297800937e-08, "loss": 0.3559, "loss_nan_ranks": 0, "loss_rank_avg": 0.3679964542388916, "step": 2360, "valid_targets_mean": 1604.4, "valid_targets_min": 751 }, { "epoch": 5.927318295739348, "grad_norm": 0.960324016846512, "learning_rate": 1.9141746853299504e-08, "loss": 0.3555, "loss_nan_ranks": 0, "loss_rank_avg": 0.3805701732635498, "step": 2365, "valid_targets_mean": 1361.2, "valid_targets_min": 699 }, { "epoch": 5.93984962406015, "grad_norm": 0.9825903898086894, "learning_rate": 1.3293527798317051e-08, "loss": 0.3528, "loss_nan_ranks": 0, "loss_rank_avg": 0.36922788619995117, "step": 2370, "valid_targets_mean": 1131.9, "valid_targets_min": 602 }, { "epoch": 5.9523809523809526, "grad_norm": 0.8195342765652477, "learning_rate": 8.508197139782682e-09, "loss": 0.3405, "loss_nan_ranks": 0, "loss_rank_avg": 0.3507591784000397, "step": 2375, "valid_targets_mean": 1598.8, "valid_targets_min": 756 }, { "epoch": 5.964912280701754, "grad_norm": 0.7958920436004321, "learning_rate": 4.786009360464583e-09, "loss": 0.3416, "loss_nan_ranks": 0, "loss_rank_avg": 0.34905338287353516, "step": 2380, "valid_targets_mean": 1636.8, "valid_targets_min": 722 }, { "epoch": 5.977443609022556, "grad_norm": 0.868391349668869, "learning_rate": 2.127162405443084e-09, "loss": 0.3422, "loss_nan_ranks": 0, "loss_rank_avg": 0.35776710510253906, "step": 2385, "valid_targets_mean": 1458.3, "valid_targets_min": 573 }, { "epoch": 5.989974937343359, "grad_norm": 0.8621316894150931, "learning_rate": 5.317976715790707e-10, "loss": 0.3624, "loss_nan_ranks": 0, "loss_rank_avg": 0.3662847876548767, "step": 2390, "valid_targets_mean": 1573.8, "valid_targets_min": 767 }, { "epoch": 6.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.33341556787490845, "step": 2394, "total_flos": 235219454066688.0, "train_loss": 0.41497956364475497, "train_runtime": 8697.8458, "train_samples_per_second": 4.4, "train_steps_per_second": 0.275, "valid_targets_mean": 1707.1, "valid_targets_min": 654 } ], "logging_steps": 5, "max_steps": 2394, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 235219454066688.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }