diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,8979 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 4060, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008620689655172414, + "grad_norm": 9.24550457643599, + "learning_rate": 3.9408866995073894e-07, + "loss": 0.9873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43203049898147583, + "step": 5, + "valid_targets_mean": 3287.9, + "valid_targets_min": 901 + }, + { + "epoch": 0.017241379310344827, + "grad_norm": 7.335533460597348, + "learning_rate": 8.866995073891626e-07, + "loss": 1.0212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5148377418518066, + "step": 10, + "valid_targets_mean": 3855.8, + "valid_targets_min": 1359 + }, + { + "epoch": 0.02586206896551724, + "grad_norm": 7.854592614384845, + "learning_rate": 1.3793103448275862e-06, + "loss": 1.0819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5726603865623474, + "step": 15, + "valid_targets_mean": 2505.0, + "valid_targets_min": 764 + }, + { + "epoch": 0.034482758620689655, + "grad_norm": 4.719130380535508, + "learning_rate": 1.8719211822660098e-06, + "loss": 1.0174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6978442668914795, + "step": 20, + "valid_targets_mean": 5433.6, + "valid_targets_min": 502 + }, + { + "epoch": 0.04310344827586207, + "grad_norm": 4.179075473275818, + "learning_rate": 2.3645320197044334e-06, + "loss": 0.9749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37579989433288574, + "step": 25, + "valid_targets_mean": 2919.2, + "valid_targets_min": 695 + }, + { + "epoch": 0.05172413793103448, + "grad_norm": 3.2866656076565355, + "learning_rate": 2.8571428571428573e-06, + "loss": 0.8866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3706856369972229, + "step": 30, + "valid_targets_mean": 3500.5, + "valid_targets_min": 643 + }, + { + "epoch": 0.0603448275862069, + "grad_norm": 2.5424063865240134, + "learning_rate": 3.349753694581281e-06, + "loss": 0.9536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4894193708896637, + "step": 35, + "valid_targets_mean": 3924.0, + "valid_targets_min": 1668 + }, + { + "epoch": 0.06896551724137931, + "grad_norm": 2.0545621588279888, + "learning_rate": 3.842364532019705e-06, + "loss": 0.8279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41704902052879333, + "step": 40, + "valid_targets_mean": 2984.0, + "valid_targets_min": 840 + }, + { + "epoch": 0.07758620689655173, + "grad_norm": 1.287657343571009, + "learning_rate": 4.334975369458129e-06, + "loss": 0.8506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40100449323654175, + "step": 45, + "valid_targets_mean": 4369.2, + "valid_targets_min": 619 + }, + { + "epoch": 0.08620689655172414, + "grad_norm": 1.1918469853583982, + "learning_rate": 4.8275862068965525e-06, + "loss": 0.8647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4594336450099945, + "step": 50, + "valid_targets_mean": 4147.5, + "valid_targets_min": 1578 + }, + { + "epoch": 0.09482758620689655, + "grad_norm": 0.9441218490186954, + "learning_rate": 5.320197044334976e-06, + "loss": 0.7806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4251787066459656, + "step": 55, + "valid_targets_mean": 5269.2, + "valid_targets_min": 2325 + }, + { + "epoch": 0.10344827586206896, + "grad_norm": 1.0388853131448057, + "learning_rate": 5.812807881773399e-06, + "loss": 0.7767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3979467749595642, + "step": 60, + "valid_targets_mean": 2872.6, + "valid_targets_min": 1303 + }, + { + "epoch": 0.11206896551724138, + "grad_norm": 0.9660370233248622, + "learning_rate": 6.305418719211823e-06, + "loss": 0.8294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32068926095962524, + "step": 65, + "valid_targets_mean": 2370.2, + "valid_targets_min": 737 + }, + { + "epoch": 0.1206896551724138, + "grad_norm": 0.8930917300505489, + "learning_rate": 6.798029556650246e-06, + "loss": 0.8083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3917607069015503, + "step": 70, + "valid_targets_mean": 3463.4, + "valid_targets_min": 1465 + }, + { + "epoch": 0.12931034482758622, + "grad_norm": 0.7389990882620856, + "learning_rate": 7.290640394088671e-06, + "loss": 0.7757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3763599991798401, + "step": 75, + "valid_targets_mean": 4467.6, + "valid_targets_min": 772 + }, + { + "epoch": 0.13793103448275862, + "grad_norm": 0.7472214279722299, + "learning_rate": 7.783251231527095e-06, + "loss": 0.7342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3860183656215668, + "step": 80, + "valid_targets_mean": 3997.8, + "valid_targets_min": 1795 + }, + { + "epoch": 0.14655172413793102, + "grad_norm": 0.7218339941398133, + "learning_rate": 8.275862068965518e-06, + "loss": 0.7617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4559746980667114, + "step": 85, + "valid_targets_mean": 5640.1, + "valid_targets_min": 274 + }, + { + "epoch": 0.15517241379310345, + "grad_norm": 0.7425167868253314, + "learning_rate": 8.768472906403942e-06, + "loss": 0.7697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43732425570487976, + "step": 90, + "valid_targets_mean": 4439.5, + "valid_targets_min": 1082 + }, + { + "epoch": 0.16379310344827586, + "grad_norm": 0.8784728083131768, + "learning_rate": 9.261083743842364e-06, + "loss": 0.6829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3236698806285858, + "step": 95, + "valid_targets_mean": 2221.5, + "valid_targets_min": 583 + }, + { + "epoch": 0.1724137931034483, + "grad_norm": 0.668610432068481, + "learning_rate": 9.75369458128079e-06, + "loss": 0.7028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3175450563430786, + "step": 100, + "valid_targets_mean": 4900.4, + "valid_targets_min": 412 + }, + { + "epoch": 0.1810344827586207, + "grad_norm": 0.762868705154042, + "learning_rate": 1.0246305418719214e-05, + "loss": 0.7046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33831289410591125, + "step": 105, + "valid_targets_mean": 3072.5, + "valid_targets_min": 1802 + }, + { + "epoch": 0.1896551724137931, + "grad_norm": 0.771147983381667, + "learning_rate": 1.0738916256157637e-05, + "loss": 0.6963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3214723765850067, + "step": 110, + "valid_targets_mean": 3625.2, + "valid_targets_min": 756 + }, + { + "epoch": 0.19827586206896552, + "grad_norm": 1.640623528697079, + "learning_rate": 1.123152709359606e-05, + "loss": 0.7089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23830479383468628, + "step": 115, + "valid_targets_mean": 3252.0, + "valid_targets_min": 679 + }, + { + "epoch": 0.20689655172413793, + "grad_norm": 0.7019602281567701, + "learning_rate": 1.1724137931034483e-05, + "loss": 0.7265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32230842113494873, + "step": 120, + "valid_targets_mean": 4071.5, + "valid_targets_min": 1946 + }, + { + "epoch": 0.21551724137931033, + "grad_norm": 0.7339495236828748, + "learning_rate": 1.2216748768472909e-05, + "loss": 0.7463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3007355332374573, + "step": 125, + "valid_targets_mean": 2948.8, + "valid_targets_min": 1718 + }, + { + "epoch": 0.22413793103448276, + "grad_norm": 0.5110196587530477, + "learning_rate": 1.2709359605911331e-05, + "loss": 0.6675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22014601528644562, + "step": 130, + "valid_targets_mean": 4781.5, + "valid_targets_min": 1996 + }, + { + "epoch": 0.23275862068965517, + "grad_norm": 0.6846953537064482, + "learning_rate": 1.3201970443349755e-05, + "loss": 0.6993, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17798146605491638, + "step": 135, + "valid_targets_mean": 2210.0, + "valid_targets_min": 728 + }, + { + "epoch": 0.2413793103448276, + "grad_norm": 0.8179902129138006, + "learning_rate": 1.369458128078818e-05, + "loss": 0.7097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47994717955589294, + "step": 140, + "valid_targets_mean": 5659.4, + "valid_targets_min": 890 + }, + { + "epoch": 0.25, + "grad_norm": 0.5915590286522948, + "learning_rate": 1.4187192118226602e-05, + "loss": 0.6907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24744611978530884, + "step": 145, + "valid_targets_mean": 3455.5, + "valid_targets_min": 553 + }, + { + "epoch": 0.25862068965517243, + "grad_norm": 0.7578130027178461, + "learning_rate": 1.4679802955665026e-05, + "loss": 0.7072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4918016195297241, + "step": 150, + "valid_targets_mean": 3785.9, + "valid_targets_min": 708 + }, + { + "epoch": 0.2672413793103448, + "grad_norm": 0.8118636484819712, + "learning_rate": 1.5172413793103448e-05, + "loss": 0.7223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3037732243537903, + "step": 155, + "valid_targets_mean": 2365.0, + "valid_targets_min": 338 + }, + { + "epoch": 0.27586206896551724, + "grad_norm": 0.8417411826914287, + "learning_rate": 1.5665024630541875e-05, + "loss": 0.7179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25301000475883484, + "step": 160, + "valid_targets_mean": 2006.4, + "valid_targets_min": 754 + }, + { + "epoch": 0.28448275862068967, + "grad_norm": 1.0078060923241636, + "learning_rate": 1.6157635467980298e-05, + "loss": 0.6841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5394257307052612, + "step": 165, + "valid_targets_mean": 3262.0, + "valid_targets_min": 946 + }, + { + "epoch": 0.29310344827586204, + "grad_norm": 0.8509294072085292, + "learning_rate": 1.665024630541872e-05, + "loss": 0.6869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.293990820646286, + "step": 170, + "valid_targets_mean": 2501.8, + "valid_targets_min": 606 + }, + { + "epoch": 0.3017241379310345, + "grad_norm": 1.5110699028438686, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.6406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42483076453208923, + "step": 175, + "valid_targets_mean": 5015.8, + "valid_targets_min": 571 + }, + { + "epoch": 0.3103448275862069, + "grad_norm": 0.829361033344782, + "learning_rate": 1.7635467980295567e-05, + "loss": 0.6593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3968130350112915, + "step": 180, + "valid_targets_mean": 4155.6, + "valid_targets_min": 1366 + }, + { + "epoch": 0.31896551724137934, + "grad_norm": 2.2132799203163036, + "learning_rate": 1.8128078817733993e-05, + "loss": 0.6668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41700565814971924, + "step": 185, + "valid_targets_mean": 3660.1, + "valid_targets_min": 1274 + }, + { + "epoch": 0.3275862068965517, + "grad_norm": 0.7201258507954451, + "learning_rate": 1.8620689655172415e-05, + "loss": 0.6377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3403915762901306, + "step": 190, + "valid_targets_mean": 3456.1, + "valid_targets_min": 811 + }, + { + "epoch": 0.33620689655172414, + "grad_norm": 0.7496831909974342, + "learning_rate": 1.911330049261084e-05, + "loss": 0.6633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3048527240753174, + "step": 195, + "valid_targets_mean": 3633.9, + "valid_targets_min": 1775 + }, + { + "epoch": 0.3448275862068966, + "grad_norm": 1.0466424956418392, + "learning_rate": 1.9605911330049263e-05, + "loss": 0.6995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32808586955070496, + "step": 200, + "valid_targets_mean": 3535.2, + "valid_targets_min": 586 + }, + { + "epoch": 0.35344827586206895, + "grad_norm": 0.8218255448633348, + "learning_rate": 2.0098522167487688e-05, + "loss": 0.6917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2928812801837921, + "step": 205, + "valid_targets_mean": 3240.9, + "valid_targets_min": 834 + }, + { + "epoch": 0.3620689655172414, + "grad_norm": 0.8029280949690537, + "learning_rate": 2.059113300492611e-05, + "loss": 0.7323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30598652362823486, + "step": 210, + "valid_targets_mean": 2593.9, + "valid_targets_min": 623 + }, + { + "epoch": 0.3706896551724138, + "grad_norm": 1.2032672924578944, + "learning_rate": 2.1083743842364536e-05, + "loss": 0.6665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2592845559120178, + "step": 215, + "valid_targets_mean": 4767.8, + "valid_targets_min": 1062 + }, + { + "epoch": 0.3793103448275862, + "grad_norm": 0.7836032078966709, + "learning_rate": 2.1576354679802954e-05, + "loss": 0.6655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27402517199516296, + "step": 220, + "valid_targets_mean": 2645.8, + "valid_targets_min": 955 + }, + { + "epoch": 0.3879310344827586, + "grad_norm": 0.7223820678613246, + "learning_rate": 2.206896551724138e-05, + "loss": 0.6182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2808302640914917, + "step": 225, + "valid_targets_mean": 2963.4, + "valid_targets_min": 1475 + }, + { + "epoch": 0.39655172413793105, + "grad_norm": 0.6050302790875435, + "learning_rate": 2.2561576354679805e-05, + "loss": 0.6178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4045502543449402, + "step": 230, + "valid_targets_mean": 5649.1, + "valid_targets_min": 1876 + }, + { + "epoch": 0.4051724137931034, + "grad_norm": 0.6999663949451717, + "learning_rate": 2.3054187192118228e-05, + "loss": 0.633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23481211066246033, + "step": 235, + "valid_targets_mean": 3052.0, + "valid_targets_min": 1318 + }, + { + "epoch": 0.41379310344827586, + "grad_norm": 0.7423530530074394, + "learning_rate": 2.3546798029556653e-05, + "loss": 0.6828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3373368978500366, + "step": 240, + "valid_targets_mean": 3396.6, + "valid_targets_min": 464 + }, + { + "epoch": 0.4224137931034483, + "grad_norm": 0.7379312877310925, + "learning_rate": 2.403940886699508e-05, + "loss": 0.7037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21787698566913605, + "step": 245, + "valid_targets_mean": 2163.5, + "valid_targets_min": 534 + }, + { + "epoch": 0.43103448275862066, + "grad_norm": 0.8063509127084881, + "learning_rate": 2.4532019704433497e-05, + "loss": 0.6382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2792884111404419, + "step": 250, + "valid_targets_mean": 3017.1, + "valid_targets_min": 1032 + }, + { + "epoch": 0.4396551724137931, + "grad_norm": 0.6706813958825989, + "learning_rate": 2.5024630541871923e-05, + "loss": 0.657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3086356520652771, + "step": 255, + "valid_targets_mean": 4803.4, + "valid_targets_min": 357 + }, + { + "epoch": 0.4482758620689655, + "grad_norm": 0.9310469728000066, + "learning_rate": 2.551724137931035e-05, + "loss": 0.7131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2496461719274521, + "step": 260, + "valid_targets_mean": 1765.0, + "valid_targets_min": 470 + }, + { + "epoch": 0.45689655172413796, + "grad_norm": 0.7289154684935432, + "learning_rate": 2.600985221674877e-05, + "loss": 0.6566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2873595654964447, + "step": 265, + "valid_targets_mean": 3518.4, + "valid_targets_min": 785 + }, + { + "epoch": 0.46551724137931033, + "grad_norm": 0.7491930663891365, + "learning_rate": 2.6502463054187196e-05, + "loss": 0.6191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3568354845046997, + "step": 270, + "valid_targets_mean": 3725.4, + "valid_targets_min": 937 + }, + { + "epoch": 0.47413793103448276, + "grad_norm": 0.6869919899460102, + "learning_rate": 2.6995073891625615e-05, + "loss": 0.6547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.300902396440506, + "step": 275, + "valid_targets_mean": 3223.5, + "valid_targets_min": 1041 + }, + { + "epoch": 0.4827586206896552, + "grad_norm": 0.7219971094287305, + "learning_rate": 2.748768472906404e-05, + "loss": 0.6283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22700543701648712, + "step": 280, + "valid_targets_mean": 2332.5, + "valid_targets_min": 1321 + }, + { + "epoch": 0.49137931034482757, + "grad_norm": 0.7840522944810209, + "learning_rate": 2.7980295566502466e-05, + "loss": 0.6794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.429918497800827, + "step": 285, + "valid_targets_mean": 4372.2, + "valid_targets_min": 2070 + }, + { + "epoch": 0.5, + "grad_norm": 0.7168472411052416, + "learning_rate": 2.8472906403940888e-05, + "loss": 0.6222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3090544641017914, + "step": 290, + "valid_targets_mean": 3569.8, + "valid_targets_min": 1793 + }, + { + "epoch": 0.5086206896551724, + "grad_norm": 0.6401065575292432, + "learning_rate": 2.8965517241379313e-05, + "loss": 0.6535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3025968670845032, + "step": 295, + "valid_targets_mean": 4973.6, + "valid_targets_min": 917 + }, + { + "epoch": 0.5172413793103449, + "grad_norm": 0.5902560458840668, + "learning_rate": 2.945812807881774e-05, + "loss": 0.6428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35170048475265503, + "step": 300, + "valid_targets_mean": 6022.5, + "valid_targets_min": 1054 + }, + { + "epoch": 0.5258620689655172, + "grad_norm": 0.7483408537338351, + "learning_rate": 2.9950738916256158e-05, + "loss": 0.6253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22863270342350006, + "step": 305, + "valid_targets_mean": 3410.6, + "valid_targets_min": 1894 + }, + { + "epoch": 0.5344827586206896, + "grad_norm": 0.8333505264909917, + "learning_rate": 3.0443349753694583e-05, + "loss": 0.6544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38335931301116943, + "step": 310, + "valid_targets_mean": 3636.6, + "valid_targets_min": 1483 + }, + { + "epoch": 0.5431034482758621, + "grad_norm": 0.7569810094427996, + "learning_rate": 3.093596059113301e-05, + "loss": 0.5966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2640477418899536, + "step": 315, + "valid_targets_mean": 3758.2, + "valid_targets_min": 1159 + }, + { + "epoch": 0.5517241379310345, + "grad_norm": 0.8703377303462938, + "learning_rate": 3.142857142857143e-05, + "loss": 0.6108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3582964241504669, + "step": 320, + "valid_targets_mean": 2754.5, + "valid_targets_min": 1437 + }, + { + "epoch": 0.5603448275862069, + "grad_norm": 0.6746278227062145, + "learning_rate": 3.1921182266009856e-05, + "loss": 0.6329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2840624749660492, + "step": 325, + "valid_targets_mean": 3434.4, + "valid_targets_min": 534 + }, + { + "epoch": 0.5689655172413793, + "grad_norm": 0.6661913423851306, + "learning_rate": 3.2413793103448275e-05, + "loss": 0.6354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3681085407733917, + "step": 330, + "valid_targets_mean": 4702.4, + "valid_targets_min": 1024 + }, + { + "epoch": 0.5775862068965517, + "grad_norm": 0.6413649397859758, + "learning_rate": 3.29064039408867e-05, + "loss": 0.6488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29207906126976013, + "step": 335, + "valid_targets_mean": 3462.1, + "valid_targets_min": 1725 + }, + { + "epoch": 0.5862068965517241, + "grad_norm": 0.8651344005129311, + "learning_rate": 3.3399014778325126e-05, + "loss": 0.6268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3141361176967621, + "step": 340, + "valid_targets_mean": 2540.4, + "valid_targets_min": 1181 + }, + { + "epoch": 0.5948275862068966, + "grad_norm": 0.7769867467422841, + "learning_rate": 3.389162561576355e-05, + "loss": 0.5976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33586743474006653, + "step": 345, + "valid_targets_mean": 3471.5, + "valid_targets_min": 2102 + }, + { + "epoch": 0.603448275862069, + "grad_norm": 0.7670895081874297, + "learning_rate": 3.438423645320197e-05, + "loss": 0.6091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30461496114730835, + "step": 350, + "valid_targets_mean": 3081.5, + "valid_targets_min": 1388 + }, + { + "epoch": 0.6120689655172413, + "grad_norm": 0.7050722350888959, + "learning_rate": 3.4876847290640396e-05, + "loss": 0.6474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4162551164627075, + "step": 355, + "valid_targets_mean": 4125.9, + "valid_targets_min": 1195 + }, + { + "epoch": 0.6206896551724138, + "grad_norm": 0.6332661299318986, + "learning_rate": 3.536945812807882e-05, + "loss": 0.6514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37823486328125, + "step": 360, + "valid_targets_mean": 6009.8, + "valid_targets_min": 1362 + }, + { + "epoch": 0.6293103448275862, + "grad_norm": 0.6793271986969113, + "learning_rate": 3.586206896551725e-05, + "loss": 0.6152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2917017936706543, + "step": 365, + "valid_targets_mean": 4666.1, + "valid_targets_min": 379 + }, + { + "epoch": 0.6379310344827587, + "grad_norm": 0.6450087890703107, + "learning_rate": 3.6354679802955665e-05, + "loss": 0.6331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29983556270599365, + "step": 370, + "valid_targets_mean": 4020.2, + "valid_targets_min": 940 + }, + { + "epoch": 0.646551724137931, + "grad_norm": 0.7471314078072796, + "learning_rate": 3.684729064039409e-05, + "loss": 0.6443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3147204518318176, + "step": 375, + "valid_targets_mean": 3499.0, + "valid_targets_min": 636 + }, + { + "epoch": 0.6551724137931034, + "grad_norm": 0.709612006208998, + "learning_rate": 3.7339901477832516e-05, + "loss": 0.6724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23856517672538757, + "step": 380, + "valid_targets_mean": 3519.8, + "valid_targets_min": 1031 + }, + { + "epoch": 0.6637931034482759, + "grad_norm": 0.697296078256357, + "learning_rate": 3.7832512315270935e-05, + "loss": 0.6477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26525959372520447, + "step": 385, + "valid_targets_mean": 3800.0, + "valid_targets_min": 636 + }, + { + "epoch": 0.6724137931034483, + "grad_norm": 0.7964944660649016, + "learning_rate": 3.832512315270936e-05, + "loss": 0.6174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2790738344192505, + "step": 390, + "valid_targets_mean": 3369.2, + "valid_targets_min": 2124 + }, + { + "epoch": 0.6810344827586207, + "grad_norm": 0.7391646482654692, + "learning_rate": 3.8817733990147786e-05, + "loss": 0.6382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2945958375930786, + "step": 395, + "valid_targets_mean": 3164.6, + "valid_targets_min": 1496 + }, + { + "epoch": 0.6896551724137931, + "grad_norm": 0.7017054297069276, + "learning_rate": 3.931034482758621e-05, + "loss": 0.5758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24679553508758545, + "step": 400, + "valid_targets_mean": 3879.5, + "valid_targets_min": 1781 + }, + { + "epoch": 0.6982758620689655, + "grad_norm": 0.784443324981124, + "learning_rate": 3.980295566502464e-05, + "loss": 0.6421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3295668959617615, + "step": 405, + "valid_targets_mean": 3656.9, + "valid_targets_min": 2311 + }, + { + "epoch": 0.7068965517241379, + "grad_norm": 0.6669603608079032, + "learning_rate": 3.999993347192948e-05, + "loss": 0.6026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2392359972000122, + "step": 410, + "valid_targets_mean": 2878.8, + "valid_targets_min": 786 + }, + { + "epoch": 0.7155172413793104, + "grad_norm": 0.5869402914374651, + "learning_rate": 3.9999526913101334e-05, + "loss": 0.6044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2842877507209778, + "step": 415, + "valid_targets_mean": 4819.2, + "valid_targets_min": 1604 + }, + { + "epoch": 0.7241379310344828, + "grad_norm": 0.712850243180447, + "learning_rate": 3.999875076298832e-05, + "loss": 0.6093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45204976201057434, + "step": 420, + "valid_targets_mean": 5218.8, + "valid_targets_min": 980 + }, + { + "epoch": 0.7327586206896551, + "grad_norm": 0.6287529479538014, + "learning_rate": 3.9997605035933704e-05, + "loss": 0.6427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23647578060626984, + "step": 425, + "valid_targets_mean": 3042.9, + "valid_targets_min": 1884 + }, + { + "epoch": 0.7413793103448276, + "grad_norm": 0.613243349326708, + "learning_rate": 3.99960897531105e-05, + "loss": 0.6482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23709192872047424, + "step": 430, + "valid_targets_mean": 3435.1, + "valid_targets_min": 1558 + }, + { + "epoch": 0.75, + "grad_norm": 0.6562955349817724, + "learning_rate": 3.999420494252116e-05, + "loss": 0.5951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3045341968536377, + "step": 435, + "valid_targets_mean": 4663.6, + "valid_targets_min": 1470 + }, + { + "epoch": 0.7586206896551724, + "grad_norm": 0.7627463901707322, + "learning_rate": 3.9991950638996976e-05, + "loss": 0.6301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4783356785774231, + "step": 440, + "valid_targets_mean": 4428.6, + "valid_targets_min": 2142 + }, + { + "epoch": 0.7672413793103449, + "grad_norm": 0.646915086795381, + "learning_rate": 3.998932688419748e-05, + "loss": 0.6333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2336660921573639, + "step": 445, + "valid_targets_mean": 3231.2, + "valid_targets_min": 1201 + }, + { + "epoch": 0.7758620689655172, + "grad_norm": 0.6989419952214646, + "learning_rate": 3.9986333726609674e-05, + "loss": 0.5952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34037575125694275, + "step": 450, + "valid_targets_mean": 4055.6, + "valid_targets_min": 2316 + }, + { + "epoch": 0.7844827586206896, + "grad_norm": 0.6629583343519574, + "learning_rate": 3.99829712215471e-05, + "loss": 0.6399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17064107954502106, + "step": 455, + "valid_targets_mean": 2590.6, + "valid_targets_min": 1968 + }, + { + "epoch": 0.7931034482758621, + "grad_norm": 0.78890938920922, + "learning_rate": 3.997923943114886e-05, + "loss": 0.6104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29810309410095215, + "step": 460, + "valid_targets_mean": 2791.9, + "valid_targets_min": 379 + }, + { + "epoch": 0.8017241379310345, + "grad_norm": 0.7272042315596238, + "learning_rate": 3.997513842437845e-05, + "loss": 0.6096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.364572137594223, + "step": 465, + "valid_targets_mean": 3626.4, + "valid_targets_min": 426 + }, + { + "epoch": 0.8103448275862069, + "grad_norm": 0.6577433648661997, + "learning_rate": 3.997066827702248e-05, + "loss": 0.6063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37066060304641724, + "step": 470, + "valid_targets_mean": 4684.1, + "valid_targets_min": 656 + }, + { + "epoch": 0.8189655172413793, + "grad_norm": 0.9374920706442889, + "learning_rate": 3.996582907168928e-05, + "loss": 0.6019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33479398488998413, + "step": 475, + "valid_targets_mean": 2754.0, + "valid_targets_min": 819 + }, + { + "epoch": 0.8275862068965517, + "grad_norm": 0.6973024921577812, + "learning_rate": 3.996062089780737e-05, + "loss": 0.611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3128424286842346, + "step": 480, + "valid_targets_mean": 5124.0, + "valid_targets_min": 1016 + }, + { + "epoch": 0.8362068965517241, + "grad_norm": 0.8674122403376363, + "learning_rate": 3.99550438516238e-05, + "loss": 0.6519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37676799297332764, + "step": 485, + "valid_targets_mean": 2540.9, + "valid_targets_min": 1171 + }, + { + "epoch": 0.8448275862068966, + "grad_norm": 0.8139547580639379, + "learning_rate": 3.994909803620241e-05, + "loss": 0.6309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30208778381347656, + "step": 490, + "valid_targets_mean": 3834.2, + "valid_targets_min": 1779 + }, + { + "epoch": 0.853448275862069, + "grad_norm": 0.6703455605697158, + "learning_rate": 3.994278356142187e-05, + "loss": 0.5802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3729390501976013, + "step": 495, + "valid_targets_mean": 4426.1, + "valid_targets_min": 1333 + }, + { + "epoch": 0.8620689655172413, + "grad_norm": 0.7773046757336888, + "learning_rate": 3.993610054397368e-05, + "loss": 0.6127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28395596146583557, + "step": 500, + "valid_targets_mean": 2704.4, + "valid_targets_min": 492 + }, + { + "epoch": 0.8706896551724138, + "grad_norm": 0.6933830157899125, + "learning_rate": 3.992904910736001e-05, + "loss": 0.6275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3321034908294678, + "step": 505, + "valid_targets_mean": 2750.9, + "valid_targets_min": 781 + }, + { + "epoch": 0.8793103448275862, + "grad_norm": 0.6899491978796316, + "learning_rate": 3.9921629381891425e-05, + "loss": 0.6128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2895788550376892, + "step": 510, + "valid_targets_mean": 3737.8, + "valid_targets_min": 1953 + }, + { + "epoch": 0.8879310344827587, + "grad_norm": 0.6461401658273619, + "learning_rate": 3.991384150468445e-05, + "loss": 0.632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2809222936630249, + "step": 515, + "valid_targets_mean": 3917.4, + "valid_targets_min": 2273 + }, + { + "epoch": 0.896551724137931, + "grad_norm": 1.2440232141350356, + "learning_rate": 3.9905685619659074e-05, + "loss": 0.6162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2755066752433777, + "step": 520, + "valid_targets_mean": 2479.9, + "valid_targets_min": 935 + }, + { + "epoch": 0.9051724137931034, + "grad_norm": 0.8153879328655858, + "learning_rate": 3.9897161877536076e-05, + "loss": 0.6054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3150935173034668, + "step": 525, + "valid_targets_mean": 2985.4, + "valid_targets_min": 1461 + }, + { + "epoch": 0.9137931034482759, + "grad_norm": 0.610080365395153, + "learning_rate": 3.9888270435834196e-05, + "loss": 0.5662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2674276828765869, + "step": 530, + "valid_targets_mean": 4293.4, + "valid_targets_min": 854 + }, + { + "epoch": 0.9224137931034483, + "grad_norm": 0.7289184888115173, + "learning_rate": 3.987901145886731e-05, + "loss": 0.6252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40158599615097046, + "step": 535, + "valid_targets_mean": 3920.5, + "valid_targets_min": 1234 + }, + { + "epoch": 0.9310344827586207, + "grad_norm": 0.9065353440528913, + "learning_rate": 3.9869385117741314e-05, + "loss": 0.6004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.262065052986145, + "step": 540, + "valid_targets_mean": 2458.5, + "valid_targets_min": 1032 + }, + { + "epoch": 0.9396551724137931, + "grad_norm": 0.8075182485003022, + "learning_rate": 3.985939159035101e-05, + "loss": 0.6231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27840715646743774, + "step": 545, + "valid_targets_mean": 2088.2, + "valid_targets_min": 815 + }, + { + "epoch": 0.9482758620689655, + "grad_norm": 0.8932299994242785, + "learning_rate": 3.98490310613768e-05, + "loss": 0.6197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31527039408683777, + "step": 550, + "valid_targets_mean": 2544.9, + "valid_targets_min": 377 + }, + { + "epoch": 0.9568965517241379, + "grad_norm": 0.600637076017398, + "learning_rate": 3.983830372228127e-05, + "loss": 0.6074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.384111225605011, + "step": 555, + "valid_targets_mean": 5281.6, + "valid_targets_min": 1035 + }, + { + "epoch": 0.9655172413793104, + "grad_norm": 0.6461798471784225, + "learning_rate": 3.982720977130567e-05, + "loss": 0.6061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39894336462020874, + "step": 560, + "valid_targets_mean": 4568.5, + "valid_targets_min": 594 + }, + { + "epoch": 0.9741379310344828, + "grad_norm": 0.785625646117963, + "learning_rate": 3.9815749413466204e-05, + "loss": 0.6221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33625924587249756, + "step": 565, + "valid_targets_mean": 2459.2, + "valid_targets_min": 494 + }, + { + "epoch": 0.9827586206896551, + "grad_norm": 0.6826434190898227, + "learning_rate": 3.980392286055033e-05, + "loss": 0.613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2937217354774475, + "step": 570, + "valid_targets_mean": 3124.0, + "valid_targets_min": 1630 + }, + { + "epoch": 0.9913793103448276, + "grad_norm": 0.7113889514712743, + "learning_rate": 3.979173033111275e-05, + "loss": 0.5836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29601383209228516, + "step": 575, + "valid_targets_mean": 3815.6, + "valid_targets_min": 1390 + }, + { + "epoch": 1.0, + "grad_norm": 0.7978906605998085, + "learning_rate": 3.977917205047142e-05, + "loss": 0.6345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4608190059661865, + "step": 580, + "valid_targets_mean": 3223.1, + "valid_targets_min": 706 + }, + { + "epoch": 1.0086206896551724, + "grad_norm": 0.6005717129590061, + "learning_rate": 3.976624825070339e-05, + "loss": 0.5683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41373294591903687, + "step": 585, + "valid_targets_mean": 5931.6, + "valid_targets_min": 1471 + }, + { + "epoch": 1.0172413793103448, + "grad_norm": 0.6552336478289915, + "learning_rate": 3.97529591706405e-05, + "loss": 0.5455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23430368304252625, + "step": 590, + "valid_targets_mean": 2999.2, + "valid_targets_min": 941 + }, + { + "epoch": 1.0258620689655173, + "grad_norm": 0.7195300818386714, + "learning_rate": 3.973930505586496e-05, + "loss": 0.5861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23183020949363708, + "step": 595, + "valid_targets_mean": 2522.2, + "valid_targets_min": 734 + }, + { + "epoch": 1.0344827586206897, + "grad_norm": 0.6737625154821479, + "learning_rate": 3.972528615870483e-05, + "loss": 0.5762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3226555585861206, + "step": 600, + "valid_targets_mean": 3511.2, + "valid_targets_min": 1281 + }, + { + "epoch": 1.043103448275862, + "grad_norm": 0.6574075084965194, + "learning_rate": 3.9710902738229354e-05, + "loss": 0.576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3020402193069458, + "step": 605, + "valid_targets_mean": 4075.6, + "valid_targets_min": 592 + }, + { + "epoch": 1.0517241379310345, + "grad_norm": 0.6361634659488868, + "learning_rate": 3.9696155060244166e-05, + "loss": 0.6074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31071919202804565, + "step": 610, + "valid_targets_mean": 4625.1, + "valid_targets_min": 1577 + }, + { + "epoch": 1.0603448275862069, + "grad_norm": 0.6865688750734892, + "learning_rate": 3.968104339728636e-05, + "loss": 0.5529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22631359100341797, + "step": 615, + "valid_targets_mean": 3810.9, + "valid_targets_min": 1016 + }, + { + "epoch": 1.0689655172413792, + "grad_norm": 0.7847925685737638, + "learning_rate": 3.966556802861951e-05, + "loss": 0.5722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21570219099521637, + "step": 620, + "valid_targets_mean": 3329.9, + "valid_targets_min": 541 + }, + { + "epoch": 1.0775862068965518, + "grad_norm": 0.6508403733170468, + "learning_rate": 3.964972924022843e-05, + "loss": 0.5917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25959932804107666, + "step": 625, + "valid_targets_mean": 3226.6, + "valid_targets_min": 544 + }, + { + "epoch": 1.0862068965517242, + "grad_norm": 0.6393748831001188, + "learning_rate": 3.963352732481396e-05, + "loss": 0.5649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24320468306541443, + "step": 630, + "valid_targets_mean": 3234.1, + "valid_targets_min": 1606 + }, + { + "epoch": 1.0948275862068966, + "grad_norm": 0.6464744061619221, + "learning_rate": 3.961696258178752e-05, + "loss": 0.56, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23860612511634827, + "step": 635, + "valid_targets_mean": 3424.6, + "valid_targets_min": 281 + }, + { + "epoch": 1.103448275862069, + "grad_norm": 0.7130411239086852, + "learning_rate": 3.960003531726559e-05, + "loss": 0.5978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2414039522409439, + "step": 640, + "valid_targets_mean": 2692.5, + "valid_targets_min": 1147 + }, + { + "epoch": 1.1120689655172413, + "grad_norm": 0.6067056941197266, + "learning_rate": 3.958274584406403e-05, + "loss": 0.5889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2696046233177185, + "step": 645, + "valid_targets_mean": 3656.6, + "valid_targets_min": 1580 + }, + { + "epoch": 1.1206896551724137, + "grad_norm": 0.6794563652987848, + "learning_rate": 3.956509448169233e-05, + "loss": 0.5579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25545477867126465, + "step": 650, + "valid_targets_mean": 3386.6, + "valid_targets_min": 1070 + }, + { + "epoch": 1.1293103448275863, + "grad_norm": 0.5907805691902701, + "learning_rate": 3.9547081556347693e-05, + "loss": 0.55, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3312992453575134, + "step": 655, + "valid_targets_mean": 5166.6, + "valid_targets_min": 1067 + }, + { + "epoch": 1.1379310344827587, + "grad_norm": 0.6929733973909208, + "learning_rate": 3.952870740090901e-05, + "loss": 0.5628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20063847303390503, + "step": 660, + "valid_targets_mean": 2974.2, + "valid_targets_min": 1165 + }, + { + "epoch": 1.146551724137931, + "grad_norm": 0.6362204985300633, + "learning_rate": 3.950997235493069e-05, + "loss": 0.5549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19708850979804993, + "step": 665, + "valid_targets_mean": 2985.6, + "valid_targets_min": 753 + }, + { + "epoch": 1.1551724137931034, + "grad_norm": 0.6358110533727936, + "learning_rate": 3.9490876764636414e-05, + "loss": 0.5738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28072255849838257, + "step": 670, + "valid_targets_mean": 3981.6, + "valid_targets_min": 904 + }, + { + "epoch": 1.1637931034482758, + "grad_norm": 0.7492089400612849, + "learning_rate": 3.947142098291272e-05, + "loss": 0.5969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18699270486831665, + "step": 675, + "valid_targets_mean": 2234.5, + "valid_targets_min": 520 + }, + { + "epoch": 1.1724137931034484, + "grad_norm": 0.6757843817064915, + "learning_rate": 3.945160536930247e-05, + "loss": 0.6009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26428648829460144, + "step": 680, + "valid_targets_mean": 3843.4, + "valid_targets_min": 1182 + }, + { + "epoch": 1.1810344827586208, + "grad_norm": 0.634931931587929, + "learning_rate": 3.9431430289998235e-05, + "loss": 0.5489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3300395607948303, + "step": 685, + "valid_targets_mean": 4375.2, + "valid_targets_min": 1305 + }, + { + "epoch": 1.1896551724137931, + "grad_norm": 0.6151851916260583, + "learning_rate": 3.941089611783551e-05, + "loss": 0.5548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18444105982780457, + "step": 690, + "valid_targets_mean": 3105.9, + "valid_targets_min": 1484 + }, + { + "epoch": 1.1982758620689655, + "grad_norm": 0.6068354631280605, + "learning_rate": 3.939000323228583e-05, + "loss": 0.5517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2512916922569275, + "step": 695, + "valid_targets_mean": 4222.9, + "valid_targets_min": 1393 + }, + { + "epoch": 1.206896551724138, + "grad_norm": 0.7634427474132905, + "learning_rate": 3.9368752019449744e-05, + "loss": 0.5944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21794426441192627, + "step": 700, + "valid_targets_mean": 2043.1, + "valid_targets_min": 581 + }, + { + "epoch": 1.2155172413793103, + "grad_norm": 0.6191064830788139, + "learning_rate": 3.934714287204969e-05, + "loss": 0.5532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3472103476524353, + "step": 705, + "valid_targets_mean": 4762.0, + "valid_targets_min": 1397 + }, + { + "epoch": 1.2241379310344827, + "grad_norm": 0.7037462902349687, + "learning_rate": 3.932517618942275e-05, + "loss": 0.5717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27132099866867065, + "step": 710, + "valid_targets_mean": 3573.6, + "valid_targets_min": 1746 + }, + { + "epoch": 1.2327586206896552, + "grad_norm": 0.7072106217368744, + "learning_rate": 3.930285237751324e-05, + "loss": 0.6102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2910650670528412, + "step": 715, + "valid_targets_mean": 3287.1, + "valid_targets_min": 663 + }, + { + "epoch": 1.2413793103448276, + "grad_norm": 0.6650108596305829, + "learning_rate": 3.928017184886525e-05, + "loss": 0.5893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2076074481010437, + "step": 720, + "valid_targets_mean": 2658.0, + "valid_targets_min": 1354 + }, + { + "epoch": 1.25, + "grad_norm": 0.7113696936170891, + "learning_rate": 3.925713502261496e-05, + "loss": 0.5793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28718888759613037, + "step": 725, + "valid_targets_mean": 2900.5, + "valid_targets_min": 274 + }, + { + "epoch": 1.2586206896551724, + "grad_norm": 0.6066075532383515, + "learning_rate": 3.9233742324482965e-05, + "loss": 0.5675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.182432159781456, + "step": 730, + "valid_targets_mean": 4086.9, + "valid_targets_min": 599 + }, + { + "epoch": 1.2672413793103448, + "grad_norm": 0.6322789893306922, + "learning_rate": 3.920999418676636e-05, + "loss": 0.5827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.340463250875473, + "step": 735, + "valid_targets_mean": 4901.2, + "valid_targets_min": 1812 + }, + { + "epoch": 1.2758620689655173, + "grad_norm": 0.645804554200195, + "learning_rate": 3.918589104833075e-05, + "loss": 0.5825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30338525772094727, + "step": 740, + "valid_targets_mean": 4204.6, + "valid_targets_min": 1376 + }, + { + "epoch": 1.2844827586206897, + "grad_norm": 0.7050655189214021, + "learning_rate": 3.916143335460218e-05, + "loss": 0.5499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33188706636428833, + "step": 745, + "valid_targets_mean": 3245.9, + "valid_targets_min": 1098 + }, + { + "epoch": 1.293103448275862, + "grad_norm": 0.6445820272212374, + "learning_rate": 3.913662155755885e-05, + "loss": 0.5595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24519510567188263, + "step": 750, + "valid_targets_mean": 3186.8, + "valid_targets_min": 509 + }, + { + "epoch": 1.3017241379310345, + "grad_norm": 0.7975036037110392, + "learning_rate": 3.911145611572282e-05, + "loss": 0.5481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2782052755355835, + "step": 755, + "valid_targets_mean": 2524.6, + "valid_targets_min": 426 + }, + { + "epoch": 1.3103448275862069, + "grad_norm": 0.7281950316071065, + "learning_rate": 3.908593749415148e-05, + "loss": 0.6545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1945173144340515, + "step": 760, + "valid_targets_mean": 2425.2, + "valid_targets_min": 478 + }, + { + "epoch": 1.3189655172413794, + "grad_norm": 0.6366615373153709, + "learning_rate": 3.9060066164428986e-05, + "loss": 0.5524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2992849349975586, + "step": 765, + "valid_targets_mean": 4045.9, + "valid_targets_min": 1069 + }, + { + "epoch": 1.3275862068965516, + "grad_norm": 0.7551529366816231, + "learning_rate": 3.903384260465756e-05, + "loss": 0.58, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27485474944114685, + "step": 770, + "valid_targets_mean": 2762.9, + "valid_targets_min": 377 + }, + { + "epoch": 1.3362068965517242, + "grad_norm": 0.6821396675191562, + "learning_rate": 3.900726729944861e-05, + "loss": 0.5618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30889642238616943, + "step": 775, + "valid_targets_mean": 3370.8, + "valid_targets_min": 438 + }, + { + "epoch": 1.3448275862068966, + "grad_norm": 0.6717238345792034, + "learning_rate": 3.898034073991382e-05, + "loss": 0.5625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41013768315315247, + "step": 780, + "valid_targets_mean": 4358.2, + "valid_targets_min": 1786 + }, + { + "epoch": 1.353448275862069, + "grad_norm": 0.7992752610314047, + "learning_rate": 3.8953063423656055e-05, + "loss": 0.5849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28634825348854065, + "step": 785, + "valid_targets_mean": 2582.8, + "valid_targets_min": 1188 + }, + { + "epoch": 1.3620689655172413, + "grad_norm": 0.6925451862529552, + "learning_rate": 3.892543585476014e-05, + "loss": 0.5691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2935306429862976, + "step": 790, + "valid_targets_mean": 5148.4, + "valid_targets_min": 1771 + }, + { + "epoch": 1.3706896551724137, + "grad_norm": 0.7842287612668596, + "learning_rate": 3.88974585437836e-05, + "loss": 0.5681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19979897141456604, + "step": 795, + "valid_targets_mean": 2904.8, + "valid_targets_min": 547 + }, + { + "epoch": 1.3793103448275863, + "grad_norm": 0.6401163459314333, + "learning_rate": 3.886913200774717e-05, + "loss": 0.5899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28293904662132263, + "step": 800, + "valid_targets_mean": 3463.2, + "valid_targets_min": 1594 + }, + { + "epoch": 1.3879310344827587, + "grad_norm": 0.6807319952123996, + "learning_rate": 3.884045677012528e-05, + "loss": 0.5806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20955102145671844, + "step": 805, + "valid_targets_mean": 2845.8, + "valid_targets_min": 367 + }, + { + "epoch": 1.396551724137931, + "grad_norm": 0.6675497158755671, + "learning_rate": 3.8811433360836364e-05, + "loss": 0.5345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2397671937942505, + "step": 810, + "valid_targets_mean": 3195.6, + "valid_targets_min": 1550 + }, + { + "epoch": 1.4051724137931034, + "grad_norm": 0.742052701853441, + "learning_rate": 3.878206231623306e-05, + "loss": 0.5727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2547149658203125, + "step": 815, + "valid_targets_mean": 2620.4, + "valid_targets_min": 408 + }, + { + "epoch": 1.4137931034482758, + "grad_norm": 0.7070815970934397, + "learning_rate": 3.8752344179092315e-05, + "loss": 0.5722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28830042481422424, + "step": 820, + "valid_targets_mean": 3540.2, + "valid_targets_min": 754 + }, + { + "epoch": 1.4224137931034484, + "grad_norm": 0.6308183596674323, + "learning_rate": 3.8722279498605344e-05, + "loss": 0.5937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23384696245193481, + "step": 825, + "valid_targets_mean": 3138.0, + "valid_targets_min": 682 + }, + { + "epoch": 1.4310344827586206, + "grad_norm": 0.6233028809186425, + "learning_rate": 3.869186883036748e-05, + "loss": 0.5749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2825425863265991, + "step": 830, + "valid_targets_mean": 3813.5, + "valid_targets_min": 1404 + }, + { + "epoch": 1.4396551724137931, + "grad_norm": 0.6315819587869369, + "learning_rate": 3.8661112736367924e-05, + "loss": 0.5418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27093827724456787, + "step": 835, + "valid_targets_mean": 3958.6, + "valid_targets_min": 1813 + }, + { + "epoch": 1.4482758620689655, + "grad_norm": 0.8002481201902519, + "learning_rate": 3.863001178497933e-05, + "loss": 0.5521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3411499261856079, + "step": 840, + "valid_targets_mean": 3449.6, + "valid_targets_min": 1562 + }, + { + "epoch": 1.456896551724138, + "grad_norm": 0.5646014000758687, + "learning_rate": 3.8598566550947316e-05, + "loss": 0.571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.267992228269577, + "step": 845, + "valid_targets_mean": 4902.5, + "valid_targets_min": 1685 + }, + { + "epoch": 1.4655172413793103, + "grad_norm": 0.6276978940722122, + "learning_rate": 3.856677761537986e-05, + "loss": 0.5893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31473422050476074, + "step": 850, + "valid_targets_mean": 4601.8, + "valid_targets_min": 1341 + }, + { + "epoch": 1.4741379310344827, + "grad_norm": 0.7348376830947169, + "learning_rate": 3.853464556573652e-05, + "loss": 0.5466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2999240458011627, + "step": 855, + "valid_targets_mean": 3640.1, + "valid_targets_min": 916 + }, + { + "epoch": 1.4827586206896552, + "grad_norm": 0.8978611329491114, + "learning_rate": 3.850217099581764e-05, + "loss": 0.5616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27713045477867126, + "step": 860, + "valid_targets_mean": 2290.8, + "valid_targets_min": 1201 + }, + { + "epoch": 1.4913793103448276, + "grad_norm": 0.77046321533511, + "learning_rate": 3.8469354505753305e-05, + "loss": 0.5693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24054045975208282, + "step": 865, + "valid_targets_mean": 2118.1, + "valid_targets_min": 938 + }, + { + "epoch": 1.5, + "grad_norm": 0.7222989825813173, + "learning_rate": 3.843619670199229e-05, + "loss": 0.544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27719244360923767, + "step": 870, + "valid_targets_mean": 3316.0, + "valid_targets_min": 300 + }, + { + "epoch": 1.5086206896551724, + "grad_norm": 0.6285334439850424, + "learning_rate": 3.8402698197290865e-05, + "loss": 0.612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40034905076026917, + "step": 875, + "valid_targets_mean": 4616.9, + "valid_targets_min": 279 + }, + { + "epoch": 1.5172413793103448, + "grad_norm": 0.5988467971800734, + "learning_rate": 3.8368859610701443e-05, + "loss": 0.5665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3291173577308655, + "step": 880, + "valid_targets_mean": 5205.2, + "valid_targets_min": 1111 + }, + { + "epoch": 1.5258620689655173, + "grad_norm": 0.6221097934601042, + "learning_rate": 3.833468156756114e-05, + "loss": 0.5594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20184822380542755, + "step": 885, + "valid_targets_mean": 3121.1, + "valid_targets_min": 1996 + }, + { + "epoch": 1.5344827586206895, + "grad_norm": 0.6088881299880774, + "learning_rate": 3.8300164699480246e-05, + "loss": 0.5604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29023054242134094, + "step": 890, + "valid_targets_mean": 4022.2, + "valid_targets_min": 1170 + }, + { + "epoch": 1.543103448275862, + "grad_norm": 0.5851660013826078, + "learning_rate": 3.8265309644330535e-05, + "loss": 0.5475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22081995010375977, + "step": 895, + "valid_targets_mean": 3288.5, + "valid_targets_min": 775 + }, + { + "epoch": 1.5517241379310345, + "grad_norm": 0.5601559917028126, + "learning_rate": 3.823011704623347e-05, + "loss": 0.5356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25190994143486023, + "step": 900, + "valid_targets_mean": 4485.5, + "valid_targets_min": 1709 + }, + { + "epoch": 1.5603448275862069, + "grad_norm": 0.7111127459782094, + "learning_rate": 3.81945875555483e-05, + "loss": 0.6294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37708738446235657, + "step": 905, + "valid_targets_mean": 2716.1, + "valid_targets_min": 754 + }, + { + "epoch": 1.5689655172413794, + "grad_norm": 0.6241213259356158, + "learning_rate": 3.8158721828860094e-05, + "loss": 0.5732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25017252564430237, + "step": 910, + "valid_targets_mean": 2699.0, + "valid_targets_min": 908 + }, + { + "epoch": 1.5775862068965516, + "grad_norm": 1.5933254202486349, + "learning_rate": 3.81225205289675e-05, + "loss": 0.5779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41490495204925537, + "step": 915, + "valid_targets_mean": 4895.0, + "valid_targets_min": 1272 + }, + { + "epoch": 1.5862068965517242, + "grad_norm": 0.5280871052538816, + "learning_rate": 3.808598432487061e-05, + "loss": 0.5613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.259874165058136, + "step": 920, + "valid_targets_mean": 5779.0, + "valid_targets_min": 1994 + }, + { + "epoch": 1.5948275862068966, + "grad_norm": 0.549604024077637, + "learning_rate": 3.8049113891758506e-05, + "loss": 0.5666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.215641051530838, + "step": 925, + "valid_targets_mean": 4589.5, + "valid_targets_min": 856 + }, + { + "epoch": 1.603448275862069, + "grad_norm": 0.764713960290603, + "learning_rate": 3.8011909910996856e-05, + "loss": 0.5534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3771178424358368, + "step": 930, + "valid_targets_mean": 5157.5, + "valid_targets_min": 1677 + }, + { + "epoch": 1.6120689655172413, + "grad_norm": 0.6691714653562654, + "learning_rate": 3.797437307011527e-05, + "loss": 0.5525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23908597230911255, + "step": 935, + "valid_targets_mean": 3168.0, + "valid_targets_min": 1366 + }, + { + "epoch": 1.6206896551724137, + "grad_norm": 0.7097924121846054, + "learning_rate": 3.793650406279463e-05, + "loss": 0.5983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20683707296848297, + "step": 940, + "valid_targets_mean": 2603.0, + "valid_targets_min": 1098 + }, + { + "epoch": 1.6293103448275863, + "grad_norm": 0.6521675166491431, + "learning_rate": 3.789830358885423e-05, + "loss": 0.5752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32541143894195557, + "step": 945, + "valid_targets_mean": 4503.4, + "valid_targets_min": 863 + }, + { + "epoch": 1.6379310344827587, + "grad_norm": 0.6127301186437016, + "learning_rate": 3.7859772354238885e-05, + "loss": 0.5737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24941956996917725, + "step": 950, + "valid_targets_mean": 3997.9, + "valid_targets_min": 1318 + }, + { + "epoch": 1.646551724137931, + "grad_norm": 0.8107486435337788, + "learning_rate": 3.782091107100587e-05, + "loss": 0.5631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38626742362976074, + "step": 955, + "valid_targets_mean": 3198.8, + "valid_targets_min": 1015 + }, + { + "epoch": 1.6551724137931034, + "grad_norm": 0.6709100944181071, + "learning_rate": 3.7781720457311746e-05, + "loss": 0.5458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23467305302619934, + "step": 960, + "valid_targets_mean": 2846.6, + "valid_targets_min": 1638 + }, + { + "epoch": 1.6637931034482758, + "grad_norm": 0.7378895599046713, + "learning_rate": 3.7742201237399105e-05, + "loss": 0.5371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2540317177772522, + "step": 965, + "valid_targets_mean": 2416.0, + "valid_targets_min": 752 + }, + { + "epoch": 1.6724137931034484, + "grad_norm": 0.8032206453205488, + "learning_rate": 3.77023541415832e-05, + "loss": 0.5284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24500620365142822, + "step": 970, + "valid_targets_mean": 2129.1, + "valid_targets_min": 338 + }, + { + "epoch": 1.6810344827586206, + "grad_norm": 0.7327397961798686, + "learning_rate": 3.7662179906238405e-05, + "loss": 0.5712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32717061042785645, + "step": 975, + "valid_targets_mean": 3676.8, + "valid_targets_min": 1039 + }, + { + "epoch": 1.6896551724137931, + "grad_norm": 0.6867767089756939, + "learning_rate": 3.762167927378464e-05, + "loss": 0.5624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27892693877220154, + "step": 980, + "valid_targets_mean": 3443.0, + "valid_targets_min": 1785 + }, + { + "epoch": 1.6982758620689655, + "grad_norm": 0.6052211747597286, + "learning_rate": 3.7580852992673656e-05, + "loss": 0.5641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21629683673381805, + "step": 985, + "valid_targets_mean": 3518.9, + "valid_targets_min": 1496 + }, + { + "epoch": 1.706896551724138, + "grad_norm": 0.5922892058837835, + "learning_rate": 3.7539701817375185e-05, + "loss": 0.5866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31934842467308044, + "step": 990, + "valid_targets_mean": 4224.4, + "valid_targets_min": 1416 + }, + { + "epoch": 1.7155172413793105, + "grad_norm": 1.0493692424194028, + "learning_rate": 3.7498226508362996e-05, + "loss": 0.5721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24411314725875854, + "step": 995, + "valid_targets_mean": 3659.2, + "valid_targets_min": 1365 + }, + { + "epoch": 1.7241379310344827, + "grad_norm": 0.6364551146606927, + "learning_rate": 3.7456427832100864e-05, + "loss": 0.5801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3116251826286316, + "step": 1000, + "valid_targets_mean": 3976.9, + "valid_targets_min": 1273 + }, + { + "epoch": 1.7327586206896552, + "grad_norm": 0.6729473015193449, + "learning_rate": 3.7414306561028385e-05, + "loss": 0.5628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3718431890010834, + "step": 1005, + "valid_targets_mean": 3988.6, + "valid_targets_min": 505 + }, + { + "epoch": 1.7413793103448276, + "grad_norm": 0.5651344642180671, + "learning_rate": 3.73718634735467e-05, + "loss": 0.5792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30802202224731445, + "step": 1010, + "valid_targets_mean": 4792.2, + "valid_targets_min": 1894 + }, + { + "epoch": 1.75, + "grad_norm": 0.8160406841638043, + "learning_rate": 3.732909935400412e-05, + "loss": 0.5709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2621057629585266, + "step": 1015, + "valid_targets_mean": 3583.6, + "valid_targets_min": 1468 + }, + { + "epoch": 1.7586206896551724, + "grad_norm": 0.6185222015980905, + "learning_rate": 3.7286014992681645e-05, + "loss": 0.5687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2619808316230774, + "step": 1020, + "valid_targets_mean": 3748.1, + "valid_targets_min": 585 + }, + { + "epoch": 1.7672413793103448, + "grad_norm": 0.640359359009962, + "learning_rate": 3.7242611185778325e-05, + "loss": 0.5421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3694639801979065, + "step": 1025, + "valid_targets_mean": 4001.2, + "valid_targets_min": 449 + }, + { + "epoch": 1.7758620689655173, + "grad_norm": 0.8925661522909636, + "learning_rate": 3.7198888735396574e-05, + "loss": 0.5703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37001776695251465, + "step": 1030, + "valid_targets_mean": 3830.2, + "valid_targets_min": 1482 + }, + { + "epoch": 1.7844827586206895, + "grad_norm": 0.6379804791552203, + "learning_rate": 3.7154848449527334e-05, + "loss": 0.5647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17609938979148865, + "step": 1035, + "valid_targets_mean": 1858.1, + "valid_targets_min": 777 + }, + { + "epoch": 1.793103448275862, + "grad_norm": 0.7359978586481348, + "learning_rate": 3.7110491142035145e-05, + "loss": 0.6291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3263494372367859, + "step": 1040, + "valid_targets_mean": 3026.8, + "valid_targets_min": 782 + }, + { + "epoch": 1.8017241379310345, + "grad_norm": 0.5501378819443856, + "learning_rate": 3.7065817632643115e-05, + "loss": 0.6026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22044426202774048, + "step": 1045, + "valid_targets_mean": 4140.5, + "valid_targets_min": 358 + }, + { + "epoch": 1.8103448275862069, + "grad_norm": 0.5146610646097317, + "learning_rate": 3.702082874691776e-05, + "loss": 0.5362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27138766646385193, + "step": 1050, + "valid_targets_mean": 5316.9, + "valid_targets_min": 1629 + }, + { + "epoch": 1.8189655172413794, + "grad_norm": 0.6383974478414849, + "learning_rate": 3.6975525316253744e-05, + "loss": 0.5958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22577664256095886, + "step": 1055, + "valid_targets_mean": 2754.6, + "valid_targets_min": 1739 + }, + { + "epoch": 1.8275862068965516, + "grad_norm": 0.6577167966442022, + "learning_rate": 3.692990817785853e-05, + "loss": 0.5482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1972791999578476, + "step": 1060, + "valid_targets_mean": 2684.1, + "valid_targets_min": 714 + }, + { + "epoch": 1.8362068965517242, + "grad_norm": 0.762753066575453, + "learning_rate": 3.68839781747369e-05, + "loss": 0.5723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2219727784395218, + "step": 1065, + "valid_targets_mean": 2217.9, + "valid_targets_min": 672 + }, + { + "epoch": 1.8448275862068966, + "grad_norm": 0.5593369703101698, + "learning_rate": 3.683773615567538e-05, + "loss": 0.5535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3015214204788208, + "step": 1070, + "valid_targets_mean": 5070.4, + "valid_targets_min": 1527 + }, + { + "epoch": 1.853448275862069, + "grad_norm": 0.7393362257169361, + "learning_rate": 3.679118297522654e-05, + "loss": 0.5468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.355376273393631, + "step": 1075, + "valid_targets_mean": 3456.8, + "valid_targets_min": 1585 + }, + { + "epoch": 1.8620689655172413, + "grad_norm": 0.5691065100419264, + "learning_rate": 3.674431949369321e-05, + "loss": 0.5659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14589709043502808, + "step": 1080, + "valid_targets_mean": 2719.9, + "valid_targets_min": 1349 + }, + { + "epoch": 1.8706896551724137, + "grad_norm": 0.6450842387622542, + "learning_rate": 3.6697146577112614e-05, + "loss": 0.5337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23650528490543365, + "step": 1085, + "valid_targets_mean": 3103.2, + "valid_targets_min": 1011 + }, + { + "epoch": 1.8793103448275863, + "grad_norm": 0.5385390273419834, + "learning_rate": 3.6649665097240304e-05, + "loss": 0.5297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18768148124217987, + "step": 1090, + "valid_targets_mean": 3738.4, + "valid_targets_min": 946 + }, + { + "epoch": 1.8879310344827587, + "grad_norm": 0.6826042526112265, + "learning_rate": 3.660187593153408e-05, + "loss": 0.5874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2879507541656494, + "step": 1095, + "valid_targets_mean": 3481.0, + "valid_targets_min": 899 + }, + { + "epoch": 1.896551724137931, + "grad_norm": 0.5991426639597468, + "learning_rate": 3.655377996313782e-05, + "loss": 0.5689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3126043379306793, + "step": 1100, + "valid_targets_mean": 3764.6, + "valid_targets_min": 1376 + }, + { + "epoch": 1.9051724137931034, + "grad_norm": 0.7434156428546637, + "learning_rate": 3.6505378080865054e-05, + "loss": 0.5656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29617348313331604, + "step": 1105, + "valid_targets_mean": 3182.0, + "valid_targets_min": 376 + }, + { + "epoch": 1.9137931034482758, + "grad_norm": 0.7376220507013634, + "learning_rate": 3.645667117918265e-05, + "loss": 0.598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2663417458534241, + "step": 1110, + "valid_targets_mean": 2946.8, + "valid_targets_min": 845 + }, + { + "epoch": 1.9224137931034484, + "grad_norm": 0.8093067307919336, + "learning_rate": 3.640766015819423e-05, + "loss": 0.5685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27129626274108887, + "step": 1115, + "valid_targets_mean": 2276.5, + "valid_targets_min": 644 + }, + { + "epoch": 1.9310344827586206, + "grad_norm": 0.5931648105041701, + "learning_rate": 3.6358345923623506e-05, + "loss": 0.5883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2597714364528656, + "step": 1120, + "valid_targets_mean": 3566.5, + "valid_targets_min": 360 + }, + { + "epoch": 1.9396551724137931, + "grad_norm": 0.738013686632746, + "learning_rate": 3.630872938679761e-05, + "loss": 0.5421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23417630791664124, + "step": 1125, + "valid_targets_mean": 2241.8, + "valid_targets_min": 578 + }, + { + "epoch": 1.9482758620689655, + "grad_norm": 0.6536051717630208, + "learning_rate": 3.6258811464630215e-05, + "loss": 0.5475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.274051696062088, + "step": 1130, + "valid_targets_mean": 4223.8, + "valid_targets_min": 1954 + }, + { + "epoch": 1.956896551724138, + "grad_norm": 0.6558108915171494, + "learning_rate": 3.620859307960458e-05, + "loss": 0.5642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24242520332336426, + "step": 1135, + "valid_targets_mean": 3583.0, + "valid_targets_min": 573 + }, + { + "epoch": 1.9655172413793105, + "grad_norm": 0.6073167182222721, + "learning_rate": 3.615807515975654e-05, + "loss": 0.5769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3692033290863037, + "step": 1140, + "valid_targets_mean": 4391.9, + "valid_targets_min": 1444 + }, + { + "epoch": 1.9741379310344827, + "grad_norm": 0.6930399535333776, + "learning_rate": 3.6107258638657324e-05, + "loss": 0.6037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3953619599342346, + "step": 1145, + "valid_targets_mean": 3482.9, + "valid_targets_min": 687 + }, + { + "epoch": 1.9827586206896552, + "grad_norm": 0.7208553198716309, + "learning_rate": 3.60561444553963e-05, + "loss": 0.545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.300916850566864, + "step": 1150, + "valid_targets_mean": 2875.0, + "valid_targets_min": 543 + }, + { + "epoch": 1.9913793103448276, + "grad_norm": 0.6244319777954573, + "learning_rate": 3.600473355456366e-05, + "loss": 0.5711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2742713689804077, + "step": 1155, + "valid_targets_mean": 3724.2, + "valid_targets_min": 669 + }, + { + "epoch": 2.0, + "grad_norm": 0.48027221325927555, + "learning_rate": 3.595302688623291e-05, + "loss": 0.5181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2375207096338272, + "step": 1160, + "valid_targets_mean": 5709.9, + "valid_targets_min": 988 + }, + { + "epoch": 2.0086206896551726, + "grad_norm": 0.5856447431662087, + "learning_rate": 3.590102540594337e-05, + "loss": 0.5453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30319711565971375, + "step": 1165, + "valid_targets_mean": 5402.0, + "valid_targets_min": 1635 + }, + { + "epoch": 2.0172413793103448, + "grad_norm": 0.7330528758146855, + "learning_rate": 3.584873007468244e-05, + "loss": 0.5198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27307698130607605, + "step": 1170, + "valid_targets_mean": 2848.5, + "valid_targets_min": 1847 + }, + { + "epoch": 2.0258620689655173, + "grad_norm": 0.7161268600907155, + "learning_rate": 3.5796141858867935e-05, + "loss": 0.5027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34653913974761963, + "step": 1175, + "valid_targets_mean": 4229.5, + "valid_targets_min": 1459 + }, + { + "epoch": 2.0344827586206895, + "grad_norm": 0.5659681020477086, + "learning_rate": 3.5743261730330144e-05, + "loss": 0.525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24164336919784546, + "step": 1180, + "valid_targets_mean": 4535.0, + "valid_targets_min": 1595 + }, + { + "epoch": 2.043103448275862, + "grad_norm": 0.4950635871322631, + "learning_rate": 3.569009066629392e-05, + "loss": 0.4908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23670178651809692, + "step": 1185, + "valid_targets_mean": 6254.8, + "valid_targets_min": 1354 + }, + { + "epoch": 2.0517241379310347, + "grad_norm": 0.6259920795782811, + "learning_rate": 3.56366296493606e-05, + "loss": 0.4903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27048274874687195, + "step": 1190, + "valid_targets_mean": 4885.5, + "valid_targets_min": 746 + }, + { + "epoch": 2.060344827586207, + "grad_norm": 0.6708662248079352, + "learning_rate": 3.558287966748985e-05, + "loss": 0.5362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30487942695617676, + "step": 1195, + "valid_targets_mean": 3506.9, + "valid_targets_min": 699 + }, + { + "epoch": 2.0689655172413794, + "grad_norm": 0.6187149631493275, + "learning_rate": 3.552884171398141e-05, + "loss": 0.5058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3041895925998688, + "step": 1200, + "valid_targets_mean": 4673.4, + "valid_targets_min": 1416 + }, + { + "epoch": 2.0775862068965516, + "grad_norm": 0.6913500596782618, + "learning_rate": 3.547451678745673e-05, + "loss": 0.5435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3575402498245239, + "step": 1205, + "valid_targets_mean": 3261.8, + "valid_targets_min": 585 + }, + { + "epoch": 2.086206896551724, + "grad_norm": 0.5514253157032389, + "learning_rate": 3.541990589184053e-05, + "loss": 0.5061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2519359290599823, + "step": 1210, + "valid_targets_mean": 4331.5, + "valid_targets_min": 1142 + }, + { + "epoch": 2.0948275862068964, + "grad_norm": 0.661119552726637, + "learning_rate": 3.5365010036342245e-05, + "loss": 0.5097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.236103817820549, + "step": 1215, + "valid_targets_mean": 3665.9, + "valid_targets_min": 2115 + }, + { + "epoch": 2.103448275862069, + "grad_norm": 0.8185562556513293, + "learning_rate": 3.530983023543734e-05, + "loss": 0.5356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31141743063926697, + "step": 1220, + "valid_targets_mean": 3175.2, + "valid_targets_min": 553 + }, + { + "epoch": 2.1120689655172415, + "grad_norm": 0.637393506264498, + "learning_rate": 3.525436750884863e-05, + "loss": 0.5785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3377242088317871, + "step": 1225, + "valid_targets_mean": 4718.9, + "valid_targets_min": 1464 + }, + { + "epoch": 2.1206896551724137, + "grad_norm": 0.7000643799771167, + "learning_rate": 3.5198622881527374e-05, + "loss": 0.5361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17588718235492706, + "step": 1230, + "valid_targets_mean": 1794.2, + "valid_targets_min": 634 + }, + { + "epoch": 2.1293103448275863, + "grad_norm": 0.7836403305595876, + "learning_rate": 3.514259738363436e-05, + "loss": 0.5403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22173136472702026, + "step": 1235, + "valid_targets_mean": 2264.4, + "valid_targets_min": 759 + }, + { + "epoch": 2.1379310344827585, + "grad_norm": 0.6896222117398101, + "learning_rate": 3.5086292050520855e-05, + "loss": 0.5224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2105882465839386, + "step": 1240, + "valid_targets_mean": 2616.1, + "valid_targets_min": 829 + }, + { + "epoch": 2.146551724137931, + "grad_norm": 0.5989637638408554, + "learning_rate": 3.502970792270951e-05, + "loss": 0.5352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3733382523059845, + "step": 1245, + "valid_targets_mean": 5749.2, + "valid_targets_min": 943 + }, + { + "epoch": 2.1551724137931036, + "grad_norm": 0.5854317531631298, + "learning_rate": 3.497284604587508e-05, + "loss": 0.5462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21069422364234924, + "step": 1250, + "valid_targets_mean": 3517.9, + "valid_targets_min": 1692 + }, + { + "epoch": 2.163793103448276, + "grad_norm": 0.6059368591518353, + "learning_rate": 3.491570747082512e-05, + "loss": 0.5363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3978424668312073, + "step": 1255, + "valid_targets_mean": 5706.0, + "valid_targets_min": 2893 + }, + { + "epoch": 2.1724137931034484, + "grad_norm": 0.6915074580961807, + "learning_rate": 3.485829325348059e-05, + "loss": 0.5327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25558722019195557, + "step": 1260, + "valid_targets_mean": 3663.6, + "valid_targets_min": 603 + }, + { + "epoch": 2.1810344827586206, + "grad_norm": 0.6260784283173534, + "learning_rate": 3.4800604454856284e-05, + "loss": 0.5269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34905317425727844, + "step": 1265, + "valid_targets_mean": 5479.1, + "valid_targets_min": 1430 + }, + { + "epoch": 2.189655172413793, + "grad_norm": 0.8041527550042902, + "learning_rate": 3.47426421410413e-05, + "loss": 0.5285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2525642216205597, + "step": 1270, + "valid_targets_mean": 1884.0, + "valid_targets_min": 449 + }, + { + "epoch": 2.1982758620689653, + "grad_norm": 0.6165577267512379, + "learning_rate": 3.468440738317926e-05, + "loss": 0.5213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24296146631240845, + "step": 1275, + "valid_targets_mean": 3509.9, + "valid_targets_min": 808 + }, + { + "epoch": 2.206896551724138, + "grad_norm": 0.7388739054714368, + "learning_rate": 3.4625901257448596e-05, + "loss": 0.5153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27573007345199585, + "step": 1280, + "valid_targets_mean": 2770.6, + "valid_targets_min": 1382 + }, + { + "epoch": 2.2155172413793105, + "grad_norm": 0.7213845700851882, + "learning_rate": 3.4567124845042564e-05, + "loss": 0.5097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23074638843536377, + "step": 1285, + "valid_targets_mean": 2694.5, + "valid_targets_min": 590 + }, + { + "epoch": 2.2241379310344827, + "grad_norm": 0.7344427687166307, + "learning_rate": 3.4508079232149354e-05, + "loss": 0.531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18814852833747864, + "step": 1290, + "valid_targets_mean": 2153.9, + "valid_targets_min": 384 + }, + { + "epoch": 2.2327586206896552, + "grad_norm": 0.7193043812909778, + "learning_rate": 3.444876550993198e-05, + "loss": 0.5276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1843530833721161, + "step": 1295, + "valid_targets_mean": 2371.8, + "valid_targets_min": 688 + }, + { + "epoch": 2.2413793103448274, + "grad_norm": 0.6514805960089244, + "learning_rate": 3.4389184774508105e-05, + "loss": 0.5329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27728837728500366, + "step": 1300, + "valid_targets_mean": 3690.1, + "valid_targets_min": 2078 + }, + { + "epoch": 2.25, + "grad_norm": 0.7699933360610283, + "learning_rate": 3.43293381269298e-05, + "loss": 0.5288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1803208887577057, + "step": 1305, + "valid_targets_mean": 1974.6, + "valid_targets_min": 679 + }, + { + "epoch": 2.2586206896551726, + "grad_norm": 0.7015909626972417, + "learning_rate": 3.4269226673163204e-05, + "loss": 0.5561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21470309793949127, + "step": 1310, + "valid_targets_mean": 2602.1, + "valid_targets_min": 520 + }, + { + "epoch": 2.2672413793103448, + "grad_norm": 0.6252001020266619, + "learning_rate": 3.420885152406805e-05, + "loss": 0.5063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16884250938892365, + "step": 1315, + "valid_targets_mean": 2221.1, + "valid_targets_min": 525 + }, + { + "epoch": 2.2758620689655173, + "grad_norm": 0.6660044917747885, + "learning_rate": 3.4148213795377194e-05, + "loss": 0.499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23926129937171936, + "step": 1320, + "valid_targets_mean": 3184.8, + "valid_targets_min": 1308 + }, + { + "epoch": 2.2844827586206895, + "grad_norm": 0.5942881743308523, + "learning_rate": 3.408731460767593e-05, + "loss": 0.5297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2458754926919937, + "step": 1325, + "valid_targets_mean": 3705.9, + "valid_targets_min": 224 + }, + { + "epoch": 2.293103448275862, + "grad_norm": 0.6559455552008154, + "learning_rate": 3.402615508638134e-05, + "loss": 0.5145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22481057047843933, + "step": 1330, + "valid_targets_mean": 3710.4, + "valid_targets_min": 775 + }, + { + "epoch": 2.3017241379310347, + "grad_norm": 0.5791109072786359, + "learning_rate": 3.396473636172146e-05, + "loss": 0.5553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3390901982784271, + "step": 1335, + "valid_targets_mean": 5454.2, + "valid_targets_min": 699 + }, + { + "epoch": 2.310344827586207, + "grad_norm": 0.6528879026655229, + "learning_rate": 3.3903059568714406e-05, + "loss": 0.5262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2803526222705841, + "step": 1340, + "valid_targets_mean": 4691.2, + "valid_targets_min": 1937 + }, + { + "epoch": 2.3189655172413794, + "grad_norm": 0.7334531238595307, + "learning_rate": 3.384112584714739e-05, + "loss": 0.5669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25998979806900024, + "step": 1345, + "valid_targets_mean": 2414.1, + "valid_targets_min": 872 + }, + { + "epoch": 2.3275862068965516, + "grad_norm": 0.6706388706657296, + "learning_rate": 3.377893634155568e-05, + "loss": 0.4946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20304179191589355, + "step": 1350, + "valid_targets_mean": 2520.6, + "valid_targets_min": 548 + }, + { + "epoch": 2.336206896551724, + "grad_norm": 0.6070209292783567, + "learning_rate": 3.371649220120143e-05, + "loss": 0.5372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23858051002025604, + "step": 1355, + "valid_targets_mean": 3398.2, + "valid_targets_min": 1239 + }, + { + "epoch": 2.344827586206897, + "grad_norm": 0.7403730502951553, + "learning_rate": 3.365379458005243e-05, + "loss": 0.5054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22786666452884674, + "step": 1360, + "valid_targets_mean": 2695.2, + "valid_targets_min": 743 + }, + { + "epoch": 2.353448275862069, + "grad_norm": 0.6160438267365825, + "learning_rate": 3.35908446367608e-05, + "loss": 0.5121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2835865020751953, + "step": 1365, + "valid_targets_mean": 4868.5, + "valid_targets_min": 2282 + }, + { + "epoch": 2.3620689655172415, + "grad_norm": 0.7046709820942064, + "learning_rate": 3.35276435346416e-05, + "loss": 0.5285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3344838321208954, + "step": 1370, + "valid_targets_mean": 3689.5, + "valid_targets_min": 827 + }, + { + "epoch": 2.3706896551724137, + "grad_norm": 0.6216160994750745, + "learning_rate": 3.346419244165127e-05, + "loss": 0.5405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3214781880378723, + "step": 1375, + "valid_targets_mean": 4100.1, + "valid_targets_min": 1151 + }, + { + "epoch": 2.3793103448275863, + "grad_norm": 0.7168615045877246, + "learning_rate": 3.3400492530366086e-05, + "loss": 0.5204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20914702117443085, + "step": 1380, + "valid_targets_mean": 2650.0, + "valid_targets_min": 1076 + }, + { + "epoch": 2.3879310344827585, + "grad_norm": 0.5642488910557313, + "learning_rate": 3.333654497796051e-05, + "loss": 0.533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19634991884231567, + "step": 1385, + "valid_targets_mean": 4127.8, + "valid_targets_min": 2108 + }, + { + "epoch": 2.396551724137931, + "grad_norm": 0.6683303924854348, + "learning_rate": 3.32723509661854e-05, + "loss": 0.5526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20605990290641785, + "step": 1390, + "valid_targets_mean": 2370.1, + "valid_targets_min": 727 + }, + { + "epoch": 2.405172413793103, + "grad_norm": 0.6217914813865608, + "learning_rate": 3.320791168134617e-05, + "loss": 0.5063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21512621641159058, + "step": 1395, + "valid_targets_mean": 3568.4, + "valid_targets_min": 1538 + }, + { + "epoch": 2.413793103448276, + "grad_norm": 0.7781917849824482, + "learning_rate": 3.31432283142809e-05, + "loss": 0.5619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29964110255241394, + "step": 1400, + "valid_targets_mean": 2767.4, + "valid_targets_min": 1923 + }, + { + "epoch": 2.4224137931034484, + "grad_norm": 0.6605249390307784, + "learning_rate": 3.307830206033831e-05, + "loss": 0.4859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29924869537353516, + "step": 1405, + "valid_targets_mean": 3953.2, + "valid_targets_min": 1292 + }, + { + "epoch": 2.4310344827586206, + "grad_norm": 0.7248729187206819, + "learning_rate": 3.301313411935565e-05, + "loss": 0.5529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2112955003976822, + "step": 1410, + "valid_targets_mean": 3034.6, + "valid_targets_min": 1475 + }, + { + "epoch": 2.439655172413793, + "grad_norm": 0.7278902760761354, + "learning_rate": 3.294772569563656e-05, + "loss": 0.5258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18192905187606812, + "step": 1415, + "valid_targets_mean": 2044.2, + "valid_targets_min": 601 + }, + { + "epoch": 2.4482758620689653, + "grad_norm": 0.64996168644576, + "learning_rate": 3.28820779979288e-05, + "loss": 0.5494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32071739435195923, + "step": 1420, + "valid_targets_mean": 4041.8, + "valid_targets_min": 722 + }, + { + "epoch": 2.456896551724138, + "grad_norm": 0.7663835356371171, + "learning_rate": 3.281619223940192e-05, + "loss": 0.5397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2934093475341797, + "step": 1425, + "valid_targets_mean": 2963.6, + "valid_targets_min": 868 + }, + { + "epoch": 2.4655172413793105, + "grad_norm": 0.6175608448597791, + "learning_rate": 3.2750069637624826e-05, + "loss": 0.5451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3377595543861389, + "step": 1430, + "valid_targets_mean": 4578.6, + "valid_targets_min": 826 + }, + { + "epoch": 2.4741379310344827, + "grad_norm": 0.7053303469776532, + "learning_rate": 3.2683711414543295e-05, + "loss": 0.5272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21549789607524872, + "step": 1435, + "valid_targets_mean": 2838.2, + "valid_targets_min": 1878 + }, + { + "epoch": 2.4827586206896552, + "grad_norm": 0.7720525074341179, + "learning_rate": 3.261711879645737e-05, + "loss": 0.5284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23706737160682678, + "step": 1440, + "valid_targets_mean": 2927.0, + "valid_targets_min": 1586 + }, + { + "epoch": 2.4913793103448274, + "grad_norm": 0.6610418282529897, + "learning_rate": 3.255029301399873e-05, + "loss": 0.5382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.260223925113678, + "step": 1445, + "valid_targets_mean": 3151.2, + "valid_targets_min": 971 + }, + { + "epoch": 2.5, + "grad_norm": 0.6761189439890816, + "learning_rate": 3.248323530210793e-05, + "loss": 0.5382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32373419404029846, + "step": 1450, + "valid_targets_mean": 3626.5, + "valid_targets_min": 1237 + }, + { + "epoch": 2.5086206896551726, + "grad_norm": 0.6059901254993909, + "learning_rate": 3.241594690001157e-05, + "loss": 0.5212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2417319416999817, + "step": 1455, + "valid_targets_mean": 4077.8, + "valid_targets_min": 1227 + }, + { + "epoch": 2.5172413793103448, + "grad_norm": 0.6563136263005875, + "learning_rate": 3.2348429051199424e-05, + "loss": 0.5293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2823067605495453, + "step": 1460, + "valid_targets_mean": 3091.4, + "valid_targets_min": 1724 + }, + { + "epoch": 2.5258620689655173, + "grad_norm": 0.7011191203587458, + "learning_rate": 3.228068300340142e-05, + "loss": 0.4991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21909786760807037, + "step": 1465, + "valid_targets_mean": 2642.6, + "valid_targets_min": 333 + }, + { + "epoch": 2.5344827586206895, + "grad_norm": 0.5963350141222057, + "learning_rate": 3.221271000856462e-05, + "loss": 0.5457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24788910150527954, + "step": 1470, + "valid_targets_mean": 4478.2, + "valid_targets_min": 1019 + }, + { + "epoch": 2.543103448275862, + "grad_norm": 0.593167616912684, + "learning_rate": 3.214451132283006e-05, + "loss": 0.527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1979059875011444, + "step": 1475, + "valid_targets_mean": 3345.1, + "valid_targets_min": 447 + }, + { + "epoch": 2.5517241379310347, + "grad_norm": 0.8516404992092461, + "learning_rate": 3.207608820650955e-05, + "loss": 0.5231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16317282617092133, + "step": 1480, + "valid_targets_mean": 1854.1, + "valid_targets_min": 572 + }, + { + "epoch": 2.560344827586207, + "grad_norm": 0.7011631826201656, + "learning_rate": 3.2007441924062374e-05, + "loss": 0.5337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26514554023742676, + "step": 1485, + "valid_targets_mean": 3083.2, + "valid_targets_min": 1483 + }, + { + "epoch": 2.5689655172413794, + "grad_norm": 0.6304731407800173, + "learning_rate": 3.193857374407192e-05, + "loss": 0.48, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2470451146364212, + "step": 1490, + "valid_targets_mean": 4148.5, + "valid_targets_min": 1957 + }, + { + "epoch": 2.5775862068965516, + "grad_norm": 0.6114733267413521, + "learning_rate": 3.186948493922225e-05, + "loss": 0.547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3491957187652588, + "step": 1495, + "valid_targets_mean": 5381.4, + "valid_targets_min": 1451 + }, + { + "epoch": 2.586206896551724, + "grad_norm": 0.6816944170488414, + "learning_rate": 3.180017678627458e-05, + "loss": 0.5967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2270176112651825, + "step": 1500, + "valid_targets_mean": 3054.8, + "valid_targets_min": 1323 + }, + { + "epoch": 2.594827586206897, + "grad_norm": 0.7494842689929292, + "learning_rate": 3.173065056604366e-05, + "loss": 0.5136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2226772904396057, + "step": 1505, + "valid_targets_mean": 2615.2, + "valid_targets_min": 1722 + }, + { + "epoch": 2.603448275862069, + "grad_norm": 0.6433063056386409, + "learning_rate": 3.166090756337415e-05, + "loss": 0.5177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21712681651115417, + "step": 1510, + "valid_targets_mean": 2954.0, + "valid_targets_min": 1400 + }, + { + "epoch": 2.612068965517241, + "grad_norm": 0.6961184844546833, + "learning_rate": 3.159094906711683e-05, + "loss": 0.5239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3057723343372345, + "step": 1515, + "valid_targets_mean": 3615.5, + "valid_targets_min": 1354 + }, + { + "epoch": 2.6206896551724137, + "grad_norm": 0.545448810661674, + "learning_rate": 3.15207763701048e-05, + "loss": 0.5392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17520517110824585, + "step": 1520, + "valid_targets_mean": 3273.5, + "valid_targets_min": 810 + }, + { + "epoch": 2.6293103448275863, + "grad_norm": 0.7398151880436875, + "learning_rate": 3.14503907691296e-05, + "loss": 0.5192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3340669572353363, + "step": 1525, + "valid_targets_mean": 4266.0, + "valid_targets_min": 1420 + }, + { + "epoch": 2.637931034482759, + "grad_norm": 0.6137448794815791, + "learning_rate": 3.1379793564917235e-05, + "loss": 0.5231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2983524799346924, + "step": 1530, + "valid_targets_mean": 3807.5, + "valid_targets_min": 1186 + }, + { + "epoch": 2.646551724137931, + "grad_norm": 0.6892239692209475, + "learning_rate": 3.130898606210414e-05, + "loss": 0.5334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3036305010318756, + "step": 1535, + "valid_targets_mean": 3669.1, + "valid_targets_min": 1027 + }, + { + "epoch": 2.655172413793103, + "grad_norm": 0.6042307659721222, + "learning_rate": 3.1237969569213056e-05, + "loss": 0.5045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24306747317314148, + "step": 1540, + "valid_targets_mean": 4035.2, + "valid_targets_min": 542 + }, + { + "epoch": 2.663793103448276, + "grad_norm": 0.8334671686011501, + "learning_rate": 3.1166745398628874e-05, + "loss": 0.5805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19789639115333557, + "step": 1545, + "valid_targets_mean": 2123.8, + "valid_targets_min": 1039 + }, + { + "epoch": 2.6724137931034484, + "grad_norm": 0.6708117196991292, + "learning_rate": 3.109531486657437e-05, + "loss": 0.5227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20612336695194244, + "step": 1550, + "valid_targets_mean": 2896.1, + "valid_targets_min": 525 + }, + { + "epoch": 2.6810344827586206, + "grad_norm": 0.648789931136324, + "learning_rate": 3.102367929308586e-05, + "loss": 0.4925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2443523108959198, + "step": 1555, + "valid_targets_mean": 3337.5, + "valid_targets_min": 355 + }, + { + "epoch": 2.689655172413793, + "grad_norm": 0.6629620042366543, + "learning_rate": 3.0951840001988854e-05, + "loss": 0.5205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35953301191329956, + "step": 1560, + "valid_targets_mean": 4085.9, + "valid_targets_min": 1662 + }, + { + "epoch": 2.6982758620689653, + "grad_norm": 0.6425392266887242, + "learning_rate": 3.0879798320873546e-05, + "loss": 0.5215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3335888683795929, + "step": 1565, + "valid_targets_mean": 4582.8, + "valid_targets_min": 305 + }, + { + "epoch": 2.706896551724138, + "grad_norm": 0.6788939491162826, + "learning_rate": 3.0807555581070304e-05, + "loss": 0.5386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21099033951759338, + "step": 1570, + "valid_targets_mean": 2514.8, + "valid_targets_min": 379 + }, + { + "epoch": 2.7155172413793105, + "grad_norm": 0.5663003368589072, + "learning_rate": 3.0735113117625045e-05, + "loss": 0.5191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2197110652923584, + "step": 1575, + "valid_targets_mean": 3289.1, + "valid_targets_min": 1913 + }, + { + "epoch": 2.7241379310344827, + "grad_norm": 0.6638782454802991, + "learning_rate": 3.0662472269274617e-05, + "loss": 0.5152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2275673747062683, + "step": 1580, + "valid_targets_mean": 2688.1, + "valid_targets_min": 808 + }, + { + "epoch": 2.7327586206896552, + "grad_norm": 0.6255538446405725, + "learning_rate": 3.058963437842198e-05, + "loss": 0.5289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22095435857772827, + "step": 1585, + "valid_targets_mean": 3796.5, + "valid_targets_min": 278 + }, + { + "epoch": 2.7413793103448274, + "grad_norm": 0.7320428985273902, + "learning_rate": 3.0516600791111465e-05, + "loss": 0.5589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32439008355140686, + "step": 1590, + "valid_targets_mean": 3684.9, + "valid_targets_min": 1568 + }, + { + "epoch": 2.75, + "grad_norm": 0.6438464021090996, + "learning_rate": 3.0443372857003857e-05, + "loss": 0.54, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23027420043945312, + "step": 1595, + "valid_targets_mean": 3581.9, + "valid_targets_min": 824 + }, + { + "epoch": 2.7586206896551726, + "grad_norm": 0.5545857332698958, + "learning_rate": 3.036995192935149e-05, + "loss": 0.5087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14524194598197937, + "step": 1600, + "valid_targets_mean": 2816.8, + "valid_targets_min": 856 + }, + { + "epoch": 2.7672413793103448, + "grad_norm": 0.5086626682547694, + "learning_rate": 3.029633936497321e-05, + "loss": 0.5171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1898607611656189, + "step": 1605, + "valid_targets_mean": 4273.4, + "valid_targets_min": 1829 + }, + { + "epoch": 2.7758620689655173, + "grad_norm": 0.6651291467249401, + "learning_rate": 3.0222536524229293e-05, + "loss": 0.4846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34460657835006714, + "step": 1610, + "valid_targets_mean": 4023.6, + "valid_targets_min": 422 + }, + { + "epoch": 2.7844827586206895, + "grad_norm": 0.6564925258448345, + "learning_rate": 3.0148544770996343e-05, + "loss": 0.5208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18299517035484314, + "step": 1615, + "valid_targets_mean": 2348.9, + "valid_targets_min": 815 + }, + { + "epoch": 2.793103448275862, + "grad_norm": 0.6070334989595733, + "learning_rate": 3.007436547264207e-05, + "loss": 0.5325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28292137384414673, + "step": 1620, + "valid_targets_mean": 4357.8, + "valid_targets_min": 1053 + }, + { + "epoch": 2.8017241379310347, + "grad_norm": 0.6572055384794816, + "learning_rate": 3.0000000000000004e-05, + "loss": 0.5248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29250192642211914, + "step": 1625, + "valid_targets_mean": 5173.9, + "valid_targets_min": 2061 + }, + { + "epoch": 2.810344827586207, + "grad_norm": 0.6925557917291159, + "learning_rate": 2.9925449727344184e-05, + "loss": 0.5264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30762213468551636, + "step": 1630, + "valid_targets_mean": 3761.1, + "valid_targets_min": 1011 + }, + { + "epoch": 2.8189655172413794, + "grad_norm": 0.6649410248775828, + "learning_rate": 2.985071603236374e-05, + "loss": 0.5271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22346317768096924, + "step": 1635, + "valid_targets_mean": 2541.2, + "valid_targets_min": 464 + }, + { + "epoch": 2.8275862068965516, + "grad_norm": 0.6368963124979056, + "learning_rate": 2.9775800296137474e-05, + "loss": 0.5056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19894252717494965, + "step": 1640, + "valid_targets_mean": 2954.8, + "valid_targets_min": 507 + }, + { + "epoch": 2.836206896551724, + "grad_norm": 0.6563286820648637, + "learning_rate": 2.970070390310828e-05, + "loss": 0.5365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22569561004638672, + "step": 1645, + "valid_targets_mean": 3002.4, + "valid_targets_min": 858 + }, + { + "epoch": 2.844827586206897, + "grad_norm": 0.735343254781571, + "learning_rate": 2.962542824105762e-05, + "loss": 0.5358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2973644733428955, + "step": 1650, + "valid_targets_mean": 4036.9, + "valid_targets_min": 753 + }, + { + "epoch": 2.853448275862069, + "grad_norm": 0.5751232737303497, + "learning_rate": 2.954997470107982e-05, + "loss": 0.4989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16975465416908264, + "step": 1655, + "valid_targets_mean": 2872.4, + "valid_targets_min": 563 + }, + { + "epoch": 2.862068965517241, + "grad_norm": 0.7517885308967944, + "learning_rate": 2.947434467755641e-05, + "loss": 0.5731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31603339314460754, + "step": 1660, + "valid_targets_mean": 2926.6, + "valid_targets_min": 1582 + }, + { + "epoch": 2.8706896551724137, + "grad_norm": 0.6477449192786452, + "learning_rate": 2.9398539568130327e-05, + "loss": 0.5898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31569886207580566, + "step": 1665, + "valid_targets_mean": 2998.9, + "valid_targets_min": 350 + }, + { + "epoch": 2.8793103448275863, + "grad_norm": 0.6412838960401286, + "learning_rate": 2.9322560773680087e-05, + "loss": 0.4843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2672387361526489, + "step": 1670, + "valid_targets_mean": 3486.6, + "valid_targets_min": 464 + }, + { + "epoch": 2.887931034482759, + "grad_norm": 0.6584277587208057, + "learning_rate": 2.924640969829393e-05, + "loss": 0.5575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27888116240501404, + "step": 1675, + "valid_targets_mean": 3456.2, + "valid_targets_min": 1280 + }, + { + "epoch": 2.896551724137931, + "grad_norm": 0.6355034785984488, + "learning_rate": 2.9170087749243832e-05, + "loss": 0.5556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2856561541557312, + "step": 1680, + "valid_targets_mean": 4470.8, + "valid_targets_min": 777 + }, + { + "epoch": 2.905172413793103, + "grad_norm": 0.5522129444874521, + "learning_rate": 2.9093596336959513e-05, + "loss": 0.5217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21474653482437134, + "step": 1685, + "valid_targets_mean": 3683.0, + "valid_targets_min": 556 + }, + { + "epoch": 2.913793103448276, + "grad_norm": 0.6079117430065913, + "learning_rate": 2.9016936875002377e-05, + "loss": 0.5326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18614447116851807, + "step": 1690, + "valid_targets_mean": 2769.2, + "valid_targets_min": 1703 + }, + { + "epoch": 2.9224137931034484, + "grad_norm": 0.8154640803310477, + "learning_rate": 2.8940110780039385e-05, + "loss": 0.5368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3220832049846649, + "step": 1695, + "valid_targets_mean": 4347.4, + "valid_targets_min": 455 + }, + { + "epoch": 2.9310344827586206, + "grad_norm": 0.7008249840337234, + "learning_rate": 2.8863119471816878e-05, + "loss": 0.5359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.307187557220459, + "step": 1700, + "valid_targets_mean": 3782.1, + "valid_targets_min": 1743 + }, + { + "epoch": 2.939655172413793, + "grad_norm": 1.8383581207079867, + "learning_rate": 2.878596437313434e-05, + "loss": 0.5455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3547545075416565, + "step": 1705, + "valid_targets_mean": 3102.1, + "valid_targets_min": 1146 + }, + { + "epoch": 2.9482758620689653, + "grad_norm": 0.6890582013953978, + "learning_rate": 2.87086469098181e-05, + "loss": 0.5464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3087565302848816, + "step": 1710, + "valid_targets_mean": 3642.0, + "valid_targets_min": 785 + }, + { + "epoch": 2.956896551724138, + "grad_norm": 0.6014663654543025, + "learning_rate": 2.863116851069499e-05, + "loss": 0.5198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3157251477241516, + "step": 1715, + "valid_targets_mean": 4566.4, + "valid_targets_min": 1703 + }, + { + "epoch": 2.9655172413793105, + "grad_norm": 0.6183718750559749, + "learning_rate": 2.855353060756593e-05, + "loss": 0.5583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3171611428260803, + "step": 1720, + "valid_targets_mean": 4044.4, + "valid_targets_min": 921 + }, + { + "epoch": 2.9741379310344827, + "grad_norm": 0.6770199017058159, + "learning_rate": 2.8475734635179472e-05, + "loss": 0.5231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20549774169921875, + "step": 1725, + "valid_targets_mean": 2707.2, + "valid_targets_min": 786 + }, + { + "epoch": 2.9827586206896552, + "grad_norm": 0.6267781432228309, + "learning_rate": 2.8397782031205295e-05, + "loss": 0.5278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3000187277793884, + "step": 1730, + "valid_targets_mean": 4574.9, + "valid_targets_min": 382 + }, + { + "epoch": 2.9913793103448274, + "grad_norm": 0.6597096565482661, + "learning_rate": 2.8319674236207634e-05, + "loss": 0.492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2720503807067871, + "step": 1735, + "valid_targets_mean": 3237.6, + "valid_targets_min": 315 + }, + { + "epoch": 3.0, + "grad_norm": 0.6370988041102025, + "learning_rate": 2.8241412693618638e-05, + "loss": 0.5817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47008463740348816, + "step": 1740, + "valid_targets_mean": 5323.2, + "valid_targets_min": 791 + }, + { + "epoch": 3.0086206896551726, + "grad_norm": 0.5006687241913529, + "learning_rate": 2.816299884971173e-05, + "loss": 0.5026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24396368861198425, + "step": 1745, + "valid_targets_mean": 5369.8, + "valid_targets_min": 2674 + }, + { + "epoch": 3.0172413793103448, + "grad_norm": 0.5939923129308482, + "learning_rate": 2.8084434153574847e-05, + "loss": 0.5129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22303283214569092, + "step": 1750, + "valid_targets_mean": 3715.1, + "valid_targets_min": 1836 + }, + { + "epoch": 3.0258620689655173, + "grad_norm": 0.7216174616024418, + "learning_rate": 2.8005720057083685e-05, + "loss": 0.5277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28930047154426575, + "step": 1755, + "valid_targets_mean": 3260.1, + "valid_targets_min": 1785 + }, + { + "epoch": 3.0344827586206895, + "grad_norm": 0.7138077000722072, + "learning_rate": 2.792685801487486e-05, + "loss": 0.5313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21426135301589966, + "step": 1760, + "valid_targets_mean": 2808.0, + "valid_targets_min": 1549 + }, + { + "epoch": 3.043103448275862, + "grad_norm": 0.6304834277842427, + "learning_rate": 2.7847849484319008e-05, + "loss": 0.469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.240925133228302, + "step": 1765, + "valid_targets_mean": 4177.4, + "valid_targets_min": 1711 + }, + { + "epoch": 3.0517241379310347, + "grad_norm": 0.7227466624603867, + "learning_rate": 2.7768695925493897e-05, + "loss": 0.5099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26144564151763916, + "step": 1770, + "valid_targets_mean": 3339.5, + "valid_targets_min": 306 + }, + { + "epoch": 3.060344827586207, + "grad_norm": 0.7157335610062132, + "learning_rate": 2.7689398801157393e-05, + "loss": 0.494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26888376474380493, + "step": 1775, + "valid_targets_mean": 3690.0, + "valid_targets_min": 1804 + }, + { + "epoch": 3.0689655172413794, + "grad_norm": 0.5782389133302738, + "learning_rate": 2.7609959576720467e-05, + "loss": 0.4676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19823557138442993, + "step": 1780, + "valid_targets_mean": 3715.9, + "valid_targets_min": 505 + }, + { + "epoch": 3.0775862068965516, + "grad_norm": 0.7773493115682202, + "learning_rate": 2.7530379720220096e-05, + "loss": 0.4946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1992146074771881, + "step": 1785, + "valid_targets_mean": 2262.2, + "valid_targets_min": 663 + }, + { + "epoch": 3.086206896551724, + "grad_norm": 0.741058590026973, + "learning_rate": 2.7450660702292132e-05, + "loss": 0.4946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29747503995895386, + "step": 1790, + "valid_targets_mean": 3664.2, + "valid_targets_min": 754 + }, + { + "epoch": 3.0948275862068964, + "grad_norm": 0.6655355159285354, + "learning_rate": 2.7370803996144143e-05, + "loss": 0.5224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30806177854537964, + "step": 1795, + "valid_targets_mean": 4508.5, + "valid_targets_min": 1059 + }, + { + "epoch": 3.103448275862069, + "grad_norm": 0.7684487595267243, + "learning_rate": 2.7290811077528166e-05, + "loss": 0.5114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24772027134895325, + "step": 1800, + "valid_targets_mean": 2500.2, + "valid_targets_min": 1039 + }, + { + "epoch": 3.1120689655172415, + "grad_norm": 0.5519635037547266, + "learning_rate": 2.7210683424713447e-05, + "loss": 0.5154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18051207065582275, + "step": 1805, + "valid_targets_mean": 4038.2, + "valid_targets_min": 1334 + }, + { + "epoch": 3.1206896551724137, + "grad_norm": 0.7459536824641055, + "learning_rate": 2.7130422518459113e-05, + "loss": 0.4894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.261766254901886, + "step": 1810, + "valid_targets_mean": 3044.9, + "valid_targets_min": 1218 + }, + { + "epoch": 3.1293103448275863, + "grad_norm": 0.5779130633158371, + "learning_rate": 2.705002984198684e-05, + "loss": 0.4887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20895667374134064, + "step": 1815, + "valid_targets_mean": 4266.2, + "valid_targets_min": 1510 + }, + { + "epoch": 3.1379310344827585, + "grad_norm": 0.6672732033247093, + "learning_rate": 2.6969506880953384e-05, + "loss": 0.4987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21796973049640656, + "step": 1820, + "valid_targets_mean": 3656.4, + "valid_targets_min": 1407 + }, + { + "epoch": 3.146551724137931, + "grad_norm": 0.6777179149459249, + "learning_rate": 2.688885512342318e-05, + "loss": 0.4933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26772722601890564, + "step": 1825, + "valid_targets_mean": 4573.4, + "valid_targets_min": 570 + }, + { + "epoch": 3.1551724137931036, + "grad_norm": 0.7747681268810057, + "learning_rate": 2.680807605984082e-05, + "loss": 0.503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.354011595249176, + "step": 1830, + "valid_targets_mean": 3500.1, + "valid_targets_min": 877 + }, + { + "epoch": 3.163793103448276, + "grad_norm": 0.6555975732510367, + "learning_rate": 2.6727171183003502e-05, + "loss": 0.4637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22615736722946167, + "step": 1835, + "valid_targets_mean": 3957.8, + "valid_targets_min": 1602 + }, + { + "epoch": 3.1724137931034484, + "grad_norm": 0.6815734089237001, + "learning_rate": 2.6646141988033475e-05, + "loss": 0.5291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22214952111244202, + "step": 1840, + "valid_targets_mean": 2964.4, + "valid_targets_min": 781 + }, + { + "epoch": 3.1810344827586206, + "grad_norm": 0.6380099116738308, + "learning_rate": 2.6564989972350364e-05, + "loss": 0.4798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.229619562625885, + "step": 1845, + "valid_targets_mean": 4561.1, + "valid_targets_min": 1693 + }, + { + "epoch": 3.189655172413793, + "grad_norm": 0.604404711071115, + "learning_rate": 2.6483716635643535e-05, + "loss": 0.5054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33196941018104553, + "step": 1850, + "valid_targets_mean": 5268.6, + "valid_targets_min": 854 + }, + { + "epoch": 3.1982758620689653, + "grad_norm": 0.6726535401662077, + "learning_rate": 2.6402323479844364e-05, + "loss": 0.5099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2879241406917572, + "step": 1855, + "valid_targets_mean": 3671.5, + "valid_targets_min": 674 + }, + { + "epoch": 3.206896551724138, + "grad_norm": 0.6445858526652752, + "learning_rate": 2.6320812009098472e-05, + "loss": 0.4887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1965891420841217, + "step": 1860, + "valid_targets_mean": 4424.1, + "valid_targets_min": 644 + }, + { + "epoch": 3.2155172413793105, + "grad_norm": 0.7543347211262738, + "learning_rate": 2.6239183729737957e-05, + "loss": 0.5032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20700570940971375, + "step": 1865, + "valid_targets_mean": 3542.0, + "valid_targets_min": 1627 + }, + { + "epoch": 3.2241379310344827, + "grad_norm": 0.635457218042516, + "learning_rate": 2.6157440150253535e-05, + "loss": 0.4891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22845527529716492, + "step": 1870, + "valid_targets_mean": 3959.0, + "valid_targets_min": 422 + }, + { + "epoch": 3.2327586206896552, + "grad_norm": 0.7224438201544804, + "learning_rate": 2.6075582781266665e-05, + "loss": 0.4711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23363882303237915, + "step": 1875, + "valid_targets_mean": 3106.1, + "valid_targets_min": 348 + }, + { + "epoch": 3.2413793103448274, + "grad_norm": 0.7469170275128653, + "learning_rate": 2.5993613135501643e-05, + "loss": 0.526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26454028487205505, + "step": 1880, + "valid_targets_mean": 3685.9, + "valid_targets_min": 1384 + }, + { + "epoch": 3.25, + "grad_norm": 0.8467283740580998, + "learning_rate": 2.5911532727757625e-05, + "loss": 0.5166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2143116295337677, + "step": 1885, + "valid_targets_mean": 2019.8, + "valid_targets_min": 594 + }, + { + "epoch": 3.2586206896551726, + "grad_norm": 0.714157813357512, + "learning_rate": 2.582934307488067e-05, + "loss": 0.489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25460219383239746, + "step": 1890, + "valid_targets_mean": 2973.9, + "valid_targets_min": 1539 + }, + { + "epoch": 3.2672413793103448, + "grad_norm": 0.6119264269910555, + "learning_rate": 2.5747045695735674e-05, + "loss": 0.512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3362063765525818, + "step": 1895, + "valid_targets_mean": 5818.9, + "valid_targets_min": 1128 + }, + { + "epoch": 3.2758620689655173, + "grad_norm": 0.6722720740574156, + "learning_rate": 2.5664642111178312e-05, + "loss": 0.4917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25751858949661255, + "step": 1900, + "valid_targets_mean": 3154.4, + "valid_targets_min": 274 + }, + { + "epoch": 3.2844827586206895, + "grad_norm": 0.6899167832115443, + "learning_rate": 2.5582133844026943e-05, + "loss": 0.5095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2338351607322693, + "step": 1905, + "valid_targets_mean": 3443.5, + "valid_targets_min": 815 + }, + { + "epoch": 3.293103448275862, + "grad_norm": 0.6885008624009163, + "learning_rate": 2.5499522419034462e-05, + "loss": 0.4983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2654280662536621, + "step": 1910, + "valid_targets_mean": 3975.5, + "valid_targets_min": 464 + }, + { + "epoch": 3.3017241379310347, + "grad_norm": 0.693540405884434, + "learning_rate": 2.5416809362860107e-05, + "loss": 0.4699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2041737139225006, + "step": 1915, + "valid_targets_mean": 3395.4, + "valid_targets_min": 1520 + }, + { + "epoch": 3.310344827586207, + "grad_norm": 0.6970513975428828, + "learning_rate": 2.5333996204041276e-05, + "loss": 0.4984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3178594708442688, + "step": 1920, + "valid_targets_mean": 3556.8, + "valid_targets_min": 1146 + }, + { + "epoch": 3.3189655172413794, + "grad_norm": 0.6954967899107553, + "learning_rate": 2.5251084472965257e-05, + "loss": 0.4865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17998085916042328, + "step": 1925, + "valid_targets_mean": 2760.6, + "valid_targets_min": 998 + }, + { + "epoch": 3.3275862068965516, + "grad_norm": 0.6046125422661831, + "learning_rate": 2.5168075701840948e-05, + "loss": 0.5053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15390408039093018, + "step": 1930, + "valid_targets_mean": 3165.2, + "valid_targets_min": 1413 + }, + { + "epoch": 3.336206896551724, + "grad_norm": 0.5960966625837508, + "learning_rate": 2.5084971424670568e-05, + "loss": 0.4869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35475805401802063, + "step": 1935, + "valid_targets_mean": 4783.4, + "valid_targets_min": 1444 + }, + { + "epoch": 3.344827586206897, + "grad_norm": 0.7763661645104593, + "learning_rate": 2.500177317722126e-05, + "loss": 0.5187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23105254769325256, + "step": 1940, + "valid_targets_mean": 2399.2, + "valid_targets_min": 916 + }, + { + "epoch": 3.353448275862069, + "grad_norm": 0.6779936365212638, + "learning_rate": 2.4918482496996757e-05, + "loss": 0.4934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19377613067626953, + "step": 1945, + "valid_targets_mean": 2806.4, + "valid_targets_min": 728 + }, + { + "epoch": 3.3620689655172415, + "grad_norm": 0.605208783856609, + "learning_rate": 2.483510092320895e-05, + "loss": 0.475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1888284683227539, + "step": 1950, + "valid_targets_mean": 3616.1, + "valid_targets_min": 1234 + }, + { + "epoch": 3.3706896551724137, + "grad_norm": 0.6789435419175992, + "learning_rate": 2.4751629996749427e-05, + "loss": 0.5092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2218063771724701, + "step": 1955, + "valid_targets_mean": 3372.0, + "valid_targets_min": 655 + }, + { + "epoch": 3.3793103448275863, + "grad_norm": 0.7475419933802377, + "learning_rate": 2.4668071260161022e-05, + "loss": 0.5067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2542641758918762, + "step": 1960, + "valid_targets_mean": 2348.0, + "valid_targets_min": 1132 + }, + { + "epoch": 3.3879310344827585, + "grad_norm": 0.7582425874011535, + "learning_rate": 2.4584426257609315e-05, + "loss": 0.5271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34427642822265625, + "step": 1965, + "valid_targets_mean": 3218.0, + "valid_targets_min": 642 + }, + { + "epoch": 3.396551724137931, + "grad_norm": 0.7337643514205625, + "learning_rate": 2.4500696534854062e-05, + "loss": 0.4802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2051694095134735, + "step": 1970, + "valid_targets_mean": 2417.1, + "valid_targets_min": 367 + }, + { + "epoch": 3.405172413793103, + "grad_norm": 0.6417616249105758, + "learning_rate": 2.4416883639220647e-05, + "loss": 0.4867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26043596863746643, + "step": 1975, + "valid_targets_mean": 3970.5, + "valid_targets_min": 1043 + }, + { + "epoch": 3.413793103448276, + "grad_norm": 0.6730591273563613, + "learning_rate": 2.4332989119571506e-05, + "loss": 0.4553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2515748143196106, + "step": 1980, + "valid_targets_mean": 3129.4, + "valid_targets_min": 840 + }, + { + "epoch": 3.4224137931034484, + "grad_norm": 0.6291444527805404, + "learning_rate": 2.4249014526277473e-05, + "loss": 0.5206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3734513521194458, + "step": 1985, + "valid_targets_mean": 4917.1, + "valid_targets_min": 1643 + }, + { + "epoch": 3.4310344827586206, + "grad_norm": 0.6094709452717426, + "learning_rate": 2.416496141118915e-05, + "loss": 0.4911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2604672312736511, + "step": 1990, + "valid_targets_mean": 4694.5, + "valid_targets_min": 1146 + }, + { + "epoch": 3.439655172413793, + "grad_norm": 0.9224302977758335, + "learning_rate": 2.4080831327608224e-05, + "loss": 0.4891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28474128246307373, + "step": 1995, + "valid_targets_mean": 2746.2, + "valid_targets_min": 645 + }, + { + "epoch": 3.4482758620689653, + "grad_norm": 0.6913692015061919, + "learning_rate": 2.3996625830258742e-05, + "loss": 0.4678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24615857005119324, + "step": 2000, + "valid_targets_mean": 3448.2, + "valid_targets_min": 1630 + }, + { + "epoch": 3.456896551724138, + "grad_norm": 0.7680712640173568, + "learning_rate": 2.3912346475258424e-05, + "loss": 0.4757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20221805572509766, + "step": 2005, + "valid_targets_mean": 2574.9, + "valid_targets_min": 565 + }, + { + "epoch": 3.4655172413793105, + "grad_norm": 0.635108180966016, + "learning_rate": 2.3827994820089856e-05, + "loss": 0.5233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32651764154434204, + "step": 2010, + "valid_targets_mean": 4662.1, + "valid_targets_min": 1373 + }, + { + "epoch": 3.4741379310344827, + "grad_norm": 0.740260789953024, + "learning_rate": 2.3743572423571752e-05, + "loss": 0.5045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.287661612033844, + "step": 2015, + "valid_targets_mean": 3843.0, + "valid_targets_min": 1769 + }, + { + "epoch": 3.4827586206896552, + "grad_norm": 0.7038105239975521, + "learning_rate": 2.365908084583011e-05, + "loss": 0.4831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31129515171051025, + "step": 2020, + "valid_targets_mean": 4296.4, + "valid_targets_min": 1580 + }, + { + "epoch": 3.4913793103448274, + "grad_norm": 0.6372501954816124, + "learning_rate": 2.3574521648269406e-05, + "loss": 0.4977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1579384207725525, + "step": 2025, + "valid_targets_mean": 2607.8, + "valid_targets_min": 1195 + }, + { + "epoch": 3.5, + "grad_norm": 0.618822719912611, + "learning_rate": 2.3489896393543717e-05, + "loss": 0.5425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24836230278015137, + "step": 2030, + "valid_targets_mean": 3427.4, + "valid_targets_min": 1491 + }, + { + "epoch": 3.5086206896551726, + "grad_norm": 0.6939708126805756, + "learning_rate": 2.340520664552788e-05, + "loss": 0.4841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1540481150150299, + "step": 2035, + "valid_targets_mean": 2188.2, + "valid_targets_min": 853 + }, + { + "epoch": 3.5172413793103448, + "grad_norm": 0.6632351331782165, + "learning_rate": 2.3320453969288553e-05, + "loss": 0.5268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2977678179740906, + "step": 2040, + "valid_targets_mean": 4092.8, + "valid_targets_min": 1544 + }, + { + "epoch": 3.5258620689655173, + "grad_norm": 0.7073877534358624, + "learning_rate": 2.32356399310553e-05, + "loss": 0.4762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23784738779067993, + "step": 2045, + "valid_targets_mean": 2905.9, + "valid_targets_min": 1383 + }, + { + "epoch": 3.5344827586206895, + "grad_norm": 0.6296400085161705, + "learning_rate": 2.3150766098191667e-05, + "loss": 0.5148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32230597734451294, + "step": 2050, + "valid_targets_mean": 5635.5, + "valid_targets_min": 1954 + }, + { + "epoch": 3.543103448275862, + "grad_norm": 0.6033249092992661, + "learning_rate": 2.3065834039166212e-05, + "loss": 0.4714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18579596281051636, + "step": 2055, + "valid_targets_mean": 3727.8, + "valid_targets_min": 1319 + }, + { + "epoch": 3.5517241379310347, + "grad_norm": 0.6645454654059848, + "learning_rate": 2.2980845323523487e-05, + "loss": 0.5103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31462427973747253, + "step": 2060, + "valid_targets_mean": 4143.9, + "valid_targets_min": 1388 + }, + { + "epoch": 3.560344827586207, + "grad_norm": 0.7143907035845465, + "learning_rate": 2.2895801521855096e-05, + "loss": 0.4634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23505531251430511, + "step": 2065, + "valid_targets_mean": 3194.8, + "valid_targets_min": 767 + }, + { + "epoch": 3.5689655172413794, + "grad_norm": 0.6500578607981675, + "learning_rate": 2.2810704205770587e-05, + "loss": 0.5166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32650500535964966, + "step": 2070, + "valid_targets_mean": 4477.2, + "valid_targets_min": 492 + }, + { + "epoch": 3.5775862068965516, + "grad_norm": 0.7229860852185133, + "learning_rate": 2.2725554947868495e-05, + "loss": 0.5121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2526986002922058, + "step": 2075, + "valid_targets_mean": 3047.9, + "valid_targets_min": 1887 + }, + { + "epoch": 3.586206896551724, + "grad_norm": 0.7627338670987832, + "learning_rate": 2.2640355321707218e-05, + "loss": 0.4954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.332661509513855, + "step": 2080, + "valid_targets_mean": 3330.1, + "valid_targets_min": 786 + }, + { + "epoch": 3.594827586206897, + "grad_norm": 0.6869482734837352, + "learning_rate": 2.2555106901775955e-05, + "loss": 0.5149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26278042793273926, + "step": 2085, + "valid_targets_mean": 3234.6, + "valid_targets_min": 1015 + }, + { + "epoch": 3.603448275862069, + "grad_norm": 0.718849337496441, + "learning_rate": 2.246981126346564e-05, + "loss": 0.488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1774282157421112, + "step": 2090, + "valid_targets_mean": 2628.5, + "valid_targets_min": 449 + }, + { + "epoch": 3.612068965517241, + "grad_norm": 0.6200784407794563, + "learning_rate": 2.238446998303977e-05, + "loss": 0.5258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27215301990509033, + "step": 2095, + "valid_targets_mean": 3960.6, + "valid_targets_min": 1863 + }, + { + "epoch": 3.6206896551724137, + "grad_norm": 0.7332072518118599, + "learning_rate": 2.2299084637605343e-05, + "loss": 0.481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2048092931509018, + "step": 2100, + "valid_targets_mean": 2481.8, + "valid_targets_min": 728 + }, + { + "epoch": 3.6293103448275863, + "grad_norm": 0.7172691824388733, + "learning_rate": 2.221365680508364e-05, + "loss": 0.4647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23591715097427368, + "step": 2105, + "valid_targets_mean": 2745.5, + "valid_targets_min": 837 + }, + { + "epoch": 3.637931034482759, + "grad_norm": 0.7339486648404044, + "learning_rate": 2.2128188064181143e-05, + "loss": 0.4893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19370971620082855, + "step": 2110, + "valid_targets_mean": 3371.1, + "valid_targets_min": 1676 + }, + { + "epoch": 3.646551724137931, + "grad_norm": 0.6893113432210646, + "learning_rate": 2.2042679994360296e-05, + "loss": 0.4478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32415205240249634, + "step": 2115, + "valid_targets_mean": 3909.2, + "valid_targets_min": 1739 + }, + { + "epoch": 3.655172413793103, + "grad_norm": 0.7281686902973696, + "learning_rate": 2.195713417581033e-05, + "loss": 0.4897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24168765544891357, + "step": 2120, + "valid_targets_mean": 4198.9, + "valid_targets_min": 1165 + }, + { + "epoch": 3.663793103448276, + "grad_norm": 0.6199759210387455, + "learning_rate": 2.1871552189418113e-05, + "loss": 0.4874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26401397585868835, + "step": 2125, + "valid_targets_mean": 4291.9, + "valid_targets_min": 2534 + }, + { + "epoch": 3.6724137931034484, + "grad_norm": 0.8452521663580951, + "learning_rate": 2.1785935616738855e-05, + "loss": 0.4733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19978295266628265, + "step": 2130, + "valid_targets_mean": 2012.5, + "valid_targets_min": 603 + }, + { + "epoch": 3.6810344827586206, + "grad_norm": 0.5985029280720291, + "learning_rate": 2.170028603996695e-05, + "loss": 0.4938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24260295927524567, + "step": 2135, + "valid_targets_mean": 5161.2, + "valid_targets_min": 2410 + }, + { + "epoch": 3.689655172413793, + "grad_norm": 0.648017450021513, + "learning_rate": 2.161460504190668e-05, + "loss": 0.4949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2743348777294159, + "step": 2140, + "valid_targets_mean": 4066.6, + "valid_targets_min": 2103 + }, + { + "epoch": 3.6982758620689653, + "grad_norm": 0.5452730933756482, + "learning_rate": 2.1528894205943017e-05, + "loss": 0.5007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28508615493774414, + "step": 2145, + "valid_targets_mean": 5361.5, + "valid_targets_min": 1637 + }, + { + "epoch": 3.706896551724138, + "grad_norm": 0.6408652868463778, + "learning_rate": 2.1443155116012328e-05, + "loss": 0.5788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25458598136901855, + "step": 2150, + "valid_targets_mean": 3966.1, + "valid_targets_min": 2205 + }, + { + "epoch": 3.7155172413793105, + "grad_norm": 0.6158141746313681, + "learning_rate": 2.1357389356573098e-05, + "loss": 0.4627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16511517763137817, + "step": 2155, + "valid_targets_mean": 2890.8, + "valid_targets_min": 1528 + }, + { + "epoch": 3.7241379310344827, + "grad_norm": 0.6024470063685289, + "learning_rate": 2.1271598512576705e-05, + "loss": 0.482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34891843795776367, + "step": 2160, + "valid_targets_mean": 5724.5, + "valid_targets_min": 2872 + }, + { + "epoch": 3.7327586206896552, + "grad_norm": 0.6510061779600028, + "learning_rate": 2.1185784169438047e-05, + "loss": 0.4756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25346675515174866, + "step": 2165, + "valid_targets_mean": 3543.5, + "valid_targets_min": 1662 + }, + { + "epoch": 3.7413793103448274, + "grad_norm": 0.6673234066879691, + "learning_rate": 2.1099947913006303e-05, + "loss": 0.4898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3024991452693939, + "step": 2170, + "valid_targets_mean": 4123.0, + "valid_targets_min": 862 + }, + { + "epoch": 3.75, + "grad_norm": 0.713682007673058, + "learning_rate": 2.1014091329535618e-05, + "loss": 0.5008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28074443340301514, + "step": 2175, + "valid_targets_mean": 3506.8, + "valid_targets_min": 1235 + }, + { + "epoch": 3.7586206896551726, + "grad_norm": 0.6602595641335219, + "learning_rate": 2.0928216005655762e-05, + "loss": 0.4734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2678144574165344, + "step": 2180, + "valid_targets_mean": 3774.0, + "valid_targets_min": 858 + }, + { + "epoch": 3.7672413793103448, + "grad_norm": 0.6043947084246732, + "learning_rate": 2.084232352834285e-05, + "loss": 0.4949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.237286776304245, + "step": 2185, + "valid_targets_mean": 4557.8, + "valid_targets_min": 1117 + }, + { + "epoch": 3.7758620689655173, + "grad_norm": 0.7429037899492916, + "learning_rate": 2.0756415484889975e-05, + "loss": 0.5154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23691049218177795, + "step": 2190, + "valid_targets_mean": 2794.6, + "valid_targets_min": 831 + }, + { + "epoch": 3.7844827586206895, + "grad_norm": 0.6505918685977937, + "learning_rate": 2.0670493462877897e-05, + "loss": 0.4931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2736482620239258, + "step": 2195, + "valid_targets_mean": 3816.5, + "valid_targets_min": 1517 + }, + { + "epoch": 3.793103448275862, + "grad_norm": 0.6440464847958695, + "learning_rate": 2.0584559050145706e-05, + "loss": 0.4699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20258024334907532, + "step": 2200, + "valid_targets_mean": 2998.4, + "valid_targets_min": 355 + }, + { + "epoch": 3.8017241379310347, + "grad_norm": 0.6236608397078807, + "learning_rate": 2.0498613834761462e-05, + "loss": 0.5048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3027684688568115, + "step": 2205, + "valid_targets_mean": 3835.9, + "valid_targets_min": 1036 + }, + { + "epoch": 3.810344827586207, + "grad_norm": 0.6808291010026204, + "learning_rate": 2.0412659404992862e-05, + "loss": 0.4713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17417572438716888, + "step": 2210, + "valid_targets_mean": 2092.0, + "valid_targets_min": 960 + }, + { + "epoch": 3.8189655172413794, + "grad_norm": 0.7056179085462992, + "learning_rate": 2.0326697349277893e-05, + "loss": 0.5039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2949260473251343, + "step": 2215, + "valid_targets_mean": 3186.6, + "valid_targets_min": 1249 + }, + { + "epoch": 3.8275862068965516, + "grad_norm": 0.651368163793838, + "learning_rate": 2.024072925619546e-05, + "loss": 0.4871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24330437183380127, + "step": 2220, + "valid_targets_mean": 3891.6, + "valid_targets_min": 1039 + }, + { + "epoch": 3.836206896551724, + "grad_norm": 0.7410681072909164, + "learning_rate": 2.0154756714436043e-05, + "loss": 0.4915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2660757899284363, + "step": 2225, + "valid_targets_mean": 3078.1, + "valid_targets_min": 1328 + }, + { + "epoch": 3.844827586206897, + "grad_norm": 0.7170639487897909, + "learning_rate": 2.006878131277233e-05, + "loss": 0.4991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18728289008140564, + "step": 2230, + "valid_targets_mean": 2276.6, + "valid_targets_min": 999 + }, + { + "epoch": 3.853448275862069, + "grad_norm": 0.6547821115723866, + "learning_rate": 1.9982804640029864e-05, + "loss": 0.4948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17102737724781036, + "step": 2235, + "valid_targets_mean": 2813.9, + "valid_targets_min": 468 + }, + { + "epoch": 3.862068965517241, + "grad_norm": 0.8628243763616128, + "learning_rate": 1.989682828505767e-05, + "loss": 0.5214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25767794251441956, + "step": 2240, + "valid_targets_mean": 2402.8, + "valid_targets_min": 1147 + }, + { + "epoch": 3.8706896551724137, + "grad_norm": 0.6296546190582638, + "learning_rate": 1.9810853836698913e-05, + "loss": 0.4841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32976585626602173, + "step": 2245, + "valid_targets_mean": 4604.4, + "valid_targets_min": 1119 + }, + { + "epoch": 3.8793103448275863, + "grad_norm": 0.5633331197906519, + "learning_rate": 1.972488288376151e-05, + "loss": 0.5211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2464699000120163, + "step": 2250, + "valid_targets_mean": 4274.8, + "valid_targets_min": 734 + }, + { + "epoch": 3.887931034482759, + "grad_norm": 0.5908706096835258, + "learning_rate": 1.963891701498879e-05, + "loss": 0.5147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24134039878845215, + "step": 2255, + "valid_targets_mean": 4620.5, + "valid_targets_min": 1978 + }, + { + "epoch": 3.896551724137931, + "grad_norm": 0.6552321287100807, + "learning_rate": 1.955295781903014e-05, + "loss": 0.4751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21362680196762085, + "step": 2260, + "valid_targets_mean": 3177.8, + "valid_targets_min": 248 + }, + { + "epoch": 3.905172413793103, + "grad_norm": 0.6877670110487286, + "learning_rate": 1.9467006884411605e-05, + "loss": 0.475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27820611000061035, + "step": 2265, + "valid_targets_mean": 3651.4, + "valid_targets_min": 844 + }, + { + "epoch": 3.913793103448276, + "grad_norm": 0.7938588084907477, + "learning_rate": 1.9381065799506583e-05, + "loss": 0.4931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19728420674800873, + "step": 2270, + "valid_targets_mean": 3676.1, + "valid_targets_min": 1032 + }, + { + "epoch": 3.9224137931034484, + "grad_norm": 0.6484184754715151, + "learning_rate": 1.929513615250643e-05, + "loss": 0.4796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2930006980895996, + "step": 2275, + "valid_targets_mean": 3896.8, + "valid_targets_min": 560 + }, + { + "epoch": 3.9310344827586206, + "grad_norm": 0.7110467870098871, + "learning_rate": 1.9209219531391155e-05, + "loss": 0.5081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18151012063026428, + "step": 2280, + "valid_targets_mean": 2306.0, + "valid_targets_min": 1641 + }, + { + "epoch": 3.939655172413793, + "grad_norm": 0.6470127936020821, + "learning_rate": 1.9123317523900015e-05, + "loss": 0.5244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20861583948135376, + "step": 2285, + "valid_targets_mean": 2988.9, + "valid_targets_min": 927 + }, + { + "epoch": 3.9482758620689653, + "grad_norm": 0.709579187138778, + "learning_rate": 1.9037431717502253e-05, + "loss": 0.5476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3209431767463684, + "step": 2290, + "valid_targets_mean": 4697.9, + "valid_targets_min": 1726 + }, + { + "epoch": 3.956896551724138, + "grad_norm": 0.6240167958052882, + "learning_rate": 1.8951563699367673e-05, + "loss": 0.5076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25870320200920105, + "step": 2295, + "valid_targets_mean": 4539.9, + "valid_targets_min": 727 + }, + { + "epoch": 3.9655172413793105, + "grad_norm": 0.6988806889464962, + "learning_rate": 1.886571505633737e-05, + "loss": 0.4913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23148533701896667, + "step": 2300, + "valid_targets_mean": 3224.5, + "valid_targets_min": 295 + }, + { + "epoch": 3.9741379310344827, + "grad_norm": 0.7110833448191198, + "learning_rate": 1.8779887374894384e-05, + "loss": 0.4968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3375265896320343, + "step": 2305, + "valid_targets_mean": 3697.4, + "valid_targets_min": 333 + }, + { + "epoch": 3.9827586206896552, + "grad_norm": 0.7142197646340462, + "learning_rate": 1.8694082241134385e-05, + "loss": 0.4925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31442028284072876, + "step": 2310, + "valid_targets_mean": 4799.8, + "valid_targets_min": 1452 + }, + { + "epoch": 3.9913793103448274, + "grad_norm": 0.649630590659874, + "learning_rate": 1.8608301240736378e-05, + "loss": 0.484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37650713324546814, + "step": 2315, + "valid_targets_mean": 4964.6, + "valid_targets_min": 1840 + }, + { + "epoch": 4.0, + "grad_norm": 0.6968453928792222, + "learning_rate": 1.852254595893335e-05, + "loss": 0.5214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32762742042541504, + "step": 2320, + "valid_targets_mean": 3752.6, + "valid_targets_min": 930 + }, + { + "epoch": 4.008620689655173, + "grad_norm": 0.7291286990476985, + "learning_rate": 1.8436817980483035e-05, + "loss": 0.4788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3126080632209778, + "step": 2325, + "valid_targets_mean": 3247.2, + "valid_targets_min": 1659 + }, + { + "epoch": 4.017241379310345, + "grad_norm": 0.7057735186382891, + "learning_rate": 1.835111888963859e-05, + "loss": 0.4538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2033781111240387, + "step": 2330, + "valid_targets_mean": 2532.8, + "valid_targets_min": 937 + }, + { + "epoch": 4.025862068965517, + "grad_norm": 0.6541556624040162, + "learning_rate": 1.8265450270119335e-05, + "loss": 0.4746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3108016550540924, + "step": 2335, + "valid_targets_mean": 4034.9, + "valid_targets_min": 1548 + }, + { + "epoch": 4.0344827586206895, + "grad_norm": 0.6445427177162534, + "learning_rate": 1.8179813705081468e-05, + "loss": 0.4941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12971869111061096, + "step": 2340, + "valid_targets_mean": 2487.0, + "valid_targets_min": 627 + }, + { + "epoch": 4.043103448275862, + "grad_norm": 0.7042487745553745, + "learning_rate": 1.8094210777088833e-05, + "loss": 0.505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2815548777580261, + "step": 2345, + "valid_targets_mean": 4024.0, + "valid_targets_min": 2054 + }, + { + "epoch": 4.051724137931035, + "grad_norm": 0.6020174178834994, + "learning_rate": 1.800864306808367e-05, + "loss": 0.4802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21823859214782715, + "step": 2350, + "valid_targets_mean": 4244.8, + "valid_targets_min": 1761 + }, + { + "epoch": 4.060344827586207, + "grad_norm": 0.6238354016392303, + "learning_rate": 1.7923112159357344e-05, + "loss": 0.4504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2578839659690857, + "step": 2355, + "valid_targets_mean": 4613.9, + "valid_targets_min": 907 + }, + { + "epoch": 4.068965517241379, + "grad_norm": 0.7187931969464216, + "learning_rate": 1.783761963152117e-05, + "loss": 0.4586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20894671976566315, + "step": 2360, + "valid_targets_mean": 3244.1, + "valid_targets_min": 1825 + }, + { + "epoch": 4.077586206896552, + "grad_norm": 0.6994976820358275, + "learning_rate": 1.7752167064477173e-05, + "loss": 0.4528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23919735848903656, + "step": 2365, + "valid_targets_mean": 3916.8, + "valid_targets_min": 1761 + }, + { + "epoch": 4.086206896551724, + "grad_norm": 0.5850465646262993, + "learning_rate": 1.7666756037388923e-05, + "loss": 0.4525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1654837727546692, + "step": 2370, + "valid_targets_mean": 3629.4, + "valid_targets_min": 447 + }, + { + "epoch": 4.094827586206897, + "grad_norm": 0.7177392536005954, + "learning_rate": 1.7581388128652315e-05, + "loss": 0.4606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21581028401851654, + "step": 2375, + "valid_targets_mean": 3316.8, + "valid_targets_min": 1135 + }, + { + "epoch": 4.103448275862069, + "grad_norm": 0.5731165600563709, + "learning_rate": 1.7496064915866414e-05, + "loss": 0.509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3835183382034302, + "step": 2380, + "valid_targets_mean": 6331.9, + "valid_targets_min": 437 + }, + { + "epoch": 4.112068965517241, + "grad_norm": 0.7698613386016276, + "learning_rate": 1.7410787975804314e-05, + "loss": 0.5003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34291595220565796, + "step": 2385, + "valid_targets_mean": 2412.0, + "valid_targets_min": 447 + }, + { + "epoch": 4.120689655172414, + "grad_norm": 0.781265529624121, + "learning_rate": 1.732555888438398e-05, + "loss": 0.4557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2012426108121872, + "step": 2390, + "valid_targets_mean": 2611.0, + "valid_targets_min": 663 + }, + { + "epoch": 4.129310344827586, + "grad_norm": 0.7510692376144248, + "learning_rate": 1.7240379216639136e-05, + "loss": 0.4786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21286329627037048, + "step": 2395, + "valid_targets_mean": 2835.6, + "valid_targets_min": 1229 + }, + { + "epoch": 4.137931034482759, + "grad_norm": 0.7969516781579605, + "learning_rate": 1.7155250546690173e-05, + "loss": 0.4848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15521761775016785, + "step": 2400, + "valid_targets_mean": 2185.4, + "valid_targets_min": 781 + }, + { + "epoch": 4.146551724137931, + "grad_norm": 0.7440613379174275, + "learning_rate": 1.707017444771502e-05, + "loss": 0.4606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2337280809879303, + "step": 2405, + "valid_targets_mean": 2963.6, + "valid_targets_min": 500 + }, + { + "epoch": 4.155172413793103, + "grad_norm": 0.606456216137015, + "learning_rate": 1.6985152491920103e-05, + "loss": 0.4822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23560550808906555, + "step": 2410, + "valid_targets_mean": 4323.4, + "valid_targets_min": 1376 + }, + { + "epoch": 4.163793103448276, + "grad_norm": 0.7113548099234891, + "learning_rate": 1.690018625051128e-05, + "loss": 0.4629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24127575755119324, + "step": 2415, + "valid_targets_mean": 3512.8, + "valid_targets_min": 546 + }, + { + "epoch": 4.172413793103448, + "grad_norm": 0.7335560913283429, + "learning_rate": 1.681527729366481e-05, + "loss": 0.5255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.227250874042511, + "step": 2420, + "valid_targets_mean": 3176.8, + "valid_targets_min": 1879 + }, + { + "epoch": 4.181034482758621, + "grad_norm": 0.6575969011970396, + "learning_rate": 1.673042719049834e-05, + "loss": 0.4494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23015271127223969, + "step": 2425, + "valid_targets_mean": 3656.1, + "valid_targets_min": 1887 + }, + { + "epoch": 4.189655172413793, + "grad_norm": 0.8112593380488805, + "learning_rate": 1.664563750904188e-05, + "loss": 0.4945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2517668902873993, + "step": 2430, + "valid_targets_mean": 3671.8, + "valid_targets_min": 1683 + }, + { + "epoch": 4.198275862068965, + "grad_norm": 0.6753752628064544, + "learning_rate": 1.656090981620888e-05, + "loss": 0.4388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25802406668663025, + "step": 2435, + "valid_targets_mean": 3962.8, + "valid_targets_min": 1314 + }, + { + "epoch": 4.206896551724138, + "grad_norm": 0.7788552162943313, + "learning_rate": 1.64762456777672e-05, + "loss": 0.4838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3255932629108429, + "step": 2440, + "valid_targets_mean": 3356.9, + "valid_targets_min": 727 + }, + { + "epoch": 4.2155172413793105, + "grad_norm": 0.8393009126080171, + "learning_rate": 1.6391646658310242e-05, + "loss": 0.4838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2931306064128876, + "step": 2445, + "valid_targets_mean": 2868.1, + "valid_targets_min": 1304 + }, + { + "epoch": 4.224137931034483, + "grad_norm": 0.571986417301678, + "learning_rate": 1.6307114321227996e-05, + "loss": 0.4586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17009538412094116, + "step": 2450, + "valid_targets_mean": 4011.8, + "valid_targets_min": 1762 + }, + { + "epoch": 4.232758620689655, + "grad_norm": 0.7702861431778673, + "learning_rate": 1.622265022867818e-05, + "loss": 0.4839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.259504109621048, + "step": 2455, + "valid_targets_mean": 3427.9, + "valid_targets_min": 1832 + }, + { + "epoch": 4.241379310344827, + "grad_norm": 0.5842087243862168, + "learning_rate": 1.6138255941557336e-05, + "loss": 0.4806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24924680590629578, + "step": 2460, + "valid_targets_mean": 5294.4, + "valid_targets_min": 2306 + }, + { + "epoch": 4.25, + "grad_norm": 0.612414463039784, + "learning_rate": 1.6053933019472003e-05, + "loss": 0.5153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31655654311180115, + "step": 2465, + "valid_targets_mean": 4829.4, + "valid_targets_min": 1225 + }, + { + "epoch": 4.258620689655173, + "grad_norm": 0.6508360003084009, + "learning_rate": 1.5969683020709902e-05, + "loss": 0.4653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17931914329528809, + "step": 2470, + "valid_targets_mean": 3591.4, + "valid_targets_min": 1719 + }, + { + "epoch": 4.267241379310345, + "grad_norm": 0.704264317057782, + "learning_rate": 1.5885507502211108e-05, + "loss": 0.4618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20509269833564758, + "step": 2475, + "valid_targets_mean": 3139.9, + "valid_targets_min": 1111 + }, + { + "epoch": 4.275862068965517, + "grad_norm": 0.6722079858073913, + "learning_rate": 1.5801408019539345e-05, + "loss": 0.4737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22740936279296875, + "step": 2480, + "valid_targets_mean": 3398.4, + "valid_targets_min": 1330 + }, + { + "epoch": 4.2844827586206895, + "grad_norm": 0.8060161994387373, + "learning_rate": 1.5717386126853156e-05, + "loss": 0.4649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24399414658546448, + "step": 2485, + "valid_targets_mean": 3238.5, + "valid_targets_min": 764 + }, + { + "epoch": 4.293103448275862, + "grad_norm": 0.6985011079923761, + "learning_rate": 1.5633443376877236e-05, + "loss": 0.4582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16938087344169617, + "step": 2490, + "valid_targets_mean": 2844.6, + "valid_targets_min": 758 + }, + { + "epoch": 4.301724137931035, + "grad_norm": 0.7439431964854526, + "learning_rate": 1.5549581320873715e-05, + "loss": 0.4437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2602732479572296, + "step": 2495, + "valid_targets_mean": 3250.0, + "valid_targets_min": 703 + }, + { + "epoch": 4.310344827586207, + "grad_norm": 0.6282633270553464, + "learning_rate": 1.546580150861351e-05, + "loss": 0.4574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25479474663734436, + "step": 2500, + "valid_targets_mean": 4628.0, + "valid_targets_min": 1693 + }, + { + "epoch": 4.318965517241379, + "grad_norm": 0.8246874265708705, + "learning_rate": 1.5382105488347654e-05, + "loss": 0.4666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22745488584041595, + "step": 2505, + "valid_targets_mean": 1733.8, + "valid_targets_min": 619 + }, + { + "epoch": 4.327586206896552, + "grad_norm": 0.7846829027462743, + "learning_rate": 1.5298494806778733e-05, + "loss": 0.4657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22082726657390594, + "step": 2510, + "valid_targets_mean": 3654.4, + "valid_targets_min": 1465 + }, + { + "epoch": 4.336206896551724, + "grad_norm": 0.6296582314989281, + "learning_rate": 1.5214971009032251e-05, + "loss": 0.4521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15524858236312866, + "step": 2515, + "valid_targets_mean": 2274.6, + "valid_targets_min": 1343 + }, + { + "epoch": 4.344827586206897, + "grad_norm": 0.7560024210938446, + "learning_rate": 1.51315356386281e-05, + "loss": 0.4347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1853024959564209, + "step": 2520, + "valid_targets_mean": 2265.1, + "valid_targets_min": 770 + }, + { + "epoch": 4.353448275862069, + "grad_norm": 0.7486129314963273, + "learning_rate": 1.5048190237452052e-05, + "loss": 0.46, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26965516805648804, + "step": 2525, + "valid_targets_mean": 3411.0, + "valid_targets_min": 770 + }, + { + "epoch": 4.362068965517241, + "grad_norm": 0.6807770197203692, + "learning_rate": 1.4964936345727217e-05, + "loss": 0.4841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3339846432209015, + "step": 2530, + "valid_targets_mean": 4331.9, + "valid_targets_min": 1783 + }, + { + "epoch": 4.370689655172414, + "grad_norm": 0.6803166630329365, + "learning_rate": 1.4881775501985645e-05, + "loss": 0.4546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28594276309013367, + "step": 2535, + "valid_targets_mean": 4092.5, + "valid_targets_min": 1539 + }, + { + "epoch": 4.379310344827586, + "grad_norm": 0.5757965352729172, + "learning_rate": 1.4798709243039842e-05, + "loss": 0.4644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1932697892189026, + "step": 2540, + "valid_targets_mean": 5489.0, + "valid_targets_min": 1267 + }, + { + "epoch": 4.387931034482759, + "grad_norm": 0.5942994221260147, + "learning_rate": 1.4715739103954375e-05, + "loss": 0.4508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21481472253799438, + "step": 2545, + "valid_targets_mean": 5184.6, + "valid_targets_min": 1271 + }, + { + "epoch": 4.396551724137931, + "grad_norm": 0.6178769092364461, + "learning_rate": 1.4632866618017543e-05, + "loss": 0.5295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18912258744239807, + "step": 2550, + "valid_targets_mean": 3871.4, + "valid_targets_min": 1208 + }, + { + "epoch": 4.405172413793103, + "grad_norm": 0.8134877119006606, + "learning_rate": 1.4550093316712987e-05, + "loss": 0.469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20429262518882751, + "step": 2555, + "valid_targets_mean": 2489.2, + "valid_targets_min": 507 + }, + { + "epoch": 4.413793103448276, + "grad_norm": 0.5891826115646406, + "learning_rate": 1.4467420729691433e-05, + "loss": 0.4544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17067211866378784, + "step": 2560, + "valid_targets_mean": 3092.2, + "valid_targets_min": 906 + }, + { + "epoch": 4.422413793103448, + "grad_norm": 0.9293318381152055, + "learning_rate": 1.4384850384742412e-05, + "loss": 0.489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2481684684753418, + "step": 2565, + "valid_targets_mean": 2614.8, + "valid_targets_min": 1022 + }, + { + "epoch": 4.431034482758621, + "grad_norm": 0.6806302768850274, + "learning_rate": 1.4302383807766003e-05, + "loss": 0.4664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2130582630634308, + "step": 2570, + "valid_targets_mean": 3661.5, + "valid_targets_min": 494 + }, + { + "epoch": 4.439655172413793, + "grad_norm": 0.697118422325438, + "learning_rate": 1.4220022522744667e-05, + "loss": 0.477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19902461767196655, + "step": 2575, + "valid_targets_mean": 1975.0, + "valid_targets_min": 930 + }, + { + "epoch": 4.448275862068965, + "grad_norm": 0.7070799859650805, + "learning_rate": 1.4137768051715059e-05, + "loss": 0.484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23186877369880676, + "step": 2580, + "valid_targets_mean": 2672.6, + "valid_targets_min": 853 + }, + { + "epoch": 4.456896551724138, + "grad_norm": 0.575984761040613, + "learning_rate": 1.4055621914739915e-05, + "loss": 0.4524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1701734960079193, + "step": 2585, + "valid_targets_mean": 4939.8, + "valid_targets_min": 839 + }, + { + "epoch": 4.4655172413793105, + "grad_norm": 0.7282253361677753, + "learning_rate": 1.3973585629879973e-05, + "loss": 0.5063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23230789601802826, + "step": 2590, + "valid_targets_mean": 2770.1, + "valid_targets_min": 338 + }, + { + "epoch": 4.474137931034483, + "grad_norm": 0.6912665051596748, + "learning_rate": 1.3891660713165873e-05, + "loss": 0.4468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2850716710090637, + "step": 2595, + "valid_targets_mean": 4440.0, + "valid_targets_min": 2110 + }, + { + "epoch": 4.482758620689655, + "grad_norm": 0.7127927872180074, + "learning_rate": 1.3809848678570204e-05, + "loss": 0.5011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22266356647014618, + "step": 2600, + "valid_targets_mean": 3722.6, + "valid_targets_min": 476 + }, + { + "epoch": 4.491379310344827, + "grad_norm": 0.6644441685191089, + "learning_rate": 1.3728151037979468e-05, + "loss": 0.4326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1618672013282776, + "step": 2605, + "valid_targets_mean": 2797.1, + "valid_targets_min": 1354 + }, + { + "epoch": 4.5, + "grad_norm": 0.5986312130885189, + "learning_rate": 1.3646569301166177e-05, + "loss": 0.5185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28881144523620605, + "step": 2610, + "valid_targets_mean": 5293.5, + "valid_targets_min": 1501 + }, + { + "epoch": 4.508620689655173, + "grad_norm": 0.6110587897031703, + "learning_rate": 1.3565104975760936e-05, + "loss": 0.4994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2550974488258362, + "step": 2615, + "valid_targets_mean": 3494.4, + "valid_targets_min": 1619 + }, + { + "epoch": 4.517241379310345, + "grad_norm": 2.095551452059934, + "learning_rate": 1.34837595672246e-05, + "loss": 0.45, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19936276972293854, + "step": 2620, + "valid_targets_mean": 3422.5, + "valid_targets_min": 976 + }, + { + "epoch": 4.525862068965517, + "grad_norm": 0.8689738516206781, + "learning_rate": 1.3402534578820428e-05, + "loss": 0.4567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20421892404556274, + "step": 2625, + "valid_targets_mean": 2142.2, + "valid_targets_min": 869 + }, + { + "epoch": 4.5344827586206895, + "grad_norm": 0.7587423675506256, + "learning_rate": 1.3321431511586308e-05, + "loss": 0.4748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26311057806015015, + "step": 2630, + "valid_targets_mean": 3068.2, + "valid_targets_min": 836 + }, + { + "epoch": 4.543103448275862, + "grad_norm": 0.5919863204622956, + "learning_rate": 1.3240451864307048e-05, + "loss": 0.4819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40301793813705444, + "step": 2635, + "valid_targets_mean": 6527.0, + "valid_targets_min": 1882 + }, + { + "epoch": 4.551724137931035, + "grad_norm": 0.7394470646381965, + "learning_rate": 1.3159597133486628e-05, + "loss": 0.4704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3109036386013031, + "step": 2640, + "valid_targets_mean": 4317.9, + "valid_targets_min": 332 + }, + { + "epoch": 4.560344827586206, + "grad_norm": 0.581128140903931, + "learning_rate": 1.3078868813320594e-05, + "loss": 0.4648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20483621954917908, + "step": 2645, + "valid_targets_mean": 4570.8, + "valid_targets_min": 1850 + }, + { + "epoch": 4.568965517241379, + "grad_norm": 0.6712161614418943, + "learning_rate": 1.2998268395668412e-05, + "loss": 0.4727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14631853997707367, + "step": 2650, + "valid_targets_mean": 2733.5, + "valid_targets_min": 998 + }, + { + "epoch": 4.577586206896552, + "grad_norm": 0.6468590508784885, + "learning_rate": 1.2917797370025908e-05, + "loss": 0.4805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17968174815177917, + "step": 2655, + "valid_targets_mean": 4648.6, + "valid_targets_min": 1836 + }, + { + "epoch": 4.586206896551724, + "grad_norm": 0.6418907774726199, + "learning_rate": 1.2837457223497754e-05, + "loss": 0.4638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17467010021209717, + "step": 2660, + "valid_targets_mean": 3575.1, + "valid_targets_min": 1494 + }, + { + "epoch": 4.594827586206897, + "grad_norm": 0.6893197458456521, + "learning_rate": 1.2757249440769957e-05, + "loss": 0.4872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20580238103866577, + "step": 2665, + "valid_targets_mean": 3706.9, + "valid_targets_min": 916 + }, + { + "epoch": 4.603448275862069, + "grad_norm": 0.7362191975720149, + "learning_rate": 1.2677175504082452e-05, + "loss": 0.4534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3365599513053894, + "step": 2670, + "valid_targets_mean": 4807.8, + "valid_targets_min": 365 + }, + { + "epoch": 4.612068965517241, + "grad_norm": 0.6649827629129201, + "learning_rate": 1.2597236893201712e-05, + "loss": 0.4974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2436644583940506, + "step": 2675, + "valid_targets_mean": 3814.6, + "valid_targets_min": 686 + }, + { + "epoch": 4.620689655172414, + "grad_norm": 0.5922553527915343, + "learning_rate": 1.2517435085393373e-05, + "loss": 0.437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17249786853790283, + "step": 2680, + "valid_targets_mean": 3553.8, + "valid_targets_min": 1067 + }, + { + "epoch": 4.629310344827586, + "grad_norm": 0.795297876094166, + "learning_rate": 1.2437771555394944e-05, + "loss": 0.4819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24163809418678284, + "step": 2685, + "valid_targets_mean": 2616.9, + "valid_targets_min": 1042 + }, + { + "epoch": 4.637931034482759, + "grad_norm": 0.7232979961406071, + "learning_rate": 1.2358247775388578e-05, + "loss": 0.4728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24019086360931396, + "step": 2690, + "valid_targets_mean": 3697.1, + "valid_targets_min": 2034 + }, + { + "epoch": 4.646551724137931, + "grad_norm": 0.7096407435915755, + "learning_rate": 1.227886521497383e-05, + "loss": 0.4479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24640309810638428, + "step": 2695, + "valid_targets_mean": 3368.4, + "valid_targets_min": 298 + }, + { + "epoch": 4.655172413793103, + "grad_norm": 0.7014687573513457, + "learning_rate": 1.2199625341140533e-05, + "loss": 0.485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2635158896446228, + "step": 2700, + "valid_targets_mean": 3276.6, + "valid_targets_min": 1973 + }, + { + "epoch": 4.663793103448276, + "grad_norm": 0.6546970206016567, + "learning_rate": 1.2120529618241665e-05, + "loss": 0.4535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31338346004486084, + "step": 2705, + "valid_targets_mean": 6266.4, + "valid_targets_min": 1717 + }, + { + "epoch": 4.672413793103448, + "grad_norm": 0.6793637217901053, + "learning_rate": 1.2041579507966288e-05, + "loss": 0.4778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24181483685970306, + "step": 2710, + "valid_targets_mean": 3137.2, + "valid_targets_min": 1817 + }, + { + "epoch": 4.681034482758621, + "grad_norm": 0.6821886337532992, + "learning_rate": 1.1962776469312556e-05, + "loss": 0.4735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2773562967777252, + "step": 2715, + "valid_targets_mean": 4600.2, + "valid_targets_min": 1971 + }, + { + "epoch": 4.689655172413794, + "grad_norm": 0.74251794560561, + "learning_rate": 1.1884121958560721e-05, + "loss": 0.5042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2979833781719208, + "step": 2720, + "valid_targets_mean": 3467.6, + "valid_targets_min": 299 + }, + { + "epoch": 4.698275862068965, + "grad_norm": 0.7380207470077343, + "learning_rate": 1.1805617429246254e-05, + "loss": 0.4522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24474026262760162, + "step": 2725, + "valid_targets_mean": 3314.1, + "valid_targets_min": 1689 + }, + { + "epoch": 4.706896551724138, + "grad_norm": 0.6441690010094415, + "learning_rate": 1.1727264332132978e-05, + "loss": 0.4806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11414848268032074, + "step": 2730, + "valid_targets_mean": 1974.6, + "valid_targets_min": 455 + }, + { + "epoch": 4.7155172413793105, + "grad_norm": 0.6516620909898261, + "learning_rate": 1.1649064115186216e-05, + "loss": 0.4883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3251435160636902, + "step": 2735, + "valid_targets_mean": 4367.8, + "valid_targets_min": 1572 + }, + { + "epoch": 4.724137931034483, + "grad_norm": 0.8113136652791311, + "learning_rate": 1.1571018223546095e-05, + "loss": 0.4889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2595599293708801, + "step": 2740, + "valid_targets_mean": 2774.2, + "valid_targets_min": 1540 + }, + { + "epoch": 4.732758620689655, + "grad_norm": 0.6827835506352863, + "learning_rate": 1.1493128099500806e-05, + "loss": 0.4525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19236114621162415, + "step": 2745, + "valid_targets_mean": 3065.4, + "valid_targets_min": 872 + }, + { + "epoch": 4.741379310344827, + "grad_norm": 0.6266580967892691, + "learning_rate": 1.1415395182459925e-05, + "loss": 0.4738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2641719579696655, + "step": 2750, + "valid_targets_mean": 4104.9, + "valid_targets_min": 606 + }, + { + "epoch": 4.75, + "grad_norm": 0.6257424293723131, + "learning_rate": 1.1337820908927891e-05, + "loss": 0.5079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2416507452726364, + "step": 2755, + "valid_targets_mean": 3923.8, + "valid_targets_min": 696 + }, + { + "epoch": 4.758620689655173, + "grad_norm": 0.805541412851283, + "learning_rate": 1.126040671247738e-05, + "loss": 0.458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27597856521606445, + "step": 2760, + "valid_targets_mean": 2572.5, + "valid_targets_min": 616 + }, + { + "epoch": 4.767241379310345, + "grad_norm": 0.8497370331969035, + "learning_rate": 1.1183154023722839e-05, + "loss": 0.4596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24853654205799103, + "step": 2765, + "valid_targets_mean": 3065.8, + "valid_targets_min": 503 + }, + { + "epoch": 4.775862068965517, + "grad_norm": 0.7473644020752989, + "learning_rate": 1.1106064270294068e-05, + "loss": 0.4822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2988268733024597, + "step": 2770, + "valid_targets_mean": 3608.1, + "valid_targets_min": 422 + }, + { + "epoch": 4.7844827586206895, + "grad_norm": 0.8095234401769938, + "learning_rate": 1.1029138876809818e-05, + "loss": 0.4887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27407869696617126, + "step": 2775, + "valid_targets_mean": 2853.9, + "valid_targets_min": 1394 + }, + { + "epoch": 4.793103448275862, + "grad_norm": 0.6752834632880041, + "learning_rate": 1.0952379264851464e-05, + "loss": 0.5027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26760053634643555, + "step": 2780, + "valid_targets_mean": 4063.4, + "valid_targets_min": 637 + }, + { + "epoch": 4.801724137931035, + "grad_norm": 0.544555675768033, + "learning_rate": 1.087578685293674e-05, + "loss": 0.4918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2731075584888458, + "step": 2785, + "valid_targets_mean": 5893.2, + "valid_targets_min": 579 + }, + { + "epoch": 4.810344827586206, + "grad_norm": 0.7388130196480253, + "learning_rate": 1.0799363056493529e-05, + "loss": 0.475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2854580879211426, + "step": 2790, + "valid_targets_mean": 3466.8, + "valid_targets_min": 1664 + }, + { + "epoch": 4.818965517241379, + "grad_norm": 0.6272646450147065, + "learning_rate": 1.0723109287833697e-05, + "loss": 0.4575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2139543890953064, + "step": 2795, + "valid_targets_mean": 4144.4, + "valid_targets_min": 2050 + }, + { + "epoch": 4.827586206896552, + "grad_norm": 0.7794735480091342, + "learning_rate": 1.0647026956126979e-05, + "loss": 0.4628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20534175634384155, + "step": 2800, + "valid_targets_mean": 2368.4, + "valid_targets_min": 464 + }, + { + "epoch": 4.836206896551724, + "grad_norm": 0.6678653155160964, + "learning_rate": 1.0571117467374972e-05, + "loss": 0.4629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15956327319145203, + "step": 2805, + "valid_targets_mean": 2978.4, + "valid_targets_min": 729 + }, + { + "epoch": 4.844827586206897, + "grad_norm": 0.5596544836969336, + "learning_rate": 1.0495382224385154e-05, + "loss": 0.5028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28414013981819153, + "step": 2810, + "valid_targets_mean": 5631.0, + "valid_targets_min": 1747 + }, + { + "epoch": 4.853448275862069, + "grad_norm": 0.6433212251402481, + "learning_rate": 1.0419822626744894e-05, + "loss": 0.4865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21995951235294342, + "step": 2815, + "valid_targets_mean": 3540.8, + "valid_targets_min": 1831 + }, + { + "epoch": 4.862068965517241, + "grad_norm": 0.625422523116903, + "learning_rate": 1.0344440070795671e-05, + "loss": 0.47, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2672995328903198, + "step": 2820, + "valid_targets_mean": 4453.2, + "valid_targets_min": 1359 + }, + { + "epoch": 4.870689655172414, + "grad_norm": 0.5794212310915364, + "learning_rate": 1.0269235949607223e-05, + "loss": 0.4825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19607986509799957, + "step": 2825, + "valid_targets_mean": 5040.0, + "valid_targets_min": 2426 + }, + { + "epoch": 4.879310344827586, + "grad_norm": 0.9159213416745643, + "learning_rate": 1.019421165295182e-05, + "loss": 0.4986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16384699940681458, + "step": 2830, + "valid_targets_mean": 2185.4, + "valid_targets_min": 498 + }, + { + "epoch": 4.887931034482759, + "grad_norm": 0.7706544148632299, + "learning_rate": 1.0119368567278545e-05, + "loss": 0.4774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26055097579956055, + "step": 2835, + "valid_targets_mean": 3275.6, + "valid_targets_min": 538 + }, + { + "epoch": 4.896551724137931, + "grad_norm": 0.5879710351602947, + "learning_rate": 1.0044708075687746e-05, + "loss": 0.4681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16971173882484436, + "step": 2840, + "valid_targets_mean": 3531.1, + "valid_targets_min": 598 + }, + { + "epoch": 4.905172413793103, + "grad_norm": 0.5934834566194066, + "learning_rate": 9.97023155790541e-06, + "loss": 0.5148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.217238187789917, + "step": 2845, + "valid_targets_mean": 3915.4, + "valid_targets_min": 978 + }, + { + "epoch": 4.913793103448276, + "grad_norm": 0.8337575764419852, + "learning_rate": 9.895940390257675e-06, + "loss": 0.4738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27576619386672974, + "step": 2850, + "valid_targets_mean": 2578.9, + "valid_targets_min": 1535 + }, + { + "epoch": 4.922413793103448, + "grad_norm": 0.588290288880693, + "learning_rate": 9.821835945645426e-06, + "loss": 0.4361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17642898857593536, + "step": 2855, + "valid_targets_mean": 3636.9, + "valid_targets_min": 990 + }, + { + "epoch": 4.931034482758621, + "grad_norm": 0.7140384238875621, + "learning_rate": 9.747919593518897e-06, + "loss": 0.4873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2606832981109619, + "step": 2860, + "valid_targets_mean": 3753.2, + "valid_targets_min": 930 + }, + { + "epoch": 4.939655172413794, + "grad_norm": 0.6297493613480442, + "learning_rate": 9.674192699852397e-06, + "loss": 0.5279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20206616818904877, + "step": 2865, + "valid_targets_mean": 3818.6, + "valid_targets_min": 2442 + }, + { + "epoch": 4.948275862068965, + "grad_norm": 0.6437450236124188, + "learning_rate": 9.600656627119e-06, + "loss": 0.4486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28231048583984375, + "step": 2870, + "valid_targets_mean": 5143.2, + "valid_targets_min": 1743 + }, + { + "epoch": 4.956896551724138, + "grad_norm": 0.7231210514400496, + "learning_rate": 9.52731273426544e-06, + "loss": 0.4665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20072084665298462, + "step": 2875, + "valid_targets_mean": 2665.2, + "valid_targets_min": 1783 + }, + { + "epoch": 4.9655172413793105, + "grad_norm": 0.7200627267648373, + "learning_rate": 9.454162376686959e-06, + "loss": 0.4669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21553944051265717, + "step": 2880, + "valid_targets_mean": 3167.2, + "valid_targets_min": 1382 + }, + { + "epoch": 4.974137931034483, + "grad_norm": 0.7048399069586939, + "learning_rate": 9.381206906202268e-06, + "loss": 0.5094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3543662428855896, + "step": 2885, + "valid_targets_mean": 4687.0, + "valid_targets_min": 1536 + }, + { + "epoch": 4.982758620689655, + "grad_norm": 0.5646407366469018, + "learning_rate": 9.308447671028546e-06, + "loss": 0.5034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24721992015838623, + "step": 2890, + "valid_targets_mean": 5792.9, + "valid_targets_min": 2082 + }, + { + "epoch": 4.991379310344827, + "grad_norm": 0.8018492929350751, + "learning_rate": 9.235886015756579e-06, + "loss": 0.4382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2554394602775574, + "step": 2895, + "valid_targets_mean": 3645.0, + "valid_targets_min": 864 + }, + { + "epoch": 5.0, + "grad_norm": 0.7478721695038638, + "learning_rate": 9.163523281325855e-06, + "loss": 0.497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2047863006591797, + "step": 2900, + "valid_targets_mean": 2364.6, + "valid_targets_min": 1117 + }, + { + "epoch": 5.008620689655173, + "grad_norm": 0.6224544654750317, + "learning_rate": 9.09136080499979e-06, + "loss": 0.4708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23398515582084656, + "step": 2905, + "valid_targets_mean": 3525.2, + "valid_targets_min": 1480 + }, + { + "epoch": 5.017241379310345, + "grad_norm": 0.6724030509673462, + "learning_rate": 9.019399920341056e-06, + "loss": 0.4449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19100379943847656, + "step": 2910, + "valid_targets_mean": 3215.8, + "valid_targets_min": 433 + }, + { + "epoch": 5.025862068965517, + "grad_norm": 0.7583214816177382, + "learning_rate": 8.947641957186901e-06, + "loss": 0.4483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19197000563144684, + "step": 2915, + "valid_targets_mean": 2711.9, + "valid_targets_min": 804 + }, + { + "epoch": 5.0344827586206895, + "grad_norm": 1.654079400156267, + "learning_rate": 8.876088241624581e-06, + "loss": 0.4418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17325645685195923, + "step": 2920, + "valid_targets_mean": 3065.5, + "valid_targets_min": 359 + }, + { + "epoch": 5.043103448275862, + "grad_norm": 0.5891519901032664, + "learning_rate": 8.804740095966854e-06, + "loss": 0.4805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2441479116678238, + "step": 2925, + "valid_targets_mean": 5050.4, + "valid_targets_min": 1819 + }, + { + "epoch": 5.051724137931035, + "grad_norm": 0.8278995394082316, + "learning_rate": 8.733598838727559e-06, + "loss": 0.4434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18562006950378418, + "step": 2930, + "valid_targets_mean": 2430.1, + "valid_targets_min": 1811 + }, + { + "epoch": 5.060344827586207, + "grad_norm": 0.8698667716108178, + "learning_rate": 8.662665784597229e-06, + "loss": 0.4666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3086687922477722, + "step": 2935, + "valid_targets_mean": 4216.1, + "valid_targets_min": 892 + }, + { + "epoch": 5.068965517241379, + "grad_norm": 0.7016494507760576, + "learning_rate": 8.591942244418787e-06, + "loss": 0.4642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21846792101860046, + "step": 2940, + "valid_targets_mean": 3485.5, + "valid_targets_min": 1529 + }, + { + "epoch": 5.077586206896552, + "grad_norm": 0.6630898429377045, + "learning_rate": 8.521429525163353e-06, + "loss": 0.4532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2878398001194, + "step": 2945, + "valid_targets_mean": 4316.1, + "valid_targets_min": 625 + }, + { + "epoch": 5.086206896551724, + "grad_norm": 0.761735138224444, + "learning_rate": 8.451128929906103e-06, + "loss": 0.4497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1884111762046814, + "step": 2950, + "valid_targets_mean": 2733.1, + "valid_targets_min": 1546 + }, + { + "epoch": 5.094827586206897, + "grad_norm": 0.8339171233338399, + "learning_rate": 8.381041757802104e-06, + "loss": 0.4466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26359814405441284, + "step": 2955, + "valid_targets_mean": 2911.4, + "valid_targets_min": 937 + }, + { + "epoch": 5.103448275862069, + "grad_norm": 0.6877757737180021, + "learning_rate": 8.311169304062408e-06, + "loss": 0.4559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2356172502040863, + "step": 2960, + "valid_targets_mean": 3584.0, + "valid_targets_min": 1213 + }, + { + "epoch": 5.112068965517241, + "grad_norm": 0.8051485741023968, + "learning_rate": 8.24151285993005e-06, + "loss": 0.403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16779354214668274, + "step": 2965, + "valid_targets_mean": 2010.1, + "valid_targets_min": 1005 + }, + { + "epoch": 5.120689655172414, + "grad_norm": 0.6175388631001866, + "learning_rate": 8.172073712656217e-06, + "loss": 0.4595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15900662541389465, + "step": 2970, + "valid_targets_mean": 3262.8, + "valid_targets_min": 1201 + }, + { + "epoch": 5.129310344827586, + "grad_norm": 0.8481695814337025, + "learning_rate": 8.102853145476443e-06, + "loss": 0.4956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27707868814468384, + "step": 2975, + "valid_targets_mean": 6042.9, + "valid_targets_min": 1837 + }, + { + "epoch": 5.137931034482759, + "grad_norm": 0.7790714744312073, + "learning_rate": 8.033852437586909e-06, + "loss": 0.4999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4171217978000641, + "step": 2980, + "valid_targets_mean": 4342.9, + "valid_targets_min": 1199 + }, + { + "epoch": 5.146551724137931, + "grad_norm": 0.7171849773714023, + "learning_rate": 7.965072864120795e-06, + "loss": 0.4707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24280916154384613, + "step": 2985, + "valid_targets_mean": 3208.9, + "valid_targets_min": 1568 + }, + { + "epoch": 5.155172413793103, + "grad_norm": 0.7268079620351237, + "learning_rate": 7.896515696124703e-06, + "loss": 0.455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24983307719230652, + "step": 2990, + "valid_targets_mean": 3696.2, + "valid_targets_min": 1525 + }, + { + "epoch": 5.163793103448276, + "grad_norm": 0.7397712005549786, + "learning_rate": 7.828182200535192e-06, + "loss": 0.4805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1951979696750641, + "step": 2995, + "valid_targets_mean": 2562.8, + "valid_targets_min": 593 + }, + { + "epoch": 5.172413793103448, + "grad_norm": 0.6078078960889772, + "learning_rate": 7.760073640155363e-06, + "loss": 0.4299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15697035193443298, + "step": 3000, + "valid_targets_mean": 3641.9, + "valid_targets_min": 1695 + }, + { + "epoch": 5.181034482758621, + "grad_norm": 0.6906741782932729, + "learning_rate": 7.6921912736315e-06, + "loss": 0.4592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2528272867202759, + "step": 3005, + "valid_targets_mean": 3960.2, + "valid_targets_min": 569 + }, + { + "epoch": 5.189655172413793, + "grad_norm": 0.779801729011475, + "learning_rate": 7.624536355429832e-06, + "loss": 0.4482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13327597081661224, + "step": 3010, + "valid_targets_mean": 2014.1, + "valid_targets_min": 595 + }, + { + "epoch": 5.198275862068965, + "grad_norm": 0.6961265560269253, + "learning_rate": 7.557110135813341e-06, + "loss": 0.4799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25486013293266296, + "step": 3015, + "valid_targets_mean": 3800.1, + "valid_targets_min": 692 + }, + { + "epoch": 5.206896551724138, + "grad_norm": 0.7485573343568958, + "learning_rate": 7.489913860818662e-06, + "loss": 0.4767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23183771967887878, + "step": 3020, + "valid_targets_mean": 3628.5, + "valid_targets_min": 1680 + }, + { + "epoch": 5.2155172413793105, + "grad_norm": 0.6081028132914261, + "learning_rate": 7.4229487722330315e-06, + "loss": 0.4706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1672867089509964, + "step": 3025, + "valid_targets_mean": 3773.8, + "valid_targets_min": 1863 + }, + { + "epoch": 5.224137931034483, + "grad_norm": 0.7129702935758114, + "learning_rate": 7.356216107571399e-06, + "loss": 0.4519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27049562335014343, + "step": 3030, + "valid_targets_mean": 3894.4, + "valid_targets_min": 696 + }, + { + "epoch": 5.232758620689655, + "grad_norm": 1.0397970965650016, + "learning_rate": 7.289717100053497e-06, + "loss": 0.5137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1450689136981964, + "step": 3035, + "valid_targets_mean": 2030.2, + "valid_targets_min": 498 + }, + { + "epoch": 5.241379310344827, + "grad_norm": 0.6915057778767586, + "learning_rate": 7.2234529785810645e-06, + "loss": 0.4723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2542662024497986, + "step": 3040, + "valid_targets_mean": 3820.6, + "valid_targets_min": 916 + }, + { + "epoch": 5.25, + "grad_norm": 0.7056302244612103, + "learning_rate": 7.157424967715163e-06, + "loss": 0.4573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13803818821907043, + "step": 3045, + "valid_targets_mean": 2330.0, + "valid_targets_min": 704 + }, + { + "epoch": 5.258620689655173, + "grad_norm": 0.6185090500671104, + "learning_rate": 7.091634287653526e-06, + "loss": 0.4722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2878566086292267, + "step": 3050, + "valid_targets_mean": 4588.1, + "valid_targets_min": 1677 + }, + { + "epoch": 5.267241379310345, + "grad_norm": 0.761902630255462, + "learning_rate": 7.026082154208012e-06, + "loss": 0.4416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29359132051467896, + "step": 3055, + "valid_targets_mean": 4224.2, + "valid_targets_min": 1513 + }, + { + "epoch": 5.275862068965517, + "grad_norm": 0.7844949843129767, + "learning_rate": 6.960769778782133e-06, + "loss": 0.4444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2986701726913452, + "step": 3060, + "valid_targets_mean": 3789.8, + "valid_targets_min": 753 + }, + { + "epoch": 5.2844827586206895, + "grad_norm": 0.726504265476168, + "learning_rate": 6.89569836834868e-06, + "loss": 0.47, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24666550755500793, + "step": 3065, + "valid_targets_mean": 3141.1, + "valid_targets_min": 890 + }, + { + "epoch": 5.293103448275862, + "grad_norm": 0.6848726403595511, + "learning_rate": 6.830869125427406e-06, + "loss": 0.4914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35621824860572815, + "step": 3070, + "valid_targets_mean": 3706.6, + "valid_targets_min": 1151 + }, + { + "epoch": 5.301724137931035, + "grad_norm": 0.7941226544749608, + "learning_rate": 6.766283248062817e-06, + "loss": 0.4605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20678937435150146, + "step": 3075, + "valid_targets_mean": 2429.2, + "valid_targets_min": 795 + }, + { + "epoch": 5.310344827586207, + "grad_norm": 0.6736650057815443, + "learning_rate": 6.701941929801996e-06, + "loss": 0.4988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25833404064178467, + "step": 3080, + "valid_targets_mean": 4493.0, + "valid_targets_min": 2396 + }, + { + "epoch": 5.318965517241379, + "grad_norm": 0.7533894846747827, + "learning_rate": 6.637846359672611e-06, + "loss": 0.4833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21882648766040802, + "step": 3085, + "valid_targets_mean": 3435.5, + "valid_targets_min": 1432 + }, + { + "epoch": 5.327586206896552, + "grad_norm": 0.9917710187118872, + "learning_rate": 6.57399772216089e-06, + "loss": 0.4459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2761511206626892, + "step": 3090, + "valid_targets_mean": 4892.8, + "valid_targets_min": 1290 + }, + { + "epoch": 5.336206896551724, + "grad_norm": 0.7004668156294833, + "learning_rate": 6.510397197189724e-06, + "loss": 0.5128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20646989345550537, + "step": 3095, + "valid_targets_mean": 3881.2, + "valid_targets_min": 1376 + }, + { + "epoch": 5.344827586206897, + "grad_norm": 0.6756196982178647, + "learning_rate": 6.447045960096909e-06, + "loss": 0.4487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19709131121635437, + "step": 3100, + "valid_targets_mean": 3093.2, + "valid_targets_min": 760 + }, + { + "epoch": 5.353448275862069, + "grad_norm": 0.8315120005504095, + "learning_rate": 6.383945181613398e-06, + "loss": 0.4206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2558245360851288, + "step": 3105, + "valid_targets_mean": 3705.8, + "valid_targets_min": 1318 + }, + { + "epoch": 5.362068965517241, + "grad_norm": 0.7182202828905813, + "learning_rate": 6.32109602784166e-06, + "loss": 0.452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2329225242137909, + "step": 3110, + "valid_targets_mean": 3544.4, + "valid_targets_min": 1795 + }, + { + "epoch": 5.370689655172414, + "grad_norm": 0.7119685951103023, + "learning_rate": 6.258499660234147e-06, + "loss": 0.4502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15800319612026215, + "step": 3115, + "valid_targets_mean": 3172.1, + "valid_targets_min": 1784 + }, + { + "epoch": 5.379310344827586, + "grad_norm": 0.7202358371283981, + "learning_rate": 6.196157235571813e-06, + "loss": 0.4635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2982141077518463, + "step": 3120, + "valid_targets_mean": 3674.8, + "valid_targets_min": 357 + }, + { + "epoch": 5.387931034482759, + "grad_norm": 0.7885867904582534, + "learning_rate": 6.134069905942764e-06, + "loss": 0.4675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30231577157974243, + "step": 3125, + "valid_targets_mean": 3849.5, + "valid_targets_min": 320 + }, + { + "epoch": 5.396551724137931, + "grad_norm": 0.7181738901551832, + "learning_rate": 6.072238818720919e-06, + "loss": 0.4318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23303094506263733, + "step": 3130, + "valid_targets_mean": 3753.1, + "valid_targets_min": 999 + }, + { + "epoch": 5.405172413793103, + "grad_norm": 0.593921178442862, + "learning_rate": 6.010665116544858e-06, + "loss": 0.4653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23492135107517242, + "step": 3135, + "valid_targets_mean": 4800.0, + "valid_targets_min": 1120 + }, + { + "epoch": 5.413793103448276, + "grad_norm": 0.7764310266542166, + "learning_rate": 5.9493499372967e-06, + "loss": 0.4358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.225310817360878, + "step": 3140, + "valid_targets_mean": 2939.2, + "valid_targets_min": 581 + }, + { + "epoch": 5.422413793103448, + "grad_norm": 0.7323733956463316, + "learning_rate": 5.888294414081024e-06, + "loss": 0.4852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25720715522766113, + "step": 3145, + "valid_targets_mean": 4409.8, + "valid_targets_min": 1989 + }, + { + "epoch": 5.431034482758621, + "grad_norm": 0.6882597275678037, + "learning_rate": 5.827499675203987e-06, + "loss": 0.421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14771276712417603, + "step": 3150, + "valid_targets_mean": 2556.0, + "valid_targets_min": 1467 + }, + { + "epoch": 5.439655172413793, + "grad_norm": 0.842854165866408, + "learning_rate": 5.76696684415245e-06, + "loss": 0.4494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25888389348983765, + "step": 3155, + "valid_targets_mean": 3178.9, + "valid_targets_min": 1645 + }, + { + "epoch": 5.448275862068965, + "grad_norm": 0.6555092034098002, + "learning_rate": 5.706697039573217e-06, + "loss": 0.4493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24546992778778076, + "step": 3160, + "valid_targets_mean": 4539.2, + "valid_targets_min": 1857 + }, + { + "epoch": 5.456896551724138, + "grad_norm": 0.7048492004889653, + "learning_rate": 5.646691375252344e-06, + "loss": 0.4697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22363720834255219, + "step": 3165, + "valid_targets_mean": 3127.9, + "valid_targets_min": 1318 + }, + { + "epoch": 5.4655172413793105, + "grad_norm": 0.6378803177788848, + "learning_rate": 5.586950960094606e-06, + "loss": 0.4376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21243023872375488, + "step": 3170, + "valid_targets_mean": 4441.9, + "valid_targets_min": 1440 + }, + { + "epoch": 5.474137931034483, + "grad_norm": 0.7817408980627574, + "learning_rate": 5.527476898102959e-06, + "loss": 0.4443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24490898847579956, + "step": 3175, + "valid_targets_mean": 3144.2, + "valid_targets_min": 333 + }, + { + "epoch": 5.482758620689655, + "grad_norm": 0.6047332242907153, + "learning_rate": 5.4682702883581395e-06, + "loss": 0.4316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.134861558675766, + "step": 3180, + "valid_targets_mean": 3966.6, + "valid_targets_min": 1314 + }, + { + "epoch": 5.491379310344827, + "grad_norm": 0.636476438778125, + "learning_rate": 5.40933222499838e-06, + "loss": 0.43, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19472798705101013, + "step": 3185, + "valid_targets_mean": 3977.2, + "valid_targets_min": 812 + }, + { + "epoch": 5.5, + "grad_norm": 0.6748452817636509, + "learning_rate": 5.350663797199174e-06, + "loss": 0.4697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19321182370185852, + "step": 3190, + "valid_targets_mean": 3625.8, + "valid_targets_min": 1099 + }, + { + "epoch": 5.508620689655173, + "grad_norm": 0.8353678432176448, + "learning_rate": 5.292266089153149e-06, + "loss": 0.4495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23553451895713806, + "step": 3195, + "valid_targets_mean": 2531.8, + "valid_targets_min": 760 + }, + { + "epoch": 5.517241379310345, + "grad_norm": 0.5708775355976995, + "learning_rate": 5.234140180050029e-06, + "loss": 0.4726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2230847179889679, + "step": 3200, + "valid_targets_mean": 4267.9, + "valid_targets_min": 455 + }, + { + "epoch": 5.525862068965517, + "grad_norm": 0.7399173755970399, + "learning_rate": 5.1762871440566935e-06, + "loss": 0.434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1931067705154419, + "step": 3205, + "valid_targets_mean": 3313.6, + "valid_targets_min": 917 + }, + { + "epoch": 5.5344827586206895, + "grad_norm": 0.7492141521348251, + "learning_rate": 5.118708050297332e-06, + "loss": 0.4385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27809464931488037, + "step": 3210, + "valid_targets_mean": 4113.2, + "valid_targets_min": 1205 + }, + { + "epoch": 5.543103448275862, + "grad_norm": 0.6827907606565726, + "learning_rate": 5.061403962833669e-06, + "loss": 0.4493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17418712377548218, + "step": 3215, + "valid_targets_mean": 3184.0, + "valid_targets_min": 976 + }, + { + "epoch": 5.551724137931035, + "grad_norm": 0.7028563981123193, + "learning_rate": 5.004375940645314e-06, + "loss": 0.4878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37116700410842896, + "step": 3220, + "valid_targets_mean": 5722.8, + "valid_targets_min": 1655 + }, + { + "epoch": 5.560344827586206, + "grad_norm": 0.6358136143357161, + "learning_rate": 4.947625037610219e-06, + "loss": 0.5058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26970693469047546, + "step": 3225, + "valid_targets_mean": 5035.5, + "valid_targets_min": 1129 + }, + { + "epoch": 5.568965517241379, + "grad_norm": 0.79905657590774, + "learning_rate": 4.8911523024851295e-06, + "loss": 0.4967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19400912523269653, + "step": 3230, + "valid_targets_mean": 2246.5, + "valid_targets_min": 385 + }, + { + "epoch": 5.577586206896552, + "grad_norm": 0.7661307110427259, + "learning_rate": 4.834958778886271e-06, + "loss": 0.4811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3137957453727722, + "step": 3235, + "valid_targets_mean": 3229.9, + "valid_targets_min": 1368 + }, + { + "epoch": 5.586206896551724, + "grad_norm": 0.6816565028977988, + "learning_rate": 4.779045505270043e-06, + "loss": 0.4576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2541190981864929, + "step": 3240, + "valid_targets_mean": 4194.4, + "valid_targets_min": 563 + }, + { + "epoch": 5.594827586206897, + "grad_norm": 0.6751608561194854, + "learning_rate": 4.723413514913817e-06, + "loss": 0.4557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17950564622879028, + "step": 3245, + "valid_targets_mean": 2918.8, + "valid_targets_min": 1462 + }, + { + "epoch": 5.603448275862069, + "grad_norm": 0.7316269694764956, + "learning_rate": 4.66806383589685e-06, + "loss": 0.4765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2735571563243866, + "step": 3250, + "valid_targets_mean": 3391.2, + "valid_targets_min": 644 + }, + { + "epoch": 5.612068965517241, + "grad_norm": 0.6414697826461783, + "learning_rate": 4.6129974910812855e-06, + "loss": 0.4433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18585994839668274, + "step": 3255, + "valid_targets_mean": 3076.2, + "valid_targets_min": 1818 + }, + { + "epoch": 5.620689655172414, + "grad_norm": 0.6504910454062405, + "learning_rate": 4.558215498093252e-06, + "loss": 0.4478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2893863916397095, + "step": 3260, + "valid_targets_mean": 5735.8, + "valid_targets_min": 415 + }, + { + "epoch": 5.629310344827586, + "grad_norm": 0.7196796255916211, + "learning_rate": 4.503718869304063e-06, + "loss": 0.4711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2017214149236679, + "step": 3265, + "valid_targets_mean": 3032.1, + "valid_targets_min": 437 + }, + { + "epoch": 5.637931034482759, + "grad_norm": 0.8140224360637848, + "learning_rate": 4.449508611811482e-06, + "loss": 0.4655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18502508103847504, + "step": 3270, + "valid_targets_mean": 2054.6, + "valid_targets_min": 897 + }, + { + "epoch": 5.646551724137931, + "grad_norm": 0.6233804054960342, + "learning_rate": 4.395585727421139e-06, + "loss": 0.4717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23045390844345093, + "step": 3275, + "valid_targets_mean": 4667.6, + "valid_targets_min": 2813 + }, + { + "epoch": 5.655172413793103, + "grad_norm": 0.7026030856163301, + "learning_rate": 4.341951212628031e-06, + "loss": 0.4861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3447082042694092, + "step": 3280, + "valid_targets_mean": 4856.2, + "valid_targets_min": 1629 + }, + { + "epoch": 5.663793103448276, + "grad_norm": 0.6809784081813541, + "learning_rate": 4.288606058598048e-06, + "loss": 0.4395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1348533034324646, + "step": 3285, + "valid_targets_mean": 2791.8, + "valid_targets_min": 1492 + }, + { + "epoch": 5.672413793103448, + "grad_norm": 0.613876392573019, + "learning_rate": 4.235551251149714e-06, + "loss": 0.4666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2379133701324463, + "step": 3290, + "valid_targets_mean": 5035.5, + "valid_targets_min": 714 + }, + { + "epoch": 5.681034482758621, + "grad_norm": 0.7094539400481058, + "learning_rate": 4.1827877707359474e-06, + "loss": 0.4482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2683733403682709, + "step": 3295, + "valid_targets_mean": 4671.0, + "valid_targets_min": 1056 + }, + { + "epoch": 5.689655172413794, + "grad_norm": 0.8787456783967276, + "learning_rate": 4.130316592425934e-06, + "loss": 0.4478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19786673784255981, + "step": 3300, + "valid_targets_mean": 2232.8, + "valid_targets_min": 279 + }, + { + "epoch": 5.698275862068965, + "grad_norm": 0.8502083809636145, + "learning_rate": 4.078138685887125e-06, + "loss": 0.4566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2412954866886139, + "step": 3305, + "valid_targets_mean": 3456.1, + "valid_targets_min": 1128 + }, + { + "epoch": 5.706896551724138, + "grad_norm": 0.7472679746715507, + "learning_rate": 4.026255015367302e-06, + "loss": 0.4484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23308929800987244, + "step": 3310, + "valid_targets_mean": 3830.0, + "valid_targets_min": 480 + }, + { + "epoch": 5.7155172413793105, + "grad_norm": 0.75274136107482, + "learning_rate": 3.974666539676774e-06, + "loss": 0.4847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14974159002304077, + "step": 3315, + "valid_targets_mean": 2648.6, + "valid_targets_min": 652 + }, + { + "epoch": 5.724137931034483, + "grad_norm": 0.5774910463846608, + "learning_rate": 3.923374212170634e-06, + "loss": 0.4978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22366955876350403, + "step": 3320, + "valid_targets_mean": 4939.1, + "valid_targets_min": 1908 + }, + { + "epoch": 5.732758620689655, + "grad_norm": 0.9389239094558165, + "learning_rate": 3.872378980731168e-06, + "loss": 0.4579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31157776713371277, + "step": 3325, + "valid_targets_mean": 4375.2, + "valid_targets_min": 2092 + }, + { + "epoch": 5.741379310344827, + "grad_norm": 0.7856183251154304, + "learning_rate": 3.821681787750327e-06, + "loss": 0.4824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2415979951620102, + "step": 3330, + "valid_targets_mean": 3034.4, + "valid_targets_min": 1001 + }, + { + "epoch": 5.75, + "grad_norm": 0.6251658420467967, + "learning_rate": 3.7712835701122985e-06, + "loss": 0.4581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3360128402709961, + "step": 3335, + "valid_targets_mean": 5729.0, + "valid_targets_min": 1965 + }, + { + "epoch": 5.758620689655173, + "grad_norm": 0.7503171399314047, + "learning_rate": 3.721185259176223e-06, + "loss": 0.4509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20740580558776855, + "step": 3340, + "valid_targets_mean": 2710.9, + "valid_targets_min": 1147 + }, + { + "epoch": 5.767241379310345, + "grad_norm": 0.7658794496747393, + "learning_rate": 3.6713877807589503e-06, + "loss": 0.4425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19355687499046326, + "step": 3345, + "valid_targets_mean": 2887.6, + "valid_targets_min": 451 + }, + { + "epoch": 5.775862068965517, + "grad_norm": 0.6818747673397412, + "learning_rate": 3.621892055117955e-06, + "loss": 0.4548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2165466994047165, + "step": 3350, + "valid_targets_mean": 4357.8, + "valid_targets_min": 1508 + }, + { + "epoch": 5.7844827586206895, + "grad_norm": 0.576120259663283, + "learning_rate": 3.572698996934303e-06, + "loss": 0.4441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19731038808822632, + "step": 3355, + "valid_targets_mean": 5281.2, + "valid_targets_min": 1702 + }, + { + "epoch": 5.793103448275862, + "grad_norm": 0.7233735140697485, + "learning_rate": 3.5238095152957906e-06, + "loss": 0.4703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16802190244197845, + "step": 3360, + "valid_targets_mean": 2777.1, + "valid_targets_min": 1009 + }, + { + "epoch": 5.801724137931035, + "grad_norm": 0.6935810826398939, + "learning_rate": 3.4752245136801065e-06, + "loss": 0.459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20021113753318787, + "step": 3365, + "valid_targets_mean": 3346.2, + "valid_targets_min": 1778 + }, + { + "epoch": 5.810344827586206, + "grad_norm": 0.6305192634235494, + "learning_rate": 3.4269448899381354e-06, + "loss": 0.4703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24132040143013, + "step": 3370, + "valid_targets_mean": 5659.8, + "valid_targets_min": 468 + }, + { + "epoch": 5.818965517241379, + "grad_norm": 0.6686341734215869, + "learning_rate": 3.3789715362773955e-06, + "loss": 0.4396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2577729821205139, + "step": 3375, + "valid_targets_mean": 4669.1, + "valid_targets_min": 842 + }, + { + "epoch": 5.827586206896552, + "grad_norm": 0.5730865976165508, + "learning_rate": 3.3313053392455317e-06, + "loss": 0.4393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2799406051635742, + "step": 3380, + "valid_targets_mean": 5855.2, + "valid_targets_min": 1536 + }, + { + "epoch": 5.836206896551724, + "grad_norm": 0.6779968652180786, + "learning_rate": 3.2839471797139287e-06, + "loss": 0.441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2712808847427368, + "step": 3385, + "valid_targets_mean": 3816.5, + "valid_targets_min": 1117 + }, + { + "epoch": 5.844827586206897, + "grad_norm": 0.678758331044749, + "learning_rate": 3.236897932861438e-06, + "loss": 0.4575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1380578577518463, + "step": 3390, + "valid_targets_mean": 2670.1, + "valid_targets_min": 1094 + }, + { + "epoch": 5.853448275862069, + "grad_norm": 0.6930942013750735, + "learning_rate": 3.190158468158209e-06, + "loss": 0.4717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26487284898757935, + "step": 3395, + "valid_targets_mean": 4868.8, + "valid_targets_min": 1079 + }, + { + "epoch": 5.862068965517241, + "grad_norm": 0.8138602963483332, + "learning_rate": 3.1437296493496183e-06, + "loss": 0.4437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2084672451019287, + "step": 3400, + "valid_targets_mean": 2761.5, + "valid_targets_min": 1043 + }, + { + "epoch": 5.870689655172414, + "grad_norm": 0.7005704796522405, + "learning_rate": 3.0976123344402897e-06, + "loss": 0.4103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30806052684783936, + "step": 3405, + "valid_targets_mean": 5424.6, + "valid_targets_min": 1519 + }, + { + "epoch": 5.879310344827586, + "grad_norm": 0.5581296805550643, + "learning_rate": 3.0518073756782683e-06, + "loss": 0.4627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23483984172344208, + "step": 3410, + "valid_targets_mean": 6492.5, + "valid_targets_min": 370 + }, + { + "epoch": 5.887931034482759, + "grad_norm": 0.7413985748284263, + "learning_rate": 3.0063156195392685e-06, + "loss": 0.4331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1887524425983429, + "step": 3415, + "valid_targets_mean": 3025.9, + "valid_targets_min": 299 + }, + { + "epoch": 5.896551724137931, + "grad_norm": 0.7227263047030936, + "learning_rate": 2.9611379067109914e-06, + "loss": 0.448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2441166490316391, + "step": 3420, + "valid_targets_mean": 4610.4, + "valid_targets_min": 837 + }, + { + "epoch": 5.905172413793103, + "grad_norm": 0.7828328616055023, + "learning_rate": 2.9162750720776366e-06, + "loss": 0.4469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19237752258777618, + "step": 3425, + "valid_targets_mean": 2687.2, + "valid_targets_min": 1850 + }, + { + "epoch": 5.913793103448276, + "grad_norm": 0.7107936465243866, + "learning_rate": 2.871727944704452e-06, + "loss": 0.4617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1891714334487915, + "step": 3430, + "valid_targets_mean": 3769.1, + "valid_targets_min": 1792 + }, + { + "epoch": 5.922413793103448, + "grad_norm": 0.7382472128070807, + "learning_rate": 2.8274973478224167e-06, + "loss": 0.4566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17400340735912323, + "step": 3435, + "valid_targets_mean": 2444.0, + "valid_targets_min": 862 + }, + { + "epoch": 5.931034482758621, + "grad_norm": 0.6571277220139973, + "learning_rate": 2.783584098813006e-06, + "loss": 0.4282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19393205642700195, + "step": 3440, + "valid_targets_mean": 3712.8, + "valid_targets_min": 1681 + }, + { + "epoch": 5.939655172413794, + "grad_norm": 0.7312043134669541, + "learning_rate": 2.739989009193138e-06, + "loss": 0.4772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2664768099784851, + "step": 3445, + "valid_targets_mean": 5313.1, + "valid_targets_min": 1496 + }, + { + "epoch": 5.948275862068965, + "grad_norm": 0.7966953008777837, + "learning_rate": 2.6967128846001234e-06, + "loss": 0.4134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16477492451667786, + "step": 3450, + "valid_targets_mean": 2826.6, + "valid_targets_min": 398 + }, + { + "epoch": 5.956896551724138, + "grad_norm": 0.7505723326591668, + "learning_rate": 2.6537565247768094e-06, + "loss": 0.4649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1759946495294571, + "step": 3455, + "valid_targets_mean": 2627.0, + "valid_targets_min": 328 + }, + { + "epoch": 5.9655172413793105, + "grad_norm": 0.7730918698032901, + "learning_rate": 2.611120723556775e-06, + "loss": 0.4524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2330639362335205, + "step": 3460, + "valid_targets_mean": 3213.0, + "valid_targets_min": 317 + }, + { + "epoch": 5.974137931034483, + "grad_norm": 0.6871752561410659, + "learning_rate": 2.568806268849684e-06, + "loss": 0.4427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2211509346961975, + "step": 3465, + "valid_targets_mean": 4685.8, + "valid_targets_min": 358 + }, + { + "epoch": 5.982758620689655, + "grad_norm": 0.6866564277500524, + "learning_rate": 2.526813942626736e-06, + "loss": 0.4428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20986169576644897, + "step": 3470, + "valid_targets_mean": 4101.9, + "valid_targets_min": 1247 + }, + { + "epoch": 5.991379310344827, + "grad_norm": 0.6407646096396032, + "learning_rate": 2.4851445209061574e-06, + "loss": 0.4476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18139518797397614, + "step": 3475, + "valid_targets_mean": 3582.2, + "valid_targets_min": 1577 + }, + { + "epoch": 6.0, + "grad_norm": 0.5858103373326182, + "learning_rate": 2.4437987737389277e-06, + "loss": 0.4322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1381668597459793, + "step": 3480, + "valid_targets_mean": 2870.6, + "valid_targets_min": 1488 + }, + { + "epoch": 6.008620689655173, + "grad_norm": 0.7740135978527747, + "learning_rate": 2.40277746519451e-06, + "loss": 0.4717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22488614916801453, + "step": 3485, + "valid_targets_mean": 3303.1, + "valid_targets_min": 789 + }, + { + "epoch": 6.017241379310345, + "grad_norm": 0.6718129793968958, + "learning_rate": 2.362081353346746e-06, + "loss": 0.463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22049200534820557, + "step": 3490, + "valid_targets_mean": 3427.2, + "valid_targets_min": 1670 + }, + { + "epoch": 6.025862068965517, + "grad_norm": 0.6108999358440294, + "learning_rate": 2.3217111902598298e-06, + "loss": 0.4345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25082069635391235, + "step": 3495, + "valid_targets_mean": 5472.6, + "valid_targets_min": 2368 + }, + { + "epoch": 6.0344827586206895, + "grad_norm": 0.7142070390712623, + "learning_rate": 2.2816677219744388e-06, + "loss": 0.429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18685954809188843, + "step": 3500, + "valid_targets_mean": 3964.9, + "valid_targets_min": 1503 + }, + { + "epoch": 6.043103448275862, + "grad_norm": 0.6773698288330054, + "learning_rate": 2.241951688493924e-06, + "loss": 0.4551, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2644187808036804, + "step": 3505, + "valid_targets_mean": 4625.0, + "valid_targets_min": 1480 + }, + { + "epoch": 6.051724137931035, + "grad_norm": 0.8091413060885396, + "learning_rate": 2.2025638237706294e-06, + "loss": 0.4631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22907666862010956, + "step": 3510, + "valid_targets_mean": 2675.4, + "valid_targets_min": 1267 + }, + { + "epoch": 6.060344827586207, + "grad_norm": 0.6849568569146559, + "learning_rate": 2.1635048556923555e-06, + "loss": 0.496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3542017340660095, + "step": 3515, + "valid_targets_mean": 4052.4, + "valid_targets_min": 432 + }, + { + "epoch": 6.068965517241379, + "grad_norm": 0.676675971064016, + "learning_rate": 2.1247755060688856e-06, + "loss": 0.4691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13447147607803345, + "step": 3520, + "valid_targets_mean": 2988.2, + "valid_targets_min": 1243 + }, + { + "epoch": 6.077586206896552, + "grad_norm": 0.7520571476765067, + "learning_rate": 2.0863764906186514e-06, + "loss": 0.4516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25384098291397095, + "step": 3525, + "valid_targets_mean": 3574.5, + "valid_targets_min": 1700 + }, + { + "epoch": 6.086206896551724, + "grad_norm": 0.716713637131033, + "learning_rate": 2.048308518955515e-06, + "loss": 0.4705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3154298663139343, + "step": 3530, + "valid_targets_mean": 5037.4, + "valid_targets_min": 429 + }, + { + "epoch": 6.094827586206897, + "grad_norm": 0.7694399803659958, + "learning_rate": 2.010572294575641e-06, + "loss": 0.4698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17910689115524292, + "step": 3535, + "valid_targets_mean": 3081.2, + "valid_targets_min": 1551 + }, + { + "epoch": 6.103448275862069, + "grad_norm": 0.7868404699942033, + "learning_rate": 1.9731685148445168e-06, + "loss": 0.4561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2528718113899231, + "step": 3540, + "valid_targets_mean": 4245.1, + "valid_targets_min": 2071 + }, + { + "epoch": 6.112068965517241, + "grad_norm": 0.7576280365207806, + "learning_rate": 1.9360978709840304e-06, + "loss": 0.4174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1939217746257782, + "step": 3545, + "valid_targets_mean": 2826.0, + "valid_targets_min": 543 + }, + { + "epoch": 6.120689655172414, + "grad_norm": 0.717658482106765, + "learning_rate": 1.8993610480597359e-06, + "loss": 0.4367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32581982016563416, + "step": 3550, + "valid_targets_mean": 5192.1, + "valid_targets_min": 1572 + }, + { + "epoch": 6.129310344827586, + "grad_norm": 0.6379593260805476, + "learning_rate": 1.8629587249681802e-06, + "loss": 0.4901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2898372411727905, + "step": 3555, + "valid_targets_mean": 4503.4, + "valid_targets_min": 233 + }, + { + "epoch": 6.137931034482759, + "grad_norm": 0.8367663752524243, + "learning_rate": 1.8268915744243321e-06, + "loss": 0.4469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21554884314537048, + "step": 3560, + "valid_targets_mean": 2432.1, + "valid_targets_min": 895 + }, + { + "epoch": 6.146551724137931, + "grad_norm": 0.7054337218833375, + "learning_rate": 1.7911602629491876e-06, + "loss": 0.4659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18869292736053467, + "step": 3565, + "valid_targets_mean": 3975.1, + "valid_targets_min": 470 + }, + { + "epoch": 6.155172413793103, + "grad_norm": 0.7631519820066042, + "learning_rate": 1.7557654508574339e-06, + "loss": 0.4637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16359178721904755, + "step": 3570, + "valid_targets_mean": 1903.8, + "valid_targets_min": 383 + }, + { + "epoch": 6.163793103448276, + "grad_norm": 0.7024009414184341, + "learning_rate": 1.7207077922452465e-06, + "loss": 0.4322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18459674715995789, + "step": 3575, + "valid_targets_mean": 3362.8, + "valid_targets_min": 1424 + }, + { + "epoch": 6.172413793103448, + "grad_norm": 0.77426605189279, + "learning_rate": 1.6859879349782016e-06, + "loss": 0.4596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14893782138824463, + "step": 3580, + "valid_targets_mean": 1882.2, + "valid_targets_min": 710 + }, + { + "epoch": 6.181034482758621, + "grad_norm": 0.7511156023426865, + "learning_rate": 1.6516065206793142e-06, + "loss": 0.4586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3090694546699524, + "step": 3585, + "valid_targets_mean": 3560.6, + "valid_targets_min": 655 + }, + { + "epoch": 6.189655172413793, + "grad_norm": 0.7063002930277574, + "learning_rate": 1.6175641847171687e-06, + "loss": 0.4822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2065710723400116, + "step": 3590, + "valid_targets_mean": 3442.1, + "valid_targets_min": 686 + }, + { + "epoch": 6.198275862068965, + "grad_norm": 0.7527738601886692, + "learning_rate": 1.5838615561941705e-06, + "loss": 0.4457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19831009209156036, + "step": 3595, + "valid_targets_mean": 3206.9, + "valid_targets_min": 305 + }, + { + "epoch": 6.206896551724138, + "grad_norm": 0.7563126410614768, + "learning_rate": 1.550499257934952e-06, + "loss": 0.4661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2772877514362335, + "step": 3600, + "valid_targets_mean": 3702.5, + "valid_targets_min": 1528 + }, + { + "epoch": 6.2155172413793105, + "grad_norm": 0.6758864017574707, + "learning_rate": 1.5174779064748246e-06, + "loss": 0.453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3183794617652893, + "step": 3605, + "valid_targets_mean": 5107.1, + "valid_targets_min": 546 + }, + { + "epoch": 6.224137931034483, + "grad_norm": 0.5562754795731977, + "learning_rate": 1.4847981120484089e-06, + "loss": 0.425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17494836449623108, + "step": 3610, + "valid_targets_mean": 5317.8, + "valid_targets_min": 1689 + }, + { + "epoch": 6.232758620689655, + "grad_norm": 0.5664500974521548, + "learning_rate": 1.4524604785783548e-06, + "loss": 0.4335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2948354482650757, + "step": 3615, + "valid_targets_mean": 6150.6, + "valid_targets_min": 1373 + }, + { + "epoch": 6.241379310344827, + "grad_norm": 0.8404985064381851, + "learning_rate": 1.4204656036641717e-06, + "loss": 0.428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2410782277584076, + "step": 3620, + "valid_targets_mean": 2847.4, + "valid_targets_min": 703 + }, + { + "epoch": 6.25, + "grad_norm": 0.6389203413177802, + "learning_rate": 1.3888140785711945e-06, + "loss": 0.4408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20249323546886444, + "step": 3625, + "valid_targets_mean": 3856.8, + "valid_targets_min": 1831 + }, + { + "epoch": 6.258620689655173, + "grad_norm": 0.8460556546997163, + "learning_rate": 1.3575064882196398e-06, + "loss": 0.4386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1973244547843933, + "step": 3630, + "valid_targets_mean": 2522.9, + "valid_targets_min": 917 + }, + { + "epoch": 6.267241379310345, + "grad_norm": 0.7198328827499566, + "learning_rate": 1.326543411173833e-06, + "loss": 0.427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1749674677848816, + "step": 3635, + "valid_targets_mean": 2668.4, + "valid_targets_min": 812 + }, + { + "epoch": 6.275862068965517, + "grad_norm": 0.7067874613966607, + "learning_rate": 1.295925419631474e-06, + "loss": 0.4557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20972105860710144, + "step": 3640, + "valid_targets_mean": 3294.1, + "valid_targets_min": 1135 + }, + { + "epoch": 6.2844827586206895, + "grad_norm": 0.737266550604988, + "learning_rate": 1.265653079413094e-06, + "loss": 0.4409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23832815885543823, + "step": 3645, + "valid_targets_mean": 2836.0, + "valid_targets_min": 836 + }, + { + "epoch": 6.293103448275862, + "grad_norm": 0.7808341732396372, + "learning_rate": 1.2357269499515745e-06, + "loss": 0.4365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26828426122665405, + "step": 3650, + "valid_targets_mean": 3709.5, + "valid_targets_min": 888 + }, + { + "epoch": 6.301724137931035, + "grad_norm": 0.8072417550739505, + "learning_rate": 1.2061475842818337e-06, + "loss": 0.4984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2538772225379944, + "step": 3655, + "valid_targets_mean": 3380.8, + "valid_targets_min": 647 + }, + { + "epoch": 6.310344827586207, + "grad_norm": 0.7006576081958799, + "learning_rate": 1.176915529030589e-06, + "loss": 0.4348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29240888357162476, + "step": 3660, + "valid_targets_mean": 4889.4, + "valid_targets_min": 2397 + }, + { + "epoch": 6.318965517241379, + "grad_norm": 0.7591513606843514, + "learning_rate": 1.1480313244062603e-06, + "loss": 0.45, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28072088956832886, + "step": 3665, + "valid_targets_mean": 3284.6, + "valid_targets_min": 1211 + }, + { + "epoch": 6.327586206896552, + "grad_norm": 0.6552167771838345, + "learning_rate": 1.1194955041889898e-06, + "loss": 0.4493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17346346378326416, + "step": 3670, + "valid_targets_mean": 3683.0, + "valid_targets_min": 1142 + }, + { + "epoch": 6.336206896551724, + "grad_norm": 0.6798357918420773, + "learning_rate": 1.0913085957207748e-06, + "loss": 0.4396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14828425645828247, + "step": 3675, + "valid_targets_mean": 2677.5, + "valid_targets_min": 571 + }, + { + "epoch": 6.344827586206897, + "grad_norm": 0.7887644233319003, + "learning_rate": 1.063471119895727e-06, + "loss": 0.4294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.227344810962677, + "step": 3680, + "valid_targets_mean": 3479.6, + "valid_targets_min": 1237 + }, + { + "epoch": 6.353448275862069, + "grad_norm": 0.8053988429305725, + "learning_rate": 1.0359835911504246e-06, + "loss": 0.4455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19495242834091187, + "step": 3685, + "valid_targets_mean": 3586.6, + "valid_targets_min": 1418 + }, + { + "epoch": 6.362068965517241, + "grad_norm": 0.8122518920294817, + "learning_rate": 1.0088465174544514e-06, + "loss": 0.4733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2196522206068039, + "step": 3690, + "valid_targets_mean": 2701.8, + "valid_targets_min": 1681 + }, + { + "epoch": 6.370689655172414, + "grad_norm": 0.753020543651917, + "learning_rate": 9.820604003009614e-07, + "loss": 0.4691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19198468327522278, + "step": 3695, + "valid_targets_mean": 3232.6, + "valid_targets_min": 582 + }, + { + "epoch": 6.379310344827586, + "grad_norm": 0.7618572578862354, + "learning_rate": 9.556257346974319e-07, + "loss": 0.42, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21314972639083862, + "step": 3700, + "valid_targets_mean": 3102.0, + "valid_targets_min": 839 + }, + { + "epoch": 6.387931034482759, + "grad_norm": 0.765904355774887, + "learning_rate": 9.295430091565261e-07, + "loss": 0.4612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19799397885799408, + "step": 3705, + "valid_targets_mean": 2865.5, + "valid_targets_min": 1475 + }, + { + "epoch": 6.396551724137931, + "grad_norm": 0.609585669709035, + "learning_rate": 9.038127056870416e-07, + "loss": 0.4525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21491311490535736, + "step": 3710, + "valid_targets_mean": 5568.1, + "valid_targets_min": 563 + }, + { + "epoch": 6.405172413793103, + "grad_norm": 0.7690352570878218, + "learning_rate": 8.784352997850277e-07, + "loss": 0.404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20269158482551575, + "step": 3715, + "valid_targets_mean": 4081.6, + "valid_targets_min": 1503 + }, + { + "epoch": 6.413793103448276, + "grad_norm": 0.7846482935070382, + "learning_rate": 8.534112604249789e-07, + "loss": 0.4087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2106572985649109, + "step": 3720, + "valid_targets_mean": 3615.6, + "valid_targets_min": 348 + }, + { + "epoch": 6.422413793103448, + "grad_norm": 0.5584770451232427, + "learning_rate": 8.287410500511739e-07, + "loss": 0.4232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2852216362953186, + "step": 3725, + "valid_targets_mean": 7787.0, + "valid_targets_min": 4802 + }, + { + "epoch": 6.431034482758621, + "grad_norm": 0.6558759014342717, + "learning_rate": 8.044251245691393e-07, + "loss": 0.4837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19151388108730316, + "step": 3730, + "valid_targets_mean": 3860.0, + "valid_targets_min": 1447 + }, + { + "epoch": 6.439655172413793, + "grad_norm": 0.6171395789005942, + "learning_rate": 7.804639333372077e-07, + "loss": 0.4506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16960985958576202, + "step": 3735, + "valid_targets_mean": 4072.1, + "valid_targets_min": 1878 + }, + { + "epoch": 6.448275862068965, + "grad_norm": 0.7816495350034629, + "learning_rate": 7.568579191582248e-07, + "loss": 0.423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17788684368133545, + "step": 3740, + "valid_targets_mean": 3884.8, + "valid_targets_min": 1158 + }, + { + "epoch": 6.456896551724138, + "grad_norm": 0.6529206048125584, + "learning_rate": 7.336075182713708e-07, + "loss": 0.4358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18897415697574615, + "step": 3745, + "valid_targets_mean": 4003.6, + "valid_targets_min": 565 + }, + { + "epoch": 6.4655172413793105, + "grad_norm": 0.6449389338492015, + "learning_rate": 7.107131603440809e-07, + "loss": 0.4252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2346837818622589, + "step": 3750, + "valid_targets_mean": 5214.0, + "valid_targets_min": 1501 + }, + { + "epoch": 6.474137931034483, + "grad_norm": 0.7811946298032911, + "learning_rate": 6.881752684641219e-07, + "loss": 0.4527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19682791829109192, + "step": 3755, + "valid_targets_mean": 2777.1, + "valid_targets_min": 645 + }, + { + "epoch": 6.482758620689655, + "grad_norm": 0.749142145074001, + "learning_rate": 6.659942591317703e-07, + "loss": 0.4234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24652616679668427, + "step": 3760, + "valid_targets_mean": 3532.0, + "valid_targets_min": 497 + }, + { + "epoch": 6.491379310344827, + "grad_norm": 0.7214616295655278, + "learning_rate": 6.441705422521072e-07, + "loss": 0.4656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21576929092407227, + "step": 3765, + "valid_targets_mean": 3651.8, + "valid_targets_min": 447 + }, + { + "epoch": 6.5, + "grad_norm": 0.8278176356345619, + "learning_rate": 6.22704521127444e-07, + "loss": 0.4623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2633417844772339, + "step": 3770, + "valid_targets_mean": 4134.5, + "valid_targets_min": 1746 + }, + { + "epoch": 6.508620689655173, + "grad_norm": 0.7594236851050343, + "learning_rate": 6.015965924498912e-07, + "loss": 0.4218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20567096769809723, + "step": 3775, + "valid_targets_mean": 2847.8, + "valid_targets_min": 1944 + }, + { + "epoch": 6.517241379310345, + "grad_norm": 0.668955142683864, + "learning_rate": 5.808471462939946e-07, + "loss": 0.5166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2435295730829239, + "step": 3780, + "valid_targets_mean": 4341.1, + "valid_targets_min": 1338 + }, + { + "epoch": 6.525862068965517, + "grad_norm": 0.7095589321551677, + "learning_rate": 5.604565661095484e-07, + "loss": 0.4735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26538217067718506, + "step": 3785, + "valid_targets_mean": 3807.8, + "valid_targets_min": 1789 + }, + { + "epoch": 6.5344827586206895, + "grad_norm": 0.8061244528120283, + "learning_rate": 5.404252287145006e-07, + "loss": 0.4308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15442270040512085, + "step": 3790, + "valid_targets_mean": 1825.9, + "valid_targets_min": 649 + }, + { + "epoch": 6.543103448275862, + "grad_norm": 0.8048087082616983, + "learning_rate": 5.207535042879963e-07, + "loss": 0.4358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24893686175346375, + "step": 3795, + "valid_targets_mean": 2585.2, + "valid_targets_min": 811 + }, + { + "epoch": 6.551724137931035, + "grad_norm": 0.7251230246204804, + "learning_rate": 5.014417563635276e-07, + "loss": 0.4444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2101002335548401, + "step": 3800, + "valid_targets_mean": 3319.1, + "valid_targets_min": 1090 + }, + { + "epoch": 6.560344827586206, + "grad_norm": 0.7553348462583848, + "learning_rate": 4.824903418222259e-07, + "loss": 0.4565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1768944263458252, + "step": 3805, + "valid_targets_mean": 2879.6, + "valid_targets_min": 1292 + }, + { + "epoch": 6.568965517241379, + "grad_norm": 0.6256257986197964, + "learning_rate": 4.638996108862559e-07, + "loss": 0.4851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13639257848262787, + "step": 3810, + "valid_targets_mean": 2789.2, + "valid_targets_min": 1803 + }, + { + "epoch": 6.577586206896552, + "grad_norm": 0.6782534867126325, + "learning_rate": 4.456699071123538e-07, + "loss": 0.4755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28413069248199463, + "step": 3815, + "valid_targets_mean": 3424.9, + "valid_targets_min": 791 + }, + { + "epoch": 6.586206896551724, + "grad_norm": 0.8068441567212525, + "learning_rate": 4.2780156738546407e-07, + "loss": 0.4407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23791375756263733, + "step": 3820, + "valid_targets_mean": 3495.1, + "valid_targets_min": 592 + }, + { + "epoch": 6.594827586206897, + "grad_norm": 0.7618897463733958, + "learning_rate": 4.1029492191253296e-07, + "loss": 0.4332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3005632758140564, + "step": 3825, + "valid_targets_mean": 3915.5, + "valid_targets_min": 756 + }, + { + "epoch": 6.603448275862069, + "grad_norm": 0.6790164115588616, + "learning_rate": 3.931502942163956e-07, + "loss": 0.4281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24386459589004517, + "step": 3830, + "valid_targets_mean": 4130.9, + "valid_targets_min": 1192 + }, + { + "epoch": 6.612068965517241, + "grad_norm": 0.6898721639387323, + "learning_rate": 3.763680011297921e-07, + "loss": 0.4393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19996516406536102, + "step": 3835, + "valid_targets_mean": 3800.0, + "valid_targets_min": 1547 + }, + { + "epoch": 6.620689655172414, + "grad_norm": 0.7915362044920929, + "learning_rate": 3.599483527895231e-07, + "loss": 0.436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1447966992855072, + "step": 3840, + "valid_targets_mean": 2010.4, + "valid_targets_min": 840 + }, + { + "epoch": 6.629310344827586, + "grad_norm": 0.7086048153772613, + "learning_rate": 3.4389165263071233e-07, + "loss": 0.4355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24716392159461975, + "step": 3845, + "valid_targets_mean": 3861.5, + "valid_targets_min": 1355 + }, + { + "epoch": 6.637931034482759, + "grad_norm": 0.7407607649314212, + "learning_rate": 3.2819819738119983e-07, + "loss": 0.4424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21087799966335297, + "step": 3850, + "valid_targets_mean": 3448.4, + "valid_targets_min": 1410 + }, + { + "epoch": 6.646551724137931, + "grad_norm": 0.7862129023418445, + "learning_rate": 3.1286827705605984e-07, + "loss": 0.3987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18787223100662231, + "step": 3855, + "valid_targets_mean": 2740.4, + "valid_targets_min": 1036 + }, + { + "epoch": 6.655172413793103, + "grad_norm": 0.9583946478618197, + "learning_rate": 2.979021749522448e-07, + "loss": 0.4737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20167484879493713, + "step": 3860, + "valid_targets_mean": 2912.0, + "valid_targets_min": 858 + }, + { + "epoch": 6.663793103448276, + "grad_norm": 0.7463481814937221, + "learning_rate": 2.833001676433367e-07, + "loss": 0.4301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26120278239250183, + "step": 3865, + "valid_targets_mean": 4619.1, + "valid_targets_min": 1401 + }, + { + "epoch": 6.672413793103448, + "grad_norm": 0.6642070716787235, + "learning_rate": 2.690625249744572e-07, + "loss": 0.4469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23449347913265228, + "step": 3870, + "valid_targets_mean": 4213.8, + "valid_targets_min": 1849 + }, + { + "epoch": 6.681034482758621, + "grad_norm": 0.915332604744956, + "learning_rate": 2.551895100572566e-07, + "loss": 0.4496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29583102464675903, + "step": 3875, + "valid_targets_mean": 2902.1, + "valid_targets_min": 1314 + }, + { + "epoch": 6.689655172413794, + "grad_norm": 0.8360269400695622, + "learning_rate": 2.4168137926506854e-07, + "loss": 0.455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2830885648727417, + "step": 3880, + "valid_targets_mean": 2553.8, + "valid_targets_min": 724 + }, + { + "epoch": 6.698275862068965, + "grad_norm": 0.6291602674642697, + "learning_rate": 2.2853838222817616e-07, + "loss": 0.4299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1453477144241333, + "step": 3885, + "valid_targets_mean": 3349.6, + "valid_targets_min": 1724 + }, + { + "epoch": 6.706896551724138, + "grad_norm": 0.6820000274820427, + "learning_rate": 2.1576076182917794e-07, + "loss": 0.4197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21681362390518188, + "step": 3890, + "valid_targets_mean": 3162.0, + "valid_targets_min": 545 + }, + { + "epoch": 6.7155172413793105, + "grad_norm": 0.7987024941251497, + "learning_rate": 2.0334875419851573e-07, + "loss": 0.4558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1824357658624649, + "step": 3895, + "valid_targets_mean": 2830.0, + "valid_targets_min": 616 + }, + { + "epoch": 6.724137931034483, + "grad_norm": 0.8215637161827768, + "learning_rate": 1.9130258871011165e-07, + "loss": 0.4713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18571743369102478, + "step": 3900, + "valid_targets_mean": 2641.1, + "valid_targets_min": 1586 + }, + { + "epoch": 6.732758620689655, + "grad_norm": 0.6581754518761841, + "learning_rate": 1.7962248797711356e-07, + "loss": 0.4888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15917380154132843, + "step": 3905, + "valid_targets_mean": 2697.8, + "valid_targets_min": 1491 + }, + { + "epoch": 6.741379310344827, + "grad_norm": 0.8997760891262785, + "learning_rate": 1.683086678478074e-07, + "loss": 0.4441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23729611933231354, + "step": 3910, + "valid_targets_mean": 2189.0, + "valid_targets_min": 489 + }, + { + "epoch": 6.75, + "grad_norm": 0.690128316353554, + "learning_rate": 1.573613374015981e-07, + "loss": 0.4293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21301250159740448, + "step": 3915, + "valid_targets_mean": 3811.6, + "valid_targets_min": 787 + }, + { + "epoch": 6.758620689655173, + "grad_norm": 0.783312423194326, + "learning_rate": 1.4678069894517033e-07, + "loss": 0.4493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18273267149925232, + "step": 3920, + "valid_targets_mean": 2392.5, + "valid_targets_min": 544 + }, + { + "epoch": 6.767241379310345, + "grad_norm": 0.5708162729229475, + "learning_rate": 1.3656694800873614e-07, + "loss": 0.4403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26748549938201904, + "step": 3925, + "valid_targets_mean": 6344.1, + "valid_targets_min": 2684 + }, + { + "epoch": 6.775862068965517, + "grad_norm": 0.7851430882153787, + "learning_rate": 1.2672027334242887e-07, + "loss": 0.4679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24374479055404663, + "step": 3930, + "valid_targets_mean": 3370.4, + "valid_targets_min": 1127 + }, + { + "epoch": 6.7844827586206895, + "grad_norm": 0.8589422933193868, + "learning_rate": 1.1724085691280806e-07, + "loss": 0.4484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15478235483169556, + "step": 3935, + "valid_targets_mean": 1799.1, + "valid_targets_min": 701 + }, + { + "epoch": 6.793103448275862, + "grad_norm": 0.7017426303676837, + "learning_rate": 1.0812887389950233e-07, + "loss": 0.4769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1417360007762909, + "step": 3940, + "valid_targets_mean": 2816.0, + "valid_targets_min": 394 + }, + { + "epoch": 6.801724137931035, + "grad_norm": 0.7320585288066113, + "learning_rate": 9.938449269197181e-08, + "loss": 0.4539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23600172996520996, + "step": 3945, + "valid_targets_mean": 3293.9, + "valid_targets_min": 1581 + }, + { + "epoch": 6.810344827586206, + "grad_norm": 0.7865361114606387, + "learning_rate": 9.100787488639295e-08, + "loss": 0.4737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16835284233093262, + "step": 3950, + "valid_targets_mean": 2500.4, + "valid_targets_min": 1075 + }, + { + "epoch": 6.818965517241379, + "grad_norm": 0.7598474628110911, + "learning_rate": 8.299917528267198e-08, + "loss": 0.4376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23328222334384918, + "step": 3955, + "valid_targets_mean": 3315.4, + "valid_targets_min": 1230 + }, + { + "epoch": 6.827586206896552, + "grad_norm": 0.6922186966361085, + "learning_rate": 7.535854188159164e-08, + "loss": 0.4561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22030378878116608, + "step": 3960, + "valid_targets_mean": 4447.1, + "valid_targets_min": 1842 + }, + { + "epoch": 6.836206896551724, + "grad_norm": 0.7934710758265647, + "learning_rate": 6.808611588206448e-08, + "loss": 0.4398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18129822611808777, + "step": 3965, + "valid_targets_mean": 2330.4, + "valid_targets_min": 696 + }, + { + "epoch": 6.844827586206897, + "grad_norm": 0.776583820755344, + "learning_rate": 6.11820316785372e-08, + "loss": 0.4452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13943791389465332, + "step": 3970, + "valid_targets_mean": 1962.6, + "valid_targets_min": 479 + }, + { + "epoch": 6.853448275862069, + "grad_norm": 0.7680731809453211, + "learning_rate": 5.464641685849259e-08, + "loss": 0.4619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3107728958129883, + "step": 3975, + "valid_targets_mean": 3925.5, + "valid_targets_min": 1400 + }, + { + "epoch": 6.862068965517241, + "grad_norm": 0.769315800681628, + "learning_rate": 4.8479392200100336e-08, + "loss": 0.4535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2647784352302551, + "step": 3980, + "valid_targets_mean": 3176.2, + "valid_targets_min": 450 + }, + { + "epoch": 6.870689655172414, + "grad_norm": 0.754287459757374, + "learning_rate": 4.268107166998769e-08, + "loss": 0.4386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20756995677947998, + "step": 3985, + "valid_targets_mean": 4493.1, + "valid_targets_min": 1858 + }, + { + "epoch": 6.879310344827586, + "grad_norm": 0.9099113076711162, + "learning_rate": 3.7251562421123375e-08, + "loss": 0.403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22244426608085632, + "step": 3990, + "valid_targets_mean": 2582.2, + "valid_targets_min": 379 + }, + { + "epoch": 6.887931034482759, + "grad_norm": 0.6508460492899365, + "learning_rate": 3.219096479084804e-08, + "loss": 0.4508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32543838024139404, + "step": 3995, + "valid_targets_mean": 5302.9, + "valid_targets_min": 1996 + }, + { + "epoch": 6.896551724137931, + "grad_norm": 0.6718668504076304, + "learning_rate": 2.749937229901134e-08, + "loss": 0.4699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28725236654281616, + "step": 4000, + "valid_targets_mean": 4658.1, + "valid_targets_min": 1064 + }, + { + "epoch": 6.905172413793103, + "grad_norm": 0.6116223856311377, + "learning_rate": 2.317687164624882e-08, + "loss": 0.4503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.211561381816864, + "step": 4005, + "valid_targets_mean": 5063.1, + "valid_targets_min": 1967 + }, + { + "epoch": 6.913793103448276, + "grad_norm": 0.8881826270754143, + "learning_rate": 1.9223542712381026e-08, + "loss": 0.459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2072024941444397, + "step": 4010, + "valid_targets_mean": 2485.4, + "valid_targets_min": 615 + }, + { + "epoch": 6.922413793103448, + "grad_norm": 0.8659801226831524, + "learning_rate": 1.563945855492799e-08, + "loss": 0.4907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39161914587020874, + "step": 4015, + "valid_targets_mean": 3150.6, + "valid_targets_min": 955 + }, + { + "epoch": 6.931034482758621, + "grad_norm": 0.80563907343615, + "learning_rate": 1.242468540777253e-08, + "loss": 0.4357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18695054948329926, + "step": 4020, + "valid_targets_mean": 2511.8, + "valid_targets_min": 328 + }, + { + "epoch": 6.939655172413794, + "grad_norm": 0.6738769628962994, + "learning_rate": 9.579282679927915e-09, + "loss": 0.4729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2548571825027466, + "step": 4025, + "valid_targets_mean": 4149.2, + "valid_targets_min": 1536 + }, + { + "epoch": 6.948275862068965, + "grad_norm": 0.6701279131603821, + "learning_rate": 7.1033029544365085e-09, + "loss": 0.4602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15617677569389343, + "step": 4030, + "valid_targets_mean": 3156.2, + "valid_targets_min": 534 + }, + { + "epoch": 6.956896551724138, + "grad_norm": 0.7093717573267669, + "learning_rate": 4.996791987410543e-09, + "loss": 0.4497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2574841380119324, + "step": 4035, + "valid_targets_mean": 3534.9, + "valid_targets_min": 524 + }, + { + "epoch": 6.9655172413793105, + "grad_norm": 0.7332674589011584, + "learning_rate": 3.2597887071750266e-09, + "loss": 0.4585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26754799485206604, + "step": 4040, + "valid_targets_mean": 4100.1, + "valid_targets_min": 656 + }, + { + "epoch": 6.974137931034483, + "grad_norm": 0.6754420334215917, + "learning_rate": 1.892325213552759e-09, + "loss": 0.4722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21604621410369873, + "step": 4045, + "valid_targets_mean": 3842.0, + "valid_targets_min": 805 + }, + { + "epoch": 6.982758620689655, + "grad_norm": 0.6285696246936402, + "learning_rate": 8.944267772692527e-10, + "loss": 0.4513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1556059867143631, + "step": 4050, + "valid_targets_mean": 2972.5, + "valid_targets_min": 1132 + }, + { + "epoch": 6.991379310344827, + "grad_norm": 0.8528056566558939, + "learning_rate": 2.66111839490879e-10, + "loss": 0.4412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22985875606536865, + "step": 4055, + "valid_targets_mean": 2739.5, + "valid_targets_min": 1285 + }, + { + "epoch": 7.0, + "grad_norm": 0.7150700292512204, + "learning_rate": 7.392011478479787e-12, + "loss": 0.4477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25698769092559814, + "step": 4060, + "valid_targets_mean": 3496.4, + "valid_targets_min": 809 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25698769092559814, + "step": 4060, + "total_flos": 1.0409209055737283e+18, + "train_loss": 0.5224247261221185, + "train_runtime": 35382.8947, + "train_samples_per_second": 1.835, + "train_steps_per_second": 0.115, + "valid_targets_mean": 3496.4, + "valid_targets_min": 809 + } + ], + "logging_steps": 5, + "max_steps": 4060, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 600, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.0409209055737283e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}