diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,13346 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 6048, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.005790387955993051, + "grad_norm": 16.655819645529608, + "learning_rate": 2.6446280991735537e-07, + "loss": 0.6604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3099028170108795, + "step": 5, + "valid_targets_mean": 7389.0, + "valid_targets_min": 5627 + }, + { + "epoch": 0.011580775911986103, + "grad_norm": 16.101305822612215, + "learning_rate": 5.950413223140496e-07, + "loss": 0.6991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2357616424560547, + "step": 10, + "valid_targets_mean": 1294.8, + "valid_targets_min": 422 + }, + { + "epoch": 0.017371163867979156, + "grad_norm": 14.103935692499862, + "learning_rate": 9.256198347107438e-07, + "loss": 0.6739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3202242851257324, + "step": 15, + "valid_targets_mean": 8539.9, + "valid_targets_min": 6761 + }, + { + "epoch": 0.023161551823972205, + "grad_norm": 11.207750997832555, + "learning_rate": 1.2561983471074383e-06, + "loss": 0.5993, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2791058421134949, + "step": 20, + "valid_targets_mean": 7579.0, + "valid_targets_min": 6174 + }, + { + "epoch": 0.02895193977996526, + "grad_norm": 7.945461390881987, + "learning_rate": 1.5867768595041324e-06, + "loss": 0.5893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.279413640499115, + "step": 25, + "valid_targets_mean": 7328.2, + "valid_targets_min": 4839 + }, + { + "epoch": 0.03474232773595831, + "grad_norm": 5.946728043074954, + "learning_rate": 1.917355371900827e-06, + "loss": 0.5752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3025493621826172, + "step": 30, + "valid_targets_mean": 6838.2, + "valid_targets_min": 5034 + }, + { + "epoch": 0.04053271569195136, + "grad_norm": 4.920950696141524, + "learning_rate": 2.247933884297521e-06, + "loss": 0.552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2719684839248657, + "step": 35, + "valid_targets_mean": 7248.2, + "valid_targets_min": 5567 + }, + { + "epoch": 0.04632310364794441, + "grad_norm": 5.087298480449038, + "learning_rate": 2.578512396694215e-06, + "loss": 0.4962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23095563054084778, + "step": 40, + "valid_targets_mean": 6234.0, + "valid_targets_min": 5603 + }, + { + "epoch": 0.05211349160393746, + "grad_norm": 3.81594656077192, + "learning_rate": 2.9090909090909093e-06, + "loss": 0.4756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10880223661661148, + "step": 45, + "valid_targets_mean": 602.0, + "valid_targets_min": 152 + }, + { + "epoch": 0.05790387955993052, + "grad_norm": 1.459292133331144, + "learning_rate": 3.2396694214876034e-06, + "loss": 0.4222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20699799060821533, + "step": 50, + "valid_targets_mean": 7299.9, + "valid_targets_min": 6158 + }, + { + "epoch": 0.06369426751592357, + "grad_norm": 1.1003630444312296, + "learning_rate": 3.5702479338842976e-06, + "loss": 0.4171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19563168287277222, + "step": 55, + "valid_targets_mean": 6140.0, + "valid_targets_min": 4648 + }, + { + "epoch": 0.06948465547191662, + "grad_norm": 0.9275014204530865, + "learning_rate": 3.900826446280992e-06, + "loss": 0.4014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19986401498317719, + "step": 60, + "valid_targets_mean": 6603.0, + "valid_targets_min": 4882 + }, + { + "epoch": 0.07527504342790967, + "grad_norm": 0.7033608549870087, + "learning_rate": 4.231404958677686e-06, + "loss": 0.3812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19471821188926697, + "step": 65, + "valid_targets_mean": 8015.0, + "valid_targets_min": 6344 + }, + { + "epoch": 0.08106543138390272, + "grad_norm": 0.6854719385778002, + "learning_rate": 4.56198347107438e-06, + "loss": 0.3691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17885202169418335, + "step": 70, + "valid_targets_mean": 6305.2, + "valid_targets_min": 4622 + }, + { + "epoch": 0.08685581933989578, + "grad_norm": 0.617162174526013, + "learning_rate": 4.892561983471075e-06, + "loss": 0.3722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18935421109199524, + "step": 75, + "valid_targets_mean": 7890.8, + "valid_targets_min": 5485 + }, + { + "epoch": 0.09264620729588882, + "grad_norm": 0.533139623615456, + "learning_rate": 5.223140495867769e-06, + "loss": 0.3666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16596432030200958, + "step": 80, + "valid_targets_mean": 7016.6, + "valid_targets_min": 4960 + }, + { + "epoch": 0.09843659525188188, + "grad_norm": 0.5366119526491792, + "learning_rate": 5.553719008264463e-06, + "loss": 0.3524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16567102074623108, + "step": 85, + "valid_targets_mean": 6527.1, + "valid_targets_min": 5399 + }, + { + "epoch": 0.10422698320787492, + "grad_norm": 0.6389437092982323, + "learning_rate": 5.8842975206611575e-06, + "loss": 0.3516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1750902235507965, + "step": 90, + "valid_targets_mean": 4995.0, + "valid_targets_min": 618 + }, + { + "epoch": 0.11001737116386798, + "grad_norm": 0.5018074528544499, + "learning_rate": 6.214876033057852e-06, + "loss": 0.3609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18015700578689575, + "step": 95, + "valid_targets_mean": 7219.4, + "valid_targets_min": 5764 + }, + { + "epoch": 0.11580775911986103, + "grad_norm": 0.5081270494729966, + "learning_rate": 6.545454545454546e-06, + "loss": 0.3409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17924398183822632, + "step": 100, + "valid_targets_mean": 7998.5, + "valid_targets_min": 5902 + }, + { + "epoch": 0.12159814707585408, + "grad_norm": 0.45522046003860905, + "learning_rate": 6.87603305785124e-06, + "loss": 0.312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13894960284233093, + "step": 105, + "valid_targets_mean": 7377.4, + "valid_targets_min": 5015 + }, + { + "epoch": 0.12738853503184713, + "grad_norm": 0.4623081465603883, + "learning_rate": 7.206611570247934e-06, + "loss": 0.315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1380847692489624, + "step": 110, + "valid_targets_mean": 6794.0, + "valid_targets_min": 5283 + }, + { + "epoch": 0.13317892298784018, + "grad_norm": 0.5569607497270949, + "learning_rate": 7.537190082644628e-06, + "loss": 0.3279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1848008781671524, + "step": 115, + "valid_targets_mean": 6656.4, + "valid_targets_min": 4364 + }, + { + "epoch": 0.13896931094383325, + "grad_norm": 0.4781151946290797, + "learning_rate": 7.867768595041323e-06, + "loss": 0.3158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1498945653438568, + "step": 120, + "valid_targets_mean": 7178.4, + "valid_targets_min": 4448 + }, + { + "epoch": 0.1447596988998263, + "grad_norm": 0.48824166066226243, + "learning_rate": 8.198347107438017e-06, + "loss": 0.3109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.176904559135437, + "step": 125, + "valid_targets_mean": 7619.8, + "valid_targets_min": 5737 + }, + { + "epoch": 0.15055008685581933, + "grad_norm": 0.4260819057733041, + "learning_rate": 8.528925619834712e-06, + "loss": 0.2745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1333511471748352, + "step": 130, + "valid_targets_mean": 8754.4, + "valid_targets_min": 5752 + }, + { + "epoch": 0.1563404748118124, + "grad_norm": 0.46330713437709214, + "learning_rate": 8.859504132231406e-06, + "loss": 0.2729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14825156331062317, + "step": 135, + "valid_targets_mean": 6341.0, + "valid_targets_min": 4543 + }, + { + "epoch": 0.16213086276780544, + "grad_norm": 0.5057694929436107, + "learning_rate": 9.1900826446281e-06, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1494828760623932, + "step": 140, + "valid_targets_mean": 7181.9, + "valid_targets_min": 4563 + }, + { + "epoch": 0.1679212507237985, + "grad_norm": 1.0547789082478412, + "learning_rate": 9.520661157024794e-06, + "loss": 0.3213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11880066245794296, + "step": 145, + "valid_targets_mean": 1104.4, + "valid_targets_min": 161 + }, + { + "epoch": 0.17371163867979156, + "grad_norm": 0.41334982805296444, + "learning_rate": 9.851239669421488e-06, + "loss": 0.3041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13268011808395386, + "step": 150, + "valid_targets_mean": 7018.2, + "valid_targets_min": 5424 + }, + { + "epoch": 0.1795020266357846, + "grad_norm": 0.49029004832293016, + "learning_rate": 1.0181818181818182e-05, + "loss": 0.306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1567586362361908, + "step": 155, + "valid_targets_mean": 7738.4, + "valid_targets_min": 5329 + }, + { + "epoch": 0.18529241459177764, + "grad_norm": 0.47121226347035616, + "learning_rate": 1.0512396694214877e-05, + "loss": 0.3125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15164563059806824, + "step": 160, + "valid_targets_mean": 6426.6, + "valid_targets_min": 5560 + }, + { + "epoch": 0.1910828025477707, + "grad_norm": 0.5141163046607614, + "learning_rate": 1.084297520661157e-05, + "loss": 0.3033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15433678030967712, + "step": 165, + "valid_targets_mean": 6169.9, + "valid_targets_min": 3446 + }, + { + "epoch": 0.19687319050376376, + "grad_norm": 0.48615843552385, + "learning_rate": 1.1173553719008265e-05, + "loss": 0.2892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1489882469177246, + "step": 170, + "valid_targets_mean": 7204.6, + "valid_targets_min": 5381 + }, + { + "epoch": 0.2026635784597568, + "grad_norm": 0.4424369145671002, + "learning_rate": 1.1504132231404959e-05, + "loss": 0.2939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15045370161533356, + "step": 175, + "valid_targets_mean": 7600.4, + "valid_targets_min": 5241 + }, + { + "epoch": 0.20845396641574984, + "grad_norm": 0.5025657591779371, + "learning_rate": 1.1834710743801653e-05, + "loss": 0.285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1559857428073883, + "step": 180, + "valid_targets_mean": 7098.5, + "valid_targets_min": 5607 + }, + { + "epoch": 0.2142443543717429, + "grad_norm": 0.5168918509248777, + "learning_rate": 1.2165289256198347e-05, + "loss": 0.2902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15138767659664154, + "step": 185, + "valid_targets_mean": 6955.1, + "valid_targets_min": 5530 + }, + { + "epoch": 0.22003474232773595, + "grad_norm": 0.4847154459970703, + "learning_rate": 1.2495867768595043e-05, + "loss": 0.2847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14534181356430054, + "step": 190, + "valid_targets_mean": 6748.1, + "valid_targets_min": 5364 + }, + { + "epoch": 0.225825130283729, + "grad_norm": 0.5142233994213916, + "learning_rate": 1.2826446280991736e-05, + "loss": 0.2883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14052218198776245, + "step": 195, + "valid_targets_mean": 6273.9, + "valid_targets_min": 5438 + }, + { + "epoch": 0.23161551823972207, + "grad_norm": 0.46607148208472965, + "learning_rate": 1.3157024793388432e-05, + "loss": 0.2884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1403881311416626, + "step": 200, + "valid_targets_mean": 6419.5, + "valid_targets_min": 4879 + }, + { + "epoch": 0.2374059061957151, + "grad_norm": 0.46894354953159606, + "learning_rate": 1.3487603305785124e-05, + "loss": 0.2841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13953427970409393, + "step": 205, + "valid_targets_mean": 6483.4, + "valid_targets_min": 4309 + }, + { + "epoch": 0.24319629415170815, + "grad_norm": 0.5298027497127495, + "learning_rate": 1.381818181818182e-05, + "loss": 0.2807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15868917107582092, + "step": 210, + "valid_targets_mean": 6825.0, + "valid_targets_min": 5205 + }, + { + "epoch": 0.24898668210770122, + "grad_norm": 0.48339105464033155, + "learning_rate": 1.4148760330578512e-05, + "loss": 0.2769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12778641283512115, + "step": 215, + "valid_targets_mean": 6729.4, + "valid_targets_min": 5200 + }, + { + "epoch": 0.25477707006369427, + "grad_norm": 0.5273655047609594, + "learning_rate": 1.4479338842975208e-05, + "loss": 0.2741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13452228903770447, + "step": 220, + "valid_targets_mean": 6094.0, + "valid_targets_min": 5302 + }, + { + "epoch": 0.26056745801968734, + "grad_norm": 0.5545406735891124, + "learning_rate": 1.48099173553719e-05, + "loss": 0.2778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1445760726928711, + "step": 225, + "valid_targets_mean": 7140.0, + "valid_targets_min": 5270 + }, + { + "epoch": 0.26635784597568035, + "grad_norm": 0.5053786373843324, + "learning_rate": 1.5140495867768596e-05, + "loss": 0.2678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12794765830039978, + "step": 230, + "valid_targets_mean": 7425.0, + "valid_targets_min": 5281 + }, + { + "epoch": 0.2721482339316734, + "grad_norm": 0.5117754491840001, + "learning_rate": 1.547107438016529e-05, + "loss": 0.2748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1676265448331833, + "step": 235, + "valid_targets_mean": 7063.4, + "valid_targets_min": 5386 + }, + { + "epoch": 0.2779386218876665, + "grad_norm": 0.4967372367737128, + "learning_rate": 1.5801652892561985e-05, + "loss": 0.2744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.144246906042099, + "step": 240, + "valid_targets_mean": 6827.0, + "valid_targets_min": 4547 + }, + { + "epoch": 0.2837290098436595, + "grad_norm": 0.7973056364596196, + "learning_rate": 1.613223140495868e-05, + "loss": 0.2538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08753351122140884, + "step": 245, + "valid_targets_mean": 2047.0, + "valid_targets_min": 165 + }, + { + "epoch": 0.2895193977996526, + "grad_norm": 0.4943520816825678, + "learning_rate": 1.6462809917355373e-05, + "loss": 0.2642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13315731287002563, + "step": 250, + "valid_targets_mean": 6754.1, + "valid_targets_min": 5196 + }, + { + "epoch": 0.29530978575564565, + "grad_norm": 0.5498952539845848, + "learning_rate": 1.6793388429752067e-05, + "loss": 0.2649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13404254615306854, + "step": 255, + "valid_targets_mean": 5985.6, + "valid_targets_min": 5028 + }, + { + "epoch": 0.30110017371163866, + "grad_norm": 0.5479083359283229, + "learning_rate": 1.712396694214876e-05, + "loss": 0.2699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1222841888666153, + "step": 260, + "valid_targets_mean": 5963.9, + "valid_targets_min": 5237 + }, + { + "epoch": 0.30689056166763173, + "grad_norm": 0.5438335340635813, + "learning_rate": 1.7454545454545456e-05, + "loss": 0.2644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13820022344589233, + "step": 265, + "valid_targets_mean": 6319.0, + "valid_targets_min": 5412 + }, + { + "epoch": 0.3126809496236248, + "grad_norm": 0.4652892213452802, + "learning_rate": 1.778512396694215e-05, + "loss": 0.2506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1078779399394989, + "step": 270, + "valid_targets_mean": 7125.8, + "valid_targets_min": 5080 + }, + { + "epoch": 0.3184713375796178, + "grad_norm": 0.5103127506468167, + "learning_rate": 1.8115702479338844e-05, + "loss": 0.2648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1312594711780548, + "step": 275, + "valid_targets_mean": 6123.5, + "valid_targets_min": 4521 + }, + { + "epoch": 0.3242617255356109, + "grad_norm": 1.2150630310249255, + "learning_rate": 1.8446280991735538e-05, + "loss": 0.3456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22883066534996033, + "step": 280, + "valid_targets_mean": 6020.6, + "valid_targets_min": 2493 + }, + { + "epoch": 0.33005211349160396, + "grad_norm": 0.7876446225017467, + "learning_rate": 1.8776859504132232e-05, + "loss": 0.426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.171472430229187, + "step": 285, + "valid_targets_mean": 3953.5, + "valid_targets_min": 1177 + }, + { + "epoch": 0.335842501447597, + "grad_norm": 0.735005861492704, + "learning_rate": 1.9107438016528926e-05, + "loss": 0.4201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24447613954544067, + "step": 290, + "valid_targets_mean": 4950.8, + "valid_targets_min": 2799 + }, + { + "epoch": 0.34163288940359005, + "grad_norm": 0.7105988732526015, + "learning_rate": 1.943801652892562e-05, + "loss": 0.3965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2215518057346344, + "step": 295, + "valid_targets_mean": 5312.0, + "valid_targets_min": 2973 + }, + { + "epoch": 0.3474232773595831, + "grad_norm": 0.6727827921481346, + "learning_rate": 1.9768595041322315e-05, + "loss": 0.3889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18209363520145416, + "step": 300, + "valid_targets_mean": 3775.5, + "valid_targets_min": 980 + }, + { + "epoch": 0.35321366531557613, + "grad_norm": 0.6668317415236219, + "learning_rate": 2.0099173553719012e-05, + "loss": 0.4069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20714129507541656, + "step": 305, + "valid_targets_mean": 5055.4, + "valid_targets_min": 1512 + }, + { + "epoch": 0.3590040532715692, + "grad_norm": 0.6107245060489721, + "learning_rate": 2.0429752066115703e-05, + "loss": 0.374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14312531054019928, + "step": 310, + "valid_targets_mean": 3600.8, + "valid_targets_min": 1848 + }, + { + "epoch": 0.36479444122756227, + "grad_norm": 0.5509653204998359, + "learning_rate": 2.0760330578512397e-05, + "loss": 0.3772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13793471455574036, + "step": 315, + "valid_targets_mean": 3369.8, + "valid_targets_min": 1381 + }, + { + "epoch": 0.3705848291835553, + "grad_norm": 0.5293595320233865, + "learning_rate": 2.109090909090909e-05, + "loss": 0.3804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19333019852638245, + "step": 320, + "valid_targets_mean": 6283.2, + "valid_targets_min": 3600 + }, + { + "epoch": 0.37637521713954836, + "grad_norm": 0.5779763292618605, + "learning_rate": 2.142148760330579e-05, + "loss": 0.3946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19307959079742432, + "step": 325, + "valid_targets_mean": 5242.5, + "valid_targets_min": 2540 + }, + { + "epoch": 0.3821656050955414, + "grad_norm": 0.5507055471870357, + "learning_rate": 2.1752066115702483e-05, + "loss": 0.376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17400136590003967, + "step": 330, + "valid_targets_mean": 6246.5, + "valid_targets_min": 2157 + }, + { + "epoch": 0.38795599305153444, + "grad_norm": 0.6129232711123003, + "learning_rate": 2.2082644628099174e-05, + "loss": 0.376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19069461524486542, + "step": 335, + "valid_targets_mean": 4556.5, + "valid_targets_min": 1720 + }, + { + "epoch": 0.3937463810075275, + "grad_norm": 0.6201979514739216, + "learning_rate": 2.2413223140495868e-05, + "loss": 0.3798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21241924166679382, + "step": 340, + "valid_targets_mean": 5116.5, + "valid_targets_min": 1982 + }, + { + "epoch": 0.3995367689635206, + "grad_norm": 0.6030358543858219, + "learning_rate": 2.2743801652892566e-05, + "loss": 0.3714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.188879132270813, + "step": 345, + "valid_targets_mean": 4464.5, + "valid_targets_min": 2718 + }, + { + "epoch": 0.4053271569195136, + "grad_norm": 0.6841676419341239, + "learning_rate": 2.307438016528926e-05, + "loss": 0.3853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1824164092540741, + "step": 350, + "valid_targets_mean": 3418.5, + "valid_targets_min": 1451 + }, + { + "epoch": 0.41111754487550667, + "grad_norm": 0.6305854851252951, + "learning_rate": 2.340495867768595e-05, + "loss": 0.3829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1953430473804474, + "step": 355, + "valid_targets_mean": 3990.4, + "valid_targets_min": 1861 + }, + { + "epoch": 0.4169079328314997, + "grad_norm": 0.6879145519243186, + "learning_rate": 2.3735537190082645e-05, + "loss": 0.3742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2048986852169037, + "step": 360, + "valid_targets_mean": 4065.4, + "valid_targets_min": 2100 + }, + { + "epoch": 0.42269832078749275, + "grad_norm": 0.670617667592637, + "learning_rate": 2.4066115702479342e-05, + "loss": 0.3864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21338799595832825, + "step": 365, + "valid_targets_mean": 4132.4, + "valid_targets_min": 3234 + }, + { + "epoch": 0.4284887087434858, + "grad_norm": 0.7359672228538012, + "learning_rate": 2.4396694214876036e-05, + "loss": 0.3932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23431096971035004, + "step": 370, + "valid_targets_mean": 4246.2, + "valid_targets_min": 2650 + }, + { + "epoch": 0.43427909669947884, + "grad_norm": 0.7503953347940379, + "learning_rate": 2.4727272727272727e-05, + "loss": 0.3808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1593906581401825, + "step": 375, + "valid_targets_mean": 4096.5, + "valid_targets_min": 1719 + }, + { + "epoch": 0.4400694846554719, + "grad_norm": 0.696019831669972, + "learning_rate": 2.505785123966942e-05, + "loss": 0.3851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18997083604335785, + "step": 380, + "valid_targets_mean": 3933.1, + "valid_targets_min": 1020 + }, + { + "epoch": 0.445859872611465, + "grad_norm": 1.0940174628721868, + "learning_rate": 2.538842975206612e-05, + "loss": 0.3627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18985775113105774, + "step": 385, + "valid_targets_mean": 3551.1, + "valid_targets_min": 2130 + }, + { + "epoch": 0.451650260567458, + "grad_norm": 0.6578172297344157, + "learning_rate": 2.5719008264462813e-05, + "loss": 0.3556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15262863039970398, + "step": 390, + "valid_targets_mean": 3535.0, + "valid_targets_min": 865 + }, + { + "epoch": 0.45744064852345107, + "grad_norm": 0.9405521225813999, + "learning_rate": 2.6049586776859507e-05, + "loss": 0.3656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.248321533203125, + "step": 395, + "valid_targets_mean": 3716.5, + "valid_targets_min": 3144 + }, + { + "epoch": 0.46323103647944414, + "grad_norm": 1.0432523503468756, + "learning_rate": 2.6380165289256198e-05, + "loss": 0.3739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1557130515575409, + "step": 400, + "valid_targets_mean": 3262.2, + "valid_targets_min": 2050 + }, + { + "epoch": 0.46902142443543715, + "grad_norm": 0.6632192119125181, + "learning_rate": 2.6710743801652895e-05, + "loss": 0.372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18791770935058594, + "step": 405, + "valid_targets_mean": 3296.5, + "valid_targets_min": 1570 + }, + { + "epoch": 0.4748118123914302, + "grad_norm": 0.6008101768878966, + "learning_rate": 2.704132231404959e-05, + "loss": 0.353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17852509021759033, + "step": 410, + "valid_targets_mean": 4806.4, + "valid_targets_min": 1795 + }, + { + "epoch": 0.4806022003474233, + "grad_norm": 0.6184314563964771, + "learning_rate": 2.7371900826446284e-05, + "loss": 0.3566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16312438249588013, + "step": 415, + "valid_targets_mean": 4342.4, + "valid_targets_min": 1934 + }, + { + "epoch": 0.4863925883034163, + "grad_norm": 0.6917524383618637, + "learning_rate": 2.7702479338842974e-05, + "loss": 0.3666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18566149473190308, + "step": 420, + "valid_targets_mean": 3675.4, + "valid_targets_min": 2557 + }, + { + "epoch": 0.4921829762594094, + "grad_norm": 0.6379702051172501, + "learning_rate": 2.8033057851239672e-05, + "loss": 0.3625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20107987523078918, + "step": 425, + "valid_targets_mean": 4550.4, + "valid_targets_min": 2663 + }, + { + "epoch": 0.49797336421540245, + "grad_norm": 0.6626061187107526, + "learning_rate": 2.8363636363636366e-05, + "loss": 0.3522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1774175465106964, + "step": 430, + "valid_targets_mean": 4604.0, + "valid_targets_min": 2016 + }, + { + "epoch": 0.5037637521713955, + "grad_norm": 0.703691101341157, + "learning_rate": 2.869421487603306e-05, + "loss": 0.3598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15914887189865112, + "step": 435, + "valid_targets_mean": 2976.8, + "valid_targets_min": 1367 + }, + { + "epoch": 0.5095541401273885, + "grad_norm": 0.7067311357342676, + "learning_rate": 2.902479338842975e-05, + "loss": 0.3676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17488998174667358, + "step": 440, + "valid_targets_mean": 3542.5, + "valid_targets_min": 2026 + }, + { + "epoch": 0.5153445280833816, + "grad_norm": 0.6737400104351488, + "learning_rate": 2.9355371900826452e-05, + "loss": 0.3475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16618910431861877, + "step": 445, + "valid_targets_mean": 3247.2, + "valid_targets_min": 2033 + }, + { + "epoch": 0.5211349160393747, + "grad_norm": 0.8326243547681798, + "learning_rate": 2.9685950413223143e-05, + "loss": 0.3699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1832060068845749, + "step": 450, + "valid_targets_mean": 2201.6, + "valid_targets_min": 1283 + }, + { + "epoch": 0.5269253039953677, + "grad_norm": 0.8234864634523447, + "learning_rate": 3.0016528925619837e-05, + "loss": 0.3616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18912853300571442, + "step": 455, + "valid_targets_mean": 2824.8, + "valid_targets_min": 1636 + }, + { + "epoch": 0.5327156919513607, + "grad_norm": 0.7135386777132873, + "learning_rate": 3.034710743801653e-05, + "loss": 0.3596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1965169608592987, + "step": 460, + "valid_targets_mean": 3844.4, + "valid_targets_min": 1629 + }, + { + "epoch": 0.5385060799073538, + "grad_norm": 0.7315405541721403, + "learning_rate": 3.067768595041323e-05, + "loss": 0.3494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16616566479206085, + "step": 465, + "valid_targets_mean": 3048.6, + "valid_targets_min": 2080 + }, + { + "epoch": 0.5442964678633468, + "grad_norm": 0.6925498446822687, + "learning_rate": 3.100826446280992e-05, + "loss": 0.3542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16240294277668, + "step": 470, + "valid_targets_mean": 3111.8, + "valid_targets_min": 1153 + }, + { + "epoch": 0.5500868558193399, + "grad_norm": 0.6353538526882198, + "learning_rate": 3.133884297520662e-05, + "loss": 0.3476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19012531638145447, + "step": 475, + "valid_targets_mean": 4069.0, + "valid_targets_min": 1858 + }, + { + "epoch": 0.555877243775333, + "grad_norm": 0.7238753552976274, + "learning_rate": 3.166942148760331e-05, + "loss": 0.3639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15231537818908691, + "step": 480, + "valid_targets_mean": 3415.2, + "valid_targets_min": 1639 + }, + { + "epoch": 0.561667631731326, + "grad_norm": 0.6665274482956017, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.3569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.145134836435318, + "step": 485, + "valid_targets_mean": 3267.6, + "valid_targets_min": 1394 + }, + { + "epoch": 0.567458019687319, + "grad_norm": 0.6510657131966783, + "learning_rate": 3.2330578512396696e-05, + "loss": 0.3484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18342448770999908, + "step": 490, + "valid_targets_mean": 4775.9, + "valid_targets_min": 1844 + }, + { + "epoch": 0.5732484076433121, + "grad_norm": 1.018435315706297, + "learning_rate": 3.2661157024793394e-05, + "loss": 0.3508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15765246748924255, + "step": 495, + "valid_targets_mean": 3317.1, + "valid_targets_min": 1305 + }, + { + "epoch": 0.5790387955993052, + "grad_norm": 0.6883157544394269, + "learning_rate": 3.2991735537190084e-05, + "loss": 0.3468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1881387084722519, + "step": 500, + "valid_targets_mean": 4290.8, + "valid_targets_min": 2185 + }, + { + "epoch": 0.5848291835552982, + "grad_norm": 0.6721803569935538, + "learning_rate": 3.332231404958678e-05, + "loss": 0.3537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20651760697364807, + "step": 505, + "valid_targets_mean": 4024.9, + "valid_targets_min": 2349 + }, + { + "epoch": 0.5906195715112913, + "grad_norm": 0.6291206567565695, + "learning_rate": 3.365289256198347e-05, + "loss": 0.3359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15432091057300568, + "step": 510, + "valid_targets_mean": 3453.0, + "valid_targets_min": 1607 + }, + { + "epoch": 0.5964099594672843, + "grad_norm": 0.6904459046779862, + "learning_rate": 3.398347107438017e-05, + "loss": 0.362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1796693503856659, + "step": 515, + "valid_targets_mean": 3389.8, + "valid_targets_min": 1507 + }, + { + "epoch": 0.6022003474232773, + "grad_norm": 0.640463878413541, + "learning_rate": 3.431404958677686e-05, + "loss": 0.357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16405197978019714, + "step": 520, + "valid_targets_mean": 3773.9, + "valid_targets_min": 1317 + }, + { + "epoch": 0.6079907353792704, + "grad_norm": 0.6511127283978864, + "learning_rate": 3.464462809917356e-05, + "loss": 0.3493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15977990627288818, + "step": 525, + "valid_targets_mean": 3881.0, + "valid_targets_min": 3355 + }, + { + "epoch": 0.6137811233352635, + "grad_norm": 0.6341328479062885, + "learning_rate": 3.497520661157025e-05, + "loss": 0.3506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16847799718379974, + "step": 530, + "valid_targets_mean": 3951.9, + "valid_targets_min": 2999 + }, + { + "epoch": 0.6195715112912565, + "grad_norm": 0.6419289214078541, + "learning_rate": 3.530578512396695e-05, + "loss": 0.35, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15617629885673523, + "step": 535, + "valid_targets_mean": 4086.5, + "valid_targets_min": 1739 + }, + { + "epoch": 0.6253618992472496, + "grad_norm": 0.6420357082141755, + "learning_rate": 3.563636363636364e-05, + "loss": 0.3502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15255317091941833, + "step": 540, + "valid_targets_mean": 3298.0, + "valid_targets_min": 1140 + }, + { + "epoch": 0.6311522872032426, + "grad_norm": 0.6147364203602501, + "learning_rate": 3.5966942148760335e-05, + "loss": 0.3288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18115392327308655, + "step": 545, + "valid_targets_mean": 4012.1, + "valid_targets_min": 2779 + }, + { + "epoch": 0.6369426751592356, + "grad_norm": 0.6415078707723959, + "learning_rate": 3.6297520661157026e-05, + "loss": 0.3405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16753660142421722, + "step": 550, + "valid_targets_mean": 3636.9, + "valid_targets_min": 1183 + }, + { + "epoch": 0.6427330631152287, + "grad_norm": 0.6598650414716416, + "learning_rate": 3.6628099173553724e-05, + "loss": 0.3262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19901029765605927, + "step": 555, + "valid_targets_mean": 4269.0, + "valid_targets_min": 3041 + }, + { + "epoch": 0.6485234510712218, + "grad_norm": 0.6794989569457508, + "learning_rate": 3.6958677685950414e-05, + "loss": 0.3568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17811614274978638, + "step": 560, + "valid_targets_mean": 3350.1, + "valid_targets_min": 1726 + }, + { + "epoch": 0.6543138390272148, + "grad_norm": 0.5301401186579153, + "learning_rate": 3.728925619834711e-05, + "loss": 0.3349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15499885380268097, + "step": 565, + "valid_targets_mean": 4747.4, + "valid_targets_min": 1743 + }, + { + "epoch": 0.6601042269832079, + "grad_norm": 0.6146825808496802, + "learning_rate": 3.76198347107438e-05, + "loss": 0.347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16681280732154846, + "step": 570, + "valid_targets_mean": 3702.8, + "valid_targets_min": 1234 + }, + { + "epoch": 0.6658946149392009, + "grad_norm": 0.6199338035415232, + "learning_rate": 3.79504132231405e-05, + "loss": 0.3533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17797984182834625, + "step": 575, + "valid_targets_mean": 4079.0, + "valid_targets_min": 2303 + }, + { + "epoch": 0.671685002895194, + "grad_norm": 0.6414527008678907, + "learning_rate": 3.828099173553719e-05, + "loss": 0.353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17539098858833313, + "step": 580, + "valid_targets_mean": 3830.8, + "valid_targets_min": 1651 + }, + { + "epoch": 0.677475390851187, + "grad_norm": 0.7158891749544349, + "learning_rate": 3.861157024793389e-05, + "loss": 0.3437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.184058278799057, + "step": 585, + "valid_targets_mean": 2876.2, + "valid_targets_min": 1374 + }, + { + "epoch": 0.6832657788071801, + "grad_norm": 0.725484792932723, + "learning_rate": 3.894214876033058e-05, + "loss": 0.3347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16789329051971436, + "step": 590, + "valid_targets_mean": 3467.5, + "valid_targets_min": 1625 + }, + { + "epoch": 0.6890561667631732, + "grad_norm": 0.6355075281081544, + "learning_rate": 3.927272727272728e-05, + "loss": 0.3299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1683839112520218, + "step": 595, + "valid_targets_mean": 4089.5, + "valid_targets_min": 997 + }, + { + "epoch": 0.6948465547191662, + "grad_norm": 0.5717834677186259, + "learning_rate": 3.960330578512397e-05, + "loss": 0.3363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14123469591140747, + "step": 600, + "valid_targets_mean": 3618.6, + "valid_targets_min": 1179 + }, + { + "epoch": 0.7006369426751592, + "grad_norm": 0.7698731162915861, + "learning_rate": 3.9933884297520665e-05, + "loss": 0.3348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16036216914653778, + "step": 605, + "valid_targets_mean": 3079.1, + "valid_targets_min": 1444 + }, + { + "epoch": 0.7064273306311523, + "grad_norm": 0.6676617529688202, + "learning_rate": 3.9999946698078364e-05, + "loss": 0.317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1451885998249054, + "step": 610, + "valid_targets_mean": 3486.8, + "valid_targets_min": 1504 + }, + { + "epoch": 0.7122177185871453, + "grad_norm": 0.603977760826595, + "learning_rate": 3.9999730159508614e-05, + "loss": 0.3293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16658622026443481, + "step": 615, + "valid_targets_mean": 3998.4, + "valid_targets_min": 2302 + }, + { + "epoch": 0.7180081065431384, + "grad_norm": 0.8047137177662046, + "learning_rate": 3.9999347054722696e-05, + "loss": 0.3392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21027225255966187, + "step": 620, + "valid_targets_mean": 4282.5, + "valid_targets_min": 1818 + }, + { + "epoch": 0.7237984944991315, + "grad_norm": 0.605583750962231, + "learning_rate": 3.999879738691128e-05, + "loss": 0.3611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17750391364097595, + "step": 625, + "valid_targets_mean": 4221.9, + "valid_targets_min": 2633 + }, + { + "epoch": 0.7295888824551245, + "grad_norm": 0.6129538675494742, + "learning_rate": 3.999808116065221e-05, + "loss": 0.3326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17118681967258453, + "step": 630, + "valid_targets_mean": 3522.4, + "valid_targets_min": 2481 + }, + { + "epoch": 0.7353792704111175, + "grad_norm": 0.6501310721805252, + "learning_rate": 3.999719838191054e-05, + "loss": 0.3458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18486127257347107, + "step": 635, + "valid_targets_mean": 4058.8, + "valid_targets_min": 2013 + }, + { + "epoch": 0.7411696583671106, + "grad_norm": 0.7104788952748324, + "learning_rate": 3.999614905803841e-05, + "loss": 0.3269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17827048897743225, + "step": 640, + "valid_targets_mean": 3723.5, + "valid_targets_min": 1744 + }, + { + "epoch": 0.7469600463231036, + "grad_norm": 0.622483649564905, + "learning_rate": 3.999493319777504e-05, + "loss": 0.3418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1791311353445053, + "step": 645, + "valid_targets_mean": 4180.9, + "valid_targets_min": 1914 + }, + { + "epoch": 0.7527504342790967, + "grad_norm": 0.6633582586829129, + "learning_rate": 3.999355081124663e-05, + "loss": 0.3269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1443580985069275, + "step": 650, + "valid_targets_mean": 3668.6, + "valid_targets_min": 3065 + }, + { + "epoch": 0.7585408222350898, + "grad_norm": 0.5827334066652042, + "learning_rate": 3.9992001909966284e-05, + "loss": 0.3169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1897888481616974, + "step": 655, + "valid_targets_mean": 4473.2, + "valid_targets_min": 2894 + }, + { + "epoch": 0.7643312101910829, + "grad_norm": 0.7444083935272425, + "learning_rate": 3.99902865068339e-05, + "loss": 0.3426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17201346158981323, + "step": 660, + "valid_targets_mean": 3395.6, + "valid_targets_min": 2359 + }, + { + "epoch": 0.7701215981470758, + "grad_norm": 0.7066890557943601, + "learning_rate": 3.998840461613608e-05, + "loss": 0.3302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14887091517448425, + "step": 665, + "valid_targets_mean": 3114.5, + "valid_targets_min": 1463 + }, + { + "epoch": 0.7759119861030689, + "grad_norm": 0.6192222424391265, + "learning_rate": 3.998635625354602e-05, + "loss": 0.3415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16993939876556396, + "step": 670, + "valid_targets_mean": 3588.1, + "valid_targets_min": 1426 + }, + { + "epoch": 0.781702374059062, + "grad_norm": 0.6542474820446244, + "learning_rate": 3.998414143612333e-05, + "loss": 0.3466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16761505603790283, + "step": 675, + "valid_targets_mean": 3405.5, + "valid_targets_min": 1661 + }, + { + "epoch": 0.787492762015055, + "grad_norm": 0.5799377222940094, + "learning_rate": 3.9981760182313957e-05, + "loss": 0.3365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17293000221252441, + "step": 680, + "valid_targets_mean": 4457.8, + "valid_targets_min": 1529 + }, + { + "epoch": 0.7932831499710481, + "grad_norm": 0.8486757316641838, + "learning_rate": 3.9979212511949984e-05, + "loss": 0.3693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19929572939872742, + "step": 685, + "valid_targets_mean": 3272.9, + "valid_targets_min": 1450 + }, + { + "epoch": 0.7990735379270412, + "grad_norm": 0.6591616098189284, + "learning_rate": 3.997649844624951e-05, + "loss": 0.3352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14856567978858948, + "step": 690, + "valid_targets_mean": 3103.2, + "valid_targets_min": 1625 + }, + { + "epoch": 0.8048639258830341, + "grad_norm": 0.5972544991840065, + "learning_rate": 3.9973618007816395e-05, + "loss": 0.3427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1420861929655075, + "step": 695, + "valid_targets_mean": 3952.2, + "valid_targets_min": 1174 + }, + { + "epoch": 0.8106543138390272, + "grad_norm": 0.5926035572764118, + "learning_rate": 3.9970571220640164e-05, + "loss": 0.3466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17784333229064941, + "step": 700, + "valid_targets_mean": 4039.2, + "valid_targets_min": 2882 + }, + { + "epoch": 0.8164447017950203, + "grad_norm": 0.7172461283418334, + "learning_rate": 3.996735811009575e-05, + "loss": 0.3482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19199448823928833, + "step": 705, + "valid_targets_mean": 4594.0, + "valid_targets_min": 1946 + }, + { + "epoch": 0.8222350897510133, + "grad_norm": 0.587560988982399, + "learning_rate": 3.996397870294329e-05, + "loss": 0.3385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16461029648780823, + "step": 710, + "valid_targets_mean": 4082.6, + "valid_targets_min": 2000 + }, + { + "epoch": 0.8280254777070064, + "grad_norm": 0.6171659688682881, + "learning_rate": 3.996043302732792e-05, + "loss": 0.3458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18425926566123962, + "step": 715, + "valid_targets_mean": 4596.1, + "valid_targets_min": 2012 + }, + { + "epoch": 0.8338158656629994, + "grad_norm": 0.6557809627187827, + "learning_rate": 3.995672111277953e-05, + "loss": 0.345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17367544770240784, + "step": 720, + "valid_targets_mean": 3094.4, + "valid_targets_min": 719 + }, + { + "epoch": 0.8396062536189924, + "grad_norm": 0.5612740562619445, + "learning_rate": 3.995284299021251e-05, + "loss": 0.3383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1546105444431305, + "step": 725, + "valid_targets_mean": 4496.2, + "valid_targets_min": 3238 + }, + { + "epoch": 0.8453966415749855, + "grad_norm": 0.6423654246977016, + "learning_rate": 3.994879869192552e-05, + "loss": 0.3242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14627155661582947, + "step": 730, + "valid_targets_mean": 3816.8, + "valid_targets_min": 1372 + }, + { + "epoch": 0.8511870295309786, + "grad_norm": 0.6898680259078053, + "learning_rate": 3.994458825160117e-05, + "loss": 0.3208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1782332956790924, + "step": 735, + "valid_targets_mean": 3306.1, + "valid_targets_min": 2370 + }, + { + "epoch": 0.8569774174869716, + "grad_norm": 0.6148055706863383, + "learning_rate": 3.99402117043058e-05, + "loss": 0.3326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14432159066200256, + "step": 740, + "valid_targets_mean": 2958.1, + "valid_targets_min": 1576 + }, + { + "epoch": 0.8627678054429647, + "grad_norm": 0.655012125079548, + "learning_rate": 3.993566908648914e-05, + "loss": 0.3254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17978647351264954, + "step": 745, + "valid_targets_mean": 3758.5, + "valid_targets_min": 1426 + }, + { + "epoch": 0.8685581933989577, + "grad_norm": 0.7067281742494269, + "learning_rate": 3.993096043598403e-05, + "loss": 0.3398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14911097288131714, + "step": 750, + "valid_targets_mean": 3117.0, + "valid_targets_min": 1728 + }, + { + "epoch": 0.8743485813549507, + "grad_norm": 0.8759168561210263, + "learning_rate": 3.992608579200612e-05, + "loss": 0.3339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17803344130516052, + "step": 755, + "valid_targets_mean": 3264.2, + "valid_targets_min": 1362 + }, + { + "epoch": 0.8801389693109438, + "grad_norm": 0.5534589430756651, + "learning_rate": 3.9921045195153493e-05, + "loss": 0.3313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1561286449432373, + "step": 760, + "valid_targets_mean": 3836.0, + "valid_targets_min": 852 + }, + { + "epoch": 0.8859293572669369, + "grad_norm": 1.3061158563859285, + "learning_rate": 3.991583868740638e-05, + "loss": 0.3408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21102529764175415, + "step": 765, + "valid_targets_mean": 2764.1, + "valid_targets_min": 1195 + }, + { + "epoch": 0.89171974522293, + "grad_norm": 0.5613147833819194, + "learning_rate": 3.9910466312126786e-05, + "loss": 0.3265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17475545406341553, + "step": 770, + "valid_targets_mean": 4457.5, + "valid_targets_min": 1564 + }, + { + "epoch": 0.897510133178923, + "grad_norm": 0.8717166999384921, + "learning_rate": 3.990492811405811e-05, + "loss": 0.3215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13619567453861237, + "step": 775, + "valid_targets_mean": 4371.2, + "valid_targets_min": 1975 + }, + { + "epoch": 0.903300521134916, + "grad_norm": 0.6096546708962742, + "learning_rate": 3.989922413932482e-05, + "loss": 0.3137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16569563746452332, + "step": 780, + "valid_targets_mean": 3965.8, + "valid_targets_min": 2426 + }, + { + "epoch": 0.9090909090909091, + "grad_norm": 0.5149683718364065, + "learning_rate": 3.989335443543203e-05, + "loss": 0.3168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1620718240737915, + "step": 785, + "valid_targets_mean": 6384.6, + "valid_targets_min": 2678 + }, + { + "epoch": 0.9148812970469021, + "grad_norm": 0.5888450189307368, + "learning_rate": 3.988731905126512e-05, + "loss": 0.328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15674595534801483, + "step": 790, + "valid_targets_mean": 3543.2, + "valid_targets_min": 2826 + }, + { + "epoch": 0.9206716850028952, + "grad_norm": 0.7469580751777335, + "learning_rate": 3.988111803708932e-05, + "loss": 0.3139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13202321529388428, + "step": 795, + "valid_targets_mean": 3155.8, + "valid_targets_min": 993 + }, + { + "epoch": 0.9264620729588883, + "grad_norm": 0.7150531573137404, + "learning_rate": 3.98747514445493e-05, + "loss": 0.3218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17962214350700378, + "step": 800, + "valid_targets_mean": 4166.5, + "valid_targets_min": 2724 + }, + { + "epoch": 0.9322524609148813, + "grad_norm": 0.6615542925583409, + "learning_rate": 3.9868219326668736e-05, + "loss": 0.3224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1518193483352661, + "step": 805, + "valid_targets_mean": 3079.5, + "valid_targets_min": 1360 + }, + { + "epoch": 0.9380428488708743, + "grad_norm": 0.5680095143500292, + "learning_rate": 3.9861521737849874e-05, + "loss": 0.3163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15517696738243103, + "step": 810, + "valid_targets_mean": 4114.9, + "valid_targets_min": 2414 + }, + { + "epoch": 0.9438332368268674, + "grad_norm": 0.6414002936421879, + "learning_rate": 3.985465873387307e-05, + "loss": 0.3425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18613509833812714, + "step": 815, + "valid_targets_mean": 4524.8, + "valid_targets_min": 2425 + }, + { + "epoch": 0.9496236247828604, + "grad_norm": 0.7074773784167145, + "learning_rate": 3.984763037189631e-05, + "loss": 0.3366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16936403512954712, + "step": 820, + "valid_targets_mean": 3035.2, + "valid_targets_min": 1720 + }, + { + "epoch": 0.9554140127388535, + "grad_norm": 0.6703087910116017, + "learning_rate": 3.984043671045479e-05, + "loss": 0.3301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1667347550392151, + "step": 825, + "valid_targets_mean": 3260.4, + "valid_targets_min": 1621 + }, + { + "epoch": 0.9612044006948466, + "grad_norm": 0.7039481741642971, + "learning_rate": 3.983307780946034e-05, + "loss": 0.3121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16874805092811584, + "step": 830, + "valid_targets_mean": 3214.6, + "valid_targets_min": 1858 + }, + { + "epoch": 0.9669947886508397, + "grad_norm": 0.6768188627921073, + "learning_rate": 3.982555373020103e-05, + "loss": 0.3291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15116912126541138, + "step": 835, + "valid_targets_mean": 3254.2, + "valid_targets_min": 1254 + }, + { + "epoch": 0.9727851766068326, + "grad_norm": 0.6297084534327685, + "learning_rate": 3.9817864535340556e-05, + "loss": 0.3185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1747068166732788, + "step": 840, + "valid_targets_mean": 2994.4, + "valid_targets_min": 1039 + }, + { + "epoch": 0.9785755645628257, + "grad_norm": 0.6093952628926999, + "learning_rate": 3.981001028891779e-05, + "loss": 0.3368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20562121272087097, + "step": 845, + "valid_targets_mean": 4625.4, + "valid_targets_min": 2526 + }, + { + "epoch": 0.9843659525188188, + "grad_norm": 0.5915248441168647, + "learning_rate": 3.980199105634623e-05, + "loss": 0.3233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17518404126167297, + "step": 850, + "valid_targets_mean": 3571.8, + "valid_targets_min": 2409 + }, + { + "epoch": 0.9901563404748118, + "grad_norm": 0.611861719637808, + "learning_rate": 3.979380690441343e-05, + "loss": 0.3276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14133447408676147, + "step": 855, + "valid_targets_mean": 3691.5, + "valid_targets_min": 2918 + }, + { + "epoch": 0.9959467284308049, + "grad_norm": 0.6276245035498778, + "learning_rate": 3.978545790128047e-05, + "loss": 0.3315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16311562061309814, + "step": 860, + "valid_targets_mean": 4063.1, + "valid_targets_min": 2179 + }, + { + "epoch": 1.0011580775911986, + "grad_norm": 0.5249869502014928, + "learning_rate": 3.9776944116481385e-05, + "loss": 0.305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13150295615196228, + "step": 865, + "valid_targets_mean": 8215.2, + "valid_targets_min": 6163 + }, + { + "epoch": 1.0069484655471916, + "grad_norm": 0.484117327524638, + "learning_rate": 3.976826562092257e-05, + "loss": 0.2709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13699647784233093, + "step": 870, + "valid_targets_mean": 8594.4, + "valid_targets_min": 6006 + }, + { + "epoch": 1.0127388535031847, + "grad_norm": 0.5948819271463993, + "learning_rate": 3.975942248688222e-05, + "loss": 0.2679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.120064377784729, + "step": 875, + "valid_targets_mean": 3578.4, + "valid_targets_min": 289 + }, + { + "epoch": 1.0185292414591778, + "grad_norm": 0.45255503571848066, + "learning_rate": 3.975041478800969e-05, + "loss": 0.2582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12774991989135742, + "step": 880, + "valid_targets_mean": 7019.9, + "valid_targets_min": 4985 + }, + { + "epoch": 1.0243196294151709, + "grad_norm": 0.4344176759331777, + "learning_rate": 3.9741242599324904e-05, + "loss": 0.2381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12448737025260925, + "step": 885, + "valid_targets_mean": 6956.6, + "valid_targets_min": 1457 + }, + { + "epoch": 1.030110017371164, + "grad_norm": 0.4255312922643231, + "learning_rate": 3.973190599721775e-05, + "loss": 0.2454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11625795066356659, + "step": 890, + "valid_targets_mean": 7361.6, + "valid_targets_min": 4961 + }, + { + "epoch": 1.035900405327157, + "grad_norm": 0.4986133150702989, + "learning_rate": 3.972240505944737e-05, + "loss": 0.2627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1547217071056366, + "step": 895, + "valid_targets_mean": 7058.5, + "valid_targets_min": 5985 + }, + { + "epoch": 1.04169079328315, + "grad_norm": 0.4433867493523557, + "learning_rate": 3.971273986514162e-05, + "loss": 0.2586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13994178175926208, + "step": 900, + "valid_targets_mean": 7664.2, + "valid_targets_min": 5169 + }, + { + "epoch": 1.0474811812391431, + "grad_norm": 0.4274109149772053, + "learning_rate": 3.9702910494796325e-05, + "loss": 0.2458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12422806024551392, + "step": 905, + "valid_targets_mean": 7111.8, + "valid_targets_min": 5348 + }, + { + "epoch": 1.053271569195136, + "grad_norm": 0.5117732983445061, + "learning_rate": 3.969291703027463e-05, + "loss": 0.2441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11535590887069702, + "step": 910, + "valid_targets_mean": 4238.9, + "valid_targets_min": 152 + }, + { + "epoch": 1.059061957151129, + "grad_norm": 0.4501238789684571, + "learning_rate": 3.968275955480635e-05, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12611274421215057, + "step": 915, + "valid_targets_mean": 7199.4, + "valid_targets_min": 4258 + }, + { + "epoch": 1.0648523451071221, + "grad_norm": 0.4435191699594279, + "learning_rate": 3.967243815298723e-05, + "loss": 0.2412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11774446070194244, + "step": 920, + "valid_targets_mean": 7243.4, + "valid_targets_min": 5831 + }, + { + "epoch": 1.0706427330631152, + "grad_norm": 0.46392473348758684, + "learning_rate": 3.966195291077827e-05, + "loss": 0.2377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11789186298847198, + "step": 925, + "valid_targets_mean": 6950.6, + "valid_targets_min": 3862 + }, + { + "epoch": 1.0764331210191083, + "grad_norm": 0.4233082561923554, + "learning_rate": 3.965130391550501e-05, + "loss": 0.2351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12646175920963287, + "step": 930, + "valid_targets_mean": 6991.9, + "valid_targets_min": 5696 + }, + { + "epoch": 1.0822235089751013, + "grad_norm": 0.4367964173531144, + "learning_rate": 3.9640491255856794e-05, + "loss": 0.2285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12679660320281982, + "step": 935, + "valid_targets_mean": 6963.5, + "valid_targets_min": 4947 + }, + { + "epoch": 1.0880138969310944, + "grad_norm": 0.4320528061136158, + "learning_rate": 3.962951502188601e-05, + "loss": 0.2332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11090008914470673, + "step": 940, + "valid_targets_mean": 6436.9, + "valid_targets_min": 4988 + }, + { + "epoch": 1.0938042848870875, + "grad_norm": 0.42536386526893744, + "learning_rate": 3.9618375305007394e-05, + "loss": 0.2362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1170877069234848, + "step": 945, + "valid_targets_mean": 7518.0, + "valid_targets_min": 5476 + }, + { + "epoch": 1.0995946728430805, + "grad_norm": 0.4582340812491947, + "learning_rate": 3.960707219799722e-05, + "loss": 0.2279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10813379287719727, + "step": 950, + "valid_targets_mean": 7422.6, + "valid_targets_min": 5226 + }, + { + "epoch": 1.1053850607990736, + "grad_norm": 0.4671861431249364, + "learning_rate": 3.959560579499254e-05, + "loss": 0.248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15178634226322174, + "step": 955, + "valid_targets_mean": 7080.0, + "valid_targets_min": 3753 + }, + { + "epoch": 1.1111754487550667, + "grad_norm": 0.5169416581408465, + "learning_rate": 3.9583976191490426e-05, + "loss": 0.2594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13366922736167908, + "step": 960, + "valid_targets_mean": 6064.6, + "valid_targets_min": 5161 + }, + { + "epoch": 1.1169658367110595, + "grad_norm": 0.418553522605153, + "learning_rate": 3.957218348434711e-05, + "loss": 0.2175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11265334486961365, + "step": 965, + "valid_targets_mean": 7388.1, + "valid_targets_min": 5795 + }, + { + "epoch": 1.1227562246670526, + "grad_norm": 0.388992424482383, + "learning_rate": 3.956022777177727e-05, + "loss": 0.2221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10571281611919403, + "step": 970, + "valid_targets_mean": 7696.0, + "valid_targets_min": 4658 + }, + { + "epoch": 1.1285466126230457, + "grad_norm": 0.4681809942636834, + "learning_rate": 3.954810915335314e-05, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1254599690437317, + "step": 975, + "valid_targets_mean": 7799.1, + "valid_targets_min": 5508 + }, + { + "epoch": 1.1343370005790387, + "grad_norm": 0.48398865515756395, + "learning_rate": 3.9535827730003686e-05, + "loss": 0.2401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11612563580274582, + "step": 980, + "valid_targets_mean": 7341.6, + "valid_targets_min": 6454 + }, + { + "epoch": 1.1401273885350318, + "grad_norm": 0.4490116781150715, + "learning_rate": 3.952338360401382e-05, + "loss": 0.2308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10329363495111465, + "step": 985, + "valid_targets_mean": 6818.0, + "valid_targets_min": 4392 + }, + { + "epoch": 1.1459177764910249, + "grad_norm": 0.41953032426909853, + "learning_rate": 3.951077687902347e-05, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10158362239599228, + "step": 990, + "valid_targets_mean": 7857.6, + "valid_targets_min": 5394 + }, + { + "epoch": 1.151708164447018, + "grad_norm": 0.3947288696909538, + "learning_rate": 3.949800766002679e-05, + "loss": 0.2013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09270112216472626, + "step": 995, + "valid_targets_mean": 8314.1, + "valid_targets_min": 4837 + }, + { + "epoch": 1.157498552403011, + "grad_norm": 0.36433462079153023, + "learning_rate": 3.9485076053371234e-05, + "loss": 0.2036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08710625767707825, + "step": 1000, + "valid_targets_mean": 7368.2, + "valid_targets_min": 6027 + }, + { + "epoch": 1.163288940359004, + "grad_norm": 0.45419044499402866, + "learning_rate": 3.9471982166756686e-05, + "loss": 0.2132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11508241295814514, + "step": 1005, + "valid_targets_mean": 6435.0, + "valid_targets_min": 4238 + }, + { + "epoch": 1.1690793283149972, + "grad_norm": 2.111154060538335, + "learning_rate": 3.9458726109234595e-05, + "loss": 0.235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09870345890522003, + "step": 1010, + "valid_targets_mean": 247.8, + "valid_targets_min": 137 + }, + { + "epoch": 1.1748697162709902, + "grad_norm": 0.45137108403630927, + "learning_rate": 3.9445307991206993e-05, + "loss": 0.2396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11088582128286362, + "step": 1015, + "valid_targets_mean": 6807.0, + "valid_targets_min": 5165 + }, + { + "epoch": 1.180660104226983, + "grad_norm": 0.8712175642859009, + "learning_rate": 3.943172792442567e-05, + "loss": 0.2438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12714217603206635, + "step": 1020, + "valid_targets_mean": 6989.9, + "valid_targets_min": 5104 + }, + { + "epoch": 1.1864504921829764, + "grad_norm": 0.4413781971226211, + "learning_rate": 3.941798602199115e-05, + "loss": 0.2421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13100311160087585, + "step": 1025, + "valid_targets_mean": 6692.2, + "valid_targets_min": 5068 + }, + { + "epoch": 1.1922408801389692, + "grad_norm": 0.4435462481029505, + "learning_rate": 3.9404082398351825e-05, + "loss": 0.2355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14060164988040924, + "step": 1030, + "valid_targets_mean": 7419.6, + "valid_targets_min": 5578 + }, + { + "epoch": 1.1980312680949623, + "grad_norm": 0.4060121325738929, + "learning_rate": 3.939001716930294e-05, + "loss": 0.2249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11470702290534973, + "step": 1035, + "valid_targets_mean": 7536.5, + "valid_targets_min": 5075 + }, + { + "epoch": 1.2038216560509554, + "grad_norm": 0.38409709927516766, + "learning_rate": 3.93757904519857e-05, + "loss": 0.2338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10600236058235168, + "step": 1040, + "valid_targets_mean": 7221.5, + "valid_targets_min": 5841 + }, + { + "epoch": 1.2096120440069484, + "grad_norm": 0.4056910276302505, + "learning_rate": 3.93614023648862e-05, + "loss": 0.2272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11286841332912445, + "step": 1045, + "valid_targets_mean": 7287.5, + "valid_targets_min": 5151 + }, + { + "epoch": 1.2154024319629415, + "grad_norm": 0.4627842508865308, + "learning_rate": 3.934685302783454e-05, + "loss": 0.2383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11054033041000366, + "step": 1050, + "valid_targets_mean": 6738.4, + "valid_targets_min": 4042 + }, + { + "epoch": 1.2211928199189346, + "grad_norm": 0.4534271435797926, + "learning_rate": 3.9332142562003735e-05, + "loss": 0.2284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12786555290222168, + "step": 1055, + "valid_targets_mean": 6331.0, + "valid_targets_min": 5496 + }, + { + "epoch": 1.2269832078749277, + "grad_norm": 0.4673448734063373, + "learning_rate": 3.9317271089908784e-05, + "loss": 0.2318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1133434921503067, + "step": 1060, + "valid_targets_mean": 6271.1, + "valid_targets_min": 4695 + }, + { + "epoch": 1.2327735958309207, + "grad_norm": 0.49865251236737174, + "learning_rate": 3.93022387354056e-05, + "loss": 0.2325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12024211138486862, + "step": 1065, + "valid_targets_mean": 5790.8, + "valid_targets_min": 5021 + }, + { + "epoch": 1.2385639837869138, + "grad_norm": 0.5926834734740676, + "learning_rate": 3.9287045623689985e-05, + "loss": 0.2237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09189209342002869, + "step": 1070, + "valid_targets_mean": 3386.8, + "valid_targets_min": 2279 + }, + { + "epoch": 1.2443543717429069, + "grad_norm": 0.4794203925484133, + "learning_rate": 3.927169188129661e-05, + "loss": 0.2234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11206869035959244, + "step": 1075, + "valid_targets_mean": 6240.5, + "valid_targets_min": 5093 + }, + { + "epoch": 1.2501447596989, + "grad_norm": 0.4632615561507731, + "learning_rate": 3.9256177636097934e-05, + "loss": 0.2255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11752070486545563, + "step": 1080, + "valid_targets_mean": 6329.5, + "valid_targets_min": 4589 + }, + { + "epoch": 1.2559351476548928, + "grad_norm": 2.0458169274072926, + "learning_rate": 3.924050301730316e-05, + "loss": 0.2241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12004910409450531, + "step": 1085, + "valid_targets_mean": 7589.2, + "valid_targets_min": 4963 + }, + { + "epoch": 1.2617255356108859, + "grad_norm": 0.42400886081444505, + "learning_rate": 3.9224668155457146e-05, + "loss": 0.2251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09688187390565872, + "step": 1090, + "valid_targets_mean": 5577.9, + "valid_targets_min": 4458 + }, + { + "epoch": 1.267515923566879, + "grad_norm": 0.5540750132695128, + "learning_rate": 3.920867318243931e-05, + "loss": 0.2237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10729500651359558, + "step": 1095, + "valid_targets_mean": 5432.5, + "valid_targets_min": 4433 + }, + { + "epoch": 1.273306311522872, + "grad_norm": 0.44705611186587985, + "learning_rate": 3.9192518231462585e-05, + "loss": 0.2277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11662057787179947, + "step": 1100, + "valid_targets_mean": 6361.8, + "valid_targets_min": 5201 + }, + { + "epoch": 1.279096699478865, + "grad_norm": 0.4549497220381097, + "learning_rate": 3.917620343707221e-05, + "loss": 0.2285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1052645817399025, + "step": 1105, + "valid_targets_mean": 5824.4, + "valid_targets_min": 5129 + }, + { + "epoch": 1.2848870874348581, + "grad_norm": 0.9022835962871019, + "learning_rate": 3.915972893514471e-05, + "loss": 0.2018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1566964089870453, + "step": 1110, + "valid_targets_mean": 1637.6, + "valid_targets_min": 137 + }, + { + "epoch": 1.2906774753908512, + "grad_norm": 0.46381011488635515, + "learning_rate": 3.9143094862886705e-05, + "loss": 0.2241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11298874765634537, + "step": 1115, + "valid_targets_mean": 6474.6, + "valid_targets_min": 4751 + }, + { + "epoch": 1.2964678633468443, + "grad_norm": 0.4472651737046458, + "learning_rate": 3.9126301358833786e-05, + "loss": 0.2205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12175101041793823, + "step": 1120, + "valid_targets_mean": 7186.2, + "valid_targets_min": 5358 + }, + { + "epoch": 1.3022582513028373, + "grad_norm": 0.4552798202628588, + "learning_rate": 3.910934856284937e-05, + "loss": 0.2268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11371338367462158, + "step": 1125, + "valid_targets_mean": 7126.8, + "valid_targets_min": 4078 + }, + { + "epoch": 1.3080486392588304, + "grad_norm": 0.4564388018494831, + "learning_rate": 3.90922366161235e-05, + "loss": 0.2183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10979453474283218, + "step": 1130, + "valid_targets_mean": 7022.1, + "valid_targets_min": 4940 + }, + { + "epoch": 1.3138390272148235, + "grad_norm": 0.44457997970128343, + "learning_rate": 3.907496566117173e-05, + "loss": 0.2133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11380468308925629, + "step": 1135, + "valid_targets_mean": 5900.6, + "valid_targets_min": 4779 + }, + { + "epoch": 1.3196294151708163, + "grad_norm": 0.44167393228026747, + "learning_rate": 3.905753584183387e-05, + "loss": 0.2233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11499583721160889, + "step": 1140, + "valid_targets_mean": 6290.1, + "valid_targets_min": 4620 + }, + { + "epoch": 1.3254198031268096, + "grad_norm": 0.5428718455705244, + "learning_rate": 3.9039947303272836e-05, + "loss": 0.2876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1602226346731186, + "step": 1145, + "valid_targets_mean": 5614.5, + "valid_targets_min": 3446 + }, + { + "epoch": 1.3312101910828025, + "grad_norm": 0.6699882200896025, + "learning_rate": 3.902220019197342e-05, + "loss": 0.3224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16955219209194183, + "step": 1150, + "valid_targets_mean": 4630.0, + "valid_targets_min": 1005 + }, + { + "epoch": 1.3370005790387955, + "grad_norm": 0.5145366457651853, + "learning_rate": 3.900429465574106e-05, + "loss": 0.3115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11790046095848083, + "step": 1155, + "valid_targets_mean": 4669.6, + "valid_targets_min": 2054 + }, + { + "epoch": 1.3427909669947886, + "grad_norm": 0.5699015253947644, + "learning_rate": 3.898623084370066e-05, + "loss": 0.3097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1340475082397461, + "step": 1160, + "valid_targets_mean": 3745.0, + "valid_targets_min": 1561 + }, + { + "epoch": 1.3485813549507817, + "grad_norm": 0.5755210822032388, + "learning_rate": 3.8968008906295274e-05, + "loss": 0.3064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1492936909198761, + "step": 1165, + "valid_targets_mean": 4284.0, + "valid_targets_min": 1184 + }, + { + "epoch": 1.3543717429067748, + "grad_norm": 0.4916120473926803, + "learning_rate": 3.894962899528492e-05, + "loss": 0.3211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16478881239891052, + "step": 1170, + "valid_targets_mean": 6332.8, + "valid_targets_min": 1651 + }, + { + "epoch": 1.3601621308627678, + "grad_norm": 0.46790533699331943, + "learning_rate": 3.893109126374528e-05, + "loss": 0.3042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14911609888076782, + "step": 1175, + "valid_targets_mean": 6446.5, + "valid_targets_min": 2231 + }, + { + "epoch": 1.365952518818761, + "grad_norm": 0.5381056441690047, + "learning_rate": 3.8912395866066404e-05, + "loss": 0.3065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11903347074985504, + "step": 1180, + "valid_targets_mean": 3367.2, + "valid_targets_min": 467 + }, + { + "epoch": 1.371742906774754, + "grad_norm": 0.6066646913438828, + "learning_rate": 3.8893542957951496e-05, + "loss": 0.3107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1923138052225113, + "step": 1185, + "valid_targets_mean": 7194.1, + "valid_targets_min": 1748 + }, + { + "epoch": 1.377533294730747, + "grad_norm": 0.5901290970032714, + "learning_rate": 3.8874532696415544e-05, + "loss": 0.3193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16743800044059753, + "step": 1190, + "valid_targets_mean": 4078.5, + "valid_targets_min": 1697 + }, + { + "epoch": 1.3833236826867399, + "grad_norm": 0.7113483233393854, + "learning_rate": 3.8855365239784055e-05, + "loss": 0.3117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1085222065448761, + "step": 1195, + "valid_targets_mean": 2089.5, + "valid_targets_min": 1223 + }, + { + "epoch": 1.3891140706427332, + "grad_norm": 0.49830543569902624, + "learning_rate": 3.883604074769172e-05, + "loss": 0.3064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1353009045124054, + "step": 1200, + "valid_targets_mean": 5404.1, + "valid_targets_min": 1829 + }, + { + "epoch": 1.394904458598726, + "grad_norm": 0.49106905409625246, + "learning_rate": 3.8816559381081077e-05, + "loss": 0.3128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1435467153787613, + "step": 1205, + "valid_targets_mean": 4583.1, + "valid_targets_min": 1596 + }, + { + "epoch": 1.400694846554719, + "grad_norm": 0.599595031027154, + "learning_rate": 3.8796921302201195e-05, + "loss": 0.3076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17589807510375977, + "step": 1210, + "valid_targets_mean": 4666.8, + "valid_targets_min": 2041 + }, + { + "epoch": 1.4064852345107122, + "grad_norm": 0.6023402305027469, + "learning_rate": 3.877712667460631e-05, + "loss": 0.321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17788101732730865, + "step": 1215, + "valid_targets_mean": 4272.1, + "valid_targets_min": 1538 + }, + { + "epoch": 1.4122756224667052, + "grad_norm": 0.6410743256350347, + "learning_rate": 3.875717566315446e-05, + "loss": 0.3152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14403772354125977, + "step": 1220, + "valid_targets_mean": 2928.0, + "valid_targets_min": 1739 + }, + { + "epoch": 1.4180660104226983, + "grad_norm": 0.6508045068605425, + "learning_rate": 3.87370684340061e-05, + "loss": 0.3113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14435258507728577, + "step": 1225, + "valid_targets_mean": 3254.5, + "valid_targets_min": 935 + }, + { + "epoch": 1.4238563983786914, + "grad_norm": 0.6830096233724313, + "learning_rate": 3.8716805154622756e-05, + "loss": 0.3236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15719226002693176, + "step": 1230, + "valid_targets_mean": 3535.2, + "valid_targets_min": 2704 + }, + { + "epoch": 1.4296467863346844, + "grad_norm": 0.7793111606201882, + "learning_rate": 3.869638599376558e-05, + "loss": 0.3215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.156528502702713, + "step": 1235, + "valid_targets_mean": 2987.6, + "valid_targets_min": 1173 + }, + { + "epoch": 1.4354371742906775, + "grad_norm": 0.5927550164432058, + "learning_rate": 3.8675811121494e-05, + "loss": 0.3186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14001598954200745, + "step": 1240, + "valid_targets_mean": 3700.6, + "valid_targets_min": 1670 + }, + { + "epoch": 1.4412275622466706, + "grad_norm": 0.6055360826788108, + "learning_rate": 3.865508070916424e-05, + "loss": 0.3186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14109604060649872, + "step": 1245, + "valid_targets_mean": 3481.6, + "valid_targets_min": 1329 + }, + { + "epoch": 1.4470179502026637, + "grad_norm": 0.629972581855596, + "learning_rate": 3.863419492942797e-05, + "loss": 0.3075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16355670988559723, + "step": 1250, + "valid_targets_mean": 3589.8, + "valid_targets_min": 2279 + }, + { + "epoch": 1.4528083381586567, + "grad_norm": 0.5165126632126333, + "learning_rate": 3.861315395623078e-05, + "loss": 0.2881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1432054042816162, + "step": 1255, + "valid_targets_mean": 5060.4, + "valid_targets_min": 2222 + }, + { + "epoch": 1.4585987261146496, + "grad_norm": 0.5781860344801439, + "learning_rate": 3.85919579648108e-05, + "loss": 0.3132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15431106090545654, + "step": 1260, + "valid_targets_mean": 3898.6, + "valid_targets_min": 1849 + }, + { + "epoch": 1.4643891140706427, + "grad_norm": 0.5956044360840919, + "learning_rate": 3.857060713169724e-05, + "loss": 0.3117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13421255350112915, + "step": 1265, + "valid_targets_mean": 3698.9, + "valid_targets_min": 1951 + }, + { + "epoch": 1.4701795020266357, + "grad_norm": 0.5546382516771821, + "learning_rate": 3.854910163470885e-05, + "loss": 0.3201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16746890544891357, + "step": 1270, + "valid_targets_mean": 4506.8, + "valid_targets_min": 1765 + }, + { + "epoch": 1.4759698899826288, + "grad_norm": 0.6775028329226226, + "learning_rate": 3.852744165295252e-05, + "loss": 0.2982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14411398768424988, + "step": 1275, + "valid_targets_mean": 2991.6, + "valid_targets_min": 2087 + }, + { + "epoch": 1.4817602779386219, + "grad_norm": 0.561251134139609, + "learning_rate": 3.850562736682173e-05, + "loss": 0.2988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13625818490982056, + "step": 1280, + "valid_targets_mean": 3994.1, + "valid_targets_min": 1603 + }, + { + "epoch": 1.487550665894615, + "grad_norm": 0.6083852437758633, + "learning_rate": 3.8483658957995114e-05, + "loss": 0.311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16182228922843933, + "step": 1285, + "valid_targets_mean": 4391.9, + "valid_targets_min": 2092 + }, + { + "epoch": 1.493341053850608, + "grad_norm": 0.5913661335465749, + "learning_rate": 3.8461536609434855e-05, + "loss": 0.3043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15757182240486145, + "step": 1290, + "valid_targets_mean": 3808.9, + "valid_targets_min": 1410 + }, + { + "epoch": 1.499131441806601, + "grad_norm": 0.5394656768823203, + "learning_rate": 3.843926050538524e-05, + "loss": 0.2974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16474880278110504, + "step": 1295, + "valid_targets_mean": 5078.5, + "valid_targets_min": 2110 + }, + { + "epoch": 1.5049218297625941, + "grad_norm": 0.6243755838275791, + "learning_rate": 3.841683083137108e-05, + "loss": 0.3068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14915981888771057, + "step": 1300, + "valid_targets_mean": 3447.4, + "valid_targets_min": 2420 + }, + { + "epoch": 1.510712217718587, + "grad_norm": 0.6375167647764206, + "learning_rate": 3.8394247774196206e-05, + "loss": 0.306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14488837122917175, + "step": 1305, + "valid_targets_mean": 4371.1, + "valid_targets_min": 1876 + }, + { + "epoch": 1.5165026056745803, + "grad_norm": 0.613454201006315, + "learning_rate": 3.837151152194187e-05, + "loss": 0.3018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17439305782318115, + "step": 1310, + "valid_targets_mean": 3858.2, + "valid_targets_min": 2605 + }, + { + "epoch": 1.5222929936305731, + "grad_norm": 0.6233105779650496, + "learning_rate": 3.83486222639652e-05, + "loss": 0.311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13862144947052002, + "step": 1315, + "valid_targets_mean": 3383.9, + "valid_targets_min": 1177 + }, + { + "epoch": 1.5280833815865664, + "grad_norm": 0.6467891702920507, + "learning_rate": 3.832558019089763e-05, + "loss": 0.3099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15471479296684265, + "step": 1320, + "valid_targets_mean": 3455.5, + "valid_targets_min": 1714 + }, + { + "epoch": 1.5338737695425593, + "grad_norm": 0.5289820990421887, + "learning_rate": 3.830238549464329e-05, + "loss": 0.3015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16986386477947235, + "step": 1325, + "valid_targets_mean": 5547.5, + "valid_targets_min": 2033 + }, + { + "epoch": 1.5396641574985526, + "grad_norm": 0.7060162824223625, + "learning_rate": 3.8279038368377415e-05, + "loss": 0.297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.161880761384964, + "step": 1330, + "valid_targets_mean": 3485.4, + "valid_targets_min": 1924 + }, + { + "epoch": 1.5454545454545454, + "grad_norm": 0.68455855185045, + "learning_rate": 3.825553900654478e-05, + "loss": 0.3018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13728755712509155, + "step": 1335, + "valid_targets_mean": 3080.2, + "valid_targets_min": 2098 + }, + { + "epoch": 1.5512449334105385, + "grad_norm": 0.7207851738392093, + "learning_rate": 3.823188760485798e-05, + "loss": 0.3025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17264071106910706, + "step": 1340, + "valid_targets_mean": 3393.4, + "valid_targets_min": 1363 + }, + { + "epoch": 1.5570353213665316, + "grad_norm": 0.6036114150424443, + "learning_rate": 3.820808436029593e-05, + "loss": 0.302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16787129640579224, + "step": 1345, + "valid_targets_mean": 4115.4, + "valid_targets_min": 1363 + }, + { + "epoch": 1.5628257093225246, + "grad_norm": 0.595651877586518, + "learning_rate": 3.81841294711021e-05, + "loss": 0.306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17603866755962372, + "step": 1350, + "valid_targets_mean": 4313.8, + "valid_targets_min": 2620 + }, + { + "epoch": 1.5686160972785177, + "grad_norm": 0.6336909981450094, + "learning_rate": 3.8160023136782956e-05, + "loss": 0.3011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15865519642829895, + "step": 1355, + "valid_targets_mean": 3425.1, + "valid_targets_min": 2316 + }, + { + "epoch": 1.5744064852345108, + "grad_norm": 0.6641668751975259, + "learning_rate": 3.813576555810625e-05, + "loss": 0.2965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13947749137878418, + "step": 1360, + "valid_targets_mean": 3744.0, + "valid_targets_min": 2022 + }, + { + "epoch": 1.5801968731905038, + "grad_norm": 0.6430114196277706, + "learning_rate": 3.811135693709935e-05, + "loss": 0.2891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10136308521032333, + "step": 1365, + "valid_targets_mean": 2445.6, + "valid_targets_min": 1294 + }, + { + "epoch": 1.5859872611464967, + "grad_norm": 0.611201336181961, + "learning_rate": 3.80867974770476e-05, + "loss": 0.3026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14630404114723206, + "step": 1370, + "valid_targets_mean": 3556.1, + "valid_targets_min": 1700 + }, + { + "epoch": 1.59177764910249, + "grad_norm": 0.6115984226936316, + "learning_rate": 3.806208738249257e-05, + "loss": 0.2945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1197759360074997, + "step": 1375, + "valid_targets_mean": 2714.0, + "valid_targets_min": 1586 + }, + { + "epoch": 1.5975680370584828, + "grad_norm": 0.689916065021143, + "learning_rate": 3.803722685923038e-05, + "loss": 0.3001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16724331676959991, + "step": 1380, + "valid_targets_mean": 3266.4, + "valid_targets_min": 1358 + }, + { + "epoch": 1.6033584250144761, + "grad_norm": 0.6326874996965296, + "learning_rate": 3.801221611430999e-05, + "loss": 0.3034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14491121470928192, + "step": 1385, + "valid_targets_mean": 3782.9, + "valid_targets_min": 2290 + }, + { + "epoch": 1.609148812970469, + "grad_norm": 0.636924161233398, + "learning_rate": 3.798705535603146e-05, + "loss": 0.305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19842056930065155, + "step": 1390, + "valid_targets_mean": 4904.8, + "valid_targets_min": 2189 + }, + { + "epoch": 1.614939200926462, + "grad_norm": 0.617435396839824, + "learning_rate": 3.796174479394425e-05, + "loss": 0.2959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19284765422344208, + "step": 1395, + "valid_targets_mean": 3959.5, + "valid_targets_min": 2121 + }, + { + "epoch": 1.620729588882455, + "grad_norm": 0.6063095285796238, + "learning_rate": 3.7936284638845416e-05, + "loss": 0.3026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15117163956165314, + "step": 1400, + "valid_targets_mean": 3689.4, + "valid_targets_min": 2011 + }, + { + "epoch": 1.6265199768384482, + "grad_norm": 0.5590404864745255, + "learning_rate": 3.791067510277792e-05, + "loss": 0.2921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16714683175086975, + "step": 1405, + "valid_targets_mean": 5082.1, + "valid_targets_min": 2674 + }, + { + "epoch": 1.6323103647944412, + "grad_norm": 0.6010907769653863, + "learning_rate": 3.7884916399028796e-05, + "loss": 0.2824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16495585441589355, + "step": 1410, + "valid_targets_mean": 3660.9, + "valid_targets_min": 2266 + }, + { + "epoch": 1.6381007527504343, + "grad_norm": 0.5413580426914476, + "learning_rate": 3.785900874212744e-05, + "loss": 0.2868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1378852128982544, + "step": 1415, + "valid_targets_mean": 4285.5, + "valid_targets_min": 904 + }, + { + "epoch": 1.6438911407064274, + "grad_norm": 0.5985754205236895, + "learning_rate": 3.783295234784378e-05, + "loss": 0.2906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16695967316627502, + "step": 1420, + "valid_targets_mean": 3999.4, + "valid_targets_min": 1194 + }, + { + "epoch": 1.6496815286624202, + "grad_norm": 0.5642800803882113, + "learning_rate": 3.78067474331865e-05, + "loss": 0.2957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13066710531711578, + "step": 1425, + "valid_targets_mean": 4011.8, + "valid_targets_min": 1658 + }, + { + "epoch": 1.6554719166184135, + "grad_norm": 0.6748860615080935, + "learning_rate": 3.778039421640121e-05, + "loss": 0.2935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12626111507415771, + "step": 1430, + "valid_targets_mean": 2728.8, + "valid_targets_min": 1423 + }, + { + "epoch": 1.6612623045744064, + "grad_norm": 0.6061991472050072, + "learning_rate": 3.775389291696866e-05, + "loss": 0.2947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1608344465494156, + "step": 1435, + "valid_targets_mean": 4504.4, + "valid_targets_min": 2320 + }, + { + "epoch": 1.6670526925303997, + "grad_norm": 0.6011184485544011, + "learning_rate": 3.7727243755602874e-05, + "loss": 0.2985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11193178594112396, + "step": 1440, + "valid_targets_mean": 3272.5, + "valid_targets_min": 1811 + }, + { + "epoch": 1.6728430804863925, + "grad_norm": 0.5264073883119884, + "learning_rate": 3.770044695424935e-05, + "loss": 0.3039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1393248736858368, + "step": 1445, + "valid_targets_mean": 4425.9, + "valid_targets_min": 3026 + }, + { + "epoch": 1.6786334684423856, + "grad_norm": 0.6441290731247149, + "learning_rate": 3.76735027360832e-05, + "loss": 0.2948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12856018543243408, + "step": 1450, + "valid_targets_mean": 2719.1, + "valid_targets_min": 644 + }, + { + "epoch": 1.6844238563983787, + "grad_norm": 0.5795525157839163, + "learning_rate": 3.764641132550726e-05, + "loss": 0.2857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1311633586883545, + "step": 1455, + "valid_targets_mean": 3369.0, + "valid_targets_min": 1397 + }, + { + "epoch": 1.6902142443543717, + "grad_norm": 0.6501614001474612, + "learning_rate": 3.761917294815028e-05, + "loss": 0.2775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14685682952404022, + "step": 1460, + "valid_targets_mean": 4122.6, + "valid_targets_min": 1415 + }, + { + "epoch": 1.6960046323103648, + "grad_norm": 0.6270974962784911, + "learning_rate": 3.759178783086498e-05, + "loss": 0.2869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1565871238708496, + "step": 1465, + "valid_targets_mean": 4136.6, + "valid_targets_min": 621 + }, + { + "epoch": 1.7017950202663579, + "grad_norm": 0.6060924529762027, + "learning_rate": 3.75642562017262e-05, + "loss": 0.2843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1117183268070221, + "step": 1470, + "valid_targets_mean": 3065.8, + "valid_targets_min": 1290 + }, + { + "epoch": 1.707585408222351, + "grad_norm": 0.6638246415488404, + "learning_rate": 3.7536578290029e-05, + "loss": 0.2744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12210803478956223, + "step": 1475, + "valid_targets_mean": 2426.9, + "valid_targets_min": 857 + }, + { + "epoch": 1.7133757961783438, + "grad_norm": 0.7507078491383454, + "learning_rate": 3.750875432628676e-05, + "loss": 0.2789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15053801238536835, + "step": 1480, + "valid_targets_mean": 2949.1, + "valid_targets_min": 720 + }, + { + "epoch": 1.719166184134337, + "grad_norm": 0.625818585702717, + "learning_rate": 3.74807845422292e-05, + "loss": 0.2891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16986438632011414, + "step": 1485, + "valid_targets_mean": 3579.1, + "valid_targets_min": 1801 + }, + { + "epoch": 1.72495657209033, + "grad_norm": 0.6211865498978847, + "learning_rate": 3.745266917080054e-05, + "loss": 0.3079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14206188917160034, + "step": 1490, + "valid_targets_mean": 3532.6, + "valid_targets_min": 1510 + }, + { + "epoch": 1.7307469600463232, + "grad_norm": 0.6918628290457827, + "learning_rate": 3.742440844615747e-05, + "loss": 0.2831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13704082369804382, + "step": 1495, + "valid_targets_mean": 2807.9, + "valid_targets_min": 987 + }, + { + "epoch": 1.736537348002316, + "grad_norm": 0.6662912463540775, + "learning_rate": 3.739600260366728e-05, + "loss": 0.294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12647037208080292, + "step": 1500, + "valid_targets_mean": 3937.4, + "valid_targets_min": 996 + }, + { + "epoch": 1.7423277359583094, + "grad_norm": 0.5716296457229726, + "learning_rate": 3.7367451879905846e-05, + "loss": 0.2814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12399876117706299, + "step": 1505, + "valid_targets_mean": 3954.5, + "valid_targets_min": 2001 + }, + { + "epoch": 1.7481181239143022, + "grad_norm": 0.4831572116294239, + "learning_rate": 3.733875651265566e-05, + "loss": 0.2871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11142855882644653, + "step": 1510, + "valid_targets_mean": 5059.6, + "valid_targets_min": 1767 + }, + { + "epoch": 1.7539085118702953, + "grad_norm": 0.5298635253583321, + "learning_rate": 3.7309916740903904e-05, + "loss": 0.2856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1220664381980896, + "step": 1515, + "valid_targets_mean": 4714.0, + "valid_targets_min": 1511 + }, + { + "epoch": 1.7596988998262884, + "grad_norm": 0.6941605178493575, + "learning_rate": 3.728093280484039e-05, + "loss": 0.2811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13680237531661987, + "step": 1520, + "valid_targets_mean": 2861.0, + "valid_targets_min": 974 + }, + { + "epoch": 1.7654892877822814, + "grad_norm": 0.6081050330394304, + "learning_rate": 3.725180494585561e-05, + "loss": 0.2891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12764519453048706, + "step": 1525, + "valid_targets_mean": 3792.6, + "valid_targets_min": 2339 + }, + { + "epoch": 1.7712796757382745, + "grad_norm": 0.5905461387221194, + "learning_rate": 3.722253340653869e-05, + "loss": 0.2836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12954270839691162, + "step": 1530, + "valid_targets_mean": 3333.1, + "valid_targets_min": 1249 + }, + { + "epoch": 1.7770700636942676, + "grad_norm": 0.6502080970822529, + "learning_rate": 3.719311843067541e-05, + "loss": 0.2941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14891153573989868, + "step": 1535, + "valid_targets_mean": 3493.1, + "valid_targets_min": 1223 + }, + { + "epoch": 1.7828604516502606, + "grad_norm": 0.6974244607751314, + "learning_rate": 3.7163560263246126e-05, + "loss": 0.3008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15846528112888336, + "step": 1540, + "valid_targets_mean": 3285.8, + "valid_targets_min": 1318 + }, + { + "epoch": 1.7886508396062535, + "grad_norm": 0.7366256261342894, + "learning_rate": 3.713385915042376e-05, + "loss": 0.2932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13647408783435822, + "step": 1545, + "valid_targets_mean": 2070.5, + "valid_targets_min": 1002 + }, + { + "epoch": 1.7944412275622468, + "grad_norm": 0.6302534919300808, + "learning_rate": 3.710401533957177e-05, + "loss": 0.3154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13620512187480927, + "step": 1550, + "valid_targets_mean": 3328.1, + "valid_targets_min": 845 + }, + { + "epoch": 1.8002316155182396, + "grad_norm": 0.5569959028822243, + "learning_rate": 3.707402907924202e-05, + "loss": 0.2843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15685269236564636, + "step": 1555, + "valid_targets_mean": 4657.4, + "valid_targets_min": 2747 + }, + { + "epoch": 1.806022003474233, + "grad_norm": 0.6254433967452091, + "learning_rate": 3.704390061917279e-05, + "loss": 0.3039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1573985517024994, + "step": 1560, + "valid_targets_mean": 4166.4, + "valid_targets_min": 1241 + }, + { + "epoch": 1.8118123914302258, + "grad_norm": 0.6605344075486596, + "learning_rate": 3.701363021028663e-05, + "loss": 0.3032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16879332065582275, + "step": 1565, + "valid_targets_mean": 3567.1, + "valid_targets_min": 992 + }, + { + "epoch": 1.8176027793862188, + "grad_norm": 0.5992245067450327, + "learning_rate": 3.698321810468834e-05, + "loss": 0.295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1365610510110855, + "step": 1570, + "valid_targets_mean": 3343.5, + "valid_targets_min": 1211 + }, + { + "epoch": 1.823393167342212, + "grad_norm": 0.6939069026436273, + "learning_rate": 3.695266455566277e-05, + "loss": 0.2982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16110128164291382, + "step": 1575, + "valid_targets_mean": 3081.6, + "valid_targets_min": 1889 + }, + { + "epoch": 1.829183555298205, + "grad_norm": 0.658296632984435, + "learning_rate": 3.692196981767284e-05, + "loss": 0.2953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12838894128799438, + "step": 1580, + "valid_targets_mean": 3233.9, + "valid_targets_min": 1624 + }, + { + "epoch": 1.834973943254198, + "grad_norm": 0.5932833946506312, + "learning_rate": 3.689113414635729e-05, + "loss": 0.297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15206484496593475, + "step": 1585, + "valid_targets_mean": 3817.4, + "valid_targets_min": 2763 + }, + { + "epoch": 1.8407643312101911, + "grad_norm": 0.6417579915172227, + "learning_rate": 3.6860157798528655e-05, + "loss": 0.2943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1469709575176239, + "step": 1590, + "valid_targets_mean": 3037.0, + "valid_targets_min": 1928 + }, + { + "epoch": 1.8465547191661842, + "grad_norm": 0.5635895780043579, + "learning_rate": 3.6829041032171065e-05, + "loss": 0.2761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1479199230670929, + "step": 1595, + "valid_targets_mean": 4733.2, + "valid_targets_min": 3147 + }, + { + "epoch": 1.852345107122177, + "grad_norm": 0.5927830627612026, + "learning_rate": 3.679778410643812e-05, + "loss": 0.2827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12866196036338806, + "step": 1600, + "valid_targets_mean": 3348.4, + "valid_targets_min": 1844 + }, + { + "epoch": 1.8581354950781703, + "grad_norm": 0.6165140209070116, + "learning_rate": 3.676638728165072e-05, + "loss": 0.2897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14650055766105652, + "step": 1605, + "valid_targets_mean": 4360.2, + "valid_targets_min": 2330 + }, + { + "epoch": 1.8639258830341632, + "grad_norm": 0.6817824524905477, + "learning_rate": 3.673485081929491e-05, + "loss": 0.2924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1639326512813568, + "step": 1610, + "valid_targets_mean": 3574.1, + "valid_targets_min": 900 + }, + { + "epoch": 1.8697162709901565, + "grad_norm": 0.5791877359098891, + "learning_rate": 3.6703174982019696e-05, + "loss": 0.2825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15437737107276917, + "step": 1615, + "valid_targets_mean": 5092.8, + "valid_targets_min": 1713 + }, + { + "epoch": 1.8755066589461493, + "grad_norm": 0.602627175280257, + "learning_rate": 3.667136003363486e-05, + "loss": 0.2986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16277575492858887, + "step": 1620, + "valid_targets_mean": 4057.1, + "valid_targets_min": 981 + }, + { + "epoch": 1.8812970469021424, + "grad_norm": 0.6112362273386295, + "learning_rate": 3.663940623910874e-05, + "loss": 0.2862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1648990511894226, + "step": 1625, + "valid_targets_mean": 4127.9, + "valid_targets_min": 2098 + }, + { + "epoch": 1.8870874348581355, + "grad_norm": 0.8037151230076643, + "learning_rate": 3.660731386456607e-05, + "loss": 0.2949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1346992403268814, + "step": 1630, + "valid_targets_mean": 3101.1, + "valid_targets_min": 1107 + }, + { + "epoch": 1.8928778228141285, + "grad_norm": 0.5804972757492853, + "learning_rate": 3.6575083177285726e-05, + "loss": 0.2863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13580957055091858, + "step": 1635, + "valid_targets_mean": 3718.0, + "valid_targets_min": 2273 + }, + { + "epoch": 1.8986682107701216, + "grad_norm": 0.5905918270634198, + "learning_rate": 3.654271444569851e-05, + "loss": 0.2809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12466682493686676, + "step": 1640, + "valid_targets_mean": 3316.0, + "valid_targets_min": 1920 + }, + { + "epoch": 1.9044585987261147, + "grad_norm": 0.5420420764028602, + "learning_rate": 3.651020793938491e-05, + "loss": 0.2699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14009517431259155, + "step": 1645, + "valid_targets_mean": 4710.8, + "valid_targets_min": 1687 + }, + { + "epoch": 1.9102489866821077, + "grad_norm": 0.6025644717103477, + "learning_rate": 3.6477563929072876e-05, + "loss": 0.2788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12080429494380951, + "step": 1650, + "valid_targets_mean": 3075.8, + "valid_targets_min": 1375 + }, + { + "epoch": 1.9160393746381006, + "grad_norm": 0.6012321362774695, + "learning_rate": 3.6444782686635534e-05, + "loss": 0.285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12920139729976654, + "step": 1655, + "valid_targets_mean": 3776.4, + "valid_targets_min": 906 + }, + { + "epoch": 1.9218297625940939, + "grad_norm": 0.6380750764288287, + "learning_rate": 3.641186448508895e-05, + "loss": 0.2832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1428179144859314, + "step": 1660, + "valid_targets_mean": 3393.9, + "valid_targets_min": 1874 + }, + { + "epoch": 1.9276201505500867, + "grad_norm": 0.5674294662426445, + "learning_rate": 3.637880959858983e-05, + "loss": 0.2753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14175750315189362, + "step": 1665, + "valid_targets_mean": 3503.4, + "valid_targets_min": 1877 + }, + { + "epoch": 1.93341053850608, + "grad_norm": 0.7553571883007889, + "learning_rate": 3.634561830243326e-05, + "loss": 0.2803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13421311974525452, + "step": 1670, + "valid_targets_mean": 3744.4, + "valid_targets_min": 1697 + }, + { + "epoch": 1.9392009264620729, + "grad_norm": 0.6443562711759654, + "learning_rate": 3.631229087305039e-05, + "loss": 0.28, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14671412110328674, + "step": 1675, + "valid_targets_mean": 3015.1, + "valid_targets_min": 1679 + }, + { + "epoch": 1.9449913144180662, + "grad_norm": 0.6391854883175944, + "learning_rate": 3.6278827588006175e-05, + "loss": 0.3057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14905647933483124, + "step": 1680, + "valid_targets_mean": 3292.0, + "valid_targets_min": 1695 + }, + { + "epoch": 1.950781702374059, + "grad_norm": 0.6184510237966775, + "learning_rate": 3.6245228725996994e-05, + "loss": 0.2887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12810581922531128, + "step": 1685, + "valid_targets_mean": 3148.8, + "valid_targets_min": 2114 + }, + { + "epoch": 1.956572090330052, + "grad_norm": 0.6268276802626227, + "learning_rate": 3.62114945668484e-05, + "loss": 0.2842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11713097244501114, + "step": 1690, + "valid_targets_mean": 2852.0, + "valid_targets_min": 993 + }, + { + "epoch": 1.9623624782860452, + "grad_norm": 0.6314414968187866, + "learning_rate": 3.617762539151275e-05, + "loss": 0.2761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1318410336971283, + "step": 1695, + "valid_targets_mean": 3059.8, + "valid_targets_min": 1275 + }, + { + "epoch": 1.9681528662420382, + "grad_norm": 0.5568778782912174, + "learning_rate": 3.6143621482066855e-05, + "loss": 0.2821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15411721169948578, + "step": 1700, + "valid_targets_mean": 4479.0, + "valid_targets_min": 2250 + }, + { + "epoch": 1.9739432541980313, + "grad_norm": 0.5822106858978399, + "learning_rate": 3.610948312170966e-05, + "loss": 0.2833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17367419600486755, + "step": 1705, + "valid_targets_mean": 4952.2, + "valid_targets_min": 2892 + }, + { + "epoch": 1.9797336421540244, + "grad_norm": 0.5437001699478157, + "learning_rate": 3.60752105947599e-05, + "loss": 0.2951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13590948283672333, + "step": 1710, + "valid_targets_mean": 4026.4, + "valid_targets_min": 2935 + }, + { + "epoch": 1.9855240301100174, + "grad_norm": 0.5316143241569335, + "learning_rate": 3.6040804186653664e-05, + "loss": 0.2837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13042062520980835, + "step": 1715, + "valid_targets_mean": 4085.2, + "valid_targets_min": 1817 + }, + { + "epoch": 1.9913144180660103, + "grad_norm": 0.5673752637324689, + "learning_rate": 3.600626418394208e-05, + "loss": 0.2858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14978323876857758, + "step": 1720, + "valid_targets_mean": 4050.4, + "valid_targets_min": 1727 + }, + { + "epoch": 1.9971048060220036, + "grad_norm": 0.554230279363605, + "learning_rate": 3.597159087428891e-05, + "loss": 0.2833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11575158685445786, + "step": 1725, + "valid_targets_mean": 3682.9, + "valid_targets_min": 1622 + }, + { + "epoch": 2.002316155182397, + "grad_norm": 0.4286585060877845, + "learning_rate": 3.593678454646815e-05, + "loss": 0.2567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10107292234897614, + "step": 1730, + "valid_targets_mean": 8514.0, + "valid_targets_min": 6788 + }, + { + "epoch": 2.0081065431383904, + "grad_norm": 0.4700487542281608, + "learning_rate": 3.5901845490361636e-05, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11241059750318527, + "step": 1735, + "valid_targets_mean": 6668.0, + "valid_targets_min": 5933 + }, + { + "epoch": 2.0138969310943833, + "grad_norm": 0.5656127248804904, + "learning_rate": 3.586677399695659e-05, + "loss": 0.2208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10180117189884186, + "step": 1740, + "valid_targets_mean": 4107.0, + "valid_targets_min": 161 + }, + { + "epoch": 2.0196873190503766, + "grad_norm": 0.4107340656584201, + "learning_rate": 3.583157035834327e-05, + "loss": 0.2171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09953369945287704, + "step": 1745, + "valid_targets_mean": 7946.0, + "valid_targets_min": 5596 + }, + { + "epoch": 2.0254777070063694, + "grad_norm": 0.4169057084257508, + "learning_rate": 3.579623486771247e-05, + "loss": 0.2085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1120513528585434, + "step": 1750, + "valid_targets_mean": 7392.8, + "valid_targets_min": 5629 + }, + { + "epoch": 2.0312680949623623, + "grad_norm": 0.42108322176936563, + "learning_rate": 3.576076781935311e-05, + "loss": 0.2123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10831199586391449, + "step": 1755, + "valid_targets_mean": 6879.6, + "valid_targets_min": 5113 + }, + { + "epoch": 2.0370584829183556, + "grad_norm": 0.46891626874187786, + "learning_rate": 3.572516950864977e-05, + "loss": 0.2294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11068766564130783, + "step": 1760, + "valid_targets_mean": 6919.5, + "valid_targets_min": 5080 + }, + { + "epoch": 2.0428488708743484, + "grad_norm": 0.45738686416436514, + "learning_rate": 3.568944023208026e-05, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09865154325962067, + "step": 1765, + "valid_targets_mean": 6377.0, + "valid_targets_min": 3939 + }, + { + "epoch": 2.0486392588303417, + "grad_norm": 0.3953598703172133, + "learning_rate": 3.565358028721311e-05, + "loss": 0.2108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10655589401721954, + "step": 1770, + "valid_targets_mean": 6392.2, + "valid_targets_min": 4355 + }, + { + "epoch": 2.0544296467863346, + "grad_norm": 0.5105530013505218, + "learning_rate": 3.5617589972705104e-05, + "loss": 0.2056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12021344900131226, + "step": 1775, + "valid_targets_mean": 5196.5, + "valid_targets_min": 137 + }, + { + "epoch": 2.060220034742328, + "grad_norm": 0.3802473517239841, + "learning_rate": 3.5581469588298826e-05, + "loss": 0.2089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09380616992712021, + "step": 1780, + "valid_targets_mean": 7344.4, + "valid_targets_min": 5244 + }, + { + "epoch": 2.0660104226983207, + "grad_norm": 0.37526809344745743, + "learning_rate": 3.554521943482011e-05, + "loss": 0.2025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08865103870630264, + "step": 1785, + "valid_targets_mean": 7921.0, + "valid_targets_min": 5311 + }, + { + "epoch": 2.071800810654314, + "grad_norm": 0.42312765517314455, + "learning_rate": 3.550883981417559e-05, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10647544264793396, + "step": 1790, + "valid_targets_mean": 7931.2, + "valid_targets_min": 6015 + }, + { + "epoch": 2.077591198610307, + "grad_norm": 0.37102703581187674, + "learning_rate": 3.547233102935013e-05, + "loss": 0.2016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09514321386814117, + "step": 1795, + "valid_targets_mean": 7640.8, + "valid_targets_min": 5248 + }, + { + "epoch": 2.0833815865663, + "grad_norm": 0.3804558512027849, + "learning_rate": 3.5435693384404346e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0999484658241272, + "step": 1800, + "valid_targets_mean": 7249.6, + "valid_targets_min": 5217 + }, + { + "epoch": 2.089171974522293, + "grad_norm": 0.39961483892181837, + "learning_rate": 3.5398927184472044e-05, + "loss": 0.2095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.115473173558712, + "step": 1805, + "valid_targets_mean": 7619.9, + "valid_targets_min": 5094 + }, + { + "epoch": 2.0949623624782863, + "grad_norm": 0.40322220155035593, + "learning_rate": 3.536203273575769e-05, + "loss": 0.2025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09707570821046829, + "step": 1810, + "valid_targets_mean": 7732.8, + "valid_targets_min": 5842 + }, + { + "epoch": 2.100752750434279, + "grad_norm": 0.4013736383136356, + "learning_rate": 3.532501034553389e-05, + "loss": 0.2051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10486122965812683, + "step": 1815, + "valid_targets_mean": 7952.9, + "valid_targets_min": 5494 + }, + { + "epoch": 2.106543138390272, + "grad_norm": 0.3976795627706171, + "learning_rate": 3.528786032213875e-05, + "loss": 0.2207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10951173305511475, + "step": 1820, + "valid_targets_mean": 8001.0, + "valid_targets_min": 5454 + }, + { + "epoch": 2.1123335263462653, + "grad_norm": 0.5536985357272681, + "learning_rate": 3.525058297497339e-05, + "loss": 0.227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08318760991096497, + "step": 1825, + "valid_targets_mean": 2985.1, + "valid_targets_min": 1896 + }, + { + "epoch": 2.118123914302258, + "grad_norm": 0.41262587121815264, + "learning_rate": 3.521317861449935e-05, + "loss": 0.1836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08865408599376678, + "step": 1830, + "valid_targets_mean": 6714.6, + "valid_targets_min": 4917 + }, + { + "epoch": 2.1239143022582514, + "grad_norm": 0.42576908508936934, + "learning_rate": 3.517564755223596e-05, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11042719334363937, + "step": 1835, + "valid_targets_mean": 7595.2, + "valid_targets_min": 5488 + }, + { + "epoch": 2.1297046902142442, + "grad_norm": 0.489870656589442, + "learning_rate": 3.513799010075778e-05, + "loss": 0.203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10994037985801697, + "step": 1840, + "valid_targets_mean": 7046.1, + "valid_targets_min": 5579 + }, + { + "epoch": 2.1354950781702375, + "grad_norm": 0.4438672691242358, + "learning_rate": 3.5100206573692016e-05, + "loss": 0.2105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10180249065160751, + "step": 1845, + "valid_targets_mean": 7108.0, + "valid_targets_min": 4963 + }, + { + "epoch": 2.1412854661262304, + "grad_norm": 0.42176761912224064, + "learning_rate": 3.506229728571587e-05, + "loss": 0.2057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10390205681324005, + "step": 1850, + "valid_targets_mean": 6810.1, + "valid_targets_min": 5666 + }, + { + "epoch": 2.1470758540822237, + "grad_norm": 0.4205231074425438, + "learning_rate": 3.502426255255393e-05, + "loss": 0.1977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09647151827812195, + "step": 1855, + "valid_targets_mean": 8068.5, + "valid_targets_min": 5524 + }, + { + "epoch": 2.1528662420382165, + "grad_norm": 0.39913914589973887, + "learning_rate": 3.498610269097554e-05, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08500435948371887, + "step": 1860, + "valid_targets_mean": 6553.8, + "valid_targets_min": 3621 + }, + { + "epoch": 2.15865662999421, + "grad_norm": 0.41904414215616076, + "learning_rate": 3.49478180187922e-05, + "loss": 0.1829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09690581262111664, + "step": 1865, + "valid_targets_mean": 6762.5, + "valid_targets_min": 4968 + }, + { + "epoch": 2.1644470179502027, + "grad_norm": 0.4556773026118942, + "learning_rate": 3.490940885485484e-05, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09846589714288712, + "step": 1870, + "valid_targets_mean": 6143.9, + "valid_targets_min": 4917 + }, + { + "epoch": 2.1702374059061955, + "grad_norm": 0.6116632442341243, + "learning_rate": 3.487087551905123e-05, + "loss": 0.2021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13917821645736694, + "step": 1875, + "valid_targets_mean": 4583.8, + "valid_targets_min": 215 + }, + { + "epoch": 2.176027793862189, + "grad_norm": 0.39727120128324056, + "learning_rate": 3.483221833230331e-05, + "loss": 0.2094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10544075816869736, + "step": 1880, + "valid_targets_mean": 7317.1, + "valid_targets_min": 4948 + }, + { + "epoch": 2.1818181818181817, + "grad_norm": 0.4817720608717964, + "learning_rate": 3.4793437616564484e-05, + "loss": 0.2238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10634658485651016, + "step": 1885, + "valid_targets_mean": 6437.6, + "valid_targets_min": 4510 + }, + { + "epoch": 2.187608569774175, + "grad_norm": 0.4479444586079385, + "learning_rate": 3.4754533694816964e-05, + "loss": 0.2146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11554727703332901, + "step": 1890, + "valid_targets_mean": 7707.9, + "valid_targets_min": 5796 + }, + { + "epoch": 2.193398957730168, + "grad_norm": 0.39456266062547346, + "learning_rate": 3.471550689106907e-05, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09939637780189514, + "step": 1895, + "valid_targets_mean": 7900.9, + "valid_targets_min": 5643 + }, + { + "epoch": 2.199189345686161, + "grad_norm": 0.4314337219750506, + "learning_rate": 3.4676357530352544e-05, + "loss": 0.2051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11286012828350067, + "step": 1900, + "valid_targets_mean": 7627.2, + "valid_targets_min": 5742 + }, + { + "epoch": 2.204979733642154, + "grad_norm": 0.39814070122791084, + "learning_rate": 3.463708593871983e-05, + "loss": 0.2088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1185033842921257, + "step": 1905, + "valid_targets_mean": 8725.6, + "valid_targets_min": 5141 + }, + { + "epoch": 2.2107701215981472, + "grad_norm": 0.4342438080398501, + "learning_rate": 3.459769244324136e-05, + "loss": 0.2061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11473096907138824, + "step": 1910, + "valid_targets_mean": 6967.8, + "valid_targets_min": 6088 + }, + { + "epoch": 2.21656050955414, + "grad_norm": 0.4064856622407791, + "learning_rate": 3.455817737200283e-05, + "loss": 0.2095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0970906987786293, + "step": 1915, + "valid_targets_mean": 7140.1, + "valid_targets_min": 4834 + }, + { + "epoch": 2.2223508975101334, + "grad_norm": 0.43104421279326466, + "learning_rate": 3.451854105410248e-05, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.096015565097332, + "step": 1920, + "valid_targets_mean": 6706.9, + "valid_targets_min": 5108 + }, + { + "epoch": 2.228141285466126, + "grad_norm": 0.47819652028982407, + "learning_rate": 3.447878381964834e-05, + "loss": 0.2107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10358776897192001, + "step": 1925, + "valid_targets_mean": 5931.4, + "valid_targets_min": 4708 + }, + { + "epoch": 2.233931673422119, + "grad_norm": 0.39514715435346154, + "learning_rate": 3.4438905999755475e-05, + "loss": 0.2022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09589406102895737, + "step": 1930, + "valid_targets_mean": 7145.6, + "valid_targets_min": 5280 + }, + { + "epoch": 2.2397220613781124, + "grad_norm": 0.7581947077221936, + "learning_rate": 3.439890792654325e-05, + "loss": 0.1974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06750276684761047, + "step": 1935, + "valid_targets_mean": 1554.9, + "valid_targets_min": 592 + }, + { + "epoch": 2.245512449334105, + "grad_norm": 0.49252508143484086, + "learning_rate": 3.435878993313255e-05, + "loss": 0.1987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11715425550937653, + "step": 1940, + "valid_targets_mean": 7069.5, + "valid_targets_min": 4765 + }, + { + "epoch": 2.2513028372900985, + "grad_norm": 0.4648454181818097, + "learning_rate": 3.4318552353642994e-05, + "loss": 0.2036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10164330899715424, + "step": 1945, + "valid_targets_mean": 6193.4, + "valid_targets_min": 5587 + }, + { + "epoch": 2.2570932252460914, + "grad_norm": 0.43862119955162104, + "learning_rate": 3.427819552319017e-05, + "loss": 0.2059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10625051707029343, + "step": 1950, + "valid_targets_mean": 6612.2, + "valid_targets_min": 4894 + }, + { + "epoch": 2.2628836132020846, + "grad_norm": 0.42877814599863134, + "learning_rate": 3.423771977788285e-05, + "loss": 0.1977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09473372995853424, + "step": 1955, + "valid_targets_mean": 6605.9, + "valid_targets_min": 3795 + }, + { + "epoch": 2.2686740011580775, + "grad_norm": 0.42031608208073107, + "learning_rate": 3.4197125454820155e-05, + "loss": 0.2027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10285250097513199, + "step": 1960, + "valid_targets_mean": 7186.8, + "valid_targets_min": 4890 + }, + { + "epoch": 2.274464389114071, + "grad_norm": 0.4804930665105912, + "learning_rate": 3.4156412892088795e-05, + "loss": 0.2072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10341675579547882, + "step": 1965, + "valid_targets_mean": 6544.4, + "valid_targets_min": 4615 + }, + { + "epoch": 2.2802547770700636, + "grad_norm": 0.43505238303162125, + "learning_rate": 3.4115582428760224e-05, + "loss": 0.2083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10975189507007599, + "step": 1970, + "valid_targets_mean": 6339.2, + "valid_targets_min": 4798 + }, + { + "epoch": 2.286045165026057, + "grad_norm": 0.551542106228472, + "learning_rate": 3.4074634404887814e-05, + "loss": 0.1742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1177409440279007, + "step": 1975, + "valid_targets_mean": 4558.8, + "valid_targets_min": 148 + }, + { + "epoch": 2.2918355529820498, + "grad_norm": 0.4965808093480616, + "learning_rate": 3.403356916150404e-05, + "loss": 0.2041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11211363971233368, + "step": 1980, + "valid_targets_mean": 6807.9, + "valid_targets_min": 5164 + }, + { + "epoch": 2.297625940938043, + "grad_norm": 0.4554774656612426, + "learning_rate": 3.3992387040617616e-05, + "loss": 0.1972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1016792431473732, + "step": 1985, + "valid_targets_mean": 7011.8, + "valid_targets_min": 4974 + }, + { + "epoch": 2.303416328894036, + "grad_norm": 0.4493838431213323, + "learning_rate": 3.395108838521068e-05, + "loss": 0.2052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09250965714454651, + "step": 1990, + "valid_targets_mean": 5794.2, + "valid_targets_min": 4809 + }, + { + "epoch": 2.3092067168500288, + "grad_norm": 0.4323765965599609, + "learning_rate": 3.390967353923591e-05, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09102576971054077, + "step": 1995, + "valid_targets_mean": 6401.8, + "valid_targets_min": 4842 + }, + { + "epoch": 2.314997104806022, + "grad_norm": 0.41963893132513386, + "learning_rate": 3.3868142847613677e-05, + "loss": 0.1988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10178063809871674, + "step": 2000, + "valid_targets_mean": 6182.1, + "valid_targets_min": 5116 + }, + { + "epoch": 2.320787492762015, + "grad_norm": 0.4072833090612429, + "learning_rate": 3.382649665622914e-05, + "loss": 0.1985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09133348613977432, + "step": 2005, + "valid_targets_mean": 6165.1, + "valid_targets_min": 5764 + }, + { + "epoch": 2.326577880718008, + "grad_norm": 0.5814652127282149, + "learning_rate": 3.378473531192942e-05, + "loss": 0.2801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16335105895996094, + "step": 2010, + "valid_targets_mean": 4151.0, + "valid_targets_min": 1945 + }, + { + "epoch": 2.332368268674001, + "grad_norm": 0.5117966431044899, + "learning_rate": 3.374285916252065e-05, + "loss": 0.2763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14518912136554718, + "step": 2015, + "valid_targets_mean": 6084.9, + "valid_targets_min": 1799 + }, + { + "epoch": 2.3381586566299943, + "grad_norm": 0.5520391493076426, + "learning_rate": 3.370086855676514e-05, + "loss": 0.274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16817054152488708, + "step": 2020, + "valid_targets_mean": 6738.0, + "valid_targets_min": 3015 + }, + { + "epoch": 2.343949044585987, + "grad_norm": 0.5090140114428693, + "learning_rate": 3.36587638443784e-05, + "loss": 0.2702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1441977322101593, + "step": 2025, + "valid_targets_mean": 5991.5, + "valid_targets_min": 1681 + }, + { + "epoch": 2.3497394325419805, + "grad_norm": 0.5499747140361684, + "learning_rate": 3.361654537602632e-05, + "loss": 0.2763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1109984964132309, + "step": 2030, + "valid_targets_mean": 3827.5, + "valid_targets_min": 1044 + }, + { + "epoch": 2.3555298204979733, + "grad_norm": 0.5622165946749246, + "learning_rate": 3.3574213503322146e-05, + "loss": 0.2831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1245369240641594, + "step": 2035, + "valid_targets_mean": 3889.4, + "valid_targets_min": 1951 + }, + { + "epoch": 2.361320208453966, + "grad_norm": 0.5041215476638641, + "learning_rate": 3.3531768578823646e-05, + "loss": 0.2689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12516576051712036, + "step": 2040, + "valid_targets_mean": 5299.0, + "valid_targets_min": 2060 + }, + { + "epoch": 2.3671105964099595, + "grad_norm": 0.5007207114227822, + "learning_rate": 3.348921095603011e-05, + "loss": 0.269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12523436546325684, + "step": 2045, + "valid_targets_mean": 5303.0, + "valid_targets_min": 2424 + }, + { + "epoch": 2.3729009843659528, + "grad_norm": 0.5749076294108161, + "learning_rate": 3.344654098937942e-05, + "loss": 0.2844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16740959882736206, + "step": 2050, + "valid_targets_mean": 5090.9, + "valid_targets_min": 2077 + }, + { + "epoch": 2.3786913723219456, + "grad_norm": 0.6036836968287327, + "learning_rate": 3.3403759034245117e-05, + "loss": 0.2766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12114214897155762, + "step": 2055, + "valid_targets_mean": 4620.9, + "valid_targets_min": 2022 + }, + { + "epoch": 2.3844817602779385, + "grad_norm": 0.5375277920208905, + "learning_rate": 3.336086544693344e-05, + "loss": 0.2827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15950371325016022, + "step": 2060, + "valid_targets_mean": 5276.0, + "valid_targets_min": 1499 + }, + { + "epoch": 2.3902721482339317, + "grad_norm": 0.49513745930204944, + "learning_rate": 3.331786058468032e-05, + "loss": 0.2669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1467110812664032, + "step": 2065, + "valid_targets_mean": 6256.1, + "valid_targets_min": 1013 + }, + { + "epoch": 2.3960625361899246, + "grad_norm": 0.5783002445562354, + "learning_rate": 3.327474480564846e-05, + "loss": 0.2768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12622880935668945, + "step": 2070, + "valid_targets_mean": 3896.4, + "valid_targets_min": 1939 + }, + { + "epoch": 2.401852924145918, + "grad_norm": 0.5769722093369325, + "learning_rate": 3.32315184689243e-05, + "loss": 0.2663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15862694382667542, + "step": 2075, + "valid_targets_mean": 4025.5, + "valid_targets_min": 1820 + }, + { + "epoch": 2.4076433121019107, + "grad_norm": 0.6525497480217162, + "learning_rate": 3.318818193451509e-05, + "loss": 0.2843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13170477747917175, + "step": 2080, + "valid_targets_mean": 2984.9, + "valid_targets_min": 1589 + }, + { + "epoch": 2.413433700057904, + "grad_norm": 0.5784325309997747, + "learning_rate": 3.31447355633458e-05, + "loss": 0.2723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12977421283721924, + "step": 2085, + "valid_targets_mean": 4404.4, + "valid_targets_min": 2668 + }, + { + "epoch": 2.419224088013897, + "grad_norm": 0.6165233848368223, + "learning_rate": 3.3101179717256214e-05, + "loss": 0.2765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.156073197722435, + "step": 2090, + "valid_targets_mean": 3997.1, + "valid_targets_min": 2530 + }, + { + "epoch": 2.42501447596989, + "grad_norm": 0.66620163280296, + "learning_rate": 3.305751475899783e-05, + "loss": 0.2863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.132912278175354, + "step": 2095, + "valid_targets_mean": 3860.4, + "valid_targets_min": 2170 + }, + { + "epoch": 2.430804863925883, + "grad_norm": 0.6585893355253127, + "learning_rate": 3.3013741052230915e-05, + "loss": 0.283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09839099645614624, + "step": 2100, + "valid_targets_mean": 2333.4, + "valid_targets_min": 1128 + }, + { + "epoch": 2.436595251881876, + "grad_norm": 0.6194017217583334, + "learning_rate": 3.2969858961521426e-05, + "loss": 0.277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14759422838687897, + "step": 2105, + "valid_targets_mean": 3412.4, + "valid_targets_min": 2001 + }, + { + "epoch": 2.442385639837869, + "grad_norm": 0.6290180566419377, + "learning_rate": 3.292586885233797e-05, + "loss": 0.2794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1449579894542694, + "step": 2110, + "valid_targets_mean": 3916.6, + "valid_targets_min": 1267 + }, + { + "epoch": 2.448176027793862, + "grad_norm": 0.6830786628543166, + "learning_rate": 3.288177109104879e-05, + "loss": 0.2682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1411552131175995, + "step": 2115, + "valid_targets_mean": 3868.5, + "valid_targets_min": 2234 + }, + { + "epoch": 2.4539664157498553, + "grad_norm": 0.6523431302307824, + "learning_rate": 3.2837566044918726e-05, + "loss": 0.2542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1432904601097107, + "step": 2120, + "valid_targets_mean": 3154.4, + "valid_targets_min": 1403 + }, + { + "epoch": 2.459756803705848, + "grad_norm": 0.6315305472107878, + "learning_rate": 3.2793254082106086e-05, + "loss": 0.2735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15610270202159882, + "step": 2125, + "valid_targets_mean": 4107.5, + "valid_targets_min": 1588 + }, + { + "epoch": 2.4655471916618414, + "grad_norm": 0.6560897876305025, + "learning_rate": 3.274883557165965e-05, + "loss": 0.2797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15769946575164795, + "step": 2130, + "valid_targets_mean": 3702.9, + "valid_targets_min": 1902 + }, + { + "epoch": 2.4713375796178343, + "grad_norm": 0.5244545462951082, + "learning_rate": 3.270431088351558e-05, + "loss": 0.2673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12711435556411743, + "step": 2135, + "valid_targets_mean": 4432.1, + "valid_targets_min": 2171 + }, + { + "epoch": 2.4771279675738276, + "grad_norm": 0.6242616772853598, + "learning_rate": 3.265968038849433e-05, + "loss": 0.27, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16617780923843384, + "step": 2140, + "valid_targets_mean": 4749.9, + "valid_targets_min": 2677 + }, + { + "epoch": 2.4829183555298204, + "grad_norm": 0.6069853123821018, + "learning_rate": 3.261494445829752e-05, + "loss": 0.2614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1648349165916443, + "step": 2145, + "valid_targets_mean": 4314.9, + "valid_targets_min": 2725 + }, + { + "epoch": 2.4887087434858137, + "grad_norm": 0.7004803993263157, + "learning_rate": 3.257010346550493e-05, + "loss": 0.2782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17026188969612122, + "step": 2150, + "valid_targets_mean": 3132.1, + "valid_targets_min": 1685 + }, + { + "epoch": 2.4944991314418066, + "grad_norm": 0.5664274832522125, + "learning_rate": 3.252515778357131e-05, + "loss": 0.2565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1272943615913391, + "step": 2155, + "valid_targets_mean": 4106.8, + "valid_targets_min": 2384 + }, + { + "epoch": 2.5002895193978, + "grad_norm": 0.5996482260516004, + "learning_rate": 3.248010778682333e-05, + "loss": 0.266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14419519901275635, + "step": 2160, + "valid_targets_mean": 4175.2, + "valid_targets_min": 2060 + }, + { + "epoch": 2.5060799073537927, + "grad_norm": 0.6708401852808833, + "learning_rate": 3.2434953850456426e-05, + "loss": 0.2651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13591818511486053, + "step": 2165, + "valid_targets_mean": 3389.1, + "valid_targets_min": 1458 + }, + { + "epoch": 2.5118702953097856, + "grad_norm": 0.6366477783226726, + "learning_rate": 3.238969635053168e-05, + "loss": 0.2705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1480705738067627, + "step": 2170, + "valid_targets_mean": 4349.9, + "valid_targets_min": 1512 + }, + { + "epoch": 2.517660683265779, + "grad_norm": 0.6905097825586773, + "learning_rate": 3.234433566397271e-05, + "loss": 0.2621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11380015313625336, + "step": 2175, + "valid_targets_mean": 2479.8, + "valid_targets_min": 1480 + }, + { + "epoch": 2.5234510712217717, + "grad_norm": 0.6197924359922682, + "learning_rate": 3.229887216856251e-05, + "loss": 0.2715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13098078966140747, + "step": 2180, + "valid_targets_mean": 3904.8, + "valid_targets_min": 1338 + }, + { + "epoch": 2.529241459177765, + "grad_norm": 0.5762184284428006, + "learning_rate": 3.2253306242940306e-05, + "loss": 0.2706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1326790302991867, + "step": 2185, + "valid_targets_mean": 3684.2, + "valid_targets_min": 1276 + }, + { + "epoch": 2.535031847133758, + "grad_norm": 0.635981487933494, + "learning_rate": 3.22076382665984e-05, + "loss": 0.2668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12587392330169678, + "step": 2190, + "valid_targets_mean": 4008.1, + "valid_targets_min": 1991 + }, + { + "epoch": 2.540822235089751, + "grad_norm": 0.6365019756761532, + "learning_rate": 3.216186861987905e-05, + "loss": 0.2634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14298924803733826, + "step": 2195, + "valid_targets_mean": 3331.6, + "valid_targets_min": 924 + }, + { + "epoch": 2.546612623045744, + "grad_norm": 0.6169273093000665, + "learning_rate": 3.211599768397121e-05, + "loss": 0.2693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13531437516212463, + "step": 2200, + "valid_targets_mean": 4542.5, + "valid_targets_min": 2089 + }, + { + "epoch": 2.5524030110017373, + "grad_norm": 0.5905221275607936, + "learning_rate": 3.2070025840907474e-05, + "loss": 0.2661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11663927137851715, + "step": 2205, + "valid_targets_mean": 3307.2, + "valid_targets_min": 1643 + }, + { + "epoch": 2.55819339895773, + "grad_norm": 0.5867800821675817, + "learning_rate": 3.202395347356079e-05, + "loss": 0.2622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13914506137371063, + "step": 2210, + "valid_targets_mean": 3690.4, + "valid_targets_min": 1244 + }, + { + "epoch": 2.563983786913723, + "grad_norm": 0.568570395278458, + "learning_rate": 3.1977780965641334e-05, + "loss": 0.2719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14504513144493103, + "step": 2215, + "valid_targets_mean": 4846.2, + "valid_targets_min": 2710 + }, + { + "epoch": 2.5697741748697163, + "grad_norm": 0.6412931743418574, + "learning_rate": 3.193150870169329e-05, + "loss": 0.2665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1039309948682785, + "step": 2220, + "valid_targets_mean": 2848.8, + "valid_targets_min": 977 + }, + { + "epoch": 2.5755645628257096, + "grad_norm": 0.5723133537467101, + "learning_rate": 3.188513706709165e-05, + "loss": 0.2641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14200431108474731, + "step": 2225, + "valid_targets_mean": 4040.8, + "valid_targets_min": 810 + }, + { + "epoch": 2.5813549507817024, + "grad_norm": 0.5793947108625295, + "learning_rate": 3.183866644803901e-05, + "loss": 0.2479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1353072226047516, + "step": 2230, + "valid_targets_mean": 3452.1, + "valid_targets_min": 1798 + }, + { + "epoch": 2.5871453387376953, + "grad_norm": 0.9774813400849247, + "learning_rate": 3.1792097231562364e-05, + "loss": 0.2719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1235002651810646, + "step": 2235, + "valid_targets_mean": 3939.5, + "valid_targets_min": 2035 + }, + { + "epoch": 2.5929357266936885, + "grad_norm": 0.5872365776559091, + "learning_rate": 3.174542980550984e-05, + "loss": 0.2515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12853936851024628, + "step": 2240, + "valid_targets_mean": 3559.2, + "valid_targets_min": 1261 + }, + { + "epoch": 2.5987261146496814, + "grad_norm": 0.6816129912425755, + "learning_rate": 3.1698664558547515e-05, + "loss": 0.2725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1662760227918625, + "step": 2245, + "valid_targets_mean": 3653.4, + "valid_targets_min": 1714 + }, + { + "epoch": 2.6045165026056747, + "grad_norm": 0.6388954275905896, + "learning_rate": 3.165180188015616e-05, + "loss": 0.2599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10945677757263184, + "step": 2250, + "valid_targets_mean": 3019.4, + "valid_targets_min": 1219 + }, + { + "epoch": 2.6103068905616675, + "grad_norm": 0.6210381392513558, + "learning_rate": 3.160484216062798e-05, + "loss": 0.2698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11807023733854294, + "step": 2255, + "valid_targets_mean": 3603.2, + "valid_targets_min": 2058 + }, + { + "epoch": 2.616097278517661, + "grad_norm": 0.6448257402388333, + "learning_rate": 3.1557785791063406e-05, + "loss": 0.2616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10894700884819031, + "step": 2260, + "valid_targets_mean": 2962.5, + "valid_targets_min": 916 + }, + { + "epoch": 2.6218876664736537, + "grad_norm": 0.5926187115075681, + "learning_rate": 3.1510633163367786e-05, + "loss": 0.2747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11035982519388199, + "step": 2265, + "valid_targets_mean": 3654.0, + "valid_targets_min": 1302 + }, + { + "epoch": 2.627678054429647, + "grad_norm": 0.6634112051804333, + "learning_rate": 3.146338467024816e-05, + "loss": 0.2496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13553813099861145, + "step": 2270, + "valid_targets_mean": 3304.9, + "valid_targets_min": 1278 + }, + { + "epoch": 2.63346844238564, + "grad_norm": 0.5932977731351348, + "learning_rate": 3.141604070520998e-05, + "loss": 0.2515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1372648924589157, + "step": 2275, + "valid_targets_mean": 3934.6, + "valid_targets_min": 2471 + }, + { + "epoch": 2.6392588303416327, + "grad_norm": 0.5629032334171726, + "learning_rate": 3.136860166255382e-05, + "loss": 0.2435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13328105211257935, + "step": 2280, + "valid_targets_mean": 4429.2, + "valid_targets_min": 1922 + }, + { + "epoch": 2.645049218297626, + "grad_norm": 0.5869812911680073, + "learning_rate": 3.132106793737209e-05, + "loss": 0.2707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14404839277267456, + "step": 2285, + "valid_targets_mean": 3957.9, + "valid_targets_min": 2672 + }, + { + "epoch": 2.6508396062536193, + "grad_norm": 0.6229051741089654, + "learning_rate": 3.127343992554577e-05, + "loss": 0.2578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1221669465303421, + "step": 2290, + "valid_targets_mean": 3290.0, + "valid_targets_min": 1086 + }, + { + "epoch": 2.656629994209612, + "grad_norm": 0.5835055556088607, + "learning_rate": 3.122571802374112e-05, + "loss": 0.2573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1405826210975647, + "step": 2295, + "valid_targets_mean": 5206.5, + "valid_targets_min": 1566 + }, + { + "epoch": 2.662420382165605, + "grad_norm": 0.5863525815985173, + "learning_rate": 3.117790262940631e-05, + "loss": 0.2602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14586801826953888, + "step": 2300, + "valid_targets_mean": 3793.8, + "valid_targets_min": 2010 + }, + { + "epoch": 2.6682107701215982, + "grad_norm": 0.6796155332616456, + "learning_rate": 3.112999414076818e-05, + "loss": 0.2711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1526874303817749, + "step": 2305, + "valid_targets_mean": 3481.2, + "valid_targets_min": 1548 + }, + { + "epoch": 2.674001158077591, + "grad_norm": 0.6658592899639356, + "learning_rate": 3.1081992956828905e-05, + "loss": 0.2612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1481824517250061, + "step": 2310, + "valid_targets_mean": 3473.6, + "valid_targets_min": 1352 + }, + { + "epoch": 2.6797915460335844, + "grad_norm": 0.5785234103831691, + "learning_rate": 3.103389947736266e-05, + "loss": 0.2533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12380445748567581, + "step": 2315, + "valid_targets_mean": 4093.6, + "valid_targets_min": 1483 + }, + { + "epoch": 2.6855819339895772, + "grad_norm": 0.5915985888499322, + "learning_rate": 3.098571410291228e-05, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11295592784881592, + "step": 2320, + "valid_targets_mean": 3756.0, + "valid_targets_min": 2015 + }, + { + "epoch": 2.69137232194557, + "grad_norm": 0.5559748529158701, + "learning_rate": 3.093743723478598e-05, + "loss": 0.2521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12268182635307312, + "step": 2325, + "valid_targets_mean": 4435.0, + "valid_targets_min": 1678 + }, + { + "epoch": 2.6971627099015634, + "grad_norm": 0.6503040030028072, + "learning_rate": 3.0889069275053916e-05, + "loss": 0.2537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12161822617053986, + "step": 2330, + "valid_targets_mean": 2909.4, + "valid_targets_min": 1568 + }, + { + "epoch": 2.7029530978575567, + "grad_norm": 0.5637056236595298, + "learning_rate": 3.084061062654495e-05, + "loss": 0.2436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11895494163036346, + "step": 2335, + "valid_targets_mean": 3716.4, + "valid_targets_min": 1282 + }, + { + "epoch": 2.7087434858135495, + "grad_norm": 0.6519618594888245, + "learning_rate": 3.0792061692843196e-05, + "loss": 0.243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.131838858127594, + "step": 2340, + "valid_targets_mean": 3126.6, + "valid_targets_min": 1312 + }, + { + "epoch": 2.7145338737695424, + "grad_norm": 0.6028390887407604, + "learning_rate": 3.074342287828473e-05, + "loss": 0.2429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12123403698205948, + "step": 2345, + "valid_targets_mean": 3599.6, + "valid_targets_min": 1764 + }, + { + "epoch": 2.7203242617255357, + "grad_norm": 0.6838931794867641, + "learning_rate": 3.069469458795418e-05, + "loss": 0.2692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11124719679355621, + "step": 2350, + "valid_targets_mean": 2447.4, + "valid_targets_min": 1175 + }, + { + "epoch": 2.7261146496815285, + "grad_norm": 0.5398446575139603, + "learning_rate": 3.064587722768136e-05, + "loss": 0.2613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13309673964977264, + "step": 2355, + "valid_targets_mean": 4826.4, + "valid_targets_min": 2546 + }, + { + "epoch": 2.731905037637522, + "grad_norm": 0.7287355653313029, + "learning_rate": 3.059697120403791e-05, + "loss": 0.2552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1539398431777954, + "step": 2360, + "valid_targets_mean": 3297.0, + "valid_targets_min": 1244 + }, + { + "epoch": 2.7376954255935146, + "grad_norm": 0.6110697593914238, + "learning_rate": 3.05479769243339e-05, + "loss": 0.2566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14367569983005524, + "step": 2365, + "valid_targets_mean": 4389.5, + "valid_targets_min": 1501 + }, + { + "epoch": 2.743485813549508, + "grad_norm": 0.5904899994841355, + "learning_rate": 3.049889479661441e-05, + "loss": 0.2505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12652909755706787, + "step": 2370, + "valid_targets_mean": 3887.8, + "valid_targets_min": 2097 + }, + { + "epoch": 2.749276201505501, + "grad_norm": 0.5293444804964664, + "learning_rate": 3.044972522965618e-05, + "loss": 0.2488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10903643071651459, + "step": 2375, + "valid_targets_mean": 4151.8, + "valid_targets_min": 1558 + }, + { + "epoch": 2.755066589461494, + "grad_norm": 0.5345690566260798, + "learning_rate": 3.040046863296416e-05, + "loss": 0.2511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11641837656497955, + "step": 2380, + "valid_targets_mean": 4776.1, + "valid_targets_min": 1145 + }, + { + "epoch": 2.760856977417487, + "grad_norm": 0.6345446481949528, + "learning_rate": 3.0351125416768155e-05, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1530282199382782, + "step": 2385, + "valid_targets_mean": 3919.2, + "valid_targets_min": 1428 + }, + { + "epoch": 2.7666473653734798, + "grad_norm": 0.5969763049450747, + "learning_rate": 3.0301695992019317e-05, + "loss": 0.2525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14372681081295013, + "step": 2390, + "valid_targets_mean": 4515.1, + "valid_targets_min": 2510 + }, + { + "epoch": 2.772437753329473, + "grad_norm": 0.6250162114900181, + "learning_rate": 3.0252180770386843e-05, + "loss": 0.249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1120104044675827, + "step": 2395, + "valid_targets_mean": 3257.1, + "valid_targets_min": 1301 + }, + { + "epoch": 2.7782281412854664, + "grad_norm": 0.6445992710336376, + "learning_rate": 3.020258016425443e-05, + "loss": 0.2672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13314387202262878, + "step": 2400, + "valid_targets_mean": 3663.1, + "valid_targets_min": 1971 + }, + { + "epoch": 2.784018529241459, + "grad_norm": 0.6483103155518952, + "learning_rate": 3.0152894586716935e-05, + "loss": 0.264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1609051674604416, + "step": 2405, + "valid_targets_mean": 3979.0, + "valid_targets_min": 1054 + }, + { + "epoch": 2.789808917197452, + "grad_norm": 0.6156498937088388, + "learning_rate": 3.0103124451576855e-05, + "loss": 0.2565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1333981156349182, + "step": 2410, + "valid_targets_mean": 4076.8, + "valid_targets_min": 2206 + }, + { + "epoch": 2.7955993051534453, + "grad_norm": 0.5862048353117455, + "learning_rate": 3.0053270173340946e-05, + "loss": 0.2754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12760062515735626, + "step": 2415, + "valid_targets_mean": 3744.9, + "valid_targets_min": 721 + }, + { + "epoch": 2.801389693109438, + "grad_norm": 0.5830520407651704, + "learning_rate": 3.000333216721674e-05, + "loss": 0.2542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1376606523990631, + "step": 2420, + "valid_targets_mean": 4235.1, + "valid_targets_min": 1625 + }, + { + "epoch": 2.8071800810654315, + "grad_norm": 0.6455391194137454, + "learning_rate": 2.9953310849109073e-05, + "loss": 0.2701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14893652498722076, + "step": 2425, + "valid_targets_mean": 4103.1, + "valid_targets_min": 2589 + }, + { + "epoch": 2.8129704690214243, + "grad_norm": 0.7062520885378663, + "learning_rate": 2.9903206635616656e-05, + "loss": 0.2711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12403398752212524, + "step": 2430, + "valid_targets_mean": 2978.0, + "valid_targets_min": 998 + }, + { + "epoch": 2.8187608569774176, + "grad_norm": 0.6206595353746295, + "learning_rate": 2.9853019944028583e-05, + "loss": 0.2556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12284980714321136, + "step": 2435, + "valid_targets_mean": 4591.8, + "valid_targets_min": 1186 + }, + { + "epoch": 2.8245512449334105, + "grad_norm": 0.5622116346549204, + "learning_rate": 2.9802751192320855e-05, + "loss": 0.2596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11684159189462662, + "step": 2440, + "valid_targets_mean": 3200.6, + "valid_targets_min": 1195 + }, + { + "epoch": 2.8303416328894038, + "grad_norm": 0.5998859708099616, + "learning_rate": 2.9752400799152916e-05, + "loss": 0.262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11318564414978027, + "step": 2445, + "valid_targets_mean": 3047.4, + "valid_targets_min": 1863 + }, + { + "epoch": 2.8361320208453966, + "grad_norm": 0.6486787196368696, + "learning_rate": 2.9701969183864137e-05, + "loss": 0.2666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1260605901479721, + "step": 2450, + "valid_targets_mean": 3483.8, + "valid_targets_min": 1483 + }, + { + "epoch": 2.8419224088013895, + "grad_norm": 0.6601872798879775, + "learning_rate": 2.965145676647036e-05, + "loss": 0.2592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1035212054848671, + "step": 2455, + "valid_targets_mean": 3146.8, + "valid_targets_min": 1662 + }, + { + "epoch": 2.8477127967573828, + "grad_norm": 0.5953589784047442, + "learning_rate": 2.9600863967660368e-05, + "loss": 0.2435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1442669928073883, + "step": 2460, + "valid_targets_mean": 4545.2, + "valid_targets_min": 2659 + }, + { + "epoch": 2.853503184713376, + "grad_norm": 0.6766549577613615, + "learning_rate": 2.95501912087924e-05, + "loss": 0.2541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13332010805606842, + "step": 2465, + "valid_targets_mean": 3529.8, + "valid_targets_min": 1434 + }, + { + "epoch": 2.859293572669369, + "grad_norm": 0.6037530300473921, + "learning_rate": 2.9499438911890644e-05, + "loss": 0.2498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11366799473762512, + "step": 2470, + "valid_targets_mean": 2998.2, + "valid_targets_min": 1074 + }, + { + "epoch": 2.8650839606253617, + "grad_norm": 0.6536835630432294, + "learning_rate": 2.944860749964169e-05, + "loss": 0.2668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12703803181648254, + "step": 2475, + "valid_targets_mean": 3511.4, + "valid_targets_min": 1510 + }, + { + "epoch": 2.870874348581355, + "grad_norm": 0.5995340016895321, + "learning_rate": 2.939769739539106e-05, + "loss": 0.2536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1322011649608612, + "step": 2480, + "valid_targets_mean": 4068.1, + "valid_targets_min": 1885 + }, + { + "epoch": 2.876664736537348, + "grad_norm": 0.60589407553132, + "learning_rate": 2.934670902313964e-05, + "loss": 0.2619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12155967950820923, + "step": 2485, + "valid_targets_mean": 4412.6, + "valid_targets_min": 3139 + }, + { + "epoch": 2.882455124493341, + "grad_norm": 0.6433580210305632, + "learning_rate": 2.929564280754018e-05, + "loss": 0.2537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15175604820251465, + "step": 2490, + "valid_targets_mean": 3801.2, + "valid_targets_min": 2598 + }, + { + "epoch": 2.888245512449334, + "grad_norm": 0.6383076431224991, + "learning_rate": 2.924449917389373e-05, + "loss": 0.2585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11478400230407715, + "step": 2495, + "valid_targets_mean": 2971.0, + "valid_targets_min": 1460 + }, + { + "epoch": 2.8940359004053273, + "grad_norm": 0.6281639839698074, + "learning_rate": 2.919327854814611e-05, + "loss": 0.2576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1256842315196991, + "step": 2500, + "valid_targets_mean": 3538.9, + "valid_targets_min": 1084 + }, + { + "epoch": 2.89982628836132, + "grad_norm": 0.6110228223154738, + "learning_rate": 2.9141981356884367e-05, + "loss": 0.2474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13257163763046265, + "step": 2505, + "valid_targets_mean": 3759.2, + "valid_targets_min": 1618 + }, + { + "epoch": 2.9056166763173135, + "grad_norm": 0.65313781157454, + "learning_rate": 2.909060802733322e-05, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13252943754196167, + "step": 2510, + "valid_targets_mean": 3248.8, + "valid_targets_min": 1193 + }, + { + "epoch": 2.9114070642733063, + "grad_norm": 0.5869454134091099, + "learning_rate": 2.90391589873515e-05, + "loss": 0.2467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12256360799074173, + "step": 2515, + "valid_targets_mean": 4340.0, + "valid_targets_min": 1564 + }, + { + "epoch": 2.917197452229299, + "grad_norm": 0.6284591952877979, + "learning_rate": 2.8987634665428578e-05, + "loss": 0.2505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14555668830871582, + "step": 2520, + "valid_targets_mean": 4032.2, + "valid_targets_min": 1472 + }, + { + "epoch": 2.9229878401852925, + "grad_norm": 0.5602935140755988, + "learning_rate": 2.8936035490680817e-05, + "loss": 0.2465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10094116628170013, + "step": 2525, + "valid_targets_mean": 4291.0, + "valid_targets_min": 1246 + }, + { + "epoch": 2.9287782281412853, + "grad_norm": 0.6333361634147205, + "learning_rate": 2.8884361892847974e-05, + "loss": 0.2403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10485686361789703, + "step": 2530, + "valid_targets_mean": 3467.9, + "valid_targets_min": 1724 + }, + { + "epoch": 2.9345686160972786, + "grad_norm": 0.6347563240307721, + "learning_rate": 2.8832614302289644e-05, + "loss": 0.2539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13671885430812836, + "step": 2535, + "valid_targets_mean": 3630.6, + "valid_targets_min": 1502 + }, + { + "epoch": 2.9403590040532714, + "grad_norm": 0.5703161738148554, + "learning_rate": 2.8780793149981652e-05, + "loss": 0.2467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1201121062040329, + "step": 2540, + "valid_targets_mean": 4141.1, + "valid_targets_min": 2134 + }, + { + "epoch": 2.9461493920092647, + "grad_norm": 0.6173130579998207, + "learning_rate": 2.8728898867512493e-05, + "loss": 0.2755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11488363146781921, + "step": 2545, + "valid_targets_mean": 3016.4, + "valid_targets_min": 2271 + }, + { + "epoch": 2.9519397799652576, + "grad_norm": 0.5508192014943009, + "learning_rate": 2.86769318870797e-05, + "loss": 0.2486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13369382917881012, + "step": 2550, + "valid_targets_mean": 4785.1, + "valid_targets_min": 2713 + }, + { + "epoch": 2.957730167921251, + "grad_norm": 0.6406012334203832, + "learning_rate": 2.8624892641486294e-05, + "loss": 0.247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12603960931301117, + "step": 2555, + "valid_targets_mean": 3951.6, + "valid_targets_min": 2373 + }, + { + "epoch": 2.9635205558772437, + "grad_norm": 0.6116187219621803, + "learning_rate": 2.8572781564137116e-05, + "loss": 0.2469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11695685982704163, + "step": 2560, + "valid_targets_mean": 3756.1, + "valid_targets_min": 2440 + }, + { + "epoch": 2.9693109438332366, + "grad_norm": 0.6592558498956914, + "learning_rate": 2.852059908903529e-05, + "loss": 0.2579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1306173950433731, + "step": 2565, + "valid_targets_mean": 3366.6, + "valid_targets_min": 2113 + }, + { + "epoch": 2.97510133178923, + "grad_norm": 0.6351786945052718, + "learning_rate": 2.8468345650778527e-05, + "loss": 0.2501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10195891559123993, + "step": 2570, + "valid_targets_mean": 2892.4, + "valid_targets_min": 757 + }, + { + "epoch": 2.980891719745223, + "grad_norm": 0.6244909255403139, + "learning_rate": 2.8416021684555593e-05, + "loss": 0.2569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1276107132434845, + "step": 2575, + "valid_targets_mean": 4595.0, + "valid_targets_min": 1928 + }, + { + "epoch": 2.986682107701216, + "grad_norm": 0.6168677811966271, + "learning_rate": 2.836362762614262e-05, + "loss": 0.257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1577070951461792, + "step": 2580, + "valid_targets_mean": 4214.5, + "valid_targets_min": 1800 + }, + { + "epoch": 2.992472495657209, + "grad_norm": 0.6036284614314452, + "learning_rate": 2.8311163911899486e-05, + "loss": 0.2502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12150377035140991, + "step": 2585, + "valid_targets_mean": 3766.9, + "valid_targets_min": 2922 + }, + { + "epoch": 2.998262883613202, + "grad_norm": 0.6497477524082161, + "learning_rate": 2.8258630978766208e-05, + "loss": 0.2541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14457978308200836, + "step": 2590, + "valid_targets_mean": 3857.8, + "valid_targets_min": 1214 + }, + { + "epoch": 3.0034742327735957, + "grad_norm": 0.5490993236724281, + "learning_rate": 2.820602926425929e-05, + "loss": 0.2168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10193323343992233, + "step": 2595, + "valid_targets_mean": 5435.1, + "valid_targets_min": 3516 + }, + { + "epoch": 3.009264620729589, + "grad_norm": 0.42556536290513536, + "learning_rate": 2.815335920646807e-05, + "loss": 0.2055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0997970774769783, + "step": 2600, + "valid_targets_mean": 7654.2, + "valid_targets_min": 6354 + }, + { + "epoch": 3.015055008685582, + "grad_norm": 0.42912438060333435, + "learning_rate": 2.810062124405107e-05, + "loss": 0.1991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1131233498454094, + "step": 2605, + "valid_targets_mean": 7341.6, + "valid_targets_min": 5754 + }, + { + "epoch": 3.020845396641575, + "grad_norm": 0.4117296149802389, + "learning_rate": 2.8047815816232363e-05, + "loss": 0.1935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08657995611429214, + "step": 2610, + "valid_targets_mean": 7666.2, + "valid_targets_min": 5666 + }, + { + "epoch": 3.026635784597568, + "grad_norm": 0.42744506674088706, + "learning_rate": 2.7994943362797906e-05, + "loss": 0.1957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10150963068008423, + "step": 2615, + "valid_targets_mean": 7469.0, + "valid_targets_min": 5646 + }, + { + "epoch": 3.0324261725535613, + "grad_norm": 0.49056118722907033, + "learning_rate": 2.794200432409185e-05, + "loss": 0.1951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11447674036026001, + "step": 2620, + "valid_targets_mean": 7046.9, + "valid_targets_min": 5571 + }, + { + "epoch": 3.038216560509554, + "grad_norm": 0.44039068925534824, + "learning_rate": 2.788899914101292e-05, + "loss": 0.2123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11457256972789764, + "step": 2625, + "valid_targets_mean": 7827.6, + "valid_targets_min": 5405 + }, + { + "epoch": 3.044006948465547, + "grad_norm": 0.41349146941679255, + "learning_rate": 2.7835928255010713e-05, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10325706750154495, + "step": 2630, + "valid_targets_mean": 7023.5, + "valid_targets_min": 5664 + }, + { + "epoch": 3.0497973364215403, + "grad_norm": 0.4593882416922647, + "learning_rate": 2.778279210808203e-05, + "loss": 0.1915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0934155136346817, + "step": 2635, + "valid_targets_mean": 7069.8, + "valid_targets_min": 5103 + }, + { + "epoch": 3.055587724377533, + "grad_norm": 0.4196669159756002, + "learning_rate": 2.7729591142767175e-05, + "loss": 0.1808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09234358370304108, + "step": 2640, + "valid_targets_mean": 6712.4, + "valid_targets_min": 5469 + }, + { + "epoch": 3.0613781123335264, + "grad_norm": 0.38396037776881536, + "learning_rate": 2.7676325802146306e-05, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09183961898088455, + "step": 2645, + "valid_targets_mean": 7289.2, + "valid_targets_min": 5513 + }, + { + "epoch": 3.0671685002895193, + "grad_norm": 0.4087622775220975, + "learning_rate": 2.762299652983573e-05, + "loss": 0.1848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09124772250652313, + "step": 2650, + "valid_targets_mean": 7022.1, + "valid_targets_min": 5691 + }, + { + "epoch": 3.0729588882455126, + "grad_norm": 0.4131668084826757, + "learning_rate": 2.756960376998418e-05, + "loss": 0.1946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09983032941818237, + "step": 2655, + "valid_targets_mean": 6736.0, + "valid_targets_min": 5550 + }, + { + "epoch": 3.0787492762015054, + "grad_norm": 0.408189467149742, + "learning_rate": 2.7516147967269163e-05, + "loss": 0.1811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0885000228881836, + "step": 2660, + "valid_targets_mean": 6346.9, + "valid_targets_min": 5067 + }, + { + "epoch": 3.0845396641574987, + "grad_norm": 0.4030983371858413, + "learning_rate": 2.746262956689322e-05, + "loss": 0.1852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0960773229598999, + "step": 2665, + "valid_targets_mean": 6784.0, + "valid_targets_min": 5618 + }, + { + "epoch": 3.0903300521134915, + "grad_norm": 0.40401695487165545, + "learning_rate": 2.7409049014580244e-05, + "loss": 0.1932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1078563779592514, + "step": 2670, + "valid_targets_mean": 7568.4, + "valid_targets_min": 5856 + }, + { + "epoch": 3.096120440069485, + "grad_norm": 0.39748706417143886, + "learning_rate": 2.7355406756571752e-05, + "loss": 0.1845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0892835333943367, + "step": 2675, + "valid_targets_mean": 7198.5, + "valid_targets_min": 4793 + }, + { + "epoch": 3.1019108280254777, + "grad_norm": 0.4046911052941595, + "learning_rate": 2.7301703239623152e-05, + "loss": 0.1903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09586253762245178, + "step": 2680, + "valid_targets_mean": 6680.5, + "valid_targets_min": 5350 + }, + { + "epoch": 3.1077012159814705, + "grad_norm": 0.42963147337235336, + "learning_rate": 2.7247938911000074e-05, + "loss": 0.2027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09971575438976288, + "step": 2685, + "valid_targets_mean": 6770.6, + "valid_targets_min": 5753 + }, + { + "epoch": 3.113491603937464, + "grad_norm": 0.7914296289583164, + "learning_rate": 2.7194114218474584e-05, + "loss": 0.2, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07213065773248672, + "step": 2690, + "valid_targets_mean": 1419.8, + "valid_targets_min": 289 + }, + { + "epoch": 3.1192819918934567, + "grad_norm": 0.4415378325560089, + "learning_rate": 2.7140229610321517e-05, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0980992242693901, + "step": 2695, + "valid_targets_mean": 6784.2, + "valid_targets_min": 5042 + }, + { + "epoch": 3.12507237984945, + "grad_norm": 0.39534955301481367, + "learning_rate": 2.7086285535314686e-05, + "loss": 0.1829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09090964496135712, + "step": 2700, + "valid_targets_mean": 6774.9, + "valid_targets_min": 3875 + }, + { + "epoch": 3.130862767805443, + "grad_norm": 0.45932881826395955, + "learning_rate": 2.703228244272317e-05, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09747149050235748, + "step": 2705, + "valid_targets_mean": 7047.6, + "valid_targets_min": 6134 + }, + { + "epoch": 3.136653155761436, + "grad_norm": 0.42200901741517993, + "learning_rate": 2.6978220782307575e-05, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10043708980083466, + "step": 2710, + "valid_targets_mean": 6886.6, + "valid_targets_min": 5869 + }, + { + "epoch": 3.142443543717429, + "grad_norm": 0.37704547310224273, + "learning_rate": 2.6924101004316295e-05, + "loss": 0.1864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08716057986021042, + "step": 2715, + "valid_targets_mean": 7559.8, + "valid_targets_min": 6145 + }, + { + "epoch": 3.1482339316734222, + "grad_norm": 0.3836760511290801, + "learning_rate": 2.6869923559481743e-05, + "loss": 0.1763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08523589372634888, + "step": 2720, + "valid_targets_mean": 8920.5, + "valid_targets_min": 4780 + }, + { + "epoch": 3.154024319629415, + "grad_norm": 0.348547170294601, + "learning_rate": 2.68156888990166e-05, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07461922615766525, + "step": 2725, + "valid_targets_mean": 7393.4, + "valid_targets_min": 5116 + }, + { + "epoch": 3.1598147075854084, + "grad_norm": 0.3728882684422235, + "learning_rate": 2.6761397474610073e-05, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0829453319311142, + "step": 2730, + "valid_targets_mean": 8477.5, + "valid_targets_min": 4033 + }, + { + "epoch": 3.1656050955414012, + "grad_norm": 0.41948282922444835, + "learning_rate": 2.6707049738424117e-05, + "loss": 0.187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09367874264717102, + "step": 2735, + "valid_targets_mean": 6736.6, + "valid_targets_min": 4646 + }, + { + "epoch": 3.1713954834973945, + "grad_norm": 0.43769731812052803, + "learning_rate": 2.665264614308968e-05, + "loss": 0.1778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09644682705402374, + "step": 2740, + "valid_targets_mean": 7551.5, + "valid_targets_min": 5461 + }, + { + "epoch": 3.1771858714533874, + "grad_norm": 0.4406770439909477, + "learning_rate": 2.6598187141702923e-05, + "loss": 0.1963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11089968681335449, + "step": 2745, + "valid_targets_mean": 7091.0, + "valid_targets_min": 4879 + }, + { + "epoch": 3.1829762594093802, + "grad_norm": 0.4515001973225758, + "learning_rate": 2.6543673187821456e-05, + "loss": 0.2016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0904221385717392, + "step": 2750, + "valid_targets_mean": 6269.8, + "valid_targets_min": 4606 + }, + { + "epoch": 3.1887666473653735, + "grad_norm": 0.4363784946771639, + "learning_rate": 2.6489104735460562e-05, + "loss": 0.1984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08854199945926666, + "step": 2755, + "valid_targets_mean": 6818.4, + "valid_targets_min": 5525 + }, + { + "epoch": 3.1945570353213664, + "grad_norm": 0.417191450993402, + "learning_rate": 2.6434482239089398e-05, + "loss": 0.1847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09214088320732117, + "step": 2760, + "valid_targets_mean": 7365.4, + "valid_targets_min": 5000 + }, + { + "epoch": 3.2003474232773597, + "grad_norm": 0.4337487632877825, + "learning_rate": 2.637980615362723e-05, + "loss": 0.191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.084344781935215, + "step": 2765, + "valid_targets_mean": 6212.1, + "valid_targets_min": 3988 + }, + { + "epoch": 3.2061378112333525, + "grad_norm": 0.3954975501331284, + "learning_rate": 2.6325076934439633e-05, + "loss": 0.1911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09320168197154999, + "step": 2770, + "valid_targets_mean": 8396.9, + "valid_targets_min": 6131 + }, + { + "epoch": 3.211928199189346, + "grad_norm": 0.4500355751831564, + "learning_rate": 2.6270295037334713e-05, + "loss": 0.1899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09574490040540695, + "step": 2775, + "valid_targets_mean": 7651.0, + "valid_targets_min": 4061 + }, + { + "epoch": 3.2177185871453386, + "grad_norm": 0.3938156686393294, + "learning_rate": 2.6215460918559283e-05, + "loss": 0.1932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09371134638786316, + "step": 2780, + "valid_targets_mean": 7090.1, + "valid_targets_min": 5203 + }, + { + "epoch": 3.223508975101332, + "grad_norm": 0.38793490038984524, + "learning_rate": 2.6160575034795087e-05, + "loss": 0.1919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08991052210330963, + "step": 2785, + "valid_targets_mean": 7001.0, + "valid_targets_min": 5362 + }, + { + "epoch": 3.229299363057325, + "grad_norm": 0.4173092770685844, + "learning_rate": 2.6105637843155004e-05, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09407326579093933, + "step": 2790, + "valid_targets_mean": 6359.2, + "valid_targets_min": 5236 + }, + { + "epoch": 3.235089751013318, + "grad_norm": 0.45548569764559765, + "learning_rate": 2.60506498011792e-05, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10315366834402084, + "step": 2795, + "valid_targets_mean": 5939.8, + "valid_targets_min": 4693 + }, + { + "epoch": 3.240880138969311, + "grad_norm": 1.238986424920992, + "learning_rate": 2.599561136683136e-05, + "loss": 0.1628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.036311253905296326, + "step": 2800, + "valid_targets_mean": 208.6, + "valid_targets_min": 134 + }, + { + "epoch": 3.2466705269253042, + "grad_norm": 0.5146729187215359, + "learning_rate": 2.5940522998494863e-05, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09998352825641632, + "step": 2805, + "valid_targets_mean": 6008.5, + "valid_targets_min": 4956 + }, + { + "epoch": 3.252460914881297, + "grad_norm": 0.42168821423053354, + "learning_rate": 2.5885385154968954e-05, + "loss": 0.1841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08223581314086914, + "step": 2810, + "valid_targets_mean": 6042.0, + "valid_targets_min": 4817 + }, + { + "epoch": 3.25825130283729, + "grad_norm": 0.4535711701413484, + "learning_rate": 2.5830198295464923e-05, + "loss": 0.1928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09312181919813156, + "step": 2815, + "valid_targets_mean": 6608.6, + "valid_targets_min": 5102 + }, + { + "epoch": 3.264041690793283, + "grad_norm": 0.4257103730536391, + "learning_rate": 2.5774962879602285e-05, + "loss": 0.1823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09401199221611023, + "step": 2820, + "valid_targets_mean": 6670.1, + "valid_targets_min": 4869 + }, + { + "epoch": 3.269832078749276, + "grad_norm": 0.44522510586129893, + "learning_rate": 2.5719679367404955e-05, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08575493097305298, + "step": 2825, + "valid_targets_mean": 6088.4, + "valid_targets_min": 4946 + }, + { + "epoch": 3.2756224667052694, + "grad_norm": 0.44406409728050933, + "learning_rate": 2.5664348219297432e-05, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08885006606578827, + "step": 2830, + "valid_targets_mean": 6228.4, + "valid_targets_min": 4856 + }, + { + "epoch": 3.281412854661262, + "grad_norm": 0.44972634761371566, + "learning_rate": 2.560896989610093e-05, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09148620069026947, + "step": 2835, + "valid_targets_mean": 5376.6, + "valid_targets_min": 4145 + }, + { + "epoch": 3.2872032426172555, + "grad_norm": 0.4931251776724191, + "learning_rate": 2.555354485902954e-05, + "loss": 0.1557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09443989396095276, + "step": 2840, + "valid_targets_mean": 6309.2, + "valid_targets_min": 5305 + }, + { + "epoch": 3.2929936305732483, + "grad_norm": 0.4301848434471346, + "learning_rate": 2.5498073569686444e-05, + "loss": 0.1845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08982059359550476, + "step": 2845, + "valid_targets_mean": 6221.5, + "valid_targets_min": 4636 + }, + { + "epoch": 3.2987840185292416, + "grad_norm": 0.41333674292252215, + "learning_rate": 2.5442556490060014e-05, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09725256264209747, + "step": 2850, + "valid_targets_mean": 6831.5, + "valid_targets_min": 5668 + }, + { + "epoch": 3.3045744064852345, + "grad_norm": 0.44385746477258825, + "learning_rate": 2.5386994082519972e-05, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.097258061170578, + "step": 2855, + "valid_targets_mean": 6437.1, + "valid_targets_min": 5004 + }, + { + "epoch": 3.3103647944412273, + "grad_norm": 0.41491139403789895, + "learning_rate": 2.5331386809813565e-05, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08634775131940842, + "step": 2860, + "valid_targets_mean": 6205.8, + "valid_targets_min": 5341 + }, + { + "epoch": 3.3161551823972206, + "grad_norm": 0.4154409527575692, + "learning_rate": 2.5275735135061696e-05, + "loss": 0.1839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0978240966796875, + "step": 2865, + "valid_targets_mean": 6748.0, + "valid_targets_min": 5363 + }, + { + "epoch": 3.3219455703532135, + "grad_norm": 0.453739504532685, + "learning_rate": 2.5220039521755056e-05, + "loss": 0.1827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09334269911050797, + "step": 2870, + "valid_targets_mean": 5549.8, + "valid_targets_min": 1320 + }, + { + "epoch": 3.3277359583092068, + "grad_norm": 0.5449848364061605, + "learning_rate": 2.5164300433750287e-05, + "loss": 0.2661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1472756564617157, + "step": 2875, + "valid_targets_mean": 5766.9, + "valid_targets_min": 746 + }, + { + "epoch": 3.3335263462651996, + "grad_norm": 0.5140311062323946, + "learning_rate": 2.5108518335266104e-05, + "loss": 0.2488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13583004474639893, + "step": 2880, + "valid_targets_mean": 7107.5, + "valid_targets_min": 3874 + }, + { + "epoch": 3.339316734221193, + "grad_norm": 0.49982562987867923, + "learning_rate": 2.505269369087941e-05, + "loss": 0.2419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11187659204006195, + "step": 2885, + "valid_targets_mean": 5893.2, + "valid_targets_min": 1321 + }, + { + "epoch": 3.3451071221771858, + "grad_norm": 0.5240008941141455, + "learning_rate": 2.499682696552149e-05, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13496187329292297, + "step": 2890, + "valid_targets_mean": 6733.1, + "valid_targets_min": 2373 + }, + { + "epoch": 3.350897510133179, + "grad_norm": 0.5844435666177288, + "learning_rate": 2.4940918624474068e-05, + "loss": 0.2558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13430002331733704, + "step": 2895, + "valid_targets_mean": 4825.4, + "valid_targets_min": 2745 + }, + { + "epoch": 3.356687898089172, + "grad_norm": 0.5368504900216172, + "learning_rate": 2.488496913336546e-05, + "loss": 0.2471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11826002597808838, + "step": 2900, + "valid_targets_mean": 5337.6, + "valid_targets_min": 1089 + }, + { + "epoch": 3.362478286045165, + "grad_norm": 0.5812257689605878, + "learning_rate": 2.482897895816671e-05, + "loss": 0.2444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10885634273290634, + "step": 2905, + "valid_targets_mean": 3264.4, + "valid_targets_min": 627 + }, + { + "epoch": 3.368268674001158, + "grad_norm": 0.5854102363595578, + "learning_rate": 2.477294856518769e-05, + "loss": 0.2512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14446288347244263, + "step": 2910, + "valid_targets_mean": 4381.6, + "valid_targets_min": 2605 + }, + { + "epoch": 3.3740590619571513, + "grad_norm": 0.5417087622693278, + "learning_rate": 2.4716878421073224e-05, + "loss": 0.2517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13980954885482788, + "step": 2915, + "valid_targets_mean": 5933.9, + "valid_targets_min": 1899 + }, + { + "epoch": 3.379849449913144, + "grad_norm": 0.5465882725499704, + "learning_rate": 2.46607689927992e-05, + "loss": 0.2482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.128583624958992, + "step": 2920, + "valid_targets_mean": 4483.6, + "valid_targets_min": 2747 + }, + { + "epoch": 3.385639837869137, + "grad_norm": 0.5592367972407115, + "learning_rate": 2.460462074766868e-05, + "loss": 0.2482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12015688419342041, + "step": 2925, + "valid_targets_mean": 5165.4, + "valid_targets_min": 2144 + }, + { + "epoch": 3.3914302258251303, + "grad_norm": 0.5816389165036916, + "learning_rate": 2.4548434153308007e-05, + "loss": 0.2432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11116787046194077, + "step": 2930, + "valid_targets_mean": 4143.4, + "valid_targets_min": 1461 + }, + { + "epoch": 3.397220613781123, + "grad_norm": 0.6260987341214326, + "learning_rate": 2.4492209677662923e-05, + "loss": 0.2468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10702268779277802, + "step": 2935, + "valid_targets_mean": 4166.4, + "valid_targets_min": 1366 + }, + { + "epoch": 3.4030110017371165, + "grad_norm": 0.6424454665235011, + "learning_rate": 2.4435947788994642e-05, + "loss": 0.2441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11391671001911163, + "step": 2940, + "valid_targets_mean": 3409.9, + "valid_targets_min": 2050 + }, + { + "epoch": 3.4088013896931093, + "grad_norm": 0.6973884546334223, + "learning_rate": 2.4379648955875994e-05, + "loss": 0.2541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11070425808429718, + "step": 2945, + "valid_targets_mean": 2586.6, + "valid_targets_min": 746 + }, + { + "epoch": 3.4145917776491026, + "grad_norm": 0.6467746768147802, + "learning_rate": 2.4323313647187484e-05, + "loss": 0.2322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11300863325595856, + "step": 2950, + "valid_targets_mean": 3737.1, + "valid_targets_min": 1538 + }, + { + "epoch": 3.4203821656050954, + "grad_norm": 0.5682308131029035, + "learning_rate": 2.4266942332113387e-05, + "loss": 0.2437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10985110700130463, + "step": 2955, + "valid_targets_mean": 3841.8, + "valid_targets_min": 2006 + }, + { + "epoch": 3.4261725535610887, + "grad_norm": 0.6742796384715897, + "learning_rate": 2.4210535480137892e-05, + "loss": 0.2566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12906578183174133, + "step": 2960, + "valid_targets_mean": 3333.8, + "valid_targets_min": 2227 + }, + { + "epoch": 3.4319629415170816, + "grad_norm": 0.5861746371987969, + "learning_rate": 2.415409356104112e-05, + "loss": 0.2517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11130131781101227, + "step": 2965, + "valid_targets_mean": 4117.2, + "valid_targets_min": 1323 + }, + { + "epoch": 3.437753329473075, + "grad_norm": 0.6457673256720572, + "learning_rate": 2.409761704489526e-05, + "loss": 0.2494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14976799488067627, + "step": 2970, + "valid_targets_mean": 4786.8, + "valid_targets_min": 2809 + }, + { + "epoch": 3.4435437174290677, + "grad_norm": 0.6343807618423737, + "learning_rate": 2.404110640206064e-05, + "loss": 0.2376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10772261023521423, + "step": 2975, + "valid_targets_mean": 3456.8, + "valid_targets_min": 1365 + }, + { + "epoch": 3.449334105385061, + "grad_norm": 0.5690937521073623, + "learning_rate": 2.3984562103181803e-05, + "loss": 0.24, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13893179595470428, + "step": 2980, + "valid_targets_mean": 5177.6, + "valid_targets_min": 3669 + }, + { + "epoch": 3.455124493341054, + "grad_norm": 0.6937980857106957, + "learning_rate": 2.3927984619183603e-05, + "loss": 0.226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11245802044868469, + "step": 2985, + "valid_targets_mean": 2766.8, + "valid_targets_min": 1505 + }, + { + "epoch": 3.4609148812970467, + "grad_norm": 0.658770834743505, + "learning_rate": 2.387137442126726e-05, + "loss": 0.2471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12539096176624298, + "step": 2990, + "valid_targets_mean": 3500.2, + "valid_targets_min": 2541 + }, + { + "epoch": 3.46670526925304, + "grad_norm": 0.613959744188834, + "learning_rate": 2.3814731980906473e-05, + "loss": 0.2406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10480717569589615, + "step": 2995, + "valid_targets_mean": 3866.1, + "valid_targets_min": 699 + }, + { + "epoch": 3.472495657209033, + "grad_norm": 0.6400793903784873, + "learning_rate": 2.3758057769843442e-05, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11400254815816879, + "step": 3000, + "valid_targets_mean": 3536.1, + "valid_targets_min": 648 + }, + { + "epoch": 3.478286045165026, + "grad_norm": 0.5860990185111562, + "learning_rate": 2.3701352260084985e-05, + "loss": 0.2439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12419473379850388, + "step": 3005, + "valid_targets_mean": 4126.6, + "valid_targets_min": 2511 + }, + { + "epoch": 3.484076433121019, + "grad_norm": 0.692113026787558, + "learning_rate": 2.3644615923898587e-05, + "loss": 0.2301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1291700005531311, + "step": 3010, + "valid_targets_mean": 3400.0, + "valid_targets_min": 2831 + }, + { + "epoch": 3.4898668210770123, + "grad_norm": 0.6269413089966849, + "learning_rate": 2.358784923380846e-05, + "loss": 0.242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11353128403425217, + "step": 3015, + "valid_targets_mean": 3609.0, + "valid_targets_min": 1825 + }, + { + "epoch": 3.495657209033005, + "grad_norm": 0.6882797448449455, + "learning_rate": 2.3531052662591626e-05, + "loss": 0.2333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13075870275497437, + "step": 3020, + "valid_targets_mean": 3237.6, + "valid_targets_min": 1108 + }, + { + "epoch": 3.5014475969889984, + "grad_norm": 0.6419042933539045, + "learning_rate": 2.347422668327396e-05, + "loss": 0.2337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08648437261581421, + "step": 3025, + "valid_targets_mean": 2583.8, + "valid_targets_min": 1501 + }, + { + "epoch": 3.5072379849449913, + "grad_norm": 0.6538370467544622, + "learning_rate": 2.3417371769126266e-05, + "loss": 0.2328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.138927161693573, + "step": 3030, + "valid_targets_mean": 3720.9, + "valid_targets_min": 2261 + }, + { + "epoch": 3.513028372900984, + "grad_norm": 0.6223594532534198, + "learning_rate": 2.3360488393660332e-05, + "loss": 0.2388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1331544816493988, + "step": 3035, + "valid_targets_mean": 4486.6, + "valid_targets_min": 1716 + }, + { + "epoch": 3.5188187608569774, + "grad_norm": 0.6488372615899336, + "learning_rate": 2.330357703062498e-05, + "loss": 0.2279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10495131462812424, + "step": 3040, + "valid_targets_mean": 3092.4, + "valid_targets_min": 1281 + }, + { + "epoch": 3.5246091488129707, + "grad_norm": 0.6667477147551848, + "learning_rate": 2.3246638154002122e-05, + "loss": 0.2401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09248582273721695, + "step": 3045, + "valid_targets_mean": 2772.8, + "valid_targets_min": 1697 + }, + { + "epoch": 3.5303995367689636, + "grad_norm": 0.6109343135643314, + "learning_rate": 2.3189672238002813e-05, + "loss": 0.2405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10102705657482147, + "step": 3050, + "valid_targets_mean": 4052.0, + "valid_targets_min": 2306 + }, + { + "epoch": 3.5361899247249564, + "grad_norm": 0.5883806614334663, + "learning_rate": 2.3132679757063322e-05, + "loss": 0.2351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1355743259191513, + "step": 3055, + "valid_targets_mean": 4673.9, + "valid_targets_min": 1965 + }, + { + "epoch": 3.5419803126809497, + "grad_norm": 0.6526251324104081, + "learning_rate": 2.307566118584114e-05, + "loss": 0.233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09953068941831589, + "step": 3060, + "valid_targets_mean": 2707.0, + "valid_targets_min": 1184 + }, + { + "epoch": 3.5477707006369426, + "grad_norm": 0.6749540950250833, + "learning_rate": 2.3018616999211053e-05, + "loss": 0.2415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12256621569395065, + "step": 3065, + "valid_targets_mean": 3146.4, + "valid_targets_min": 1401 + }, + { + "epoch": 3.553561088592936, + "grad_norm": 0.6858757618445532, + "learning_rate": 2.296154767226119e-05, + "loss": 0.2383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1241631805896759, + "step": 3070, + "valid_targets_mean": 3128.6, + "valid_targets_min": 2109 + }, + { + "epoch": 3.5593514765489287, + "grad_norm": 0.6661911939882548, + "learning_rate": 2.2904453680289062e-05, + "loss": 0.2291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1004391461610794, + "step": 3075, + "valid_targets_mean": 2732.0, + "valid_targets_min": 2061 + }, + { + "epoch": 3.565141864504922, + "grad_norm": 0.6541639028198071, + "learning_rate": 2.2847335498797594e-05, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12782573699951172, + "step": 3080, + "valid_targets_mean": 3449.1, + "valid_targets_min": 1580 + }, + { + "epoch": 3.570932252460915, + "grad_norm": 0.6420817772922169, + "learning_rate": 2.279019360349116e-05, + "loss": 0.2356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09694606065750122, + "step": 3085, + "valid_targets_mean": 2976.4, + "valid_targets_min": 1577 + }, + { + "epoch": 3.576722640416908, + "grad_norm": 0.6115369181788524, + "learning_rate": 2.273302847027166e-05, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10978230088949203, + "step": 3090, + "valid_targets_mean": 4427.0, + "valid_targets_min": 2095 + }, + { + "epoch": 3.582513028372901, + "grad_norm": 0.6203970033828395, + "learning_rate": 2.2675840575234505e-05, + "loss": 0.2275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10813956707715988, + "step": 3095, + "valid_targets_mean": 3615.0, + "valid_targets_min": 1673 + }, + { + "epoch": 3.588303416328894, + "grad_norm": 0.5707467224855606, + "learning_rate": 2.2618630394664688e-05, + "loss": 0.2409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12412257492542267, + "step": 3100, + "valid_targets_mean": 4451.8, + "valid_targets_min": 2964 + }, + { + "epoch": 3.594093804284887, + "grad_norm": 0.5894831777496278, + "learning_rate": 2.2561398405032807e-05, + "loss": 0.2273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10445494204759598, + "step": 3105, + "valid_targets_mean": 3968.0, + "valid_targets_min": 2453 + }, + { + "epoch": 3.5998841922408804, + "grad_norm": 0.6194496140271808, + "learning_rate": 2.2504145082991085e-05, + "loss": 0.2392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11659243702888489, + "step": 3110, + "valid_targets_mean": 3560.6, + "valid_targets_min": 1766 + }, + { + "epoch": 3.6056745801968733, + "grad_norm": 0.6387257036814868, + "learning_rate": 2.2446870905369416e-05, + "loss": 0.2298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12865771353244781, + "step": 3115, + "valid_targets_mean": 4773.9, + "valid_targets_min": 1757 + }, + { + "epoch": 3.611464968152866, + "grad_norm": 0.6765494323792554, + "learning_rate": 2.2389576349171397e-05, + "loss": 0.2456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13330453634262085, + "step": 3120, + "valid_targets_mean": 4200.1, + "valid_targets_min": 2203 + }, + { + "epoch": 3.6172553561088594, + "grad_norm": 0.6746232050436143, + "learning_rate": 2.233226189157033e-05, + "loss": 0.2257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11752544343471527, + "step": 3125, + "valid_targets_mean": 3093.2, + "valid_targets_min": 1955 + }, + { + "epoch": 3.6230457440648522, + "grad_norm": 0.5915526608420583, + "learning_rate": 2.2274928009905267e-05, + "loss": 0.2375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08885739743709564, + "step": 3130, + "valid_targets_mean": 2910.9, + "valid_targets_min": 1221 + }, + { + "epoch": 3.6288361320208455, + "grad_norm": 0.6390249261029404, + "learning_rate": 2.2217575181677048e-05, + "loss": 0.225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09216712415218353, + "step": 3135, + "valid_targets_mean": 3756.5, + "valid_targets_min": 2506 + }, + { + "epoch": 3.6346265199768384, + "grad_norm": 0.6412730416327591, + "learning_rate": 2.2160203884544293e-05, + "loss": 0.2209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10596312582492828, + "step": 3140, + "valid_targets_mean": 4419.8, + "valid_targets_min": 2579 + }, + { + "epoch": 3.6404169079328312, + "grad_norm": 0.6832458547802231, + "learning_rate": 2.2102814596319444e-05, + "loss": 0.2186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13834300637245178, + "step": 3145, + "valid_targets_mean": 3728.2, + "valid_targets_min": 464 + }, + { + "epoch": 3.6462072958888245, + "grad_norm": 0.621863267559752, + "learning_rate": 2.204540779496477e-05, + "loss": 0.2407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11769835650920868, + "step": 3150, + "valid_targets_mean": 3660.8, + "valid_targets_min": 1003 + }, + { + "epoch": 3.651997683844818, + "grad_norm": 0.6072219530140257, + "learning_rate": 2.1987983958588413e-05, + "loss": 0.2261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10092820227146149, + "step": 3155, + "valid_targets_mean": 3318.9, + "valid_targets_min": 1446 + }, + { + "epoch": 3.6577880718008107, + "grad_norm": 0.7761619084531957, + "learning_rate": 2.193054356544039e-05, + "loss": 0.2351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10936201363801956, + "step": 3160, + "valid_targets_mean": 3807.8, + "valid_targets_min": 1105 + }, + { + "epoch": 3.6635784597568035, + "grad_norm": 0.582808255884101, + "learning_rate": 2.1873087093908588e-05, + "loss": 0.2282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11235076189041138, + "step": 3165, + "valid_targets_mean": 4257.2, + "valid_targets_min": 1460 + }, + { + "epoch": 3.669368847712797, + "grad_norm": 0.6301435665504801, + "learning_rate": 2.1815615022514826e-05, + "loss": 0.243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11589612066745758, + "step": 3170, + "valid_targets_mean": 3643.8, + "valid_targets_min": 1172 + }, + { + "epoch": 3.6751592356687897, + "grad_norm": 0.5881241674908854, + "learning_rate": 2.1758127829910835e-05, + "loss": 0.2325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11802739650011063, + "step": 3175, + "valid_targets_mean": 4325.5, + "valid_targets_min": 1818 + }, + { + "epoch": 3.680949623624783, + "grad_norm": 0.6692753816102861, + "learning_rate": 2.1700625994874276e-05, + "loss": 0.2224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10609833896160126, + "step": 3180, + "valid_targets_mean": 3680.4, + "valid_targets_min": 1535 + }, + { + "epoch": 3.686740011580776, + "grad_norm": 0.6327977443669413, + "learning_rate": 2.1643109996304768e-05, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11642224341630936, + "step": 3185, + "valid_targets_mean": 4250.5, + "valid_targets_min": 1814 + }, + { + "epoch": 3.692530399536769, + "grad_norm": 0.7185575308202862, + "learning_rate": 2.158558031321988e-05, + "loss": 0.2197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10359126329421997, + "step": 3190, + "valid_targets_mean": 2750.5, + "valid_targets_min": 1073 + }, + { + "epoch": 3.698320787492762, + "grad_norm": 0.6112249812545777, + "learning_rate": 2.1528037424751176e-05, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11483891308307648, + "step": 3195, + "valid_targets_mean": 4409.8, + "valid_targets_min": 3131 + }, + { + "epoch": 3.7041111754487552, + "grad_norm": 0.622363696526355, + "learning_rate": 2.1470481810140168e-05, + "loss": 0.2146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09661306440830231, + "step": 3200, + "valid_targets_mean": 3341.8, + "valid_targets_min": 1079 + }, + { + "epoch": 3.709901563404748, + "grad_norm": 0.6207658574507436, + "learning_rate": 2.1412913948734365e-05, + "loss": 0.2099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10788443684577942, + "step": 3205, + "valid_targets_mean": 3970.4, + "valid_targets_min": 1143 + }, + { + "epoch": 3.715691951360741, + "grad_norm": 0.6145454281573289, + "learning_rate": 2.135533431998329e-05, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09064505994319916, + "step": 3210, + "valid_targets_mean": 3327.1, + "valid_targets_min": 1981 + }, + { + "epoch": 3.7214823393167342, + "grad_norm": 0.7142208351524535, + "learning_rate": 2.129774340343446e-05, + "loss": 0.248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10260207951068878, + "step": 3215, + "valid_targets_mean": 2796.6, + "valid_targets_min": 1410 + }, + { + "epoch": 3.7272727272727275, + "grad_norm": 0.6313431287384368, + "learning_rate": 2.1240141678729387e-05, + "loss": 0.222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09830041974782944, + "step": 3220, + "valid_targets_mean": 2989.8, + "valid_targets_min": 1691 + }, + { + "epoch": 3.7330631152287204, + "grad_norm": 0.9358403011904364, + "learning_rate": 2.1182529625599617e-05, + "loss": 0.2318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.124494269490242, + "step": 3225, + "valid_targets_mean": 4091.6, + "valid_targets_min": 2318 + }, + { + "epoch": 3.738853503184713, + "grad_norm": 0.5973542139833429, + "learning_rate": 2.1124907723862706e-05, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09477721154689789, + "step": 3230, + "valid_targets_mean": 3572.8, + "valid_targets_min": 1509 + }, + { + "epoch": 3.7446438911407065, + "grad_norm": 0.6398607876411986, + "learning_rate": 2.106727645341824e-05, + "loss": 0.2263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11783312261104584, + "step": 3235, + "valid_targets_mean": 3824.9, + "valid_targets_min": 1601 + }, + { + "epoch": 3.7504342790966994, + "grad_norm": 0.6817149155548572, + "learning_rate": 2.100963629424382e-05, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1156693622469902, + "step": 3240, + "valid_targets_mean": 3558.9, + "valid_targets_min": 1281 + }, + { + "epoch": 3.7562246670526926, + "grad_norm": 0.5728050591821362, + "learning_rate": 2.095198772639109e-05, + "loss": 0.222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09249010682106018, + "step": 3245, + "valid_targets_mean": 3256.1, + "valid_targets_min": 1445 + }, + { + "epoch": 3.7620150550086855, + "grad_norm": 0.66348305148334, + "learning_rate": 2.089433122998172e-05, + "loss": 0.2269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13942071795463562, + "step": 3250, + "valid_targets_mean": 4249.1, + "valid_targets_min": 3677 + }, + { + "epoch": 3.767805442964679, + "grad_norm": 0.650179732240423, + "learning_rate": 2.0836667285203403e-05, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.132462739944458, + "step": 3255, + "valid_targets_mean": 4770.8, + "valid_targets_min": 2121 + }, + { + "epoch": 3.7735958309206716, + "grad_norm": 0.608692207261934, + "learning_rate": 2.077899637230588e-05, + "loss": 0.2154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11029618978500366, + "step": 3260, + "valid_targets_mean": 3527.8, + "valid_targets_min": 1202 + }, + { + "epoch": 3.779386218876665, + "grad_norm": 0.6256858394345893, + "learning_rate": 2.0721318971596915e-05, + "loss": 0.238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11849434673786163, + "step": 3265, + "valid_targets_mean": 4491.6, + "valid_targets_min": 3504 + }, + { + "epoch": 3.7851766068326578, + "grad_norm": 0.574664428323433, + "learning_rate": 2.0663635563438306e-05, + "loss": 0.2344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1048889011144638, + "step": 3270, + "valid_targets_mean": 4129.0, + "valid_targets_min": 1849 + }, + { + "epoch": 3.7909669947886506, + "grad_norm": 0.7801975367950815, + "learning_rate": 2.0605946628241895e-05, + "loss": 0.2395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16848134994506836, + "step": 3275, + "valid_targets_mean": 4321.4, + "valid_targets_min": 3034 + }, + { + "epoch": 3.796757382744644, + "grad_norm": 0.6443689938605106, + "learning_rate": 2.0548252646465544e-05, + "loss": 0.2319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12259537726640701, + "step": 3280, + "valid_targets_mean": 4204.4, + "valid_targets_min": 2310 + }, + { + "epoch": 3.802547770700637, + "grad_norm": 0.6629375966318463, + "learning_rate": 2.0490554098609144e-05, + "loss": 0.2357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11784237623214722, + "step": 3285, + "valid_targets_mean": 4133.1, + "valid_targets_min": 2310 + }, + { + "epoch": 3.80833815865663, + "grad_norm": 0.6779347834232181, + "learning_rate": 2.0432851465210618e-05, + "loss": 0.2402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1057584285736084, + "step": 3290, + "valid_targets_mean": 2686.6, + "valid_targets_min": 802 + }, + { + "epoch": 3.814128546612623, + "grad_norm": 0.6612416426138654, + "learning_rate": 2.0375145226841916e-05, + "loss": 0.2425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1279018074274063, + "step": 3295, + "valid_targets_mean": 3936.5, + "valid_targets_min": 1433 + }, + { + "epoch": 3.819918934568616, + "grad_norm": 0.5944207402613865, + "learning_rate": 2.0317435864105017e-05, + "loss": 0.2222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12686294317245483, + "step": 3300, + "valid_targets_mean": 4604.6, + "valid_targets_min": 2467 + }, + { + "epoch": 3.825709322524609, + "grad_norm": 0.8029093931170834, + "learning_rate": 2.025972385762791e-05, + "loss": 0.234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10743986815214157, + "step": 3305, + "valid_targets_mean": 3055.6, + "valid_targets_min": 1501 + }, + { + "epoch": 3.8314997104806023, + "grad_norm": 0.7435607132336812, + "learning_rate": 2.0202009688060603e-05, + "loss": 0.2365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.140691339969635, + "step": 3310, + "valid_targets_mean": 3669.0, + "valid_targets_min": 2260 + }, + { + "epoch": 3.837290098436595, + "grad_norm": 0.6617232566726545, + "learning_rate": 2.0144293836071132e-05, + "loss": 0.2304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11230485141277313, + "step": 3315, + "valid_targets_mean": 3529.6, + "valid_targets_min": 1325 + }, + { + "epoch": 3.843080486392588, + "grad_norm": 0.5750730658739346, + "learning_rate": 2.0086576782341537e-05, + "loss": 0.2272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1011202335357666, + "step": 3320, + "valid_targets_mean": 4397.6, + "valid_targets_min": 2754 + }, + { + "epoch": 3.8488708743485813, + "grad_norm": 0.6146915424507076, + "learning_rate": 2.0028859007563857e-05, + "loss": 0.2164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09985917806625366, + "step": 3325, + "valid_targets_mean": 3566.8, + "valid_targets_min": 1991 + }, + { + "epoch": 3.8546612623045746, + "grad_norm": 0.6660943821357719, + "learning_rate": 1.997114099243615e-05, + "loss": 0.2326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12243643403053284, + "step": 3330, + "valid_targets_mean": 2909.9, + "valid_targets_min": 1076 + }, + { + "epoch": 3.8604516502605675, + "grad_norm": 0.6308468996973874, + "learning_rate": 1.9913423217658466e-05, + "loss": 0.2161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08638469874858856, + "step": 3335, + "valid_targets_mean": 3275.1, + "valid_targets_min": 521 + }, + { + "epoch": 3.8662420382165603, + "grad_norm": 0.5950358153672971, + "learning_rate": 1.9855706163928868e-05, + "loss": 0.2383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09733293950557709, + "step": 3340, + "valid_targets_mean": 3767.1, + "valid_targets_min": 2185 + }, + { + "epoch": 3.8720324261725536, + "grad_norm": 0.6053995285078753, + "learning_rate": 1.9797990311939394e-05, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1037420853972435, + "step": 3345, + "valid_targets_mean": 3945.8, + "valid_targets_min": 2024 + }, + { + "epoch": 3.8778228141285465, + "grad_norm": 0.6214312013380663, + "learning_rate": 1.97402761423721e-05, + "loss": 0.2304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0892077088356018, + "step": 3350, + "valid_targets_mean": 3204.5, + "valid_targets_min": 1777 + }, + { + "epoch": 3.8836132020845398, + "grad_norm": 0.643149933538212, + "learning_rate": 1.968256413589499e-05, + "loss": 0.222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09115341305732727, + "step": 3355, + "valid_targets_mean": 3268.5, + "valid_targets_min": 1965 + }, + { + "epoch": 3.8894035900405326, + "grad_norm": 0.6291381948112222, + "learning_rate": 1.962485477315809e-05, + "loss": 0.2364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13257244229316711, + "step": 3360, + "valid_targets_mean": 4630.0, + "valid_targets_min": 1307 + }, + { + "epoch": 3.895193977996526, + "grad_norm": 0.6454990402044124, + "learning_rate": 1.956714853478939e-05, + "loss": 0.2297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1468641608953476, + "step": 3365, + "valid_targets_mean": 5090.8, + "valid_targets_min": 2762 + }, + { + "epoch": 3.9009843659525187, + "grad_norm": 0.7135917015837402, + "learning_rate": 1.9509445901390863e-05, + "loss": 0.2206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10307130217552185, + "step": 3370, + "valid_targets_mean": 2930.9, + "valid_targets_min": 1035 + }, + { + "epoch": 3.906774753908512, + "grad_norm": 0.6143842180197991, + "learning_rate": 1.9451747353534463e-05, + "loss": 0.2173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13558322191238403, + "step": 3375, + "valid_targets_mean": 4369.2, + "valid_targets_min": 1876 + }, + { + "epoch": 3.912565141864505, + "grad_norm": 0.5959476877785364, + "learning_rate": 1.9394053371758108e-05, + "loss": 0.2209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09872700273990631, + "step": 3380, + "valid_targets_mean": 3840.5, + "valid_targets_min": 2303 + }, + { + "epoch": 3.9183555298204977, + "grad_norm": 0.6416421146123354, + "learning_rate": 1.93363644365617e-05, + "loss": 0.219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09142628312110901, + "step": 3385, + "valid_targets_mean": 3454.0, + "valid_targets_min": 1434 + }, + { + "epoch": 3.924145917776491, + "grad_norm": 0.5691149807130056, + "learning_rate": 1.9278681028403095e-05, + "loss": 0.2182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1255941390991211, + "step": 3390, + "valid_targets_mean": 4752.5, + "valid_targets_min": 1496 + }, + { + "epoch": 3.9299363057324843, + "grad_norm": 0.6745112014980869, + "learning_rate": 1.9221003627694127e-05, + "loss": 0.2151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1109854057431221, + "step": 3395, + "valid_targets_mean": 3358.0, + "valid_targets_min": 1518 + }, + { + "epoch": 3.935726693688477, + "grad_norm": 0.6071462519506634, + "learning_rate": 1.9163332714796604e-05, + "loss": 0.2233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10561128705739975, + "step": 3400, + "valid_targets_mean": 4146.1, + "valid_targets_min": 1152 + }, + { + "epoch": 3.94151708164447, + "grad_norm": 0.6426584726925665, + "learning_rate": 1.9105668770018287e-05, + "loss": 0.2312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11722976714372635, + "step": 3405, + "valid_targets_mean": 3758.4, + "valid_targets_min": 1598 + }, + { + "epoch": 3.9473074696004633, + "grad_norm": 0.617643112012234, + "learning_rate": 1.9048012273608912e-05, + "loss": 0.2422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1155039593577385, + "step": 3410, + "valid_targets_mean": 4272.2, + "valid_targets_min": 1611 + }, + { + "epoch": 3.953097857556456, + "grad_norm": 0.6750768602220908, + "learning_rate": 1.8990363705756182e-05, + "loss": 0.2171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09991435706615448, + "step": 3415, + "valid_targets_mean": 3249.9, + "valid_targets_min": 1490 + }, + { + "epoch": 3.9588882455124494, + "grad_norm": 0.6683691782809446, + "learning_rate": 1.8932723546581767e-05, + "loss": 0.2207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09730266779661179, + "step": 3420, + "valid_targets_mean": 2868.1, + "valid_targets_min": 2355 + }, + { + "epoch": 3.9646786334684423, + "grad_norm": 0.6833310701250812, + "learning_rate": 1.88750922761373e-05, + "loss": 0.2236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11862599104642868, + "step": 3425, + "valid_targets_mean": 3776.5, + "valid_targets_min": 1743 + }, + { + "epoch": 3.9704690214244356, + "grad_norm": 0.555511474925617, + "learning_rate": 1.881747037440039e-05, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09805724769830704, + "step": 3430, + "valid_targets_mean": 4739.0, + "valid_targets_min": 2933 + }, + { + "epoch": 3.9762594093804284, + "grad_norm": 0.6519285293424276, + "learning_rate": 1.8759858321270616e-05, + "loss": 0.2305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12541580200195312, + "step": 3435, + "valid_targets_mean": 3573.1, + "valid_targets_min": 2433 + }, + { + "epoch": 3.9820497973364217, + "grad_norm": 0.6896268921953539, + "learning_rate": 1.8702256596565547e-05, + "loss": 0.2301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12650898098945618, + "step": 3440, + "valid_targets_mean": 3635.6, + "valid_targets_min": 2100 + }, + { + "epoch": 3.9878401852924146, + "grad_norm": 0.7276116433866298, + "learning_rate": 1.864466568001671e-05, + "loss": 0.2252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09770257771015167, + "step": 3445, + "valid_targets_mean": 3246.2, + "valid_targets_min": 1365 + }, + { + "epoch": 3.9936305732484074, + "grad_norm": 0.7005066198558177, + "learning_rate": 1.858708605126563e-05, + "loss": 0.2269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1187184602022171, + "step": 3450, + "valid_targets_mean": 4794.2, + "valid_targets_min": 1572 + }, + { + "epoch": 3.9994209612044007, + "grad_norm": 1.1531137671857628, + "learning_rate": 1.8529518189859842e-05, + "loss": 0.2304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11856088787317276, + "step": 3455, + "valid_targets_mean": 3688.5, + "valid_targets_min": 876 + }, + { + "epoch": 4.004632310364794, + "grad_norm": 0.6338487908223315, + "learning_rate": 1.847196257524883e-05, + "loss": 0.1867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1476893126964569, + "step": 3460, + "valid_targets_mean": 8029.9, + "valid_targets_min": 6142 + }, + { + "epoch": 4.010422698320787, + "grad_norm": 0.47487343877733607, + "learning_rate": 1.8414419686780124e-05, + "loss": 0.1912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09128034114837646, + "step": 3465, + "valid_targets_mean": 6664.0, + "valid_targets_min": 5470 + }, + { + "epoch": 4.016213086276781, + "grad_norm": 0.4441691540656432, + "learning_rate": 1.8356890003695242e-05, + "loss": 0.1825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09573540836572647, + "step": 3470, + "valid_targets_mean": 7126.9, + "valid_targets_min": 5368 + }, + { + "epoch": 4.022003474232774, + "grad_norm": 0.6934913878421209, + "learning_rate": 1.829937400512573e-05, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08647888153791428, + "step": 3475, + "valid_targets_mean": 7634.9, + "valid_targets_min": 5614 + }, + { + "epoch": 4.027793862188767, + "grad_norm": 0.3845706850810253, + "learning_rate": 1.824187217008917e-05, + "loss": 0.1828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08422435820102692, + "step": 3480, + "valid_targets_mean": 8630.8, + "valid_targets_min": 6362 + }, + { + "epoch": 4.033584250144759, + "grad_norm": 0.4508861479189168, + "learning_rate": 1.8184384977485177e-05, + "loss": 0.1859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09215463697910309, + "step": 3485, + "valid_targets_mean": 7383.6, + "valid_targets_min": 5844 + }, + { + "epoch": 4.039374638100753, + "grad_norm": 0.40329975694327114, + "learning_rate": 1.812691290609142e-05, + "loss": 0.1963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10451547056436539, + "step": 3490, + "valid_targets_mean": 8017.0, + "valid_targets_min": 5283 + }, + { + "epoch": 4.045165026056746, + "grad_norm": 0.4056281920802866, + "learning_rate": 1.8069456434559618e-05, + "loss": 0.1816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07850828766822815, + "step": 3495, + "valid_targets_mean": 7051.1, + "valid_targets_min": 4135 + }, + { + "epoch": 4.050955414012739, + "grad_norm": 0.4265931631056118, + "learning_rate": 1.8012016041411593e-05, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07563488185405731, + "step": 3500, + "valid_targets_mean": 5370.4, + "valid_targets_min": 3919 + }, + { + "epoch": 4.056745801968732, + "grad_norm": 0.45089862246410817, + "learning_rate": 1.7954592205035235e-05, + "loss": 0.1629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07701076567173004, + "step": 3505, + "valid_targets_mean": 6063.6, + "valid_targets_min": 4863 + }, + { + "epoch": 4.0625361899247245, + "grad_norm": 0.43651516381480626, + "learning_rate": 1.7897185403680562e-05, + "loss": 0.181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08604348450899124, + "step": 3510, + "valid_targets_mean": 7001.2, + "valid_targets_min": 6020 + }, + { + "epoch": 4.068326577880718, + "grad_norm": 0.44093517531962334, + "learning_rate": 1.7839796115455707e-05, + "loss": 0.1716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.085826575756073, + "step": 3515, + "valid_targets_mean": 6181.9, + "valid_targets_min": 4669 + }, + { + "epoch": 4.074116965836711, + "grad_norm": 0.3999835630513633, + "learning_rate": 1.7782424818322955e-05, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07994566857814789, + "step": 3520, + "valid_targets_mean": 6250.2, + "valid_targets_min": 4900 + }, + { + "epoch": 4.079907353792704, + "grad_norm": 0.40031048525427254, + "learning_rate": 1.7725071990094743e-05, + "loss": 0.17, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07790108025074005, + "step": 3525, + "valid_targets_mean": 7156.4, + "valid_targets_min": 5323 + }, + { + "epoch": 4.085697741748697, + "grad_norm": 0.4184529184232665, + "learning_rate": 1.766773810842968e-05, + "loss": 0.1731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08983001112937927, + "step": 3530, + "valid_targets_mean": 7215.8, + "valid_targets_min": 4720 + }, + { + "epoch": 4.091488129704691, + "grad_norm": 0.416428854975788, + "learning_rate": 1.761042365082861e-05, + "loss": 0.1788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08676470816135406, + "step": 3535, + "valid_targets_mean": 6550.8, + "valid_targets_min": 5739 + }, + { + "epoch": 4.097278517660683, + "grad_norm": 0.4216208509272915, + "learning_rate": 1.755312909463059e-05, + "loss": 0.1717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08913668990135193, + "step": 3540, + "valid_targets_mean": 6726.8, + "valid_targets_min": 4540 + }, + { + "epoch": 4.103068905616676, + "grad_norm": 0.5196781964099998, + "learning_rate": 1.749585491700892e-05, + "loss": 0.1789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09615586698055267, + "step": 3545, + "valid_targets_mean": 4573.4, + "valid_targets_min": 1039 + }, + { + "epoch": 4.108859293572669, + "grad_norm": 0.43286538062723917, + "learning_rate": 1.7438601594967196e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08920808136463165, + "step": 3550, + "valid_targets_mean": 6430.0, + "valid_targets_min": 5412 + }, + { + "epoch": 4.114649681528663, + "grad_norm": 0.7025560718319794, + "learning_rate": 1.7381369605335312e-05, + "loss": 0.1692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02563580311834812, + "step": 3555, + "valid_targets_mean": 583.1, + "valid_targets_min": 158 + }, + { + "epoch": 4.120440069484656, + "grad_norm": 6.598428220612047, + "learning_rate": 1.7324159424765502e-05, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08991201221942902, + "step": 3560, + "valid_targets_mean": 8230.9, + "valid_targets_min": 5061 + }, + { + "epoch": 4.1262304574406485, + "grad_norm": 0.44186789396424986, + "learning_rate": 1.726697152972835e-05, + "loss": 0.1743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09638067334890366, + "step": 3565, + "valid_targets_mean": 7238.9, + "valid_targets_min": 5112 + }, + { + "epoch": 4.132020845396641, + "grad_norm": 0.4450781677124384, + "learning_rate": 1.7209806396508847e-05, + "loss": 0.1726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07829023897647858, + "step": 3570, + "valid_targets_mean": 6322.8, + "valid_targets_min": 4867 + }, + { + "epoch": 4.137811233352634, + "grad_norm": 0.4122268360572617, + "learning_rate": 1.7152664501202413e-05, + "loss": 0.1795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08773788809776306, + "step": 3575, + "valid_targets_mean": 7887.0, + "valid_targets_min": 6004 + }, + { + "epoch": 4.143601621308628, + "grad_norm": 0.39107096983030126, + "learning_rate": 1.7095546319710944e-05, + "loss": 0.1698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07779907435178757, + "step": 3580, + "valid_targets_mean": 7090.2, + "valid_targets_min": 4917 + }, + { + "epoch": 4.149392009264621, + "grad_norm": 0.41031771658626737, + "learning_rate": 1.703845232773881e-05, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08271464705467224, + "step": 3585, + "valid_targets_mean": 7441.9, + "valid_targets_min": 4202 + }, + { + "epoch": 4.155182397220614, + "grad_norm": 0.5269944341101026, + "learning_rate": 1.698138300078895e-05, + "loss": 0.1516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08067260682582855, + "step": 3590, + "valid_targets_mean": 7901.8, + "valid_targets_min": 5060 + }, + { + "epoch": 4.1609727851766065, + "grad_norm": 0.40568727713688824, + "learning_rate": 1.692433881415887e-05, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07333912700414658, + "step": 3595, + "valid_targets_mean": 8557.6, + "valid_targets_min": 6331 + }, + { + "epoch": 4.1667631731326, + "grad_norm": 0.4594855492171708, + "learning_rate": 1.686732024293668e-05, + "loss": 0.1793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08284049481153488, + "step": 3600, + "valid_targets_mean": 4976.4, + "valid_targets_min": 2847 + }, + { + "epoch": 4.172553561088593, + "grad_norm": 0.4848900085631287, + "learning_rate": 1.6810327761997193e-05, + "loss": 0.1611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09487798810005188, + "step": 3605, + "valid_targets_mean": 7129.1, + "valid_targets_min": 5331 + }, + { + "epoch": 4.178343949044586, + "grad_norm": 0.45541337635011625, + "learning_rate": 1.6753361845997888e-05, + "loss": 0.182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09323504567146301, + "step": 3610, + "valid_targets_mean": 6864.0, + "valid_targets_min": 4697 + }, + { + "epoch": 4.184134337000579, + "grad_norm": 0.48558524786208335, + "learning_rate": 1.6696422969375027e-05, + "loss": 0.1892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08608924597501755, + "step": 3615, + "valid_targets_mean": 6631.9, + "valid_targets_min": 4623 + }, + { + "epoch": 4.1899247249565725, + "grad_norm": 0.4337802573922538, + "learning_rate": 1.6639511606339674e-05, + "loss": 0.181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08183959126472473, + "step": 3620, + "valid_targets_mean": 6330.4, + "valid_targets_min": 5424 + }, + { + "epoch": 4.195715112912565, + "grad_norm": 0.4401264336839986, + "learning_rate": 1.6582628230873737e-05, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08481311798095703, + "step": 3625, + "valid_targets_mean": 6814.8, + "valid_targets_min": 4964 + }, + { + "epoch": 4.201505500868558, + "grad_norm": 0.46448901960485245, + "learning_rate": 1.652577331672605e-05, + "loss": 0.1816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09653014689683914, + "step": 3630, + "valid_targets_mean": 6915.8, + "valid_targets_min": 5321 + }, + { + "epoch": 4.207295888824551, + "grad_norm": 0.43792282662310117, + "learning_rate": 1.646894733740838e-05, + "loss": 0.1749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09070654213428497, + "step": 3635, + "valid_targets_mean": 6464.5, + "valid_targets_min": 5185 + }, + { + "epoch": 4.213086276780544, + "grad_norm": 0.42338152090227527, + "learning_rate": 1.6412150766191545e-05, + "loss": 0.1772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09077668935060501, + "step": 3640, + "valid_targets_mean": 7048.6, + "valid_targets_min": 4913 + }, + { + "epoch": 4.218876664736538, + "grad_norm": 0.4228905286663746, + "learning_rate": 1.635538407610142e-05, + "loss": 0.1808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09877828508615494, + "step": 3645, + "valid_targets_mean": 7469.2, + "valid_targets_min": 5426 + }, + { + "epoch": 4.2246670526925305, + "grad_norm": 0.4518896532340265, + "learning_rate": 1.6298647739915018e-05, + "loss": 0.1795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09248585999011993, + "step": 3650, + "valid_targets_mean": 6819.4, + "valid_targets_min": 5286 + }, + { + "epoch": 4.230457440648523, + "grad_norm": 0.43493724361595565, + "learning_rate": 1.624194223015656e-05, + "loss": 0.1786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09276282787322998, + "step": 3655, + "valid_targets_mean": 7107.5, + "valid_targets_min": 5201 + }, + { + "epoch": 4.236247828604516, + "grad_norm": 0.6009769366191408, + "learning_rate": 1.6185268019093534e-05, + "loss": 0.1757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08548557013273239, + "step": 3660, + "valid_targets_mean": 5642.6, + "valid_targets_min": 4838 + }, + { + "epoch": 4.24203821656051, + "grad_norm": 0.9590245633099599, + "learning_rate": 1.612862557873275e-05, + "loss": 0.1447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14384236931800842, + "step": 3665, + "valid_targets_mean": 2054.1, + "valid_targets_min": 134 + }, + { + "epoch": 4.247828604516503, + "grad_norm": 0.48201038621071807, + "learning_rate": 1.6072015380816407e-05, + "loss": 0.1799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08267612755298615, + "step": 3670, + "valid_targets_mean": 6686.4, + "valid_targets_min": 5256 + }, + { + "epoch": 4.253618992472496, + "grad_norm": 0.44266137339887035, + "learning_rate": 1.6015437896818204e-05, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0936516746878624, + "step": 3675, + "valid_targets_mean": 6216.6, + "valid_targets_min": 5542 + }, + { + "epoch": 4.2594093804284885, + "grad_norm": 0.4319241530250814, + "learning_rate": 1.5958893597939363e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08643292635679245, + "step": 3680, + "valid_targets_mean": 6486.1, + "valid_targets_min": 5531 + }, + { + "epoch": 4.265199768384482, + "grad_norm": 0.44892852652887477, + "learning_rate": 1.5902382955104745e-05, + "loss": 0.1713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08577564358711243, + "step": 3685, + "valid_targets_mean": 5797.4, + "valid_targets_min": 3911 + }, + { + "epoch": 4.270990156340475, + "grad_norm": 0.45772014152728996, + "learning_rate": 1.5845906438958884e-05, + "loss": 0.1733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08880448341369629, + "step": 3690, + "valid_targets_mean": 6357.5, + "valid_targets_min": 5195 + }, + { + "epoch": 4.276780544296468, + "grad_norm": 0.4873851162289285, + "learning_rate": 1.5789464519862108e-05, + "loss": 0.1785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0878804624080658, + "step": 3695, + "valid_targets_mean": 5808.9, + "valid_targets_min": 4959 + }, + { + "epoch": 4.282570932252461, + "grad_norm": 0.5766157379649499, + "learning_rate": 1.5733057667886617e-05, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.056061677634716034, + "step": 3700, + "valid_targets_mean": 2591.5, + "valid_targets_min": 842 + }, + { + "epoch": 4.288361320208454, + "grad_norm": 0.4927049290514214, + "learning_rate": 1.567668635281253e-05, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08399844169616699, + "step": 3705, + "valid_targets_mean": 6006.9, + "valid_targets_min": 4769 + }, + { + "epoch": 4.294151708164447, + "grad_norm": 0.5355797089915041, + "learning_rate": 1.5620351044124013e-05, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0904797911643982, + "step": 3710, + "valid_targets_mean": 6764.2, + "valid_targets_min": 5046 + }, + { + "epoch": 4.29994209612044, + "grad_norm": 0.4901600888939512, + "learning_rate": 1.556405221100536e-05, + "loss": 0.176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09228479862213135, + "step": 3715, + "valid_targets_mean": 6055.8, + "valid_targets_min": 5182 + }, + { + "epoch": 4.305732484076433, + "grad_norm": 0.4023050347526533, + "learning_rate": 1.5507790322337087e-05, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08523489534854889, + "step": 3720, + "valid_targets_mean": 7101.6, + "valid_targets_min": 5366 + }, + { + "epoch": 4.311522872032426, + "grad_norm": 0.429715793834325, + "learning_rate": 1.5451565846691997e-05, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0880584716796875, + "step": 3725, + "valid_targets_mean": 7111.9, + "valid_targets_min": 5192 + }, + { + "epoch": 4.31731325998842, + "grad_norm": 0.47100851970652413, + "learning_rate": 1.5395379252331323e-05, + "loss": 0.1712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08530952036380768, + "step": 3730, + "valid_targets_mean": 5509.9, + "valid_targets_min": 4790 + }, + { + "epoch": 4.3231036479444125, + "grad_norm": 0.7478325913135395, + "learning_rate": 1.533923100720081e-05, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14090920984745026, + "step": 3735, + "valid_targets_mean": 3874.2, + "valid_targets_min": 1457 + }, + { + "epoch": 4.328894035900405, + "grad_norm": 0.5767617826201782, + "learning_rate": 1.528312157892678e-05, + "loss": 0.233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09905903041362762, + "step": 3740, + "valid_targets_mean": 3920.0, + "valid_targets_min": 533 + }, + { + "epoch": 4.334684423856398, + "grad_norm": 0.5861299199156408, + "learning_rate": 1.5227051434812317e-05, + "loss": 0.2307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10419026762247086, + "step": 3745, + "valid_targets_mean": 4077.0, + "valid_targets_min": 1752 + }, + { + "epoch": 4.340474811812391, + "grad_norm": 0.60786331148038, + "learning_rate": 1.5171021041833294e-05, + "loss": 0.225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14355988800525665, + "step": 3750, + "valid_targets_mean": 4813.5, + "valid_targets_min": 1126 + }, + { + "epoch": 4.346265199768385, + "grad_norm": 0.5655458438425296, + "learning_rate": 1.5115030866634542e-05, + "loss": 0.2173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10995228588581085, + "step": 3755, + "valid_targets_mean": 4727.9, + "valid_targets_min": 3308 + }, + { + "epoch": 4.352055587724378, + "grad_norm": 0.665312876122124, + "learning_rate": 1.5059081375525937e-05, + "loss": 0.2349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14005246758460999, + "step": 3760, + "valid_targets_mean": 5534.2, + "valid_targets_min": 1432 + }, + { + "epoch": 4.3578459756803705, + "grad_norm": 0.5884469602525774, + "learning_rate": 1.500317303447851e-05, + "loss": 0.2269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11218597739934921, + "step": 3765, + "valid_targets_mean": 4703.4, + "valid_targets_min": 1541 + }, + { + "epoch": 4.363636363636363, + "grad_norm": 0.530958275217967, + "learning_rate": 1.4947306309120598e-05, + "loss": 0.2248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13557025790214539, + "step": 3770, + "valid_targets_mean": 6251.0, + "valid_targets_min": 1855 + }, + { + "epoch": 4.369426751592357, + "grad_norm": 0.5772578330417059, + "learning_rate": 1.489148166473391e-05, + "loss": 0.2221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.083745077252388, + "step": 3775, + "valid_targets_mean": 3304.2, + "valid_targets_min": 1024 + }, + { + "epoch": 4.37521713954835, + "grad_norm": 0.5667875048921107, + "learning_rate": 1.483569956624972e-05, + "loss": 0.2342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0941530242562294, + "step": 3780, + "valid_targets_mean": 3925.4, + "valid_targets_min": 2309 + }, + { + "epoch": 4.381007527504343, + "grad_norm": 0.6169957824519388, + "learning_rate": 1.4779960478244951e-05, + "loss": 0.2284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10853932797908783, + "step": 3785, + "valid_targets_mean": 3988.9, + "valid_targets_min": 1401 + }, + { + "epoch": 4.386797915460336, + "grad_norm": 0.44807236365204745, + "learning_rate": 1.472426486493831e-05, + "loss": 0.2197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12988603115081787, + "step": 3790, + "valid_targets_mean": 8196.8, + "valid_targets_min": 3317 + }, + { + "epoch": 4.392588303416328, + "grad_norm": 3.0327013394529097, + "learning_rate": 1.4668613190186436e-05, + "loss": 0.2278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11648248136043549, + "step": 3795, + "valid_targets_mean": 4238.2, + "valid_targets_min": 1650 + }, + { + "epoch": 4.398378691372322, + "grad_norm": 0.5537086119802135, + "learning_rate": 1.4613005917480031e-05, + "loss": 0.2224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10524934530258179, + "step": 3800, + "valid_targets_mean": 4466.8, + "valid_targets_min": 1412 + }, + { + "epoch": 4.404169079328315, + "grad_norm": 0.6147355430304815, + "learning_rate": 1.4557443509939994e-05, + "loss": 0.2198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10133112221956253, + "step": 3805, + "valid_targets_mean": 3129.1, + "valid_targets_min": 1354 + }, + { + "epoch": 4.409959467284308, + "grad_norm": 0.5531691642814615, + "learning_rate": 1.4501926430313563e-05, + "loss": 0.2219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10187403112649918, + "step": 3810, + "valid_targets_mean": 4856.8, + "valid_targets_min": 2052 + }, + { + "epoch": 4.415749855240301, + "grad_norm": 0.6814516561894316, + "learning_rate": 1.4446455140970463e-05, + "loss": 0.2136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09588038921356201, + "step": 3815, + "valid_targets_mean": 3705.2, + "valid_targets_min": 1210 + }, + { + "epoch": 4.4215402431962945, + "grad_norm": 0.6617520741751552, + "learning_rate": 1.439103010389908e-05, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15332624316215515, + "step": 3820, + "valid_targets_mean": 4497.4, + "valid_targets_min": 1293 + }, + { + "epoch": 4.427330631152287, + "grad_norm": 0.6358424187335716, + "learning_rate": 1.4335651780702571e-05, + "loss": 0.2302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12418112903833389, + "step": 3825, + "valid_targets_mean": 4081.0, + "valid_targets_min": 2979 + }, + { + "epoch": 4.43312101910828, + "grad_norm": 0.6854443786877813, + "learning_rate": 1.4280320632595045e-05, + "loss": 0.2273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1276114284992218, + "step": 3830, + "valid_targets_mean": 4278.9, + "valid_targets_min": 671 + }, + { + "epoch": 4.438911407064273, + "grad_norm": 0.7096522105030221, + "learning_rate": 1.422503712039772e-05, + "loss": 0.2246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08994592726230621, + "step": 3835, + "valid_targets_mean": 2743.2, + "valid_targets_min": 1291 + }, + { + "epoch": 4.444701795020267, + "grad_norm": 0.6418924276012431, + "learning_rate": 1.4169801704535089e-05, + "loss": 0.2075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09683261066675186, + "step": 3840, + "valid_targets_mean": 2747.0, + "valid_targets_min": 1285 + }, + { + "epoch": 4.45049218297626, + "grad_norm": 0.6820132674699142, + "learning_rate": 1.4114614845031056e-05, + "loss": 0.2181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11021986603736877, + "step": 3845, + "valid_targets_mean": 3813.1, + "valid_targets_min": 1209 + }, + { + "epoch": 4.456282570932252, + "grad_norm": 0.6767122819071981, + "learning_rate": 1.4059477001505142e-05, + "loss": 0.2006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11383691430091858, + "step": 3850, + "valid_targets_mean": 4358.9, + "valid_targets_min": 3280 + }, + { + "epoch": 4.462072958888245, + "grad_norm": 0.648493463933508, + "learning_rate": 1.4004388633168644e-05, + "loss": 0.2256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08743107318878174, + "step": 3855, + "valid_targets_mean": 3406.6, + "valid_targets_min": 1957 + }, + { + "epoch": 4.467863346844238, + "grad_norm": 0.6544777379199935, + "learning_rate": 1.3949350198820805e-05, + "loss": 0.224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15421587228775024, + "step": 3860, + "valid_targets_mean": 4873.1, + "valid_targets_min": 2833 + }, + { + "epoch": 4.473653734800232, + "grad_norm": 0.6873146316651686, + "learning_rate": 1.3894362156845004e-05, + "loss": 0.2084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08594775944948196, + "step": 3865, + "valid_targets_mean": 2926.9, + "valid_targets_min": 1846 + }, + { + "epoch": 4.479444122756225, + "grad_norm": 0.6697088686410119, + "learning_rate": 1.3839424965204913e-05, + "loss": 0.2159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09900952130556107, + "step": 3870, + "valid_targets_mean": 3012.8, + "valid_targets_min": 1009 + }, + { + "epoch": 4.485234510712218, + "grad_norm": 0.6780954186588826, + "learning_rate": 1.3784539081440729e-05, + "loss": 0.2084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10792041569948196, + "step": 3875, + "valid_targets_mean": 3220.8, + "valid_targets_min": 1264 + }, + { + "epoch": 4.49102489866821, + "grad_norm": 0.5955939245300237, + "learning_rate": 1.3729704962665294e-05, + "loss": 0.2126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09530109167098999, + "step": 3880, + "valid_targets_mean": 3489.0, + "valid_targets_min": 1606 + }, + { + "epoch": 4.496815286624204, + "grad_norm": 0.7449723707975259, + "learning_rate": 1.3674923065560375e-05, + "loss": 0.2156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10313093662261963, + "step": 3885, + "valid_targets_mean": 3216.2, + "valid_targets_min": 1190 + }, + { + "epoch": 4.502605674580197, + "grad_norm": 0.681314779850815, + "learning_rate": 1.3620193846372778e-05, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1107458770275116, + "step": 3890, + "valid_targets_mean": 3226.9, + "valid_targets_min": 571 + }, + { + "epoch": 4.50839606253619, + "grad_norm": 0.6618093464158726, + "learning_rate": 1.3565517760910605e-05, + "loss": 0.2106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11724066734313965, + "step": 3895, + "valid_targets_mean": 4088.4, + "valid_targets_min": 2410 + }, + { + "epoch": 4.514186450492183, + "grad_norm": 0.6208808503309192, + "learning_rate": 1.3510895264539446e-05, + "loss": 0.2109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09507305175065994, + "step": 3900, + "valid_targets_mean": 3610.4, + "valid_targets_min": 1110 + }, + { + "epoch": 4.519976838448176, + "grad_norm": 0.7699986741181999, + "learning_rate": 1.3456326812178546e-05, + "loss": 0.2057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10935103893280029, + "step": 3905, + "valid_targets_mean": 3476.2, + "valid_targets_min": 513 + }, + { + "epoch": 4.525767226404169, + "grad_norm": 0.7441197830906147, + "learning_rate": 1.3401812858297087e-05, + "loss": 0.2085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1089061051607132, + "step": 3910, + "valid_targets_mean": 4105.5, + "valid_targets_min": 2558 + }, + { + "epoch": 4.531557614360162, + "grad_norm": 0.6779694865278227, + "learning_rate": 1.334735385691033e-05, + "loss": 0.2232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11115449666976929, + "step": 3915, + "valid_targets_mean": 3982.9, + "valid_targets_min": 3108 + }, + { + "epoch": 4.537348002316155, + "grad_norm": 0.6526369552324457, + "learning_rate": 1.329295026157589e-05, + "loss": 0.2105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09470139443874359, + "step": 3920, + "valid_targets_mean": 3392.6, + "valid_targets_min": 1860 + }, + { + "epoch": 4.543138390272148, + "grad_norm": 0.5847112270187543, + "learning_rate": 1.3238602525389937e-05, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12164326012134552, + "step": 3925, + "valid_targets_mean": 5552.2, + "valid_targets_min": 2495 + }, + { + "epoch": 4.548928778228142, + "grad_norm": 0.5814037340512235, + "learning_rate": 1.3184311100983407e-05, + "loss": 0.2085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09199385344982147, + "step": 3930, + "valid_targets_mean": 3295.2, + "valid_targets_min": 1292 + }, + { + "epoch": 4.554719166184134, + "grad_norm": 0.6984013547866225, + "learning_rate": 1.313007644051826e-05, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11497661471366882, + "step": 3935, + "valid_targets_mean": 3739.6, + "valid_targets_min": 1655 + }, + { + "epoch": 4.560509554140127, + "grad_norm": 0.6552364897317641, + "learning_rate": 1.3075898995683707e-05, + "loss": 0.206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1044774204492569, + "step": 3940, + "valid_targets_mean": 3523.8, + "valid_targets_min": 1757 + }, + { + "epoch": 4.56629994209612, + "grad_norm": 0.6855739539292031, + "learning_rate": 1.3021779217692432e-05, + "loss": 0.2108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1120901107788086, + "step": 3945, + "valid_targets_mean": 4563.8, + "valid_targets_min": 2313 + }, + { + "epoch": 4.572090330052114, + "grad_norm": 0.6724289348839543, + "learning_rate": 1.2967717557276841e-05, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09848484396934509, + "step": 3950, + "valid_targets_mean": 3368.1, + "valid_targets_min": 2192 + }, + { + "epoch": 4.577880718008107, + "grad_norm": 0.6404908890629426, + "learning_rate": 1.2913714464685322e-05, + "loss": 0.2064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0937051847577095, + "step": 3955, + "valid_targets_mean": 3490.8, + "valid_targets_min": 1833 + }, + { + "epoch": 4.5836711059640995, + "grad_norm": 0.7226581927394442, + "learning_rate": 1.2859770389678485e-05, + "loss": 0.2069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09981274604797363, + "step": 3960, + "valid_targets_mean": 2947.1, + "valid_targets_min": 1200 + }, + { + "epoch": 4.589461493920092, + "grad_norm": 0.6019247172317036, + "learning_rate": 1.2805885781525418e-05, + "loss": 0.2138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10194752365350723, + "step": 3965, + "valid_targets_mean": 5060.6, + "valid_targets_min": 3501 + }, + { + "epoch": 4.595251881876086, + "grad_norm": 0.6650891905748428, + "learning_rate": 1.2752061088999935e-05, + "loss": 0.2056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1218477189540863, + "step": 3970, + "valid_targets_mean": 3855.6, + "valid_targets_min": 2498 + }, + { + "epoch": 4.601042269832079, + "grad_norm": 0.6358089854292314, + "learning_rate": 1.2698296760376851e-05, + "loss": 0.2183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09213036298751831, + "step": 3975, + "valid_targets_mean": 3354.5, + "valid_targets_min": 988 + }, + { + "epoch": 4.606832657788072, + "grad_norm": 0.7708131148847596, + "learning_rate": 1.264459324342826e-05, + "loss": 0.2063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11673158407211304, + "step": 3980, + "valid_targets_mean": 3658.9, + "valid_targets_min": 1361 + }, + { + "epoch": 4.612623045744065, + "grad_norm": 0.6343915324436624, + "learning_rate": 1.2590950985419759e-05, + "loss": 0.2178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11010897159576416, + "step": 3985, + "valid_targets_mean": 3907.2, + "valid_targets_min": 1685 + }, + { + "epoch": 4.6184134337000575, + "grad_norm": 0.7641387693225504, + "learning_rate": 1.2537370433106783e-05, + "loss": 0.2093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09839899837970734, + "step": 3990, + "valid_targets_mean": 2536.4, + "valid_targets_min": 474 + }, + { + "epoch": 4.624203821656051, + "grad_norm": 0.6700502378999571, + "learning_rate": 1.2483852032730843e-05, + "loss": 0.209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1001468151807785, + "step": 3995, + "valid_targets_mean": 3377.9, + "valid_targets_min": 2200 + }, + { + "epoch": 4.629994209612044, + "grad_norm": 0.6452149033447232, + "learning_rate": 1.2430396230015825e-05, + "loss": 0.2015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10308694839477539, + "step": 4000, + "valid_targets_mean": 4266.8, + "valid_targets_min": 2241 + }, + { + "epoch": 4.635784597568037, + "grad_norm": 0.6567104338943586, + "learning_rate": 1.2377003470164279e-05, + "loss": 0.2043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13291510939598083, + "step": 4005, + "valid_targets_mean": 4576.9, + "valid_targets_min": 1478 + }, + { + "epoch": 4.64157498552403, + "grad_norm": 1.1276081511241993, + "learning_rate": 1.2323674197853697e-05, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09665089100599289, + "step": 4010, + "valid_targets_mean": 4326.5, + "valid_targets_min": 1194 + }, + { + "epoch": 4.6473653734800235, + "grad_norm": 0.658002633108222, + "learning_rate": 1.2270408857232834e-05, + "loss": 0.2232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11016303300857544, + "step": 4015, + "valid_targets_mean": 3412.9, + "valid_targets_min": 1220 + }, + { + "epoch": 4.653155761436016, + "grad_norm": 0.618944368962821, + "learning_rate": 1.221720789191798e-05, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09453439712524414, + "step": 4020, + "valid_targets_mean": 4365.8, + "valid_targets_min": 1988 + }, + { + "epoch": 4.658946149392009, + "grad_norm": 0.6190869503731473, + "learning_rate": 1.2164071744989294e-05, + "loss": 0.2134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09763002395629883, + "step": 4025, + "valid_targets_mean": 3706.8, + "valid_targets_min": 1256 + }, + { + "epoch": 4.664736537348002, + "grad_norm": 0.7315105123119252, + "learning_rate": 1.2111000858987082e-05, + "loss": 0.2088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1276649832725525, + "step": 4030, + "valid_targets_mean": 3325.8, + "valid_targets_min": 1608 + }, + { + "epoch": 4.670526925303996, + "grad_norm": 0.6708486543908649, + "learning_rate": 1.2057995675908153e-05, + "loss": 0.2192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11391855031251907, + "step": 4035, + "valid_targets_mean": 4343.9, + "valid_targets_min": 2800 + }, + { + "epoch": 4.676317313259989, + "grad_norm": 0.6927673324719886, + "learning_rate": 1.2005056637202098e-05, + "loss": 0.2085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10946047306060791, + "step": 4040, + "valid_targets_mean": 3786.2, + "valid_targets_min": 1771 + }, + { + "epoch": 4.6821077012159815, + "grad_norm": 0.6821074198825163, + "learning_rate": 1.1952184183767638e-05, + "loss": 0.1997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11058327555656433, + "step": 4045, + "valid_targets_mean": 3600.8, + "valid_targets_min": 2528 + }, + { + "epoch": 4.687898089171974, + "grad_norm": 0.7512276074888579, + "learning_rate": 1.1899378755948931e-05, + "loss": 0.1973, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0803704783320427, + "step": 4050, + "valid_targets_mean": 2860.0, + "valid_targets_min": 854 + }, + { + "epoch": 4.693688477127967, + "grad_norm": 0.7374796362496391, + "learning_rate": 1.1846640793531941e-05, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11275653541088104, + "step": 4055, + "valid_targets_mean": 3152.2, + "valid_targets_min": 2343 + }, + { + "epoch": 4.699478865083961, + "grad_norm": 0.7851951138416005, + "learning_rate": 1.1793970735740716e-05, + "loss": 0.1902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07852757722139359, + "step": 4060, + "valid_targets_mean": 2654.6, + "valid_targets_min": 1026 + }, + { + "epoch": 4.705269253039954, + "grad_norm": 0.6767815923385693, + "learning_rate": 1.1741369021233799e-05, + "loss": 0.1959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10796316713094711, + "step": 4065, + "valid_targets_mean": 4003.4, + "valid_targets_min": 1524 + }, + { + "epoch": 4.711059640995947, + "grad_norm": 0.697623902564916, + "learning_rate": 1.1688836088100524e-05, + "loss": 0.1864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10594123601913452, + "step": 4070, + "valid_targets_mean": 3186.2, + "valid_targets_min": 1274 + }, + { + "epoch": 4.7168500289519395, + "grad_norm": 0.7168919011091767, + "learning_rate": 1.1636372373857388e-05, + "loss": 0.1983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12310782074928284, + "step": 4075, + "valid_targets_mean": 4443.4, + "valid_targets_min": 914 + }, + { + "epoch": 4.722640416907932, + "grad_norm": 0.6519683034317412, + "learning_rate": 1.1583978315444405e-05, + "loss": 0.2245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1027081310749054, + "step": 4080, + "valid_targets_mean": 4349.2, + "valid_targets_min": 2093 + }, + { + "epoch": 4.728430804863926, + "grad_norm": 0.6867968732060586, + "learning_rate": 1.1531654349221468e-05, + "loss": 0.2002, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11880075186491013, + "step": 4085, + "valid_targets_mean": 3341.9, + "valid_targets_min": 1621 + }, + { + "epoch": 4.734221192819919, + "grad_norm": 0.6400500919623512, + "learning_rate": 1.147940091096472e-05, + "loss": 0.2056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0961160808801651, + "step": 4090, + "valid_targets_mean": 4046.6, + "valid_targets_min": 2253 + }, + { + "epoch": 4.740011580775912, + "grad_norm": 0.6612830128083482, + "learning_rate": 1.1427218435862884e-05, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08661623299121857, + "step": 4095, + "valid_targets_mean": 4462.8, + "valid_targets_min": 1789 + }, + { + "epoch": 4.7458019687319055, + "grad_norm": 0.6781757851379322, + "learning_rate": 1.1375107358513717e-05, + "loss": 0.2128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11982538551092148, + "step": 4100, + "valid_targets_mean": 4629.9, + "valid_targets_min": 1586 + }, + { + "epoch": 4.751592356687898, + "grad_norm": 0.7051102633354648, + "learning_rate": 1.1323068112920304e-05, + "loss": 0.2032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10887384414672852, + "step": 4105, + "valid_targets_mean": 3739.8, + "valid_targets_min": 2391 + }, + { + "epoch": 4.757382744643891, + "grad_norm": 0.590000679776256, + "learning_rate": 1.1271101132487514e-05, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08773195743560791, + "step": 4110, + "valid_targets_mean": 3735.9, + "valid_targets_min": 2513 + }, + { + "epoch": 4.763173132599884, + "grad_norm": 0.590068763568892, + "learning_rate": 1.1219206850018351e-05, + "loss": 0.2067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.081422820687294, + "step": 4115, + "valid_targets_mean": 3797.2, + "valid_targets_min": 2156 + }, + { + "epoch": 4.768963520555877, + "grad_norm": 0.6529370026097409, + "learning_rate": 1.1167385697710361e-05, + "loss": 0.196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0942734032869339, + "step": 4120, + "valid_targets_mean": 3446.1, + "valid_targets_min": 1423 + }, + { + "epoch": 4.774753908511871, + "grad_norm": 0.7988272664256585, + "learning_rate": 1.1115638107152034e-05, + "loss": 0.1996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10448580980300903, + "step": 4125, + "valid_targets_mean": 3166.9, + "valid_targets_min": 2126 + }, + { + "epoch": 4.7805442964678635, + "grad_norm": 2.4716700067552764, + "learning_rate": 1.1063964509319188e-05, + "loss": 0.2172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10709042847156525, + "step": 4130, + "valid_targets_mean": 3749.2, + "valid_targets_min": 1834 + }, + { + "epoch": 4.786334684423856, + "grad_norm": 0.7725124051827306, + "learning_rate": 1.1012365334571432e-05, + "loss": 0.2061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1110750138759613, + "step": 4135, + "valid_targets_mean": 4225.4, + "valid_targets_min": 1571 + }, + { + "epoch": 4.792125072379849, + "grad_norm": 0.6821783963541409, + "learning_rate": 1.0960841012648506e-05, + "loss": 0.217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10442662984132767, + "step": 4140, + "valid_targets_mean": 3508.6, + "valid_targets_min": 2150 + }, + { + "epoch": 4.797915460335842, + "grad_norm": 0.6237896593022638, + "learning_rate": 1.0909391972666783e-05, + "loss": 0.2077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10406245291233063, + "step": 4145, + "valid_targets_mean": 4714.1, + "valid_targets_min": 2761 + }, + { + "epoch": 4.803705848291836, + "grad_norm": 0.6539511811303761, + "learning_rate": 1.0858018643115636e-05, + "loss": 0.2207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12043410539627075, + "step": 4150, + "valid_targets_mean": 4161.5, + "valid_targets_min": 2492 + }, + { + "epoch": 4.809496236247829, + "grad_norm": 0.6695364647428075, + "learning_rate": 1.0806721451853892e-05, + "loss": 0.2134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09142296016216278, + "step": 4155, + "valid_targets_mean": 3178.5, + "valid_targets_min": 1675 + }, + { + "epoch": 4.8152866242038215, + "grad_norm": 0.6371529714283551, + "learning_rate": 1.075550082610628e-05, + "loss": 0.2148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08996136486530304, + "step": 4160, + "valid_targets_mean": 3115.8, + "valid_targets_min": 1868 + }, + { + "epoch": 4.821077012159814, + "grad_norm": 0.7399760446823987, + "learning_rate": 1.0704357192459824e-05, + "loss": 0.2084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12434670329093933, + "step": 4165, + "valid_targets_mean": 3591.2, + "valid_targets_min": 1969 + }, + { + "epoch": 4.826867400115808, + "grad_norm": 0.6338640690724339, + "learning_rate": 1.065329097686036e-05, + "loss": 0.2104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09719884395599365, + "step": 4170, + "valid_targets_mean": 3810.2, + "valid_targets_min": 1878 + }, + { + "epoch": 4.832657788071801, + "grad_norm": 0.7371351517240505, + "learning_rate": 1.0602302604608949e-05, + "loss": 0.2122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10729122161865234, + "step": 4175, + "valid_targets_mean": 3476.5, + "valid_targets_min": 1207 + }, + { + "epoch": 4.838448176027794, + "grad_norm": 0.6493557233873885, + "learning_rate": 1.0551392500358317e-05, + "loss": 0.2087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10543425381183624, + "step": 4180, + "valid_targets_mean": 4223.1, + "valid_targets_min": 2305 + }, + { + "epoch": 4.844238563983787, + "grad_norm": 0.6871112372096738, + "learning_rate": 1.0500561088109364e-05, + "loss": 0.203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08310510963201523, + "step": 4185, + "valid_targets_mean": 2648.1, + "valid_targets_min": 953 + }, + { + "epoch": 4.85002895193978, + "grad_norm": 0.6055841246914735, + "learning_rate": 1.04498087912076e-05, + "loss": 0.1958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09668581187725067, + "step": 4190, + "valid_targets_mean": 5047.8, + "valid_targets_min": 1659 + }, + { + "epoch": 4.855819339895773, + "grad_norm": 0.6417252887209771, + "learning_rate": 1.039913603233964e-05, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08402392268180847, + "step": 4195, + "valid_targets_mean": 3317.6, + "valid_targets_min": 1390 + }, + { + "epoch": 4.861609727851766, + "grad_norm": 0.685651764111974, + "learning_rate": 1.0348543233529647e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10304082930088043, + "step": 4200, + "valid_targets_mean": 3857.2, + "valid_targets_min": 2032 + }, + { + "epoch": 4.867400115807759, + "grad_norm": 0.6221040398072666, + "learning_rate": 1.0298030816135866e-05, + "loss": 0.2136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09167119860649109, + "step": 4205, + "valid_targets_mean": 3392.9, + "valid_targets_min": 1464 + }, + { + "epoch": 4.873190503763752, + "grad_norm": 0.7112812688595366, + "learning_rate": 1.024759920084709e-05, + "loss": 0.2075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10534010827541351, + "step": 4210, + "valid_targets_mean": 2917.6, + "valid_targets_min": 1826 + }, + { + "epoch": 4.8789808917197455, + "grad_norm": 0.6305949802947574, + "learning_rate": 1.0197248807679152e-05, + "loss": 0.2111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10810261964797974, + "step": 4215, + "valid_targets_mean": 4448.1, + "valid_targets_min": 2900 + }, + { + "epoch": 4.884771279675738, + "grad_norm": 0.7233919857839544, + "learning_rate": 1.0146980055971422e-05, + "loss": 0.2029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11249633878469467, + "step": 4220, + "valid_targets_mean": 3419.1, + "valid_targets_min": 2194 + }, + { + "epoch": 4.890561667631731, + "grad_norm": 0.6491504770527533, + "learning_rate": 1.0096793364383346e-05, + "loss": 0.2137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10216433554887772, + "step": 4225, + "valid_targets_mean": 4196.8, + "valid_targets_min": 1931 + }, + { + "epoch": 4.896352055587724, + "grad_norm": 0.5979581538813098, + "learning_rate": 1.0046689150890935e-05, + "loss": 0.2066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09729495644569397, + "step": 4230, + "valid_targets_mean": 5424.6, + "valid_targets_min": 1233 + }, + { + "epoch": 4.902142443543718, + "grad_norm": 0.5635032941313716, + "learning_rate": 9.996667832783266e-06, + "loss": 0.1997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10651378333568573, + "step": 4235, + "valid_targets_mean": 5272.1, + "valid_targets_min": 3440 + }, + { + "epoch": 4.907932831499711, + "grad_norm": 0.6161802350596663, + "learning_rate": 9.946729826659056e-06, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09289420396089554, + "step": 4240, + "valid_targets_mean": 3633.9, + "valid_targets_min": 1873 + }, + { + "epoch": 4.9137232194557035, + "grad_norm": 0.6419586212420326, + "learning_rate": 9.896875548423145e-06, + "loss": 0.1996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09963817894458771, + "step": 4245, + "valid_targets_mean": 3971.1, + "valid_targets_min": 1614 + }, + { + "epoch": 4.919513607411696, + "grad_norm": 0.6527250029035306, + "learning_rate": 9.847105413283067e-06, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11336039006710052, + "step": 4250, + "valid_targets_mean": 4600.5, + "valid_targets_min": 974 + }, + { + "epoch": 4.92530399536769, + "grad_norm": 0.6875372467737946, + "learning_rate": 9.797419835745572e-06, + "loss": 0.1977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09850144386291504, + "step": 4255, + "valid_targets_mean": 3246.1, + "valid_targets_min": 2054 + }, + { + "epoch": 4.931094383323683, + "grad_norm": 0.6776141150310618, + "learning_rate": 9.747819229613163e-06, + "loss": 0.1965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10144221782684326, + "step": 4260, + "valid_targets_mean": 4019.6, + "valid_targets_min": 1333 + }, + { + "epoch": 4.936884771279676, + "grad_norm": 0.7006395388235903, + "learning_rate": 9.698304007980688e-06, + "loss": 0.2013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08913317322731018, + "step": 4265, + "valid_targets_mean": 3301.9, + "valid_targets_min": 1515 + }, + { + "epoch": 4.942675159235669, + "grad_norm": 0.733734340115638, + "learning_rate": 9.648874583231858e-06, + "loss": 0.2152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11472181975841522, + "step": 4270, + "valid_targets_mean": 2966.4, + "valid_targets_min": 1349 + }, + { + "epoch": 4.948465547191661, + "grad_norm": 0.6786184673726235, + "learning_rate": 9.59953136703584e-06, + "loss": 0.2131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10117700695991516, + "step": 4275, + "valid_targets_mean": 3284.2, + "valid_targets_min": 1906 + }, + { + "epoch": 4.954255935147655, + "grad_norm": 0.7093575427420685, + "learning_rate": 9.550274770343826e-06, + "loss": 0.2019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11812548339366913, + "step": 4280, + "valid_targets_mean": 3551.6, + "valid_targets_min": 2076 + }, + { + "epoch": 4.960046323103648, + "grad_norm": 0.6242435134589384, + "learning_rate": 9.501105203385592e-06, + "loss": 0.194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09157514572143555, + "step": 4285, + "valid_targets_mean": 3506.8, + "valid_targets_min": 1021 + }, + { + "epoch": 4.965836711059641, + "grad_norm": 0.7343590166234107, + "learning_rate": 9.452023075666104e-06, + "loss": 0.2087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1187494695186615, + "step": 4290, + "valid_targets_mean": 3709.6, + "valid_targets_min": 2413 + }, + { + "epoch": 4.971627099015634, + "grad_norm": 0.645442375731764, + "learning_rate": 9.403028795962095e-06, + "loss": 0.1983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09993511438369751, + "step": 4295, + "valid_targets_mean": 3746.6, + "valid_targets_min": 1605 + }, + { + "epoch": 4.9774174869716274, + "grad_norm": 0.7213539538482706, + "learning_rate": 9.354122772318654e-06, + "loss": 0.2103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08953292667865753, + "step": 4300, + "valid_targets_mean": 2676.8, + "valid_targets_min": 815 + }, + { + "epoch": 4.98320787492762, + "grad_norm": 0.6582835715939438, + "learning_rate": 9.305305412045836e-06, + "loss": 0.2103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09620843827724457, + "step": 4305, + "valid_targets_mean": 3131.9, + "valid_targets_min": 1878 + }, + { + "epoch": 4.988998262883613, + "grad_norm": 0.6150045532252543, + "learning_rate": 9.256577121715278e-06, + "loss": 0.2043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10262969136238098, + "step": 4310, + "valid_targets_mean": 4093.5, + "valid_targets_min": 1793 + }, + { + "epoch": 4.994788650839606, + "grad_norm": 0.6397601890340455, + "learning_rate": 9.207938307156808e-06, + "loss": 0.2103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10116622596979141, + "step": 4315, + "valid_targets_mean": 3702.9, + "valid_targets_min": 1690 + }, + { + "epoch": 5.0, + "grad_norm": 0.7476582503069477, + "learning_rate": 9.159389373455055e-06, + "loss": 0.2018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16756142675876617, + "step": 4320, + "valid_targets_mean": 5459.5, + "valid_targets_min": 2092 + }, + { + "epoch": 5.005790387955993, + "grad_norm": 0.5170706009126839, + "learning_rate": 9.11093072494608e-06, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08678993582725525, + "step": 4325, + "valid_targets_mean": 7389.0, + "valid_targets_min": 5627 + }, + { + "epoch": 5.011580775911986, + "grad_norm": 0.7348375708355329, + "learning_rate": 9.06256276521402e-06, + "loss": 0.179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04887574166059494, + "step": 4330, + "valid_targets_mean": 1294.8, + "valid_targets_min": 422 + }, + { + "epoch": 5.017371163867979, + "grad_norm": 0.40825306898806885, + "learning_rate": 9.014285897087717e-06, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09006273746490479, + "step": 4335, + "valid_targets_mean": 8539.9, + "valid_targets_min": 6761 + }, + { + "epoch": 5.023161551823972, + "grad_norm": 0.4169264118454129, + "learning_rate": 8.966100522637349e-06, + "loss": 0.1692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07914991676807404, + "step": 4340, + "valid_targets_mean": 7579.0, + "valid_targets_min": 6174 + }, + { + "epoch": 5.028951939779965, + "grad_norm": 0.4261254122872559, + "learning_rate": 8.9180070431711e-06, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07706992328166962, + "step": 4345, + "valid_targets_mean": 7328.2, + "valid_targets_min": 4839 + }, + { + "epoch": 5.034742327735958, + "grad_norm": 0.49356145166231324, + "learning_rate": 8.870005859231824e-06, + "loss": 0.1809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09671644866466522, + "step": 4350, + "valid_targets_mean": 6838.2, + "valid_targets_min": 5034 + }, + { + "epoch": 5.040532715691952, + "grad_norm": 0.45057468994612204, + "learning_rate": 8.822097370593699e-06, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.092893585562706, + "step": 4355, + "valid_targets_mean": 7248.2, + "valid_targets_min": 5567 + }, + { + "epoch": 5.046323103647945, + "grad_norm": 0.42693244283983495, + "learning_rate": 8.774281976258885e-06, + "loss": 0.1692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07744898647069931, + "step": 4360, + "valid_targets_mean": 6234.0, + "valid_targets_min": 5603 + }, + { + "epoch": 5.052113491603937, + "grad_norm": 0.7444283539312915, + "learning_rate": 8.726560074454224e-06, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02484450861811638, + "step": 4365, + "valid_targets_mean": 602.0, + "valid_targets_min": 152 + }, + { + "epoch": 5.05790387955993, + "grad_norm": 0.41468949731358445, + "learning_rate": 8.678932062627919e-06, + "loss": 0.1557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07928498834371567, + "step": 4370, + "valid_targets_mean": 7299.9, + "valid_targets_min": 6158 + }, + { + "epoch": 5.063694267515924, + "grad_norm": 0.4539339242355834, + "learning_rate": 8.63139833744619e-06, + "loss": 0.1716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08123583346605301, + "step": 4375, + "valid_targets_mean": 6140.0, + "valid_targets_min": 4648 + }, + { + "epoch": 5.069484655471917, + "grad_norm": 0.4534263931745652, + "learning_rate": 8.583959294790024e-06, + "loss": 0.1643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08489354699850082, + "step": 4380, + "valid_targets_mean": 6603.0, + "valid_targets_min": 4882 + }, + { + "epoch": 5.07527504342791, + "grad_norm": 0.4213647354140185, + "learning_rate": 8.53661532975184e-06, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08459046483039856, + "step": 4385, + "valid_targets_mean": 8015.0, + "valid_targets_min": 6344 + }, + { + "epoch": 5.0810654313839025, + "grad_norm": 0.4403748874087747, + "learning_rate": 8.489366836632215e-06, + "loss": 0.1581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.073912113904953, + "step": 4390, + "valid_targets_mean": 6305.2, + "valid_targets_min": 4622 + }, + { + "epoch": 5.086855819339895, + "grad_norm": 0.4217263365160654, + "learning_rate": 8.442214208936594e-06, + "loss": 0.1671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08894698321819305, + "step": 4395, + "valid_targets_mean": 7890.8, + "valid_targets_min": 5485 + }, + { + "epoch": 5.092646207295889, + "grad_norm": 0.41608850779826057, + "learning_rate": 8.395157839372017e-06, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07635587453842163, + "step": 4400, + "valid_targets_mean": 7016.6, + "valid_targets_min": 4960 + }, + { + "epoch": 5.098436595251882, + "grad_norm": 0.46639560747858133, + "learning_rate": 8.348198119843846e-06, + "loss": 0.1637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07922901213169098, + "step": 4405, + "valid_targets_mean": 6527.1, + "valid_targets_min": 5399 + }, + { + "epoch": 5.104226983207875, + "grad_norm": 0.4974472656784201, + "learning_rate": 8.301335441452486e-06, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0812421441078186, + "step": 4410, + "valid_targets_mean": 4995.0, + "valid_targets_min": 618 + }, + { + "epoch": 5.110017371163868, + "grad_norm": 0.4384624621750658, + "learning_rate": 8.25457019449016e-06, + "loss": 0.1804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08994653075933456, + "step": 4415, + "valid_targets_mean": 7219.4, + "valid_targets_min": 5764 + }, + { + "epoch": 5.115807759119861, + "grad_norm": 0.4506617842709948, + "learning_rate": 8.207902768437643e-06, + "loss": 0.1483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08390418440103531, + "step": 4420, + "valid_targets_mean": 7998.5, + "valid_targets_min": 5902 + }, + { + "epoch": 5.121598147075854, + "grad_norm": 0.4223545408283372, + "learning_rate": 8.161333551960993e-06, + "loss": 0.1563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06642226874828339, + "step": 4425, + "valid_targets_mean": 7377.4, + "valid_targets_min": 5015 + }, + { + "epoch": 5.127388535031847, + "grad_norm": 0.4166683550696679, + "learning_rate": 8.114862932908356e-06, + "loss": 0.1631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0684872716665268, + "step": 4430, + "valid_targets_mean": 6794.0, + "valid_targets_min": 5283 + }, + { + "epoch": 5.13317892298784, + "grad_norm": 0.44850588452073403, + "learning_rate": 8.068491298306718e-06, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0934000313282013, + "step": 4435, + "valid_targets_mean": 6656.4, + "valid_targets_min": 4364 + }, + { + "epoch": 5.138969310943834, + "grad_norm": 0.4045204129415732, + "learning_rate": 8.022219034358676e-06, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07664947211742401, + "step": 4440, + "valid_targets_mean": 7178.4, + "valid_targets_min": 4448 + }, + { + "epoch": 5.1447596988998265, + "grad_norm": 0.4792195984467097, + "learning_rate": 7.976046526439215e-06, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09860777854919434, + "step": 4445, + "valid_targets_mean": 7619.8, + "valid_targets_min": 5737 + }, + { + "epoch": 5.150550086855819, + "grad_norm": 0.38924917775303713, + "learning_rate": 7.929974159092531e-06, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06915652006864548, + "step": 4450, + "valid_targets_mean": 8754.4, + "valid_targets_min": 5752 + }, + { + "epoch": 5.156340474811812, + "grad_norm": 0.4842231172925656, + "learning_rate": 7.884002316028787e-06, + "loss": 0.1483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0772770345211029, + "step": 4455, + "valid_targets_mean": 6341.0, + "valid_targets_min": 4543 + }, + { + "epoch": 5.162130862767805, + "grad_norm": 0.43535274367025806, + "learning_rate": 7.838131380120962e-06, + "loss": 0.144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08321338891983032, + "step": 4460, + "valid_targets_mean": 7181.9, + "valid_targets_min": 4563 + }, + { + "epoch": 5.167921250723799, + "grad_norm": 0.7920145323587261, + "learning_rate": 7.792361733401599e-06, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05222175642848015, + "step": 4465, + "valid_targets_mean": 1104.4, + "valid_targets_min": 161 + }, + { + "epoch": 5.173711638679792, + "grad_norm": 0.44403128554587973, + "learning_rate": 7.7466937570597e-06, + "loss": 0.1527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07515177875757217, + "step": 4470, + "valid_targets_mean": 7018.2, + "valid_targets_min": 5424 + }, + { + "epoch": 5.1795020266357845, + "grad_norm": 0.4722726033181905, + "learning_rate": 7.701127831437499e-06, + "loss": 0.1733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08935241401195526, + "step": 4475, + "valid_targets_mean": 7738.4, + "valid_targets_min": 5329 + }, + { + "epoch": 5.185292414591777, + "grad_norm": 0.45611397186572844, + "learning_rate": 7.655664336027296e-06, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08397034555673599, + "step": 4480, + "valid_targets_mean": 6426.6, + "valid_targets_min": 5560 + }, + { + "epoch": 5.191082802547771, + "grad_norm": 0.49322397234122534, + "learning_rate": 7.610303649468323e-06, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08556440472602844, + "step": 4485, + "valid_targets_mean": 6169.9, + "valid_targets_min": 3446 + }, + { + "epoch": 5.196873190503764, + "grad_norm": 0.4306990762052569, + "learning_rate": 7.5650461495435775e-06, + "loss": 0.167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08588321506977081, + "step": 4490, + "valid_targets_mean": 7204.6, + "valid_targets_min": 5381 + }, + { + "epoch": 5.202663578459757, + "grad_norm": 0.42431598510637303, + "learning_rate": 7.519892213176669e-06, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0885540097951889, + "step": 4495, + "valid_targets_mean": 7600.4, + "valid_targets_min": 5241 + }, + { + "epoch": 5.20845396641575, + "grad_norm": 0.42897458180789355, + "learning_rate": 7.474842216428695e-06, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08960206806659698, + "step": 4500, + "valid_targets_mean": 7098.5, + "valid_targets_min": 5607 + }, + { + "epoch": 5.2142443543717425, + "grad_norm": 0.4657738884299629, + "learning_rate": 7.429896534495076e-06, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09080995619297028, + "step": 4505, + "valid_targets_mean": 6955.1, + "valid_targets_min": 5530 + }, + { + "epoch": 5.220034742327736, + "grad_norm": 0.46053355463201484, + "learning_rate": 7.385055541702489e-06, + "loss": 0.171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08669829368591309, + "step": 4510, + "valid_targets_mean": 6748.1, + "valid_targets_min": 5364 + }, + { + "epoch": 5.225825130283729, + "grad_norm": 0.4683200226871999, + "learning_rate": 7.340319611505682e-06, + "loss": 0.1706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0833309143781662, + "step": 4515, + "valid_targets_mean": 6273.9, + "valid_targets_min": 5438 + }, + { + "epoch": 5.231615518239722, + "grad_norm": 0.42873974780150437, + "learning_rate": 7.295689116484419e-06, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07850541174411774, + "step": 4520, + "valid_targets_mean": 6419.5, + "valid_targets_min": 4879 + }, + { + "epoch": 5.237405906195715, + "grad_norm": 0.43734828472291104, + "learning_rate": 7.251164428340349e-06, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07928910851478577, + "step": 4525, + "valid_targets_mean": 6483.4, + "valid_targets_min": 4309 + }, + { + "epoch": 5.2431962941517085, + "grad_norm": 0.4825423523548664, + "learning_rate": 7.206745917893918e-06, + "loss": 0.138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09682394564151764, + "step": 4530, + "valid_targets_mean": 6825.0, + "valid_targets_min": 5205 + }, + { + "epoch": 5.248986682107701, + "grad_norm": 0.49377635032991485, + "learning_rate": 7.162433955081283e-06, + "loss": 0.166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07656551897525787, + "step": 4535, + "valid_targets_mean": 6729.4, + "valid_targets_min": 5200 + }, + { + "epoch": 5.254777070063694, + "grad_norm": 0.4554574464975237, + "learning_rate": 7.1182289089512105e-06, + "loss": 0.163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07981318235397339, + "step": 4540, + "valid_targets_mean": 6094.0, + "valid_targets_min": 5302 + }, + { + "epoch": 5.260567458019687, + "grad_norm": 0.457518491044438, + "learning_rate": 7.074131147662042e-06, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08653154969215393, + "step": 4545, + "valid_targets_mean": 7140.0, + "valid_targets_min": 5270 + }, + { + "epoch": 5.26635784597568, + "grad_norm": 0.4011706541140575, + "learning_rate": 7.030141038478584e-06, + "loss": 0.1606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07779765874147415, + "step": 4550, + "valid_targets_mean": 7425.0, + "valid_targets_min": 5281 + }, + { + "epoch": 5.272148233931674, + "grad_norm": 0.4491415779590304, + "learning_rate": 6.986258947769087e-06, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10509377717971802, + "step": 4555, + "valid_targets_mean": 7063.4, + "valid_targets_min": 5386 + }, + { + "epoch": 5.2779386218876665, + "grad_norm": 0.4384591648317973, + "learning_rate": 6.942485241002173e-06, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08581419289112091, + "step": 4560, + "valid_targets_mean": 6827.0, + "valid_targets_min": 4547 + }, + { + "epoch": 5.283729009843659, + "grad_norm": 0.603270492629835, + "learning_rate": 6.898820282743792e-06, + "loss": 0.144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03785271942615509, + "step": 4565, + "valid_targets_mean": 2047.0, + "valid_targets_min": 165 + }, + { + "epoch": 5.289519397799652, + "grad_norm": 0.4338840236261938, + "learning_rate": 6.855264436654199e-06, + "loss": 0.1493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07795995473861694, + "step": 4570, + "valid_targets_mean": 6754.1, + "valid_targets_min": 5196 + }, + { + "epoch": 5.295309785755646, + "grad_norm": 0.49540523131501424, + "learning_rate": 6.811818065484918e-06, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08165041357278824, + "step": 4575, + "valid_targets_mean": 5985.6, + "valid_targets_min": 5028 + }, + { + "epoch": 5.301100173711639, + "grad_norm": 0.4669333045816074, + "learning_rate": 6.7684815310756965e-06, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07661072909832001, + "step": 4580, + "valid_targets_mean": 5963.9, + "valid_targets_min": 5237 + }, + { + "epoch": 5.306890561667632, + "grad_norm": 0.4820832943223326, + "learning_rate": 6.72525519435155e-06, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0846099928021431, + "step": 4585, + "valid_targets_mean": 6319.0, + "valid_targets_min": 5412 + }, + { + "epoch": 5.3126809496236245, + "grad_norm": 0.42051650052749845, + "learning_rate": 6.682139415319688e-06, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06711379438638687, + "step": 4590, + "valid_targets_mean": 7125.8, + "valid_targets_min": 5080 + }, + { + "epoch": 5.318471337579618, + "grad_norm": 0.4652762288890063, + "learning_rate": 6.639134553066568e-06, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07977567613124847, + "step": 4595, + "valid_targets_mean": 6123.5, + "valid_targets_min": 4521 + }, + { + "epoch": 5.324261725535611, + "grad_norm": 0.6454278429206071, + "learning_rate": 6.5962409657548875e-06, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12123078107833862, + "step": 4600, + "valid_targets_mean": 6020.6, + "valid_targets_min": 2493 + }, + { + "epoch": 5.330052113491604, + "grad_norm": 0.8629500695343556, + "learning_rate": 6.5534590106205865e-06, + "loss": 0.2119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08393067121505737, + "step": 4605, + "valid_targets_mean": 3953.5, + "valid_targets_min": 1177 + }, + { + "epoch": 5.335842501447597, + "grad_norm": 0.6295404053425543, + "learning_rate": 6.5107890439698965e-06, + "loss": 0.2207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13039299845695496, + "step": 4610, + "valid_targets_mean": 4950.8, + "valid_targets_min": 2799 + }, + { + "epoch": 5.34163288940359, + "grad_norm": 0.6278474637692992, + "learning_rate": 6.468231421176359e-06, + "loss": 0.2073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11538614332675934, + "step": 4615, + "valid_targets_mean": 5312.0, + "valid_targets_min": 2973 + }, + { + "epoch": 5.347423277359583, + "grad_norm": 0.6286400950974823, + "learning_rate": 6.425786496677855e-06, + "loss": 0.2084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09965649247169495, + "step": 4620, + "valid_targets_mean": 3775.5, + "valid_targets_min": 980 + }, + { + "epoch": 5.353213665315576, + "grad_norm": 0.5945486249462685, + "learning_rate": 6.3834546239736925e-06, + "loss": 0.2228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11746525764465332, + "step": 4625, + "valid_targets_mean": 5055.4, + "valid_targets_min": 1512 + }, + { + "epoch": 5.359004053271569, + "grad_norm": 0.656038142968123, + "learning_rate": 6.3412361556216065e-06, + "loss": 0.2028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.079439178109169, + "step": 4630, + "valid_targets_mean": 3600.8, + "valid_targets_min": 1848 + }, + { + "epoch": 5.364794441227562, + "grad_norm": 0.5666988911136864, + "learning_rate": 6.2991314432348714e-06, + "loss": 0.2107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07719736546278, + "step": 4635, + "valid_targets_mean": 3369.8, + "valid_targets_min": 1381 + }, + { + "epoch": 5.370584829183556, + "grad_norm": 0.5244894286826159, + "learning_rate": 6.257140837479352e-06, + "loss": 0.2078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11062712222337723, + "step": 4640, + "valid_targets_mean": 6283.2, + "valid_targets_min": 3600 + }, + { + "epoch": 5.3763752171395485, + "grad_norm": 0.6203824635854146, + "learning_rate": 6.2152646880705835e-06, + "loss": 0.2229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10925555974245071, + "step": 4645, + "valid_targets_mean": 5242.5, + "valid_targets_min": 2540 + }, + { + "epoch": 5.382165605095541, + "grad_norm": 0.5713876724097097, + "learning_rate": 6.173503343770864e-06, + "loss": 0.2065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09428058564662933, + "step": 4650, + "valid_targets_mean": 6246.5, + "valid_targets_min": 2157 + }, + { + "epoch": 5.387955993051534, + "grad_norm": 0.6140290374368306, + "learning_rate": 6.131857152386329e-06, + "loss": 0.2097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10542860627174377, + "step": 4655, + "valid_targets_mean": 4556.5, + "valid_targets_min": 1720 + }, + { + "epoch": 5.393746381007528, + "grad_norm": 0.6065323078628322, + "learning_rate": 6.0903264607640955e-06, + "loss": 0.2158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1147131621837616, + "step": 4660, + "valid_targets_mean": 5116.5, + "valid_targets_min": 1982 + }, + { + "epoch": 5.399536768963521, + "grad_norm": 0.5810831150496794, + "learning_rate": 6.048911614789325e-06, + "loss": 0.2094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10943673551082611, + "step": 4665, + "valid_targets_mean": 4464.5, + "valid_targets_min": 2718 + }, + { + "epoch": 5.405327156919514, + "grad_norm": 0.6448830357345737, + "learning_rate": 6.007612959382392e-06, + "loss": 0.205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1034761518239975, + "step": 4670, + "valid_targets_mean": 3418.5, + "valid_targets_min": 1451 + }, + { + "epoch": 5.4111175448755064, + "grad_norm": 0.6646340852402701, + "learning_rate": 5.966430838495969e-06, + "loss": 0.2025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10305717587471008, + "step": 4675, + "valid_targets_mean": 3990.4, + "valid_targets_min": 1861 + }, + { + "epoch": 5.416907932831499, + "grad_norm": 0.6787610049432943, + "learning_rate": 5.925365595112189e-06, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11291109025478363, + "step": 4680, + "valid_targets_mean": 4065.4, + "valid_targets_min": 2100 + }, + { + "epoch": 5.422698320787493, + "grad_norm": 0.6468199469370208, + "learning_rate": 5.8844175712397826e-06, + "loss": 0.2043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11317706108093262, + "step": 4685, + "valid_targets_mean": 4132.4, + "valid_targets_min": 3234 + }, + { + "epoch": 5.428488708743486, + "grad_norm": 0.7000979053549637, + "learning_rate": 5.843587107911209e-06, + "loss": 0.214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12522569298744202, + "step": 4690, + "valid_targets_mean": 4246.2, + "valid_targets_min": 2650 + }, + { + "epoch": 5.434279096699479, + "grad_norm": 0.6580517542861027, + "learning_rate": 5.802874545179848e-06, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0871807187795639, + "step": 4695, + "valid_targets_mean": 4096.5, + "valid_targets_min": 1719 + }, + { + "epoch": 5.440069484655472, + "grad_norm": 0.6577612345149414, + "learning_rate": 5.76228022211716e-06, + "loss": 0.2119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10219301283359528, + "step": 4700, + "valid_targets_mean": 3933.1, + "valid_targets_min": 1020 + }, + { + "epoch": 5.445859872611465, + "grad_norm": 0.659954032330438, + "learning_rate": 5.721804476809836e-06, + "loss": 0.1956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10725036263465881, + "step": 4705, + "valid_targets_mean": 3551.1, + "valid_targets_min": 2130 + }, + { + "epoch": 5.451650260567458, + "grad_norm": 0.6312457586493002, + "learning_rate": 5.681447646357012e-06, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07873150706291199, + "step": 4710, + "valid_targets_mean": 3535.0, + "valid_targets_min": 865 + }, + { + "epoch": 5.457440648523451, + "grad_norm": 0.7722552413609229, + "learning_rate": 5.641210066867455e-06, + "loss": 0.1963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1350419670343399, + "step": 4715, + "valid_targets_mean": 3716.5, + "valid_targets_min": 3144 + }, + { + "epoch": 5.463231036479444, + "grad_norm": 0.7467110819892256, + "learning_rate": 5.601092073456757e-06, + "loss": 0.206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09063935279846191, + "step": 4720, + "valid_targets_mean": 3262.2, + "valid_targets_min": 2050 + }, + { + "epoch": 5.469021424435438, + "grad_norm": 0.6632300609186226, + "learning_rate": 5.561094000244534e-06, + "loss": 0.204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10219931602478027, + "step": 4725, + "valid_targets_mean": 3296.5, + "valid_targets_min": 1570 + }, + { + "epoch": 5.47481181239143, + "grad_norm": 0.6338761495132271, + "learning_rate": 5.52121618035167e-06, + "loss": 0.1931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10076398402452469, + "step": 4730, + "valid_targets_mean": 4806.4, + "valid_targets_min": 1795 + }, + { + "epoch": 5.480602200347423, + "grad_norm": 0.58715133275694, + "learning_rate": 5.481458945897524e-06, + "loss": 0.1968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09306555986404419, + "step": 4735, + "valid_targets_mean": 4342.4, + "valid_targets_min": 1934 + }, + { + "epoch": 5.486392588303416, + "grad_norm": 0.678016304337756, + "learning_rate": 5.44182262799718e-06, + "loss": 0.1979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1021447628736496, + "step": 4740, + "valid_targets_mean": 3675.4, + "valid_targets_min": 2557 + }, + { + "epoch": 5.492182976259409, + "grad_norm": 0.6811772712489489, + "learning_rate": 5.402307556758648e-06, + "loss": 0.1954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11167696118354797, + "step": 4745, + "valid_targets_mean": 4550.4, + "valid_targets_min": 2663 + }, + { + "epoch": 5.497973364215403, + "grad_norm": 0.6371050216182214, + "learning_rate": 5.362914061280173e-06, + "loss": 0.191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08882875740528107, + "step": 4750, + "valid_targets_mean": 4604.0, + "valid_targets_min": 2016 + }, + { + "epoch": 5.503763752171396, + "grad_norm": 0.7119686877592675, + "learning_rate": 5.323642469647457e-06, + "loss": 0.1971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08872997760772705, + "step": 4755, + "valid_targets_mean": 2976.8, + "valid_targets_min": 1367 + }, + { + "epoch": 5.509554140127388, + "grad_norm": 0.6917916761818202, + "learning_rate": 5.284493108930935e-06, + "loss": 0.1996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09722135961055756, + "step": 4760, + "valid_targets_mean": 3542.5, + "valid_targets_min": 2026 + }, + { + "epoch": 5.515344528083381, + "grad_norm": 0.6799912825803575, + "learning_rate": 5.245466305183042e-06, + "loss": 0.1874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08388345688581467, + "step": 4765, + "valid_targets_mean": 3247.2, + "valid_targets_min": 2033 + }, + { + "epoch": 5.521134916039375, + "grad_norm": 0.8200711667381476, + "learning_rate": 5.2065623834355205e-06, + "loss": 0.1956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09685410559177399, + "step": 4770, + "valid_targets_mean": 2201.6, + "valid_targets_min": 1283 + }, + { + "epoch": 5.526925303995368, + "grad_norm": 0.773411446394524, + "learning_rate": 5.167781667696692e-06, + "loss": 0.1967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10723498463630676, + "step": 4775, + "valid_targets_mean": 2824.8, + "valid_targets_min": 1636 + }, + { + "epoch": 5.532715691951361, + "grad_norm": 0.7105868546737427, + "learning_rate": 5.129124480948775e-06, + "loss": 0.199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10746259987354279, + "step": 4780, + "valid_targets_mean": 3844.4, + "valid_targets_min": 1629 + }, + { + "epoch": 5.5385060799073536, + "grad_norm": 0.7119922314320859, + "learning_rate": 5.090591145145169e-06, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09051588177680969, + "step": 4785, + "valid_targets_mean": 3048.6, + "valid_targets_min": 2080 + }, + { + "epoch": 5.544296467863347, + "grad_norm": 0.7116798621330098, + "learning_rate": 5.0521819812078046e-06, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09174008667469025, + "step": 4790, + "valid_targets_mean": 3111.8, + "valid_targets_min": 1153 + }, + { + "epoch": 5.55008685581934, + "grad_norm": 0.6706055056945278, + "learning_rate": 5.013897309024462e-06, + "loss": 0.1935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10536851733922958, + "step": 4795, + "valid_targets_mean": 4069.0, + "valid_targets_min": 1858 + }, + { + "epoch": 5.555877243775333, + "grad_norm": 0.6242996097491735, + "learning_rate": 4.975737447446076e-06, + "loss": 0.2014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08580464869737625, + "step": 4800, + "valid_targets_mean": 3415.2, + "valid_targets_min": 1639 + }, + { + "epoch": 5.561667631731326, + "grad_norm": 0.7034187772300746, + "learning_rate": 4.937702714284132e-06, + "loss": 0.1919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07671986520290375, + "step": 4805, + "valid_targets_mean": 3267.6, + "valid_targets_min": 1394 + }, + { + "epoch": 5.567458019687319, + "grad_norm": 0.6517323852977768, + "learning_rate": 4.899793426307982e-06, + "loss": 0.1973, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10675493627786636, + "step": 4810, + "valid_targets_mean": 4775.9, + "valid_targets_min": 1844 + }, + { + "epoch": 5.573248407643312, + "grad_norm": 0.7018212376921986, + "learning_rate": 4.862009899242219e-06, + "loss": 0.1933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08690626919269562, + "step": 4815, + "valid_targets_mean": 3317.1, + "valid_targets_min": 1305 + }, + { + "epoch": 5.579038795599305, + "grad_norm": 0.69114193929696, + "learning_rate": 4.824352447764049e-06, + "loss": 0.1969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1058984249830246, + "step": 4820, + "valid_targets_mean": 4290.8, + "valid_targets_min": 2185 + }, + { + "epoch": 5.584829183555298, + "grad_norm": 0.7040668680153952, + "learning_rate": 4.786821385500653e-06, + "loss": 0.1914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1182374507188797, + "step": 4825, + "valid_targets_mean": 4024.9, + "valid_targets_min": 2349 + }, + { + "epoch": 5.590619571511291, + "grad_norm": 0.6527180269033891, + "learning_rate": 4.749417025026615e-06, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08381571620702744, + "step": 4830, + "valid_targets_mean": 3453.0, + "valid_targets_min": 1607 + }, + { + "epoch": 5.596409959467284, + "grad_norm": 0.7116519649417133, + "learning_rate": 4.7121396778612606e-06, + "loss": 0.1998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10524024069309235, + "step": 4835, + "valid_targets_mean": 3389.8, + "valid_targets_min": 1507 + }, + { + "epoch": 5.6022003474232775, + "grad_norm": 0.65985189052359, + "learning_rate": 4.67498965446612e-06, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08167605847120285, + "step": 4840, + "valid_targets_mean": 3773.9, + "valid_targets_min": 1317 + }, + { + "epoch": 5.60799073537927, + "grad_norm": 0.6503048626054666, + "learning_rate": 4.637967264242309e-06, + "loss": 0.1964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09242589771747589, + "step": 4845, + "valid_targets_mean": 3881.0, + "valid_targets_min": 3355 + }, + { + "epoch": 5.613781123335263, + "grad_norm": 0.6594418766777953, + "learning_rate": 4.601072815527961e-06, + "loss": 0.2025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.099497951567173, + "step": 4850, + "valid_targets_mean": 3951.9, + "valid_targets_min": 2999 + }, + { + "epoch": 5.619571511291257, + "grad_norm": 0.6860624635409228, + "learning_rate": 4.564306615595657e-06, + "loss": 0.1958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08808408677577972, + "step": 4855, + "valid_targets_mean": 4086.5, + "valid_targets_min": 1739 + }, + { + "epoch": 5.62536189924725, + "grad_norm": 0.6535359815334453, + "learning_rate": 4.527668970649875e-06, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08575412631034851, + "step": 4860, + "valid_targets_mean": 3298.0, + "valid_targets_min": 1140 + }, + { + "epoch": 5.631152287203243, + "grad_norm": 0.6448606842611453, + "learning_rate": 4.491160185824419e-06, + "loss": 0.1825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0939522311091423, + "step": 4865, + "valid_targets_mean": 4012.1, + "valid_targets_min": 2779 + }, + { + "epoch": 5.6369426751592355, + "grad_norm": 0.6641592030702549, + "learning_rate": 4.454780565179894e-06, + "loss": 0.1939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0876445323228836, + "step": 4870, + "valid_targets_mean": 3636.9, + "valid_targets_min": 1183 + }, + { + "epoch": 5.642733063115228, + "grad_norm": 0.6922167993474277, + "learning_rate": 4.418530411701183e-06, + "loss": 0.1841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11794327944517136, + "step": 4875, + "valid_targets_mean": 4269.0, + "valid_targets_min": 3041 + }, + { + "epoch": 5.648523451071222, + "grad_norm": 0.6710735679921558, + "learning_rate": 4.382410027294901e-06, + "loss": 0.1996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0949588268995285, + "step": 4880, + "valid_targets_mean": 3350.1, + "valid_targets_min": 1726 + }, + { + "epoch": 5.654313839027215, + "grad_norm": 0.5809522620469011, + "learning_rate": 4.346419712786898e-06, + "loss": 0.1894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08865835517644882, + "step": 4885, + "valid_targets_mean": 4747.4, + "valid_targets_min": 1743 + }, + { + "epoch": 5.660104226983208, + "grad_norm": 0.6383510256052882, + "learning_rate": 4.3105597679197415e-06, + "loss": 0.1981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09135545790195465, + "step": 4890, + "valid_targets_mean": 3702.8, + "valid_targets_min": 1234 + }, + { + "epoch": 5.665894614939201, + "grad_norm": 0.6754634424721645, + "learning_rate": 4.274830491350226e-06, + "loss": 0.2002, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10354183614253998, + "step": 4895, + "valid_targets_mean": 4079.0, + "valid_targets_min": 2303 + }, + { + "epoch": 5.6716850028951935, + "grad_norm": 0.7095886049137189, + "learning_rate": 4.239232180646895e-06, + "loss": 0.2013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09504668414592743, + "step": 4900, + "valid_targets_mean": 3830.8, + "valid_targets_min": 1651 + }, + { + "epoch": 5.677475390851187, + "grad_norm": 0.6960742471213217, + "learning_rate": 4.2037651322875385e-06, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09748727083206177, + "step": 4905, + "valid_targets_mean": 2876.2, + "valid_targets_min": 1374 + }, + { + "epoch": 5.68326577880718, + "grad_norm": 0.7341851009140241, + "learning_rate": 4.168429641656735e-06, + "loss": 0.1872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09375683963298798, + "step": 4910, + "valid_targets_mean": 3467.5, + "valid_targets_min": 1625 + }, + { + "epoch": 5.689056166763173, + "grad_norm": 0.6683721274985364, + "learning_rate": 4.133226003043414e-06, + "loss": 0.1839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09662937372922897, + "step": 4915, + "valid_targets_mean": 4089.5, + "valid_targets_min": 997 + }, + { + "epoch": 5.694846554719167, + "grad_norm": 0.6481627696138452, + "learning_rate": 4.098154509638372e-06, + "loss": 0.1831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0757637545466423, + "step": 4920, + "valid_targets_mean": 3618.6, + "valid_targets_min": 1179 + }, + { + "epoch": 5.7006369426751595, + "grad_norm": 0.7512215381813111, + "learning_rate": 4.06321545353185e-06, + "loss": 0.1859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09299871325492859, + "step": 4925, + "valid_targets_mean": 3079.1, + "valid_targets_min": 1444 + }, + { + "epoch": 5.706427330631152, + "grad_norm": 0.6808994582603312, + "learning_rate": 4.02840912571109e-06, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07229427993297577, + "step": 4930, + "valid_targets_mean": 3486.8, + "valid_targets_min": 1504 + }, + { + "epoch": 5.712217718587145, + "grad_norm": 0.6452902106629557, + "learning_rate": 3.993735816057924e-06, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08849136531352997, + "step": 4935, + "valid_targets_mean": 3998.4, + "valid_targets_min": 2302 + }, + { + "epoch": 5.718008106543138, + "grad_norm": 0.7621091499298214, + "learning_rate": 3.959195813346342e-06, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11983069777488708, + "step": 4940, + "valid_targets_mean": 4282.5, + "valid_targets_min": 1818 + }, + { + "epoch": 5.723798494499132, + "grad_norm": 0.7004998749919713, + "learning_rate": 3.924789405240108e-06, + "loss": 0.2066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10091252624988556, + "step": 4945, + "valid_targets_mean": 4221.9, + "valid_targets_min": 2633 + }, + { + "epoch": 5.729588882455125, + "grad_norm": 0.7052587606385302, + "learning_rate": 3.89051687829034e-06, + "loss": 0.1839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09614616632461548, + "step": 4950, + "valid_targets_mean": 3522.4, + "valid_targets_min": 2481 + }, + { + "epoch": 5.7353792704111175, + "grad_norm": 0.7015131244757292, + "learning_rate": 3.856378517933152e-06, + "loss": 0.1943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10637455433607101, + "step": 4955, + "valid_targets_mean": 4058.8, + "valid_targets_min": 2013 + }, + { + "epoch": 5.74116965836711, + "grad_norm": 0.7335595197985654, + "learning_rate": 3.822374608487256e-06, + "loss": 0.187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10042333602905273, + "step": 4960, + "valid_targets_mean": 3723.5, + "valid_targets_min": 1744 + }, + { + "epoch": 5.746960046323103, + "grad_norm": 0.6924352020727813, + "learning_rate": 3.788505433151599e-06, + "loss": 0.198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10071320831775665, + "step": 4965, + "valid_targets_mean": 4180.9, + "valid_targets_min": 1914 + }, + { + "epoch": 5.752750434279097, + "grad_norm": 0.7005163039353128, + "learning_rate": 3.7547712740030085e-06, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08322249352931976, + "step": 4970, + "valid_targets_mean": 3668.6, + "valid_targets_min": 3065 + }, + { + "epoch": 5.75854082223509, + "grad_norm": 0.6471601098849894, + "learning_rate": 3.7211724119938297e-06, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10704165697097778, + "step": 4975, + "valid_targets_mean": 4473.2, + "valid_targets_min": 2894 + }, + { + "epoch": 5.764331210191083, + "grad_norm": 0.732266303878351, + "learning_rate": 3.68770912694961e-06, + "loss": 0.1939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09558729827404022, + "step": 4980, + "valid_targets_mean": 3395.6, + "valid_targets_min": 2359 + }, + { + "epoch": 5.7701215981470755, + "grad_norm": 0.6915369674556678, + "learning_rate": 3.654381697566749e-06, + "loss": 0.1838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08329612761735916, + "step": 4985, + "valid_targets_mean": 3114.5, + "valid_targets_min": 1463 + }, + { + "epoch": 5.775911986103069, + "grad_norm": 0.7347954782252792, + "learning_rate": 3.6211904014101752e-06, + "loss": 0.1908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10167864710092545, + "step": 4990, + "valid_targets_mean": 3588.1, + "valid_targets_min": 1426 + }, + { + "epoch": 5.781702374059062, + "grad_norm": 0.693654917555153, + "learning_rate": 3.5881355149110533e-06, + "loss": 0.201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09456135332584381, + "step": 4995, + "valid_targets_mean": 3405.5, + "valid_targets_min": 1661 + }, + { + "epoch": 5.787492762015055, + "grad_norm": 0.6653941668059742, + "learning_rate": 3.555217313364465e-06, + "loss": 0.1943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10260509699583054, + "step": 5000, + "valid_targets_mean": 4457.8, + "valid_targets_min": 1529 + }, + { + "epoch": 5.793283149971048, + "grad_norm": 0.7980848239300522, + "learning_rate": 3.522436070927129e-06, + "loss": 0.2067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11328806728124619, + "step": 5005, + "valid_targets_mean": 3272.9, + "valid_targets_min": 1450 + }, + { + "epoch": 5.7990735379270415, + "grad_norm": 0.7329244914999775, + "learning_rate": 3.4897920606150938e-06, + "loss": 0.1894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08768180012702942, + "step": 5010, + "valid_targets_mean": 3103.2, + "valid_targets_min": 1625 + }, + { + "epoch": 5.804863925883034, + "grad_norm": 0.6504116880373527, + "learning_rate": 3.457285554301497e-06, + "loss": 0.2044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08451417088508606, + "step": 5015, + "valid_targets_mean": 3952.2, + "valid_targets_min": 1174 + }, + { + "epoch": 5.810654313839027, + "grad_norm": 0.7413148612675606, + "learning_rate": 3.424916822714277e-06, + "loss": 0.1987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09813237190246582, + "step": 5020, + "valid_targets_mean": 4039.2, + "valid_targets_min": 2882 + }, + { + "epoch": 5.81644470179502, + "grad_norm": 0.6950280348634762, + "learning_rate": 3.3926861354339358e-06, + "loss": 0.2019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11226019263267517, + "step": 5025, + "valid_targets_mean": 4594.0, + "valid_targets_min": 1946 + }, + { + "epoch": 5.822235089751013, + "grad_norm": 0.6906324382540923, + "learning_rate": 3.3605937608912666e-06, + "loss": 0.196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09345898032188416, + "step": 5030, + "valid_targets_mean": 4082.6, + "valid_targets_min": 2000 + }, + { + "epoch": 5.828025477707007, + "grad_norm": 0.7142374362982976, + "learning_rate": 3.328639966365148e-06, + "loss": 0.198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10212359577417374, + "step": 5035, + "valid_targets_mean": 4596.1, + "valid_targets_min": 2012 + }, + { + "epoch": 5.8338158656629995, + "grad_norm": 0.751407440395027, + "learning_rate": 3.2968250179803097e-06, + "loss": 0.1959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09792028367519379, + "step": 5040, + "valid_targets_mean": 3094.4, + "valid_targets_min": 719 + }, + { + "epoch": 5.839606253618992, + "grad_norm": 0.7400161140259937, + "learning_rate": 3.265149180705096e-06, + "loss": 0.1988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09038306772708893, + "step": 5045, + "valid_targets_mean": 4496.2, + "valid_targets_min": 3238 + }, + { + "epoch": 5.845396641574985, + "grad_norm": 0.7274957247678856, + "learning_rate": 3.233612718349286e-06, + "loss": 0.1868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08311730623245239, + "step": 5050, + "valid_targets_mean": 3816.8, + "valid_targets_min": 1372 + }, + { + "epoch": 5.851187029530979, + "grad_norm": 0.7813908012380086, + "learning_rate": 3.202215893561884e-06, + "loss": 0.1868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10442608594894409, + "step": 5055, + "valid_targets_mean": 3306.1, + "valid_targets_min": 2370 + }, + { + "epoch": 5.856977417486972, + "grad_norm": 0.6230264597997414, + "learning_rate": 3.170958967828941e-06, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08658678084611893, + "step": 5060, + "valid_targets_mean": 2958.1, + "valid_targets_min": 1576 + }, + { + "epoch": 5.862767805442965, + "grad_norm": 0.7117685524553994, + "learning_rate": 3.13984220147135e-06, + "loss": 0.1916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10298854857683182, + "step": 5065, + "valid_targets_mean": 3758.5, + "valid_targets_min": 1426 + }, + { + "epoch": 5.8685581933989575, + "grad_norm": 0.7330685461651261, + "learning_rate": 3.108865853642715e-06, + "loss": 0.2005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08899010717868805, + "step": 5070, + "valid_targets_mean": 3117.0, + "valid_targets_min": 1728 + }, + { + "epoch": 5.874348581354951, + "grad_norm": 0.766333671014308, + "learning_rate": 3.078030182327174e-06, + "loss": 0.1953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1059906929731369, + "step": 5075, + "valid_targets_mean": 3264.2, + "valid_targets_min": 1362 + }, + { + "epoch": 5.880138969310944, + "grad_norm": 0.6316516046760946, + "learning_rate": 3.0473354443372337e-06, + "loss": 0.1968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09446334838867188, + "step": 5080, + "valid_targets_mean": 3836.0, + "valid_targets_min": 852 + }, + { + "epoch": 5.885929357266937, + "grad_norm": 0.8278546280629885, + "learning_rate": 3.016781895311671e-06, + "loss": 0.1989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12963464856147766, + "step": 5085, + "valid_targets_mean": 2764.1, + "valid_targets_min": 1195 + }, + { + "epoch": 5.89171974522293, + "grad_norm": 0.6676519894325554, + "learning_rate": 2.986369789713368e-06, + "loss": 0.1947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.104510597884655, + "step": 5090, + "valid_targets_mean": 4457.5, + "valid_targets_min": 1564 + }, + { + "epoch": 5.897510133178923, + "grad_norm": 0.7287009986225004, + "learning_rate": 2.956099380827213e-06, + "loss": 0.197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09116792678833008, + "step": 5095, + "valid_targets_mean": 4371.2, + "valid_targets_min": 1975 + }, + { + "epoch": 5.903300521134916, + "grad_norm": 0.7364861255216155, + "learning_rate": 2.9259709207579835e-06, + "loss": 0.1822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09816905856132507, + "step": 5100, + "valid_targets_mean": 3965.8, + "valid_targets_min": 2426 + }, + { + "epoch": 5.909090909090909, + "grad_norm": 0.5730343476125825, + "learning_rate": 2.895984660428235e-06, + "loss": 0.1897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09755247831344604, + "step": 5105, + "valid_targets_mean": 6384.6, + "valid_targets_min": 2678 + }, + { + "epoch": 5.914881297046902, + "grad_norm": 0.6370394301993322, + "learning_rate": 2.8661408495762423e-06, + "loss": 0.1912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0927160382270813, + "step": 5110, + "valid_targets_mean": 3543.2, + "valid_targets_min": 2826 + }, + { + "epoch": 5.920671685002895, + "grad_norm": 0.6731370557606815, + "learning_rate": 2.836439736753882e-06, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07997622340917587, + "step": 5115, + "valid_targets_mean": 3155.8, + "valid_targets_min": 993 + }, + { + "epoch": 5.926462072958889, + "grad_norm": 0.6599899199310425, + "learning_rate": 2.8068815693245976e-06, + "loss": 0.1851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10311765968799591, + "step": 5120, + "valid_targets_mean": 4166.5, + "valid_targets_min": 2724 + }, + { + "epoch": 5.9322524609148815, + "grad_norm": 0.7027299860479045, + "learning_rate": 2.777466593461313e-06, + "loss": 0.1859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09111426025629044, + "step": 5125, + "valid_targets_mean": 3079.5, + "valid_targets_min": 1360 + }, + { + "epoch": 5.938042848870874, + "grad_norm": 0.6522068975482174, + "learning_rate": 2.7481950541443957e-06, + "loss": 0.1887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09260795265436172, + "step": 5130, + "valid_targets_mean": 4114.9, + "valid_targets_min": 2414 + }, + { + "epoch": 5.943833236826867, + "grad_norm": 0.7388334717337071, + "learning_rate": 2.7190671951596124e-06, + "loss": 0.2107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11505642533302307, + "step": 5135, + "valid_targets_mean": 4524.8, + "valid_targets_min": 2425 + }, + { + "epoch": 5.949623624782861, + "grad_norm": 0.8018275911931491, + "learning_rate": 2.6900832590961033e-06, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09761355817317963, + "step": 5140, + "valid_targets_mean": 3035.2, + "valid_targets_min": 1720 + }, + { + "epoch": 5.955414012738854, + "grad_norm": 0.7394931936417326, + "learning_rate": 2.6612434873443425e-06, + "loss": 0.191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09674566984176636, + "step": 5145, + "valid_targets_mean": 3260.4, + "valid_targets_min": 1621 + }, + { + "epoch": 5.961204400694847, + "grad_norm": 0.7282135820448314, + "learning_rate": 2.632548120094167e-06, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09710457921028137, + "step": 5150, + "valid_targets_mean": 3214.6, + "valid_targets_min": 1858 + }, + { + "epoch": 5.966994788650839, + "grad_norm": 0.7297952067172664, + "learning_rate": 2.6039973963327245e-06, + "loss": 0.1987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09201544523239136, + "step": 5155, + "valid_targets_mean": 3254.2, + "valid_targets_min": 1254 + }, + { + "epoch": 5.972785176606832, + "grad_norm": 0.6918489073968943, + "learning_rate": 2.5755915538425315e-06, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10040745884180069, + "step": 5160, + "valid_targets_mean": 2994.4, + "valid_targets_min": 1039 + }, + { + "epoch": 5.978575564562826, + "grad_norm": 0.6954095340384442, + "learning_rate": 2.5473308291994657e-06, + "loss": 0.1991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12377315014600754, + "step": 5165, + "valid_targets_mean": 4625.4, + "valid_targets_min": 2526 + }, + { + "epoch": 5.984365952518819, + "grad_norm": 0.6709528127757199, + "learning_rate": 2.5192154577707983e-06, + "loss": 0.194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10651902109384537, + "step": 5170, + "valid_targets_mean": 3571.8, + "valid_targets_min": 2409 + }, + { + "epoch": 5.990156340474812, + "grad_norm": 0.6603236277053569, + "learning_rate": 2.4912456737132428e-06, + "loss": 0.1943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08352956175804138, + "step": 5175, + "valid_targets_mean": 3691.5, + "valid_targets_min": 2918 + }, + { + "epoch": 5.995946728430805, + "grad_norm": 0.682482921886044, + "learning_rate": 2.463421709971001e-06, + "loss": 0.1997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0977027416229248, + "step": 5180, + "valid_targets_mean": 4063.1, + "valid_targets_min": 2179 + }, + { + "epoch": 6.001158077591199, + "grad_norm": 0.6295607542604371, + "learning_rate": 2.4357437982738107e-06, + "loss": 0.1826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0902089774608612, + "step": 5185, + "valid_targets_mean": 8215.2, + "valid_targets_min": 6163 + }, + { + "epoch": 6.006948465547191, + "grad_norm": 0.6689994894967519, + "learning_rate": 2.408212169135031e-06, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09361856430768967, + "step": 5190, + "valid_targets_mean": 8594.4, + "valid_targets_min": 6006 + }, + { + "epoch": 6.012738853503185, + "grad_norm": 0.645912983139216, + "learning_rate": 2.3808270518497258e-06, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07413487136363983, + "step": 5195, + "valid_targets_mean": 3578.4, + "valid_targets_min": 289 + }, + { + "epoch": 6.018529241459178, + "grad_norm": 0.5057537450377909, + "learning_rate": 2.3535886744927373e-06, + "loss": 0.17, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08606113493442535, + "step": 5200, + "valid_targets_mean": 7019.9, + "valid_targets_min": 4985 + }, + { + "epoch": 6.024319629415171, + "grad_norm": 0.6413968314620463, + "learning_rate": 2.3264972639168005e-06, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08514834195375443, + "step": 5205, + "valid_targets_mean": 6956.6, + "valid_targets_min": 1457 + }, + { + "epoch": 6.030110017371164, + "grad_norm": 0.43875479622506836, + "learning_rate": 2.2995530457506463e-06, + "loss": 0.1672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07854004204273224, + "step": 5210, + "valid_targets_mean": 7361.6, + "valid_targets_min": 4961 + }, + { + "epoch": 6.0359004053271565, + "grad_norm": 0.5345119958764212, + "learning_rate": 2.272756244397132e-06, + "loss": 0.1822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10500028729438782, + "step": 5215, + "valid_targets_mean": 7058.5, + "valid_targets_min": 5985 + }, + { + "epoch": 6.04169079328315, + "grad_norm": 0.44174736329293024, + "learning_rate": 2.2461070830313457e-06, + "loss": 0.1748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09389522671699524, + "step": 5220, + "valid_targets_mean": 7664.2, + "valid_targets_min": 5169 + }, + { + "epoch": 6.047481181239143, + "grad_norm": 0.4695724589920154, + "learning_rate": 2.219605783598795e-06, + "loss": 0.1643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08354824036359787, + "step": 5225, + "valid_targets_mean": 7111.8, + "valid_targets_min": 5348 + }, + { + "epoch": 6.053271569195136, + "grad_norm": 0.5107050185858826, + "learning_rate": 2.193252566813504e-06, + "loss": 0.1524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0697193592786789, + "step": 5230, + "valid_targets_mean": 4238.9, + "valid_targets_min": 152 + }, + { + "epoch": 6.059061957151129, + "grad_norm": 0.4670961151455812, + "learning_rate": 2.1670476521562202e-06, + "loss": 0.1571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08529293537139893, + "step": 5235, + "valid_targets_mean": 7199.4, + "valid_targets_min": 4258 + }, + { + "epoch": 6.064852345107123, + "grad_norm": 0.5173124026480072, + "learning_rate": 2.1409912578725624e-06, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0789559930562973, + "step": 5240, + "valid_targets_mean": 7243.4, + "valid_targets_min": 5831 + }, + { + "epoch": 6.070642733063115, + "grad_norm": 0.4614128874803108, + "learning_rate": 2.115083600971206e-06, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0823356881737709, + "step": 5245, + "valid_targets_mean": 6950.6, + "valid_targets_min": 3862 + }, + { + "epoch": 6.076433121019108, + "grad_norm": 0.5532390387354605, + "learning_rate": 2.089324897222087e-06, + "loss": 0.1611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08550730347633362, + "step": 5250, + "valid_targets_mean": 6991.9, + "valid_targets_min": 5696 + }, + { + "epoch": 6.082223508975101, + "grad_norm": 0.44910028703040206, + "learning_rate": 2.0637153611545835e-06, + "loss": 0.1554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0870165005326271, + "step": 5255, + "valid_targets_mean": 6963.5, + "valid_targets_min": 4947 + }, + { + "epoch": 6.088013896931094, + "grad_norm": 0.4295536500825577, + "learning_rate": 2.0382552060557525e-06, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07566852867603302, + "step": 5260, + "valid_targets_mean": 6436.9, + "valid_targets_min": 4988 + }, + { + "epoch": 6.093804284887088, + "grad_norm": 0.43376542972026405, + "learning_rate": 2.012944643968542e-06, + "loss": 0.1621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07929432392120361, + "step": 5265, + "valid_targets_mean": 7518.0, + "valid_targets_min": 5476 + }, + { + "epoch": 6.0995946728430805, + "grad_norm": 0.39792800727485456, + "learning_rate": 1.9877838856900177e-06, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07714049518108368, + "step": 5270, + "valid_targets_mean": 7422.6, + "valid_targets_min": 5226 + }, + { + "epoch": 6.105385060799073, + "grad_norm": 0.4879186264265735, + "learning_rate": 1.962773140769627e-06, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10403196513652802, + "step": 5275, + "valid_targets_mean": 7080.0, + "valid_targets_min": 3753 + }, + { + "epoch": 6.111175448755066, + "grad_norm": 0.5775563776522831, + "learning_rate": 1.9379126175074338e-06, + "loss": 0.1818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09144257009029388, + "step": 5280, + "valid_targets_mean": 6064.6, + "valid_targets_min": 5161 + }, + { + "epoch": 6.11696583671106, + "grad_norm": 0.44043326178371406, + "learning_rate": 1.9132025229524045e-06, + "loss": 0.1301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07593751698732376, + "step": 5285, + "valid_targets_mean": 7388.1, + "valid_targets_min": 5795 + }, + { + "epoch": 6.122756224667053, + "grad_norm": 0.43469795273500295, + "learning_rate": 1.8886430629006524e-06, + "loss": 0.1553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07400475442409515, + "step": 5290, + "valid_targets_mean": 7696.0, + "valid_targets_min": 4658 + }, + { + "epoch": 6.128546612623046, + "grad_norm": 0.45002670649485194, + "learning_rate": 1.8642344418937597e-06, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08555644005537033, + "step": 5295, + "valid_targets_mean": 7799.1, + "valid_targets_min": 5508 + }, + { + "epoch": 6.1343370005790385, + "grad_norm": 0.4678842267897686, + "learning_rate": 1.83997686321705e-06, + "loss": 0.1665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08118224889039993, + "step": 5300, + "valid_targets_mean": 7341.6, + "valid_targets_min": 6454 + }, + { + "epoch": 6.140127388535032, + "grad_norm": 0.4525273642891633, + "learning_rate": 1.8158705288979073e-06, + "loss": 0.1604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07110708206892014, + "step": 5305, + "valid_targets_mean": 6818.0, + "valid_targets_min": 4392 + }, + { + "epoch": 6.145917776491025, + "grad_norm": 0.4298419040599829, + "learning_rate": 1.7919156397040782e-06, + "loss": 0.1592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06920880824327469, + "step": 5310, + "valid_targets_mean": 7857.6, + "valid_targets_min": 5394 + }, + { + "epoch": 6.151708164447018, + "grad_norm": 0.39182340853570996, + "learning_rate": 1.768112395142021e-06, + "loss": 0.1414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06716249883174896, + "step": 5315, + "valid_targets_mean": 8314.1, + "valid_targets_min": 4837 + }, + { + "epoch": 6.157498552403011, + "grad_norm": 0.3775500815963387, + "learning_rate": 1.7444609934552326e-06, + "loss": 0.1417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06070984899997711, + "step": 5320, + "valid_targets_mean": 7368.2, + "valid_targets_min": 6027 + }, + { + "epoch": 6.163288940359004, + "grad_norm": 0.4483137843277887, + "learning_rate": 1.7209616316225863e-06, + "loss": 0.1491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07993191480636597, + "step": 5325, + "valid_targets_mean": 6435.0, + "valid_targets_min": 4238 + }, + { + "epoch": 6.169079328314997, + "grad_norm": 1.4199087279438916, + "learning_rate": 1.6976145053567172e-06, + "loss": 0.1438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04002588987350464, + "step": 5330, + "valid_targets_mean": 247.8, + "valid_targets_min": 137 + }, + { + "epoch": 6.17486971627099, + "grad_norm": 0.45861727354707577, + "learning_rate": 1.6744198091023745e-06, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07827004045248032, + "step": 5335, + "valid_targets_mean": 6807.0, + "valid_targets_min": 5165 + }, + { + "epoch": 6.180660104226983, + "grad_norm": 0.5518915576183956, + "learning_rate": 1.6513777360348005e-06, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09153981506824493, + "step": 5340, + "valid_targets_mean": 6989.9, + "valid_targets_min": 5104 + }, + { + "epoch": 6.186450492182976, + "grad_norm": 0.48283612871721937, + "learning_rate": 1.6284884780581345e-06, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09221628308296204, + "step": 5345, + "valid_targets_mean": 6692.2, + "valid_targets_min": 5068 + }, + { + "epoch": 6.19224088013897, + "grad_norm": 0.47390055367830936, + "learning_rate": 1.6057522258037982e-06, + "loss": 0.1663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09916602820158005, + "step": 5350, + "valid_targets_mean": 7419.6, + "valid_targets_min": 5578 + }, + { + "epoch": 6.1980312680949625, + "grad_norm": 0.43306006927650187, + "learning_rate": 1.5831691686289263e-06, + "loss": 0.1598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08104896545410156, + "step": 5355, + "valid_targets_mean": 7536.5, + "valid_targets_min": 5075 + }, + { + "epoch": 6.203821656050955, + "grad_norm": 0.4064542024075362, + "learning_rate": 1.5607394946147713e-06, + "loss": 0.1667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07590160518884659, + "step": 5360, + "valid_targets_mean": 7221.5, + "valid_targets_min": 5841 + }, + { + "epoch": 6.209612044006948, + "grad_norm": 0.40126692667989083, + "learning_rate": 1.5384633905651524e-06, + "loss": 0.1609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07777977734804153, + "step": 5365, + "valid_targets_mean": 7287.5, + "valid_targets_min": 5151 + }, + { + "epoch": 6.215402431962941, + "grad_norm": 0.46137222872969186, + "learning_rate": 1.5163410420048897e-06, + "loss": 0.1702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07954985648393631, + "step": 5370, + "valid_targets_mean": 6738.4, + "valid_targets_min": 4042 + }, + { + "epoch": 6.221192819918935, + "grad_norm": 0.4409684532102382, + "learning_rate": 1.4943726331782672e-06, + "loss": 0.1644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0905829519033432, + "step": 5375, + "valid_targets_mean": 6331.0, + "valid_targets_min": 5496 + }, + { + "epoch": 6.226983207874928, + "grad_norm": 0.4553367903867736, + "learning_rate": 1.4725583470474835e-06, + "loss": 0.165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0791117399930954, + "step": 5380, + "valid_targets_mean": 6271.1, + "valid_targets_min": 4695 + }, + { + "epoch": 6.2327735958309205, + "grad_norm": 0.48629704290044623, + "learning_rate": 1.4508983652911558e-06, + "loss": 0.1623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08450581133365631, + "step": 5385, + "valid_targets_mean": 5790.8, + "valid_targets_min": 5021 + }, + { + "epoch": 6.238563983786913, + "grad_norm": 0.530857423794274, + "learning_rate": 1.4293928683027681e-06, + "loss": 0.1555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0609971359372139, + "step": 5390, + "valid_targets_mean": 3386.8, + "valid_targets_min": 2279 + }, + { + "epoch": 6.244354371742907, + "grad_norm": 0.43948413393234587, + "learning_rate": 1.408042035189201e-06, + "loss": 0.1352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07962469756603241, + "step": 5395, + "valid_targets_mean": 6240.5, + "valid_targets_min": 5093 + }, + { + "epoch": 6.2501447596989, + "grad_norm": 0.4649474702410835, + "learning_rate": 1.3868460437692298e-06, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08120064437389374, + "step": 5400, + "valid_targets_mean": 6329.5, + "valid_targets_min": 4589 + }, + { + "epoch": 6.255935147654893, + "grad_norm": 0.44995107882935337, + "learning_rate": 1.3658050705720417e-06, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09052136540412903, + "step": 5405, + "valid_targets_mean": 7589.2, + "valid_targets_min": 4963 + }, + { + "epoch": 6.261725535610886, + "grad_norm": 0.441803859460047, + "learning_rate": 1.3449192908357622e-06, + "loss": 0.1594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06894394010305405, + "step": 5410, + "valid_targets_mean": 5577.9, + "valid_targets_min": 4458 + }, + { + "epoch": 6.267515923566879, + "grad_norm": 0.4786237029391992, + "learning_rate": 1.3241888785060076e-06, + "loss": 0.1595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07872607558965683, + "step": 5415, + "valid_targets_mean": 5432.5, + "valid_targets_min": 4433 + }, + { + "epoch": 6.273306311522872, + "grad_norm": 0.46161869669093514, + "learning_rate": 1.3036140062344215e-06, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08250191807746887, + "step": 5420, + "valid_targets_mean": 6361.8, + "valid_targets_min": 5201 + }, + { + "epoch": 6.279096699478865, + "grad_norm": 0.48405430754178314, + "learning_rate": 1.2831948453772514e-06, + "loss": 0.1627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07658439129590988, + "step": 5425, + "valid_targets_mean": 5824.4, + "valid_targets_min": 5129 + }, + { + "epoch": 6.284887087434858, + "grad_norm": 0.8564807909910727, + "learning_rate": 1.2629315659939056e-06, + "loss": 0.1242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07406964898109436, + "step": 5430, + "valid_targets_mean": 1637.6, + "valid_targets_min": 137 + }, + { + "epoch": 6.290677475390851, + "grad_norm": 0.4606197263140385, + "learning_rate": 1.2428243368455472e-06, + "loss": 0.1563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07980707287788391, + "step": 5435, + "valid_targets_mean": 6474.6, + "valid_targets_min": 4751 + }, + { + "epoch": 6.2964678633468445, + "grad_norm": 0.4596236677668687, + "learning_rate": 1.2228733253936942e-06, + "loss": 0.1593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08767077326774597, + "step": 5440, + "valid_targets_mean": 7186.2, + "valid_targets_min": 5358 + }, + { + "epoch": 6.302258251302837, + "grad_norm": 0.4448274823182163, + "learning_rate": 1.2030786977988095e-06, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08086740225553513, + "step": 5445, + "valid_targets_mean": 7126.8, + "valid_targets_min": 4078 + }, + { + "epoch": 6.30804863925883, + "grad_norm": 0.4338103596308614, + "learning_rate": 1.1834406189189297e-06, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07633671909570694, + "step": 5450, + "valid_targets_mean": 7022.1, + "valid_targets_min": 4940 + }, + { + "epoch": 6.313839027214823, + "grad_norm": 0.47759713239926815, + "learning_rate": 1.1639592523082865e-06, + "loss": 0.1518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08213866502046585, + "step": 5455, + "valid_targets_mean": 5900.6, + "valid_targets_min": 4779 + }, + { + "epoch": 6.319629415170817, + "grad_norm": 0.45111566647701096, + "learning_rate": 1.1446347602159459e-06, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08208338916301727, + "step": 5460, + "valid_targets_mean": 6290.1, + "valid_targets_min": 4620 + }, + { + "epoch": 6.32541980312681, + "grad_norm": 0.6232495525960332, + "learning_rate": 1.1254673035844578e-06, + "loss": 0.195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10899636149406433, + "step": 5465, + "valid_targets_mean": 5614.5, + "valid_targets_min": 3446 + }, + { + "epoch": 6.3312101910828025, + "grad_norm": 0.6948828112562729, + "learning_rate": 1.1064570420485076e-06, + "loss": 0.2084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11155211180448532, + "step": 5470, + "valid_targets_mean": 4630.0, + "valid_targets_min": 1005 + }, + { + "epoch": 6.337000579038795, + "grad_norm": 0.5438326454147281, + "learning_rate": 1.0876041339335974e-06, + "loss": 0.2072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07827803492546082, + "step": 5475, + "valid_targets_mean": 4669.6, + "valid_targets_min": 2054 + }, + { + "epoch": 6.342790966994789, + "grad_norm": 0.6097818884247744, + "learning_rate": 1.068908736254728e-06, + "loss": 0.2023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08751150965690613, + "step": 5480, + "valid_targets_mean": 3745.0, + "valid_targets_min": 1561 + }, + { + "epoch": 6.348581354950782, + "grad_norm": 0.601122129840999, + "learning_rate": 1.0503710047150783e-06, + "loss": 0.2032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10058531910181046, + "step": 5485, + "valid_targets_mean": 4284.0, + "valid_targets_min": 1184 + }, + { + "epoch": 6.354371742906775, + "grad_norm": 0.5355235715879467, + "learning_rate": 1.031991093704725e-06, + "loss": 0.2107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1052846908569336, + "step": 5490, + "valid_targets_mean": 6332.8, + "valid_targets_min": 1651 + }, + { + "epoch": 6.360162130862768, + "grad_norm": 0.5356930805063133, + "learning_rate": 1.0137691562993447e-06, + "loss": 0.203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10232044756412506, + "step": 5495, + "valid_targets_mean": 6446.5, + "valid_targets_min": 2231 + }, + { + "epoch": 6.3659525188187605, + "grad_norm": 0.6234673466277105, + "learning_rate": 9.957053442589436e-07, + "loss": 0.1999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07552198320627213, + "step": 5500, + "valid_targets_mean": 3367.2, + "valid_targets_min": 467 + }, + { + "epoch": 6.371742906774754, + "grad_norm": 0.6370748608739049, + "learning_rate": 9.777998080265893e-07, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13317011296749115, + "step": 5505, + "valid_targets_mean": 7194.1, + "valid_targets_min": 1748 + }, + { + "epoch": 6.377533294730747, + "grad_norm": 0.6441957718117712, + "learning_rate": 9.600526967271696e-07, + "loss": 0.2107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10556194186210632, + "step": 5510, + "valid_targets_mean": 4078.5, + "valid_targets_min": 1697 + }, + { + "epoch": 6.38332368268674, + "grad_norm": 0.6850420900180021, + "learning_rate": 9.42464158166132e-07, + "loss": 0.204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06766747683286667, + "step": 5515, + "valid_targets_mean": 2089.5, + "valid_targets_min": 1223 + }, + { + "epoch": 6.389114070642733, + "grad_norm": 0.5835185801774779, + "learning_rate": 9.250343388282701e-07, + "loss": 0.205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09306862950325012, + "step": 5520, + "valid_targets_mean": 5404.1, + "valid_targets_min": 1829 + }, + { + "epoch": 6.3949044585987265, + "grad_norm": 0.5675457701788594, + "learning_rate": 9.077633838764987e-07, + "loss": 0.2072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09642751514911652, + "step": 5525, + "valid_targets_mean": 4583.1, + "valid_targets_min": 1596 + }, + { + "epoch": 6.400694846554719, + "grad_norm": 0.6202832830411663, + "learning_rate": 8.906514371506358e-07, + "loss": 0.1995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10527827590703964, + "step": 5530, + "valid_targets_mean": 4666.8, + "valid_targets_min": 2041 + }, + { + "epoch": 6.406485234510712, + "grad_norm": 0.6361782574350211, + "learning_rate": 8.736986411662185e-07, + "loss": 0.1985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1075083315372467, + "step": 5535, + "valid_targets_mean": 4272.1, + "valid_targets_min": 1538 + }, + { + "epoch": 6.412275622466705, + "grad_norm": 0.6947641383759208, + "learning_rate": 8.569051371133019e-07, + "loss": 0.1946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08818969130516052, + "step": 5540, + "valid_targets_mean": 2928.0, + "valid_targets_min": 1739 + }, + { + "epoch": 6.418066010422699, + "grad_norm": 0.6786378448146124, + "learning_rate": 8.402710648552958e-07, + "loss": 0.1962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09193718433380127, + "step": 5545, + "valid_targets_mean": 3254.5, + "valid_targets_min": 935 + }, + { + "epoch": 6.423856398378692, + "grad_norm": 0.67462549653229, + "learning_rate": 8.237965629277967e-07, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1017710268497467, + "step": 5550, + "valid_targets_mean": 3535.2, + "valid_targets_min": 2704 + }, + { + "epoch": 6.4296467863346844, + "grad_norm": 0.7166248619877422, + "learning_rate": 8.074817685374215e-07, + "loss": 0.2002, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09396373480558395, + "step": 5555, + "valid_targets_mean": 2987.6, + "valid_targets_min": 1173 + }, + { + "epoch": 6.435437174290677, + "grad_norm": 0.6380381399423711, + "learning_rate": 7.913268175606869e-07, + "loss": 0.2041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0939275324344635, + "step": 5560, + "valid_targets_mean": 3700.6, + "valid_targets_min": 1670 + }, + { + "epoch": 6.44122756224667, + "grad_norm": 0.6878641353617551, + "learning_rate": 7.753318445428571e-07, + "loss": 0.1994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08689321577548981, + "step": 5565, + "valid_targets_mean": 3481.6, + "valid_targets_min": 1329 + }, + { + "epoch": 6.447017950202664, + "grad_norm": 0.675521361398003, + "learning_rate": 7.594969826968435e-07, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10225418955087662, + "step": 5570, + "valid_targets_mean": 3589.8, + "valid_targets_min": 2279 + }, + { + "epoch": 6.452808338158657, + "grad_norm": 0.5928946044089235, + "learning_rate": 7.43822363902067e-07, + "loss": 0.1813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09262774884700775, + "step": 5575, + "valid_targets_mean": 5060.4, + "valid_targets_min": 2222 + }, + { + "epoch": 6.45859872611465, + "grad_norm": 0.6509966309795416, + "learning_rate": 7.283081187033914e-07, + "loss": 0.1921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0970219075679779, + "step": 5580, + "valid_targets_mean": 3898.6, + "valid_targets_min": 1849 + }, + { + "epoch": 6.464389114070642, + "grad_norm": 0.6272129163596066, + "learning_rate": 7.129543763100155e-07, + "loss": 0.1974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08521132171154022, + "step": 5585, + "valid_targets_mean": 3698.9, + "valid_targets_min": 1951 + }, + { + "epoch": 6.470179502026636, + "grad_norm": 0.6416525439771972, + "learning_rate": 6.977612645944031e-07, + "loss": 0.2003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10487203299999237, + "step": 5590, + "valid_targets_mean": 4506.8, + "valid_targets_min": 1765 + }, + { + "epoch": 6.475969889982629, + "grad_norm": 0.6986484122660109, + "learning_rate": 6.827289100912171e-07, + "loss": 0.1873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09219237416982651, + "step": 5595, + "valid_targets_mean": 2991.6, + "valid_targets_min": 2087 + }, + { + "epoch": 6.481760277938622, + "grad_norm": 0.6558910975438434, + "learning_rate": 6.678574379962666e-07, + "loss": 0.1887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08751760423183441, + "step": 5600, + "valid_targets_mean": 3994.1, + "valid_targets_min": 1603 + }, + { + "epoch": 6.487550665894615, + "grad_norm": 0.646081026641617, + "learning_rate": 6.531469721654682e-07, + "loss": 0.1928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10096277296543121, + "step": 5605, + "valid_targets_mean": 4391.9, + "valid_targets_min": 2092 + }, + { + "epoch": 6.4933410538506084, + "grad_norm": 0.6444455652055436, + "learning_rate": 6.385976351138024e-07, + "loss": 0.1878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09697127342224121, + "step": 5610, + "valid_targets_mean": 3808.9, + "valid_targets_min": 1410 + }, + { + "epoch": 6.499131441806601, + "grad_norm": 0.6125684704713852, + "learning_rate": 6.242095480143051e-07, + "loss": 0.1827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09984603524208069, + "step": 5615, + "valid_targets_mean": 5078.5, + "valid_targets_min": 2110 + }, + { + "epoch": 6.504921829762594, + "grad_norm": 0.6864467278013624, + "learning_rate": 6.099828306970579e-07, + "loss": 0.1935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09133944660425186, + "step": 5620, + "valid_targets_mean": 3447.4, + "valid_targets_min": 2420 + }, + { + "epoch": 6.510712217718587, + "grad_norm": 0.6369169118299097, + "learning_rate": 5.959176016481816e-07, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08592967689037323, + "step": 5625, + "valid_targets_mean": 4371.1, + "valid_targets_min": 1876 + }, + { + "epoch": 6.51650260567458, + "grad_norm": 0.6711562549633678, + "learning_rate": 5.82013978008853e-07, + "loss": 0.188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10728715360164642, + "step": 5630, + "valid_targets_mean": 3858.2, + "valid_targets_min": 2605 + }, + { + "epoch": 6.522292993630574, + "grad_norm": 0.7147202515551577, + "learning_rate": 5.682720755743342e-07, + "loss": 0.1893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08998127281665802, + "step": 5635, + "valid_targets_mean": 3383.9, + "valid_targets_min": 1177 + }, + { + "epoch": 6.528083381586566, + "grad_norm": 0.7321320344926183, + "learning_rate": 5.546920087930097e-07, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09720846265554428, + "step": 5640, + "valid_targets_mean": 3455.5, + "valid_targets_min": 1714 + }, + { + "epoch": 6.533873769542559, + "grad_norm": 0.6270437609775642, + "learning_rate": 5.412738907654147e-07, + "loss": 0.1919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10687099397182465, + "step": 5645, + "valid_targets_mean": 5547.5, + "valid_targets_min": 2033 + }, + { + "epoch": 6.539664157498552, + "grad_norm": 1.3739530314415442, + "learning_rate": 5.280178332433173e-07, + "loss": 0.1862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0967320129275322, + "step": 5650, + "valid_targets_mean": 3485.4, + "valid_targets_min": 1924 + }, + { + "epoch": 6.545454545454545, + "grad_norm": 0.7015965787431185, + "learning_rate": 5.149239466287736e-07, + "loss": 0.1949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08975489437580109, + "step": 5655, + "valid_targets_mean": 3080.2, + "valid_targets_min": 2098 + }, + { + "epoch": 6.551244933410539, + "grad_norm": 0.7320039568179678, + "learning_rate": 5.01992339973214e-07, + "loss": 0.1909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10681869834661484, + "step": 5660, + "valid_targets_mean": 3393.4, + "valid_targets_min": 1363 + }, + { + "epoch": 6.5570353213665316, + "grad_norm": 0.6658995441329661, + "learning_rate": 4.892231209765341e-07, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10230939090251923, + "step": 5665, + "valid_targets_mean": 4115.4, + "valid_targets_min": 1363 + }, + { + "epoch": 6.562825709322524, + "grad_norm": 0.6514909570196014, + "learning_rate": 4.7661639598618467e-07, + "loss": 0.1908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1146596372127533, + "step": 5670, + "valid_targets_mean": 4313.8, + "valid_targets_min": 2620 + }, + { + "epoch": 6.568616097278518, + "grad_norm": 0.7078908593122644, + "learning_rate": 4.641722699963147e-07, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0996047705411911, + "step": 5675, + "valid_targets_mean": 3425.1, + "valid_targets_min": 2316 + }, + { + "epoch": 6.574406485234511, + "grad_norm": 1.0795733147780582, + "learning_rate": 4.5189084664686525e-07, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09666218608617783, + "step": 5680, + "valid_targets_mean": 3744.0, + "valid_targets_min": 2022 + }, + { + "epoch": 6.580196873190504, + "grad_norm": 0.6618911031171433, + "learning_rate": 4.3977222822273013e-07, + "loss": 0.1816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0638245940208435, + "step": 5685, + "valid_targets_mean": 2445.6, + "valid_targets_min": 1294 + }, + { + "epoch": 6.585987261146497, + "grad_norm": 0.7074342788867226, + "learning_rate": 4.278165156528924e-07, + "loss": 0.1924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09848304837942123, + "step": 5690, + "valid_targets_mean": 3556.1, + "valid_targets_min": 1700 + }, + { + "epoch": 6.5917776491024895, + "grad_norm": 0.6959478862926624, + "learning_rate": 4.1602380850958247e-07, + "loss": 0.1902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08021435141563416, + "step": 5695, + "valid_targets_mean": 2714.0, + "valid_targets_min": 1586 + }, + { + "epoch": 6.597568037058483, + "grad_norm": 0.734440751148546, + "learning_rate": 4.043942050074612e-07, + "loss": 0.1853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09646306186914444, + "step": 5700, + "valid_targets_mean": 3266.4, + "valid_targets_min": 1358 + }, + { + "epoch": 6.603358425014476, + "grad_norm": 0.7175842729357217, + "learning_rate": 3.929278020027849e-07, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0929139107465744, + "step": 5705, + "valid_targets_mean": 3782.9, + "valid_targets_min": 2290 + }, + { + "epoch": 6.609148812970469, + "grad_norm": 0.7143653808913596, + "learning_rate": 3.816246949926106e-07, + "loss": 0.1964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1292996108531952, + "step": 5710, + "valid_targets_mean": 4904.8, + "valid_targets_min": 2189 + }, + { + "epoch": 6.614939200926462, + "grad_norm": 0.7001644859705362, + "learning_rate": 3.704849781139941e-07, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11987542361021042, + "step": 5715, + "valid_targets_mean": 3959.5, + "valid_targets_min": 2121 + }, + { + "epoch": 6.620729588882455, + "grad_norm": 0.8700422206648432, + "learning_rate": 3.595087441432132e-07, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09515506774187088, + "step": 5720, + "valid_targets_mean": 3689.4, + "valid_targets_min": 2011 + }, + { + "epoch": 6.626519976838448, + "grad_norm": 0.6443645086879763, + "learning_rate": 3.4869608449499047e-07, + "loss": 0.1846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10317341238260269, + "step": 5725, + "valid_targets_mean": 5082.1, + "valid_targets_min": 2674 + }, + { + "epoch": 6.632310364794441, + "grad_norm": 0.6934878123779786, + "learning_rate": 3.380470892217314e-07, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10448549687862396, + "step": 5730, + "valid_targets_mean": 3660.9, + "valid_targets_min": 2266 + }, + { + "epoch": 6.638100752750434, + "grad_norm": 0.6101536669987144, + "learning_rate": 3.275618470127739e-07, + "loss": 0.1851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08371835947036743, + "step": 5735, + "valid_targets_mean": 4285.5, + "valid_targets_min": 904 + }, + { + "epoch": 6.643891140706427, + "grad_norm": 0.6765235879602266, + "learning_rate": 3.172404451936517e-07, + "loss": 0.1893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11076696217060089, + "step": 5740, + "valid_targets_mean": 3999.4, + "valid_targets_min": 1194 + }, + { + "epoch": 6.649681528662421, + "grad_norm": 0.9097217500520384, + "learning_rate": 3.0708296972536746e-07, + "loss": 0.188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08506985753774643, + "step": 5745, + "valid_targets_mean": 4011.8, + "valid_targets_min": 1658 + }, + { + "epoch": 6.6554719166184135, + "grad_norm": 0.7605155604289844, + "learning_rate": 2.970895052036782e-07, + "loss": 0.1887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0780138224363327, + "step": 5750, + "valid_targets_mean": 2728.8, + "valid_targets_min": 1423 + }, + { + "epoch": 6.661262304574406, + "grad_norm": 0.6563600298248051, + "learning_rate": 2.8726013485838033e-07, + "loss": 0.1914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10089323669672012, + "step": 5755, + "valid_targets_mean": 4504.4, + "valid_targets_min": 2320 + }, + { + "epoch": 6.667052692530399, + "grad_norm": 0.6679398668890479, + "learning_rate": 2.7759494055263237e-07, + "loss": 0.1919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07146115601062775, + "step": 5760, + "valid_targets_mean": 3272.5, + "valid_targets_min": 1811 + }, + { + "epoch": 6.672843080486393, + "grad_norm": 0.6216942456939679, + "learning_rate": 2.6809400278225983e-07, + "loss": 0.1985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09198446571826935, + "step": 5765, + "valid_targets_mean": 4425.9, + "valid_targets_min": 3026 + }, + { + "epoch": 6.678633468442386, + "grad_norm": 0.677098777582908, + "learning_rate": 2.5875740067509814e-07, + "loss": 0.1875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08467467874288559, + "step": 5770, + "valid_targets_mean": 2719.1, + "valid_targets_min": 644 + }, + { + "epoch": 6.684423856398379, + "grad_norm": 0.7057087481731708, + "learning_rate": 2.495852119903153e-07, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08754457533359528, + "step": 5775, + "valid_targets_mean": 3369.0, + "valid_targets_min": 1397 + }, + { + "epoch": 6.6902142443543715, + "grad_norm": 0.6663376534097115, + "learning_rate": 2.405775131177857e-07, + "loss": 0.1761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09364516288042068, + "step": 5780, + "valid_targets_mean": 4122.6, + "valid_targets_min": 1415 + }, + { + "epoch": 6.696004632310364, + "grad_norm": 0.7057387566685178, + "learning_rate": 2.317343790774329e-07, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09693238139152527, + "step": 5785, + "valid_targets_mean": 4136.6, + "valid_targets_min": 621 + }, + { + "epoch": 6.701795020266358, + "grad_norm": 0.6662639138513196, + "learning_rate": 2.2305588351862362e-07, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07178279757499695, + "step": 5790, + "valid_targets_mean": 3065.8, + "valid_targets_min": 1290 + }, + { + "epoch": 6.707585408222351, + "grad_norm": 0.812162044772317, + "learning_rate": 2.145420987195368e-07, + "loss": 0.1693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07713647931814194, + "step": 5795, + "valid_targets_mean": 2426.9, + "valid_targets_min": 857 + }, + { + "epoch": 6.713375796178344, + "grad_norm": 0.8006317550584056, + "learning_rate": 2.061930955865754e-07, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09362758696079254, + "step": 5800, + "valid_targets_mean": 2949.1, + "valid_targets_min": 720 + }, + { + "epoch": 6.719166184134337, + "grad_norm": 0.7153682567907866, + "learning_rate": 1.9800894365377355e-07, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10825473070144653, + "step": 5805, + "valid_targets_mean": 3579.1, + "valid_targets_min": 1801 + }, + { + "epoch": 6.72495657209033, + "grad_norm": 0.6870837669469271, + "learning_rate": 1.89989711082208e-07, + "loss": 0.2007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09038402140140533, + "step": 5810, + "valid_targets_mean": 3532.6, + "valid_targets_min": 1510 + }, + { + "epoch": 6.730746960046323, + "grad_norm": 0.7410978699692073, + "learning_rate": 1.8213546465944755e-07, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08246541023254395, + "step": 5815, + "valid_targets_mean": 2807.9, + "valid_targets_min": 987 + }, + { + "epoch": 6.736537348002316, + "grad_norm": 0.6712440479339922, + "learning_rate": 1.744462697989735e-07, + "loss": 0.1924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08393748104572296, + "step": 5820, + "valid_targets_mean": 3937.4, + "valid_targets_min": 996 + }, + { + "epoch": 6.742327735958309, + "grad_norm": 0.6680483028378558, + "learning_rate": 1.6692219053965786e-07, + "loss": 0.1818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07929886877536774, + "step": 5825, + "valid_targets_mean": 3954.5, + "valid_targets_min": 2001 + }, + { + "epoch": 6.748118123914303, + "grad_norm": 0.5841660991413182, + "learning_rate": 1.5956328954521928e-07, + "loss": 0.1877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07254894077777863, + "step": 5830, + "valid_targets_mean": 5059.6, + "valid_targets_min": 1767 + }, + { + "epoch": 6.7539085118702955, + "grad_norm": 0.5919303114874458, + "learning_rate": 1.5236962810369238e-07, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07777701318264008, + "step": 5835, + "valid_targets_mean": 4714.0, + "valid_targets_min": 1511 + }, + { + "epoch": 6.759698899826288, + "grad_norm": 0.7289685243699716, + "learning_rate": 1.4534126612693934e-07, + "loss": 0.181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08982829749584198, + "step": 5840, + "valid_targets_mean": 2861.0, + "valid_targets_min": 974 + }, + { + "epoch": 6.765489287782281, + "grad_norm": 0.6489662405100123, + "learning_rate": 1.3847826215013016e-07, + "loss": 0.1846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08057229220867157, + "step": 5845, + "valid_targets_mean": 3792.6, + "valid_targets_min": 2339 + }, + { + "epoch": 6.771279675738274, + "grad_norm": 0.6709627866530247, + "learning_rate": 1.3178067333126766e-07, + "loss": 0.1799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0797836035490036, + "step": 5850, + "valid_targets_mean": 3333.1, + "valid_targets_min": 1249 + }, + { + "epoch": 6.777070063694268, + "grad_norm": 0.67605341835653, + "learning_rate": 1.2524855545070548e-07, + "loss": 0.1877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09331607818603516, + "step": 5855, + "valid_targets_mean": 3493.1, + "valid_targets_min": 1223 + }, + { + "epoch": 6.782860451650261, + "grad_norm": 0.755248924578272, + "learning_rate": 1.188819629106841e-07, + "loss": 0.1967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09816304594278336, + "step": 5860, + "valid_targets_mean": 3285.8, + "valid_targets_min": 1318 + }, + { + "epoch": 6.7886508396062535, + "grad_norm": 0.7842651252551903, + "learning_rate": 1.1268094873488233e-07, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08679020404815674, + "step": 5865, + "valid_targets_mean": 2070.5, + "valid_targets_min": 1002 + }, + { + "epoch": 6.794441227562246, + "grad_norm": 0.6986357267206671, + "learning_rate": 1.0664556456797315e-07, + "loss": 0.2041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08979077637195587, + "step": 5870, + "valid_targets_mean": 3328.1, + "valid_targets_min": 845 + }, + { + "epoch": 6.80023161551824, + "grad_norm": 0.65199188833835, + "learning_rate": 1.0077586067518186e-07, + "loss": 0.1818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1040390133857727, + "step": 5875, + "valid_targets_mean": 4657.4, + "valid_targets_min": 2747 + }, + { + "epoch": 6.806022003474233, + "grad_norm": 0.7163232433100037, + "learning_rate": 9.507188594189309e-08, + "loss": 0.2036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1029033362865448, + "step": 5880, + "valid_targets_mean": 4166.4, + "valid_targets_min": 1241 + }, + { + "epoch": 6.811812391430226, + "grad_norm": 0.7444738988192119, + "learning_rate": 8.953368787322226e-08, + "loss": 0.1959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10908888280391693, + "step": 5885, + "valid_targets_mean": 3567.1, + "valid_targets_min": 992 + }, + { + "epoch": 6.817602779386219, + "grad_norm": 0.6625460085007966, + "learning_rate": 8.416131259362025e-08, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09041760861873627, + "step": 5890, + "valid_targets_mean": 3343.5, + "valid_targets_min": 1211 + }, + { + "epoch": 6.823393167342212, + "grad_norm": 0.7809754980914095, + "learning_rate": 7.895480484650941e-08, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.103350929915905, + "step": 5895, + "valid_targets_mean": 3081.6, + "valid_targets_min": 1889 + }, + { + "epoch": 6.829183555298205, + "grad_norm": 0.7507747805313876, + "learning_rate": 7.391420799388372e-08, + "loss": 0.1912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08229044079780579, + "step": 5900, + "valid_targets_mean": 3233.9, + "valid_targets_min": 1624 + }, + { + "epoch": 6.834973943254198, + "grad_norm": 0.6766796999819782, + "learning_rate": 6.903956401597134e-08, + "loss": 0.1916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10043347626924515, + "step": 5905, + "valid_targets_mean": 3817.4, + "valid_targets_min": 2763 + }, + { + "epoch": 6.840764331210191, + "grad_norm": 0.7225663791930595, + "learning_rate": 6.433091351086829e-08, + "loss": 0.1938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09248000383377075, + "step": 5910, + "valid_targets_mean": 3037.0, + "valid_targets_min": 1928 + }, + { + "epoch": 6.846554719166184, + "grad_norm": 0.6427345921650035, + "learning_rate": 5.97882956942053e-08, + "loss": 0.1808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0936291515827179, + "step": 5915, + "valid_targets_mean": 4733.2, + "valid_targets_min": 3147 + }, + { + "epoch": 6.8523451071221775, + "grad_norm": 0.652378535416869, + "learning_rate": 5.541174839883256e-08, + "loss": 0.1865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08591797947883606, + "step": 5920, + "valid_targets_mean": 3348.4, + "valid_targets_min": 1844 + }, + { + "epoch": 6.85813549507817, + "grad_norm": 0.7084978224727022, + "learning_rate": 5.12013080744822e-08, + "loss": 0.1891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09425882995128632, + "step": 5925, + "valid_targets_mean": 4360.2, + "valid_targets_min": 2330 + }, + { + "epoch": 6.863925883034163, + "grad_norm": 0.7953115356519015, + "learning_rate": 4.7157009787486276e-08, + "loss": 0.1965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10636697709560394, + "step": 5930, + "valid_targets_mean": 3574.1, + "valid_targets_min": 900 + }, + { + "epoch": 6.869716270990156, + "grad_norm": 0.6314888078450656, + "learning_rate": 4.3278887220470355e-08, + "loss": 0.1875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10081933438777924, + "step": 5935, + "valid_targets_mean": 5092.8, + "valid_targets_min": 1713 + }, + { + "epoch": 6.87550665894615, + "grad_norm": 0.7190545537104681, + "learning_rate": 3.9566972672080427e-08, + "loss": 0.1959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10799182206392288, + "step": 5940, + "valid_targets_mean": 4057.1, + "valid_targets_min": 981 + }, + { + "epoch": 6.881297046902143, + "grad_norm": 0.6706026201273239, + "learning_rate": 3.60212970567142e-08, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11171825975179672, + "step": 5945, + "valid_targets_mean": 4127.9, + "valid_targets_min": 2098 + }, + { + "epoch": 6.8870874348581355, + "grad_norm": 0.7490324494092551, + "learning_rate": 3.2641889904256875e-08, + "loss": 0.1915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08943615853786469, + "step": 5950, + "valid_targets_mean": 3101.1, + "valid_targets_min": 1107 + }, + { + "epoch": 6.892877822814128, + "grad_norm": 0.6772633245975402, + "learning_rate": 2.942877935983912e-08, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09332503378391266, + "step": 5955, + "valid_targets_mean": 3718.0, + "valid_targets_min": 2273 + }, + { + "epoch": 6.898668210770122, + "grad_norm": 0.70928581545397, + "learning_rate": 2.6381992183610593e-08, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08233386278152466, + "step": 5960, + "valid_targets_mean": 3316.0, + "valid_targets_min": 1920 + }, + { + "epoch": 6.904458598726115, + "grad_norm": 1.4468656575646994, + "learning_rate": 2.3501553750497897e-08, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09502074867486954, + "step": 5965, + "valid_targets_mean": 4710.8, + "valid_targets_min": 1687 + }, + { + "epoch": 6.910248986682108, + "grad_norm": 0.7764638334594126, + "learning_rate": 2.0787488050015846e-08, + "loss": 0.1855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08005072921514511, + "step": 5970, + "valid_targets_mean": 3075.8, + "valid_targets_min": 1375 + }, + { + "epoch": 6.916039374638101, + "grad_norm": 0.7063658629510593, + "learning_rate": 1.8239817686049877e-08, + "loss": 0.1873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08526700735092163, + "step": 5975, + "valid_targets_mean": 3776.4, + "valid_targets_min": 906 + }, + { + "epoch": 6.921829762594093, + "grad_norm": 0.7338832979330177, + "learning_rate": 1.585856387667839e-08, + "loss": 0.1866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09271879494190216, + "step": 5980, + "valid_targets_mean": 3393.9, + "valid_targets_min": 1874 + }, + { + "epoch": 6.927620150550087, + "grad_norm": 0.6875599163112425, + "learning_rate": 1.3643746453990692e-08, + "loss": 0.1791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09227672219276428, + "step": 5985, + "valid_targets_mean": 3503.4, + "valid_targets_min": 1877 + }, + { + "epoch": 6.93341053850608, + "grad_norm": 0.6880943124412784, + "learning_rate": 1.1595383863922672e-08, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09128659963607788, + "step": 5990, + "valid_targets_mean": 3744.4, + "valid_targets_min": 1697 + }, + { + "epoch": 6.939200926462073, + "grad_norm": 0.7890699686552586, + "learning_rate": 9.713493166105814e-09, + "loss": 0.1888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10081125050783157, + "step": 5995, + "valid_targets_mean": 3015.1, + "valid_targets_min": 1679 + }, + { + "epoch": 6.944991314418066, + "grad_norm": 0.7467220344761751, + "learning_rate": 7.998090033720652e-09, + "loss": 0.2055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10079054534435272, + "step": 6000, + "valid_targets_mean": 3292.0, + "valid_targets_min": 1695 + }, + { + "epoch": 6.950781702374059, + "grad_norm": 0.7100018820654072, + "learning_rate": 6.4491887533701945e-09, + "loss": 0.19, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08281861245632172, + "step": 6005, + "valid_targets_mean": 3148.8, + "valid_targets_min": 2114 + }, + { + "epoch": 6.956572090330052, + "grad_norm": 0.7037414374916773, + "learning_rate": 5.066802224962253e-09, + "loss": 0.1848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07482528686523438, + "step": 6010, + "valid_targets_mean": 2852.0, + "valid_targets_min": 993 + }, + { + "epoch": 6.962362478286045, + "grad_norm": 0.7469951851474523, + "learning_rate": 3.850941961593968e-09, + "loss": 0.1808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08902069926261902, + "step": 6015, + "valid_targets_mean": 3059.8, + "valid_targets_min": 1275 + }, + { + "epoch": 6.968152866242038, + "grad_norm": 0.6463801645047006, + "learning_rate": 2.801618089465219e-09, + "loss": 0.1931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10158857703208923, + "step": 6020, + "valid_targets_mean": 4479.0, + "valid_targets_min": 2250 + }, + { + "epoch": 6.973943254198032, + "grad_norm": 0.7019589980202549, + "learning_rate": 1.918839347792023e-09, + "loss": 0.1888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11621302366256714, + "step": 6025, + "valid_targets_mean": 4952.2, + "valid_targets_min": 2892 + }, + { + "epoch": 6.979733642154025, + "grad_norm": 0.6735471715415253, + "learning_rate": 1.2026130887243803e-09, + "loss": 0.1969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0936989039182663, + "step": 6030, + "valid_targets_mean": 4026.4, + "valid_targets_min": 2935 + }, + { + "epoch": 6.985524030110017, + "grad_norm": 0.678477506458588, + "learning_rate": 6.529452773018641e-10, + "loss": 0.1908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08855093270540237, + "step": 6035, + "valid_targets_mean": 4085.2, + "valid_targets_min": 1817 + }, + { + "epoch": 6.99131441806601, + "grad_norm": 0.7191713008057828, + "learning_rate": 2.6984049138700785e-10, + "loss": 0.1896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10044793039560318, + "step": 6040, + "valid_targets_mean": 4050.4, + "valid_targets_min": 1727 + }, + { + "epoch": 6.997104806022003, + "grad_norm": 0.6412694764109674, + "learning_rate": 5.330192164310077e-11, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07490961253643036, + "step": 6045, + "valid_targets_mean": 3682.9, + "valid_targets_min": 1622 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15162084996700287, + "step": 6048, + "total_flos": 2.4439457607529267e+18, + "train_loss": 0.23659130134595135, + "train_runtime": 153851.6493, + "train_samples_per_second": 0.628, + "train_steps_per_second": 0.039, + "valid_targets_mean": 5459.5, + "valid_targets_min": 2092 + } + ], + "logging_steps": 5, + "max_steps": 6048, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.4439457607529267e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}