| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.996199941537562, |
| "eval_steps": 500, |
| "global_step": 639, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00467699503069278, |
| "grad_norm": 0.5310407876968384, |
| "learning_rate": 9.98435054773083e-06, |
| "loss": 15.7421, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.00935399006138556, |
| "grad_norm": 0.6928378343582153, |
| "learning_rate": 9.96870109546166e-06, |
| "loss": 22.4837, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.014030985092078339, |
| "grad_norm": 0.5985817909240723, |
| "learning_rate": 9.953051643192489e-06, |
| "loss": 19.2438, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.01870798012277112, |
| "grad_norm": 0.3373333215713501, |
| "learning_rate": 9.937402190923318e-06, |
| "loss": 17.8898, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0233849751534639, |
| "grad_norm": 0.41831186413764954, |
| "learning_rate": 9.921752738654147e-06, |
| "loss": 17.4461, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.028061970184156678, |
| "grad_norm": 0.3748932182788849, |
| "learning_rate": 9.906103286384977e-06, |
| "loss": 17.7855, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.03273896521484946, |
| "grad_norm": 0.3773082494735718, |
| "learning_rate": 9.890453834115806e-06, |
| "loss": 16.9238, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.03741596024554224, |
| "grad_norm": 0.3397878408432007, |
| "learning_rate": 9.874804381846637e-06, |
| "loss": 16.7176, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.04209295527623502, |
| "grad_norm": 0.5136957168579102, |
| "learning_rate": 9.859154929577466e-06, |
| "loss": 15.4666, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0467699503069278, |
| "grad_norm": 0.3085887134075165, |
| "learning_rate": 9.843505477308296e-06, |
| "loss": 13.7508, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.05144694533762058, |
| "grad_norm": 0.3942926228046417, |
| "learning_rate": 9.827856025039125e-06, |
| "loss": 17.3618, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.056123940368313356, |
| "grad_norm": 0.27711015939712524, |
| "learning_rate": 9.812206572769954e-06, |
| "loss": 16.2495, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.06080093539900614, |
| "grad_norm": 0.27834147214889526, |
| "learning_rate": 9.796557120500783e-06, |
| "loss": 15.9543, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.06547793042969892, |
| "grad_norm": 0.24677161872386932, |
| "learning_rate": 9.780907668231613e-06, |
| "loss": 15.6386, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0701549254603917, |
| "grad_norm": 0.36406269669532776, |
| "learning_rate": 9.765258215962442e-06, |
| "loss": 15.6255, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.07483192049108447, |
| "grad_norm": 0.307948499917984, |
| "learning_rate": 9.749608763693271e-06, |
| "loss": 14.9189, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.07950891552177726, |
| "grad_norm": 0.2980886697769165, |
| "learning_rate": 9.7339593114241e-06, |
| "loss": 15.0406, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.08418591055247004, |
| "grad_norm": 0.412708044052124, |
| "learning_rate": 9.71830985915493e-06, |
| "loss": 14.7356, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.08886290558316282, |
| "grad_norm": 0.2903729975223541, |
| "learning_rate": 9.70266040688576e-06, |
| "loss": 14.8173, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0935399006138556, |
| "grad_norm": 0.2171318084001541, |
| "learning_rate": 9.687010954616589e-06, |
| "loss": 15.0565, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09821689564454837, |
| "grad_norm": 0.4166527986526489, |
| "learning_rate": 9.671361502347418e-06, |
| "loss": 14.1942, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.10289389067524116, |
| "grad_norm": 0.2564053237438202, |
| "learning_rate": 9.655712050078247e-06, |
| "loss": 14.3627, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.10757088570593394, |
| "grad_norm": 0.254341185092926, |
| "learning_rate": 9.640062597809078e-06, |
| "loss": 13.7315, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.11224788073662671, |
| "grad_norm": 0.24184982478618622, |
| "learning_rate": 9.624413145539908e-06, |
| "loss": 13.9785, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.1169248757673195, |
| "grad_norm": 0.3118051588535309, |
| "learning_rate": 9.608763693270737e-06, |
| "loss": 15.1744, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.12160187079801228, |
| "grad_norm": 0.2545301020145416, |
| "learning_rate": 9.593114241001566e-06, |
| "loss": 15.9676, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.12627886582870507, |
| "grad_norm": 0.2265356183052063, |
| "learning_rate": 9.577464788732394e-06, |
| "loss": 14.8985, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.13095586085939784, |
| "grad_norm": 0.2141331285238266, |
| "learning_rate": 9.561815336463225e-06, |
| "loss": 13.6498, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.1356328558900906, |
| "grad_norm": 0.27572301030158997, |
| "learning_rate": 9.546165884194054e-06, |
| "loss": 13.9124, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.1403098509207834, |
| "grad_norm": 0.1987282633781433, |
| "learning_rate": 9.530516431924883e-06, |
| "loss": 12.9095, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.14498684595147618, |
| "grad_norm": 0.2444925159215927, |
| "learning_rate": 9.514866979655713e-06, |
| "loss": 13.365, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.14966384098216895, |
| "grad_norm": 0.4400818645954132, |
| "learning_rate": 9.499217527386542e-06, |
| "loss": 13.2832, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.15434083601286175, |
| "grad_norm": 0.2764039933681488, |
| "learning_rate": 9.483568075117371e-06, |
| "loss": 14.3228, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.15901783104355452, |
| "grad_norm": 0.21101799607276917, |
| "learning_rate": 9.4679186228482e-06, |
| "loss": 14.156, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.1636948260742473, |
| "grad_norm": 0.267008513212204, |
| "learning_rate": 9.45226917057903e-06, |
| "loss": 14.1084, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1683718211049401, |
| "grad_norm": 0.2759203016757965, |
| "learning_rate": 9.43661971830986e-06, |
| "loss": 13.5008, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.17304881613563286, |
| "grad_norm": 0.2793346643447876, |
| "learning_rate": 9.42097026604069e-06, |
| "loss": 14.2747, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.17772581116632563, |
| "grad_norm": 0.25120246410369873, |
| "learning_rate": 9.40532081377152e-06, |
| "loss": 13.4347, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.1824028061970184, |
| "grad_norm": 0.1591794341802597, |
| "learning_rate": 9.389671361502349e-06, |
| "loss": 12.619, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.1870798012277112, |
| "grad_norm": 0.2054363638162613, |
| "learning_rate": 9.374021909233178e-06, |
| "loss": 11.8876, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.19175679625840397, |
| "grad_norm": 0.23818843066692352, |
| "learning_rate": 9.358372456964007e-06, |
| "loss": 13.4683, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.19643379128909674, |
| "grad_norm": 0.32269319891929626, |
| "learning_rate": 9.342723004694837e-06, |
| "loss": 12.5351, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.20111078631978954, |
| "grad_norm": 0.29193466901779175, |
| "learning_rate": 9.327073552425666e-06, |
| "loss": 11.917, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.2057877813504823, |
| "grad_norm": 0.20844891667366028, |
| "learning_rate": 9.311424100156495e-06, |
| "loss": 12.0984, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.21046477638117508, |
| "grad_norm": 0.26920032501220703, |
| "learning_rate": 9.295774647887325e-06, |
| "loss": 14.1542, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.21514177141186788, |
| "grad_norm": 0.20874425768852234, |
| "learning_rate": 9.280125195618154e-06, |
| "loss": 13.9397, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.21981876644256065, |
| "grad_norm": 0.28703245520591736, |
| "learning_rate": 9.264475743348983e-06, |
| "loss": 12.7704, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.22449576147325342, |
| "grad_norm": 0.23402653634548187, |
| "learning_rate": 9.248826291079813e-06, |
| "loss": 12.8326, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.22917275650394622, |
| "grad_norm": 0.28065574169158936, |
| "learning_rate": 9.233176838810642e-06, |
| "loss": 11.4735, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.233849751534639, |
| "grad_norm": 0.21932877600193024, |
| "learning_rate": 9.217527386541471e-06, |
| "loss": 12.2491, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.23852674656533177, |
| "grad_norm": 0.24466539919376373, |
| "learning_rate": 9.2018779342723e-06, |
| "loss": 12.3501, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.24320374159602456, |
| "grad_norm": 0.17424331605434418, |
| "learning_rate": 9.186228482003131e-06, |
| "loss": 12.6445, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.24788073662671734, |
| "grad_norm": 0.2298133671283722, |
| "learning_rate": 9.17057902973396e-06, |
| "loss": 12.5759, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.25255773165741013, |
| "grad_norm": 0.30562305450439453, |
| "learning_rate": 9.15492957746479e-06, |
| "loss": 13.4988, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.2572347266881029, |
| "grad_norm": 0.21225547790527344, |
| "learning_rate": 9.13928012519562e-06, |
| "loss": 13.3909, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.2619117217187957, |
| "grad_norm": 0.3120986223220825, |
| "learning_rate": 9.123630672926449e-06, |
| "loss": 13.8276, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.2665887167494885, |
| "grad_norm": 0.18036110699176788, |
| "learning_rate": 9.107981220657278e-06, |
| "loss": 13.7724, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.2712657117801812, |
| "grad_norm": 0.22987115383148193, |
| "learning_rate": 9.092331768388107e-06, |
| "loss": 12.2669, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.275942706810874, |
| "grad_norm": 0.23878921568393707, |
| "learning_rate": 9.076682316118937e-06, |
| "loss": 12.7097, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.2806197018415668, |
| "grad_norm": 0.20319631695747375, |
| "learning_rate": 9.061032863849766e-06, |
| "loss": 12.2795, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.28529669687225956, |
| "grad_norm": 0.18609336018562317, |
| "learning_rate": 9.045383411580595e-06, |
| "loss": 11.2712, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.28997369190295236, |
| "grad_norm": 0.21320512890815735, |
| "learning_rate": 9.029733959311425e-06, |
| "loss": 11.6637, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.29465068693364516, |
| "grad_norm": 0.23330001533031464, |
| "learning_rate": 9.014084507042254e-06, |
| "loss": 12.9509, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.2993276819643379, |
| "grad_norm": 0.21313583850860596, |
| "learning_rate": 8.998435054773083e-06, |
| "loss": 13.8547, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.3040046769950307, |
| "grad_norm": 0.20739194750785828, |
| "learning_rate": 8.982785602503912e-06, |
| "loss": 12.735, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.3086816720257235, |
| "grad_norm": 0.2453576922416687, |
| "learning_rate": 8.967136150234742e-06, |
| "loss": 12.7951, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.31335866705641624, |
| "grad_norm": 0.21135878562927246, |
| "learning_rate": 8.951486697965573e-06, |
| "loss": 13.7611, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.31803566208710904, |
| "grad_norm": 0.2257193773984909, |
| "learning_rate": 8.935837245696402e-06, |
| "loss": 11.3833, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.32271265711780184, |
| "grad_norm": 0.1934535950422287, |
| "learning_rate": 8.920187793427231e-06, |
| "loss": 11.4428, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.3273896521484946, |
| "grad_norm": 0.19537678360939026, |
| "learning_rate": 8.90453834115806e-06, |
| "loss": 13.1129, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.3320666471791874, |
| "grad_norm": 0.2596362233161926, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 11.9323, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.3367436422098802, |
| "grad_norm": 0.28119221329689026, |
| "learning_rate": 8.87323943661972e-06, |
| "loss": 12.1397, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.3414206372405729, |
| "grad_norm": 0.2443932145833969, |
| "learning_rate": 8.857589984350549e-06, |
| "loss": 11.1756, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.3460976322712657, |
| "grad_norm": 0.23586861789226532, |
| "learning_rate": 8.841940532081378e-06, |
| "loss": 12.2808, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.3507746273019585, |
| "grad_norm": 0.2984711229801178, |
| "learning_rate": 8.826291079812207e-06, |
| "loss": 11.8437, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.35545162233265126, |
| "grad_norm": 0.2404984086751938, |
| "learning_rate": 8.810641627543037e-06, |
| "loss": 11.6321, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.36012861736334406, |
| "grad_norm": 0.22745920717716217, |
| "learning_rate": 8.794992175273866e-06, |
| "loss": 12.969, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.3648056123940368, |
| "grad_norm": 0.22989057004451752, |
| "learning_rate": 8.779342723004695e-06, |
| "loss": 12.1793, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.3694826074247296, |
| "grad_norm": 0.22097162902355194, |
| "learning_rate": 8.763693270735524e-06, |
| "loss": 12.5693, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.3741596024554224, |
| "grad_norm": 0.19985444843769073, |
| "learning_rate": 8.748043818466354e-06, |
| "loss": 13.3868, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.37883659748611515, |
| "grad_norm": 0.2339348942041397, |
| "learning_rate": 8.732394366197183e-06, |
| "loss": 11.5905, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.38351359251680794, |
| "grad_norm": 0.28241512179374695, |
| "learning_rate": 8.716744913928014e-06, |
| "loss": 12.6998, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.38819058754750074, |
| "grad_norm": 0.2848986089229584, |
| "learning_rate": 8.701095461658843e-06, |
| "loss": 11.3058, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.3928675825781935, |
| "grad_norm": 0.2118872106075287, |
| "learning_rate": 8.685446009389673e-06, |
| "loss": 10.4664, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.3975445776088863, |
| "grad_norm": 0.16718249022960663, |
| "learning_rate": 8.669796557120502e-06, |
| "loss": 13.2492, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.4022215726395791, |
| "grad_norm": 0.2131660282611847, |
| "learning_rate": 8.65414710485133e-06, |
| "loss": 12.0166, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.4068985676702718, |
| "grad_norm": 0.2012370079755783, |
| "learning_rate": 8.63849765258216e-06, |
| "loss": 12.5326, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.4115755627009646, |
| "grad_norm": 0.2684880793094635, |
| "learning_rate": 8.62284820031299e-06, |
| "loss": 12.8071, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.4162525577316574, |
| "grad_norm": 0.2500629127025604, |
| "learning_rate": 8.60719874804382e-06, |
| "loss": 12.1628, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.42092955276235017, |
| "grad_norm": 0.18125677108764648, |
| "learning_rate": 8.591549295774648e-06, |
| "loss": 11.3137, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.42560654779304297, |
| "grad_norm": 0.1830630898475647, |
| "learning_rate": 8.575899843505478e-06, |
| "loss": 11.8507, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.43028354282373577, |
| "grad_norm": 0.1481466144323349, |
| "learning_rate": 8.560250391236307e-06, |
| "loss": 10.8795, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.4349605378544285, |
| "grad_norm": 0.18768347799777985, |
| "learning_rate": 8.544600938967136e-06, |
| "loss": 11.2509, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.4396375328851213, |
| "grad_norm": 0.22724182903766632, |
| "learning_rate": 8.528951486697966e-06, |
| "loss": 11.6564, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.4443145279158141, |
| "grad_norm": 0.1806531399488449, |
| "learning_rate": 8.513302034428795e-06, |
| "loss": 11.9111, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.44899152294650685, |
| "grad_norm": 0.2578674554824829, |
| "learning_rate": 8.497652582159626e-06, |
| "loss": 13.1609, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.45366851797719965, |
| "grad_norm": 0.21666157245635986, |
| "learning_rate": 8.482003129890455e-06, |
| "loss": 12.3285, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.45834551300789245, |
| "grad_norm": 0.2574619948863983, |
| "learning_rate": 8.466353677621285e-06, |
| "loss": 11.4998, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.4630225080385852, |
| "grad_norm": 0.28588882088661194, |
| "learning_rate": 8.450704225352114e-06, |
| "loss": 11.0233, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.467699503069278, |
| "grad_norm": 0.28356659412384033, |
| "learning_rate": 8.435054773082943e-06, |
| "loss": 10.9355, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4723764980999708, |
| "grad_norm": 0.18748782575130463, |
| "learning_rate": 8.419405320813773e-06, |
| "loss": 13.5926, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.47705349313066353, |
| "grad_norm": 0.17172126471996307, |
| "learning_rate": 8.403755868544602e-06, |
| "loss": 11.4017, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.48173048816135633, |
| "grad_norm": 0.1956973671913147, |
| "learning_rate": 8.388106416275431e-06, |
| "loss": 12.1463, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.48640748319204913, |
| "grad_norm": 0.30823975801467896, |
| "learning_rate": 8.37245696400626e-06, |
| "loss": 10.2949, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.49108447822274187, |
| "grad_norm": 0.23158958554267883, |
| "learning_rate": 8.35680751173709e-06, |
| "loss": 11.2003, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.49576147325343467, |
| "grad_norm": 0.23977261781692505, |
| "learning_rate": 8.341158059467919e-06, |
| "loss": 11.5904, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.5004384682841274, |
| "grad_norm": 0.17250728607177734, |
| "learning_rate": 8.325508607198748e-06, |
| "loss": 11.2648, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.5051154633148203, |
| "grad_norm": 0.23300261795520782, |
| "learning_rate": 8.309859154929578e-06, |
| "loss": 11.9646, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.509792458345513, |
| "grad_norm": 0.2430488020181656, |
| "learning_rate": 8.294209702660407e-06, |
| "loss": 12.046, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.5144694533762058, |
| "grad_norm": 0.18206799030303955, |
| "learning_rate": 8.278560250391236e-06, |
| "loss": 12.0767, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5191464484068986, |
| "grad_norm": 0.25876322388648987, |
| "learning_rate": 8.262910798122067e-06, |
| "loss": 11.7794, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.5238234434375914, |
| "grad_norm": 0.28936639428138733, |
| "learning_rate": 8.247261345852897e-06, |
| "loss": 10.3819, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.5285004384682841, |
| "grad_norm": 0.214036762714386, |
| "learning_rate": 8.231611893583726e-06, |
| "loss": 10.3209, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.533177433498977, |
| "grad_norm": 0.23764470219612122, |
| "learning_rate": 8.215962441314555e-06, |
| "loss": 10.8417, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.5378544285296697, |
| "grad_norm": 0.2604602575302124, |
| "learning_rate": 8.200312989045383e-06, |
| "loss": 12.534, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.5425314235603624, |
| "grad_norm": 0.24597330391407013, |
| "learning_rate": 8.184663536776214e-06, |
| "loss": 12.348, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.5472084185910553, |
| "grad_norm": 0.2204928994178772, |
| "learning_rate": 8.169014084507043e-06, |
| "loss": 10.979, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.551885413621748, |
| "grad_norm": 0.15487593412399292, |
| "learning_rate": 8.153364632237872e-06, |
| "loss": 11.0756, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.5565624086524408, |
| "grad_norm": 0.23864871263504028, |
| "learning_rate": 8.137715179968702e-06, |
| "loss": 11.66, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.5612394036831336, |
| "grad_norm": 0.22024200856685638, |
| "learning_rate": 8.122065727699531e-06, |
| "loss": 10.7713, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5659163987138264, |
| "grad_norm": 0.19292014837265015, |
| "learning_rate": 8.10641627543036e-06, |
| "loss": 9.4704, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.5705933937445191, |
| "grad_norm": 0.16765080392360687, |
| "learning_rate": 8.09076682316119e-06, |
| "loss": 10.7993, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.575270388775212, |
| "grad_norm": 0.26758840680122375, |
| "learning_rate": 8.075117370892019e-06, |
| "loss": 11.354, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.5799473838059047, |
| "grad_norm": 0.25225985050201416, |
| "learning_rate": 8.059467918622848e-06, |
| "loss": 11.2162, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.5846243788365975, |
| "grad_norm": 0.22062422335147858, |
| "learning_rate": 8.043818466353678e-06, |
| "loss": 9.9452, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.5893013738672903, |
| "grad_norm": 0.2589726746082306, |
| "learning_rate": 8.028169014084509e-06, |
| "loss": 11.6098, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.5939783688979831, |
| "grad_norm": 0.23492346704006195, |
| "learning_rate": 8.012519561815338e-06, |
| "loss": 10.6918, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.5986553639286758, |
| "grad_norm": 0.29631978273391724, |
| "learning_rate": 7.996870109546167e-06, |
| "loss": 11.4451, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.6033323589593687, |
| "grad_norm": 0.195633202791214, |
| "learning_rate": 7.981220657276996e-06, |
| "loss": 11.3396, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.6080093539900614, |
| "grad_norm": 0.14094115793704987, |
| "learning_rate": 7.965571205007824e-06, |
| "loss": 10.9388, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6126863490207541, |
| "grad_norm": 0.2307533323764801, |
| "learning_rate": 7.949921752738655e-06, |
| "loss": 12.2129, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.617363344051447, |
| "grad_norm": 0.2004641741514206, |
| "learning_rate": 7.934272300469484e-06, |
| "loss": 9.9139, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.6220403390821397, |
| "grad_norm": 0.22784000635147095, |
| "learning_rate": 7.918622848200314e-06, |
| "loss": 10.2306, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.6267173341128325, |
| "grad_norm": 0.21663011610507965, |
| "learning_rate": 7.902973395931143e-06, |
| "loss": 9.9467, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.6313943291435253, |
| "grad_norm": 0.18714800477027893, |
| "learning_rate": 7.887323943661972e-06, |
| "loss": 9.7232, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.6360713241742181, |
| "grad_norm": 0.23525570333003998, |
| "learning_rate": 7.871674491392802e-06, |
| "loss": 9.9539, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.6407483192049108, |
| "grad_norm": 0.22870206832885742, |
| "learning_rate": 7.856025039123631e-06, |
| "loss": 11.9964, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.6454253142356037, |
| "grad_norm": 0.19730104506015778, |
| "learning_rate": 7.84037558685446e-06, |
| "loss": 10.8391, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.6501023092662964, |
| "grad_norm": 0.1873929351568222, |
| "learning_rate": 7.82472613458529e-06, |
| "loss": 10.7179, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.6547793042969892, |
| "grad_norm": 0.14801403880119324, |
| "learning_rate": 7.809076682316119e-06, |
| "loss": 10.9041, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.659456299327682, |
| "grad_norm": 0.21909023821353912, |
| "learning_rate": 7.79342723004695e-06, |
| "loss": 11.5497, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.6641332943583748, |
| "grad_norm": 0.20469622313976288, |
| "learning_rate": 7.77777777777778e-06, |
| "loss": 11.0387, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.6688102893890675, |
| "grad_norm": 0.20616918802261353, |
| "learning_rate": 7.762128325508608e-06, |
| "loss": 9.5392, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.6734872844197604, |
| "grad_norm": 0.1846546232700348, |
| "learning_rate": 7.746478873239436e-06, |
| "loss": 11.5538, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.6781642794504531, |
| "grad_norm": 0.17778314650058746, |
| "learning_rate": 7.730829420970265e-06, |
| "loss": 12.8435, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.6828412744811458, |
| "grad_norm": 0.24238605797290802, |
| "learning_rate": 7.715179968701096e-06, |
| "loss": 9.4674, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.6875182695118387, |
| "grad_norm": 0.20961545407772064, |
| "learning_rate": 7.699530516431926e-06, |
| "loss": 10.1325, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.6921952645425314, |
| "grad_norm": 0.20476683974266052, |
| "learning_rate": 7.683881064162755e-06, |
| "loss": 11.1375, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.6968722595732242, |
| "grad_norm": 0.22241833806037903, |
| "learning_rate": 7.668231611893584e-06, |
| "loss": 9.6296, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.701549254603917, |
| "grad_norm": 0.2302970439195633, |
| "learning_rate": 7.652582159624414e-06, |
| "loss": 10.8763, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.7062262496346098, |
| "grad_norm": 0.20484097301959991, |
| "learning_rate": 7.636932707355243e-06, |
| "loss": 9.0306, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.7109032446653025, |
| "grad_norm": 0.20411114394664764, |
| "learning_rate": 7.621283255086073e-06, |
| "loss": 11.5865, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.7155802396959953, |
| "grad_norm": 0.37148869037628174, |
| "learning_rate": 7.6056338028169015e-06, |
| "loss": 10.4929, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.7202572347266881, |
| "grad_norm": 0.19864030182361603, |
| "learning_rate": 7.589984350547731e-06, |
| "loss": 10.4561, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.7249342297573809, |
| "grad_norm": 0.21187515556812286, |
| "learning_rate": 7.574334898278561e-06, |
| "loss": 9.6848, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.7296112247880736, |
| "grad_norm": 0.18564990162849426, |
| "learning_rate": 7.55868544600939e-06, |
| "loss": 11.2932, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.7342882198187665, |
| "grad_norm": 0.21274517476558685, |
| "learning_rate": 7.54303599374022e-06, |
| "loss": 10.2206, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.7389652148494592, |
| "grad_norm": 0.23622578382492065, |
| "learning_rate": 7.527386541471049e-06, |
| "loss": 9.4342, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.743642209880152, |
| "grad_norm": 0.21262332797050476, |
| "learning_rate": 7.511737089201878e-06, |
| "loss": 11.4181, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.7483192049108448, |
| "grad_norm": 0.22142890095710754, |
| "learning_rate": 7.496087636932708e-06, |
| "loss": 10.4912, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7529961999415375, |
| "grad_norm": 0.219626322388649, |
| "learning_rate": 7.480438184663538e-06, |
| "loss": 10.902, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.7576731949722303, |
| "grad_norm": 0.19913645088672638, |
| "learning_rate": 7.464788732394367e-06, |
| "loss": 8.9078, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.7623501900029231, |
| "grad_norm": 0.19409991800785065, |
| "learning_rate": 7.449139280125196e-06, |
| "loss": 10.7111, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.7670271850336159, |
| "grad_norm": 0.20056220889091492, |
| "learning_rate": 7.433489827856026e-06, |
| "loss": 11.438, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.7717041800643086, |
| "grad_norm": 0.19502754509449005, |
| "learning_rate": 7.417840375586856e-06, |
| "loss": 10.1837, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.7763811750950015, |
| "grad_norm": 0.17272567749023438, |
| "learning_rate": 7.402190923317685e-06, |
| "loss": 10.7406, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.7810581701256942, |
| "grad_norm": 0.19558610022068024, |
| "learning_rate": 7.386541471048514e-06, |
| "loss": 10.1322, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.785735165156387, |
| "grad_norm": 0.2161480039358139, |
| "learning_rate": 7.370892018779343e-06, |
| "loss": 9.7506, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.7904121601870798, |
| "grad_norm": 0.25595343112945557, |
| "learning_rate": 7.355242566510172e-06, |
| "loss": 11.0059, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.7950891552177726, |
| "grad_norm": 0.21218866109848022, |
| "learning_rate": 7.339593114241002e-06, |
| "loss": 11.2122, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.7997661502484653, |
| "grad_norm": 0.1922176331281662, |
| "learning_rate": 7.3239436619718316e-06, |
| "loss": 11.0585, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.8044431452791582, |
| "grad_norm": 0.1726471334695816, |
| "learning_rate": 7.308294209702661e-06, |
| "loss": 11.3007, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.8091201403098509, |
| "grad_norm": 0.20865805447101593, |
| "learning_rate": 7.29264475743349e-06, |
| "loss": 12.5848, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.8137971353405437, |
| "grad_norm": 0.2097303569316864, |
| "learning_rate": 7.2769953051643195e-06, |
| "loss": 11.694, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.8184741303712365, |
| "grad_norm": 0.22343699634075165, |
| "learning_rate": 7.26134585289515e-06, |
| "loss": 9.9861, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.8231511254019293, |
| "grad_norm": 0.19908592104911804, |
| "learning_rate": 7.245696400625979e-06, |
| "loss": 10.7263, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.827828120432622, |
| "grad_norm": 0.2062506079673767, |
| "learning_rate": 7.230046948356808e-06, |
| "loss": 10.7234, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.8325051154633148, |
| "grad_norm": 0.23186688125133514, |
| "learning_rate": 7.2143974960876376e-06, |
| "loss": 10.7846, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.8371821104940076, |
| "grad_norm": 0.20528610050678253, |
| "learning_rate": 7.198748043818467e-06, |
| "loss": 10.6732, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.8418591055247003, |
| "grad_norm": 0.21028846502304077, |
| "learning_rate": 7.183098591549297e-06, |
| "loss": 9.5007, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.8465361005553932, |
| "grad_norm": 0.1943686306476593, |
| "learning_rate": 7.167449139280126e-06, |
| "loss": 10.6163, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.8512130955860859, |
| "grad_norm": 0.15791501104831696, |
| "learning_rate": 7.151799687010955e-06, |
| "loss": 10.4564, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.8558900906167787, |
| "grad_norm": 0.15603427588939667, |
| "learning_rate": 7.136150234741784e-06, |
| "loss": 11.4006, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.8605670856474715, |
| "grad_norm": 0.1737872064113617, |
| "learning_rate": 7.120500782472613e-06, |
| "loss": 10.2583, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.8652440806781643, |
| "grad_norm": 0.16742144525051117, |
| "learning_rate": 7.1048513302034435e-06, |
| "loss": 9.6543, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.869921075708857, |
| "grad_norm": 0.2204071581363678, |
| "learning_rate": 7.089201877934273e-06, |
| "loss": 10.6068, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.8745980707395499, |
| "grad_norm": 0.17526549100875854, |
| "learning_rate": 7.073552425665102e-06, |
| "loss": 10.5927, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.8792750657702426, |
| "grad_norm": 0.18857762217521667, |
| "learning_rate": 7.0579029733959315e-06, |
| "loss": 10.0686, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.8839520608009354, |
| "grad_norm": 0.16617538034915924, |
| "learning_rate": 7.042253521126761e-06, |
| "loss": 11.0356, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.8886290558316282, |
| "grad_norm": 0.20443867146968842, |
| "learning_rate": 7.026604068857591e-06, |
| "loss": 9.764, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.893306050862321, |
| "grad_norm": 0.16466206312179565, |
| "learning_rate": 7.01095461658842e-06, |
| "loss": 8.9783, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.8979830458930137, |
| "grad_norm": 0.2051703780889511, |
| "learning_rate": 6.9953051643192495e-06, |
| "loss": 10.5345, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.9026600409237066, |
| "grad_norm": 0.19935429096221924, |
| "learning_rate": 6.979655712050079e-06, |
| "loss": 10.1047, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.9073370359543993, |
| "grad_norm": 0.14471961557865143, |
| "learning_rate": 6.964006259780907e-06, |
| "loss": 8.9315, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.912014030985092, |
| "grad_norm": 0.21026520431041718, |
| "learning_rate": 6.948356807511738e-06, |
| "loss": 11.0192, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.9166910260157849, |
| "grad_norm": 0.22124925255775452, |
| "learning_rate": 6.932707355242568e-06, |
| "loss": 10.7211, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.9213680210464776, |
| "grad_norm": 0.6166573166847229, |
| "learning_rate": 6.917057902973396e-06, |
| "loss": 10.1654, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.9260450160771704, |
| "grad_norm": 0.14892670512199402, |
| "learning_rate": 6.901408450704225e-06, |
| "loss": 9.6949, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.9307220111078632, |
| "grad_norm": 0.17058013379573822, |
| "learning_rate": 6.885758998435055e-06, |
| "loss": 9.9864, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.935399006138556, |
| "grad_norm": 0.19176752865314484, |
| "learning_rate": 6.870109546165885e-06, |
| "loss": 9.7219, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.9400760011692487, |
| "grad_norm": 0.1923060268163681, |
| "learning_rate": 6.854460093896714e-06, |
| "loss": 9.0111, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.9447529961999416, |
| "grad_norm": 0.22771762311458588, |
| "learning_rate": 6.8388106416275434e-06, |
| "loss": 9.9277, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.9494299912306343, |
| "grad_norm": 0.21972382068634033, |
| "learning_rate": 6.823161189358373e-06, |
| "loss": 10.5451, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.9541069862613271, |
| "grad_norm": 0.32944294810295105, |
| "learning_rate": 6.807511737089203e-06, |
| "loss": 9.8053, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.9587839812920199, |
| "grad_norm": 0.1875985562801361, |
| "learning_rate": 6.791862284820032e-06, |
| "loss": 10.3256, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.9634609763227127, |
| "grad_norm": 0.17583012580871582, |
| "learning_rate": 6.7762128325508615e-06, |
| "loss": 10.4922, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.9681379713534054, |
| "grad_norm": 0.22149552404880524, |
| "learning_rate": 6.760563380281691e-06, |
| "loss": 10.1547, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.9728149663840983, |
| "grad_norm": 0.18506276607513428, |
| "learning_rate": 6.74491392801252e-06, |
| "loss": 10.5188, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.977491961414791, |
| "grad_norm": 0.21199573576450348, |
| "learning_rate": 6.72926447574335e-06, |
| "loss": 11.3258, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.9821689564454837, |
| "grad_norm": 0.18747669458389282, |
| "learning_rate": 6.71361502347418e-06, |
| "loss": 10.251, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9868459514761766, |
| "grad_norm": 0.1887262761592865, |
| "learning_rate": 6.697965571205008e-06, |
| "loss": 9.2012, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.9915229465068693, |
| "grad_norm": 0.16557927429676056, |
| "learning_rate": 6.682316118935837e-06, |
| "loss": 9.2171, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.9961999415375621, |
| "grad_norm": 0.19340123236179352, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 9.4988, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.0046769950306929, |
| "grad_norm": 0.3001099228858948, |
| "learning_rate": 6.651017214397497e-06, |
| "loss": 11.8577, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.0093539900613855, |
| "grad_norm": 0.18085287511348724, |
| "learning_rate": 6.635367762128326e-06, |
| "loss": 10.4356, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.0140309850920783, |
| "grad_norm": 0.17791183292865753, |
| "learning_rate": 6.619718309859155e-06, |
| "loss": 10.3929, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.0187079801227712, |
| "grad_norm": 0.20649202167987823, |
| "learning_rate": 6.604068857589985e-06, |
| "loss": 9.342, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.0233849751534638, |
| "grad_norm": 0.2049955129623413, |
| "learning_rate": 6.588419405320814e-06, |
| "loss": 10.656, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.0280619701841567, |
| "grad_norm": 0.18064165115356445, |
| "learning_rate": 6.572769953051644e-06, |
| "loss": 10.1633, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.0327389652148495, |
| "grad_norm": 0.1652020812034607, |
| "learning_rate": 6.5571205007824735e-06, |
| "loss": 8.9937, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.0374159602455422, |
| "grad_norm": 0.16658996045589447, |
| "learning_rate": 6.541471048513303e-06, |
| "loss": 11.0051, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.042092955276235, |
| "grad_norm": 0.1875378042459488, |
| "learning_rate": 6.525821596244132e-06, |
| "loss": 9.7089, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.0467699503069279, |
| "grad_norm": 0.19267050921916962, |
| "learning_rate": 6.510172143974961e-06, |
| "loss": 10.0252, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.0514469453376205, |
| "grad_norm": 0.2656681537628174, |
| "learning_rate": 6.4945226917057916e-06, |
| "loss": 9.7082, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.0561239403683134, |
| "grad_norm": 0.16058804094791412, |
| "learning_rate": 6.478873239436621e-06, |
| "loss": 9.6689, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.0608009353990062, |
| "grad_norm": 0.14145280420780182, |
| "learning_rate": 6.463223787167449e-06, |
| "loss": 8.6923, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.0654779304296988, |
| "grad_norm": 0.14217382669448853, |
| "learning_rate": 6.447574334898279e-06, |
| "loss": 10.4302, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.0701549254603917, |
| "grad_norm": 0.18387371301651, |
| "learning_rate": 6.431924882629108e-06, |
| "loss": 9.5514, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.0748319204910846, |
| "grad_norm": 0.15731996297836304, |
| "learning_rate": 6.416275430359938e-06, |
| "loss": 9.2854, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.0795089155217772, |
| "grad_norm": 0.1794990450143814, |
| "learning_rate": 6.400625978090767e-06, |
| "loss": 11.0837, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.08418591055247, |
| "grad_norm": 0.19289837777614594, |
| "learning_rate": 6.384976525821597e-06, |
| "loss": 9.3129, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.088862905583163, |
| "grad_norm": 0.1858958899974823, |
| "learning_rate": 6.369327073552426e-06, |
| "loss": 10.7238, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.0935399006138555, |
| "grad_norm": 0.26388686895370483, |
| "learning_rate": 6.353677621283255e-06, |
| "loss": 9.2242, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.0982168956445484, |
| "grad_norm": 0.17551296949386597, |
| "learning_rate": 6.3380281690140855e-06, |
| "loss": 8.3665, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.1028938906752412, |
| "grad_norm": 0.20290863513946533, |
| "learning_rate": 6.322378716744915e-06, |
| "loss": 9.6916, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.1075708857059339, |
| "grad_norm": 0.11323179304599762, |
| "learning_rate": 6.306729264475744e-06, |
| "loss": 10.218, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.1122478807366267, |
| "grad_norm": 0.22893109917640686, |
| "learning_rate": 6.291079812206573e-06, |
| "loss": 10.3068, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.1169248757673196, |
| "grad_norm": 0.1943362057209015, |
| "learning_rate": 6.275430359937402e-06, |
| "loss": 9.738, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.1216018707980122, |
| "grad_norm": 0.22017931938171387, |
| "learning_rate": 6.259780907668233e-06, |
| "loss": 8.5765, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.126278865828705, |
| "grad_norm": 0.1584814190864563, |
| "learning_rate": 6.244131455399062e-06, |
| "loss": 11.0436, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.130955860859398, |
| "grad_norm": 0.182816743850708, |
| "learning_rate": 6.228482003129891e-06, |
| "loss": 11.1518, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.1356328558900906, |
| "grad_norm": 0.21375828981399536, |
| "learning_rate": 6.21283255086072e-06, |
| "loss": 10.0972, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.1403098509207834, |
| "grad_norm": 0.1926356703042984, |
| "learning_rate": 6.197183098591549e-06, |
| "loss": 9.0861, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.1449868459514763, |
| "grad_norm": 0.13788476586341858, |
| "learning_rate": 6.181533646322379e-06, |
| "loss": 9.1896, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.149663840982169, |
| "grad_norm": 0.24886344373226166, |
| "learning_rate": 6.165884194053209e-06, |
| "loss": 8.9126, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.1543408360128617, |
| "grad_norm": 0.21492387354373932, |
| "learning_rate": 6.150234741784038e-06, |
| "loss": 9.1809, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.1590178310435546, |
| "grad_norm": 0.20666466653347015, |
| "learning_rate": 6.134585289514867e-06, |
| "loss": 9.8609, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.1636948260742472, |
| "grad_norm": 0.12884530425071716, |
| "learning_rate": 6.118935837245697e-06, |
| "loss": 9.0015, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.16837182110494, |
| "grad_norm": 0.2109869420528412, |
| "learning_rate": 6.103286384976527e-06, |
| "loss": 8.4398, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.173048816135633, |
| "grad_norm": 0.1602170467376709, |
| "learning_rate": 6.087636932707356e-06, |
| "loss": 8.9123, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.1777258111663256, |
| "grad_norm": 0.1901443898677826, |
| "learning_rate": 6.071987480438185e-06, |
| "loss": 9.3279, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.1824028061970184, |
| "grad_norm": 0.12106055021286011, |
| "learning_rate": 6.056338028169015e-06, |
| "loss": 8.8215, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.1870798012277113, |
| "grad_norm": 0.15600277483463287, |
| "learning_rate": 6.040688575899843e-06, |
| "loss": 9.5461, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.191756796258404, |
| "grad_norm": 0.211564302444458, |
| "learning_rate": 6.025039123630674e-06, |
| "loss": 9.9196, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.1964337912890968, |
| "grad_norm": 0.16480544209480286, |
| "learning_rate": 6.0093896713615026e-06, |
| "loss": 9.488, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.2011107863197896, |
| "grad_norm": 0.22194457054138184, |
| "learning_rate": 5.993740219092332e-06, |
| "loss": 10.415, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.2057877813504823, |
| "grad_norm": 0.27972927689552307, |
| "learning_rate": 5.978090766823161e-06, |
| "loss": 9.3022, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.2104647763811751, |
| "grad_norm": 0.23484700918197632, |
| "learning_rate": 5.9624413145539905e-06, |
| "loss": 8.218, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.215141771411868, |
| "grad_norm": 0.20119240880012512, |
| "learning_rate": 5.946791862284821e-06, |
| "loss": 8.204, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.2198187664425606, |
| "grad_norm": 0.19867953658103943, |
| "learning_rate": 5.93114241001565e-06, |
| "loss": 9.4491, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.2244957614732535, |
| "grad_norm": 0.19878610968589783, |
| "learning_rate": 5.915492957746479e-06, |
| "loss": 10.683, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.2291727565039463, |
| "grad_norm": 0.18710929155349731, |
| "learning_rate": 5.8998435054773086e-06, |
| "loss": 10.2426, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.233849751534639, |
| "grad_norm": 0.1873483806848526, |
| "learning_rate": 5.884194053208139e-06, |
| "loss": 10.1553, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.2385267465653318, |
| "grad_norm": 0.26153287291526794, |
| "learning_rate": 5.868544600938968e-06, |
| "loss": 9.8046, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.2432037415960246, |
| "grad_norm": 0.17956022918224335, |
| "learning_rate": 5.852895148669797e-06, |
| "loss": 9.2137, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.2478807366267173, |
| "grad_norm": 0.15572352707386017, |
| "learning_rate": 5.837245696400627e-06, |
| "loss": 9.2382, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.2525577316574101, |
| "grad_norm": 0.16768573224544525, |
| "learning_rate": 5.821596244131456e-06, |
| "loss": 10.1462, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.257234726688103, |
| "grad_norm": 0.14606249332427979, |
| "learning_rate": 5.805946791862286e-06, |
| "loss": 9.6735, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.2619117217187956, |
| "grad_norm": 0.20985975861549377, |
| "learning_rate": 5.790297339593115e-06, |
| "loss": 10.9061, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.2665887167494885, |
| "grad_norm": 0.17635460197925568, |
| "learning_rate": 5.774647887323944e-06, |
| "loss": 9.1385, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.271265711780181, |
| "grad_norm": 0.19080878794193268, |
| "learning_rate": 5.758998435054773e-06, |
| "loss": 9.8189, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.275942706810874, |
| "grad_norm": 0.1511276364326477, |
| "learning_rate": 5.7433489827856025e-06, |
| "loss": 9.9191, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.2806197018415668, |
| "grad_norm": 0.2525511085987091, |
| "learning_rate": 5.727699530516433e-06, |
| "loss": 8.7398, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.2852966968722597, |
| "grad_norm": 0.18259669840335846, |
| "learning_rate": 5.712050078247262e-06, |
| "loss": 10.7875, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.2899736919029523, |
| "grad_norm": 0.2251911461353302, |
| "learning_rate": 5.696400625978091e-06, |
| "loss": 8.9997, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.2946506869336452, |
| "grad_norm": 0.17306119203567505, |
| "learning_rate": 5.6807511737089205e-06, |
| "loss": 10.0071, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.2993276819643378, |
| "grad_norm": 0.23585619032382965, |
| "learning_rate": 5.66510172143975e-06, |
| "loss": 9.5575, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.3040046769950306, |
| "grad_norm": 0.2100452035665512, |
| "learning_rate": 5.64945226917058e-06, |
| "loss": 9.6862, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.3086816720257235, |
| "grad_norm": 0.19781209528446198, |
| "learning_rate": 5.633802816901409e-06, |
| "loss": 9.6712, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.3133586670564164, |
| "grad_norm": 0.20990189909934998, |
| "learning_rate": 5.618153364632239e-06, |
| "loss": 9.1145, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.318035662087109, |
| "grad_norm": 0.14471188187599182, |
| "learning_rate": 5.602503912363068e-06, |
| "loss": 10.0124, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.3227126571178018, |
| "grad_norm": 0.181657612323761, |
| "learning_rate": 5.586854460093896e-06, |
| "loss": 8.5702, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.3273896521484945, |
| "grad_norm": 0.28895941376686096, |
| "learning_rate": 5.571205007824727e-06, |
| "loss": 8.7288, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.3320666471791873, |
| "grad_norm": 0.19658011198043823, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 10.2721, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.3367436422098802, |
| "grad_norm": 0.1778428554534912, |
| "learning_rate": 5.539906103286385e-06, |
| "loss": 8.6042, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.341420637240573, |
| "grad_norm": 0.1622474491596222, |
| "learning_rate": 5.5242566510172144e-06, |
| "loss": 9.0871, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.3460976322712657, |
| "grad_norm": 0.17768928408622742, |
| "learning_rate": 5.508607198748044e-06, |
| "loss": 9.1438, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.3507746273019585, |
| "grad_norm": 0.15472590923309326, |
| "learning_rate": 5.492957746478874e-06, |
| "loss": 8.1626, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.3554516223326512, |
| "grad_norm": 0.151944100856781, |
| "learning_rate": 5.477308294209703e-06, |
| "loss": 10.6628, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.360128617363344, |
| "grad_norm": 0.2412179410457611, |
| "learning_rate": 5.4616588419405325e-06, |
| "loss": 10.0811, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.3648056123940369, |
| "grad_norm": 0.1254899650812149, |
| "learning_rate": 5.446009389671362e-06, |
| "loss": 8.7967, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.3694826074247297, |
| "grad_norm": 0.1940433233976364, |
| "learning_rate": 5.430359937402191e-06, |
| "loss": 10.7896, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.3741596024554223, |
| "grad_norm": 0.23099660873413086, |
| "learning_rate": 5.414710485133021e-06, |
| "loss": 10.3398, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.3788365974861152, |
| "grad_norm": 0.14648781716823578, |
| "learning_rate": 5.3990610328638506e-06, |
| "loss": 9.3573, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.3835135925168078, |
| "grad_norm": 0.18853303790092468, |
| "learning_rate": 5.38341158059468e-06, |
| "loss": 9.8656, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.3881905875475007, |
| "grad_norm": 0.20366129279136658, |
| "learning_rate": 5.367762128325509e-06, |
| "loss": 10.2061, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.3928675825781935, |
| "grad_norm": 0.18720601499080658, |
| "learning_rate": 5.352112676056338e-06, |
| "loss": 8.4737, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.3975445776088864, |
| "grad_norm": 0.1396239697933197, |
| "learning_rate": 5.336463223787169e-06, |
| "loss": 9.3009, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.402221572639579, |
| "grad_norm": 0.19741852581501007, |
| "learning_rate": 5.320813771517997e-06, |
| "loss": 9.7318, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.4068985676702719, |
| "grad_norm": 0.1550920307636261, |
| "learning_rate": 5.305164319248826e-06, |
| "loss": 9.0948, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.4115755627009645, |
| "grad_norm": 0.20845593512058258, |
| "learning_rate": 5.289514866979656e-06, |
| "loss": 8.555, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.4162525577316574, |
| "grad_norm": 0.15616929531097412, |
| "learning_rate": 5.273865414710485e-06, |
| "loss": 9.293, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.4209295527623502, |
| "grad_norm": 0.18581336736679077, |
| "learning_rate": 5.258215962441315e-06, |
| "loss": 8.6798, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.425606547793043, |
| "grad_norm": 0.14762163162231445, |
| "learning_rate": 5.2425665101721445e-06, |
| "loss": 7.7574, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.4302835428237357, |
| "grad_norm": 0.11617639660835266, |
| "learning_rate": 5.226917057902974e-06, |
| "loss": 9.9937, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.4349605378544286, |
| "grad_norm": 0.12888303399085999, |
| "learning_rate": 5.211267605633803e-06, |
| "loss": 9.5393, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.4396375328851212, |
| "grad_norm": 0.14450183510780334, |
| "learning_rate": 5.195618153364632e-06, |
| "loss": 10.9441, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.444314527915814, |
| "grad_norm": 0.20856888592243195, |
| "learning_rate": 5.1799687010954625e-06, |
| "loss": 9.6833, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.448991522946507, |
| "grad_norm": 0.23422713577747345, |
| "learning_rate": 5.164319248826292e-06, |
| "loss": 9.2532, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.4536685179771998, |
| "grad_norm": 0.19145800173282623, |
| "learning_rate": 5.148669796557121e-06, |
| "loss": 9.7285, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.4583455130078924, |
| "grad_norm": 0.1990247666835785, |
| "learning_rate": 5.1330203442879505e-06, |
| "loss": 7.6512, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.4630225080385852, |
| "grad_norm": 0.17829596996307373, |
| "learning_rate": 5.117370892018779e-06, |
| "loss": 9.5529, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.4676995030692779, |
| "grad_norm": 0.162981778383255, |
| "learning_rate": 5.10172143974961e-06, |
| "loss": 10.0274, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.4723764980999707, |
| "grad_norm": 0.17965111136436462, |
| "learning_rate": 5.086071987480438e-06, |
| "loss": 9.2513, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.4770534931306636, |
| "grad_norm": 0.28804492950439453, |
| "learning_rate": 5.070422535211268e-06, |
| "loss": 10.0194, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.4817304881613564, |
| "grad_norm": 0.1571478545665741, |
| "learning_rate": 5.054773082942097e-06, |
| "loss": 10.0889, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.486407483192049, |
| "grad_norm": 0.2101372927427292, |
| "learning_rate": 5.039123630672926e-06, |
| "loss": 8.6775, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.491084478222742, |
| "grad_norm": 0.20323887467384338, |
| "learning_rate": 5.0234741784037565e-06, |
| "loss": 9.8082, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.4957614732534346, |
| "grad_norm": 0.16192995011806488, |
| "learning_rate": 5.007824726134586e-06, |
| "loss": 8.0025, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.5004384682841274, |
| "grad_norm": 0.16440463066101074, |
| "learning_rate": 4.992175273865415e-06, |
| "loss": 9.579, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.5051154633148203, |
| "grad_norm": 0.19055482745170593, |
| "learning_rate": 4.976525821596244e-06, |
| "loss": 8.7398, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.5097924583455131, |
| "grad_norm": 0.17318573594093323, |
| "learning_rate": 4.960876369327074e-06, |
| "loss": 9.7488, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.5144694533762058, |
| "grad_norm": 0.24867770075798035, |
| "learning_rate": 4.945226917057903e-06, |
| "loss": 10.5706, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.5191464484068986, |
| "grad_norm": 0.1796032041311264, |
| "learning_rate": 4.929577464788733e-06, |
| "loss": 9.4351, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.5238234434375912, |
| "grad_norm": 0.21675661206245422, |
| "learning_rate": 4.9139280125195624e-06, |
| "loss": 10.6771, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.528500438468284, |
| "grad_norm": 0.17892418801784515, |
| "learning_rate": 4.898278560250392e-06, |
| "loss": 7.6976, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.533177433498977, |
| "grad_norm": 0.16854748129844666, |
| "learning_rate": 4.882629107981221e-06, |
| "loss": 9.0202, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.5378544285296698, |
| "grad_norm": 0.20898739993572235, |
| "learning_rate": 4.86697965571205e-06, |
| "loss": 9.3772, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.5425314235603624, |
| "grad_norm": 0.2980878949165344, |
| "learning_rate": 4.85133020344288e-06, |
| "loss": 10.5012, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.5472084185910553, |
| "grad_norm": 0.12076615542173386, |
| "learning_rate": 4.835680751173709e-06, |
| "loss": 10.1389, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.551885413621748, |
| "grad_norm": 0.1814320981502533, |
| "learning_rate": 4.820031298904539e-06, |
| "loss": 8.4015, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.5565624086524408, |
| "grad_norm": 0.16422027349472046, |
| "learning_rate": 4.8043818466353684e-06, |
| "loss": 8.4772, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.5612394036831336, |
| "grad_norm": 0.12222316116094589, |
| "learning_rate": 4.788732394366197e-06, |
| "loss": 8.7358, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.5659163987138265, |
| "grad_norm": 0.20471377670764923, |
| "learning_rate": 4.773082942097027e-06, |
| "loss": 8.9805, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.5705933937445191, |
| "grad_norm": 0.1602873057126999, |
| "learning_rate": 4.757433489827856e-06, |
| "loss": 7.7731, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.575270388775212, |
| "grad_norm": 0.1620335578918457, |
| "learning_rate": 4.741784037558686e-06, |
| "loss": 8.5971, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.5799473838059046, |
| "grad_norm": 0.14822766184806824, |
| "learning_rate": 4.726134585289515e-06, |
| "loss": 8.1521, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.5846243788365975, |
| "grad_norm": 0.16832107305526733, |
| "learning_rate": 4.710485133020345e-06, |
| "loss": 9.0838, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.5893013738672903, |
| "grad_norm": 0.1385219246149063, |
| "learning_rate": 4.694835680751174e-06, |
| "loss": 7.4367, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.5939783688979832, |
| "grad_norm": 0.13664643466472626, |
| "learning_rate": 4.679186228482004e-06, |
| "loss": 8.5027, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.5986553639286758, |
| "grad_norm": 0.18891537189483643, |
| "learning_rate": 4.663536776212833e-06, |
| "loss": 8.6301, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.6033323589593687, |
| "grad_norm": 0.19962970912456512, |
| "learning_rate": 4.647887323943662e-06, |
| "loss": 10.6293, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.6080093539900613, |
| "grad_norm": 0.18747878074645996, |
| "learning_rate": 4.632237871674492e-06, |
| "loss": 10.0322, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.6126863490207541, |
| "grad_norm": 0.3010605573654175, |
| "learning_rate": 4.616588419405321e-06, |
| "loss": 9.1209, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.617363344051447, |
| "grad_norm": 0.11245454847812653, |
| "learning_rate": 4.60093896713615e-06, |
| "loss": 8.0594, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.6220403390821398, |
| "grad_norm": 0.20886649191379547, |
| "learning_rate": 4.58528951486698e-06, |
| "loss": 9.1715, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.6267173341128325, |
| "grad_norm": 0.14630508422851562, |
| "learning_rate": 4.56964006259781e-06, |
| "loss": 8.7735, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.6313943291435253, |
| "grad_norm": 0.21093368530273438, |
| "learning_rate": 4.553990610328639e-06, |
| "loss": 8.2183, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.636071324174218, |
| "grad_norm": 0.22136329114437103, |
| "learning_rate": 4.538341158059468e-06, |
| "loss": 9.067, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.6407483192049108, |
| "grad_norm": 0.15906454622745514, |
| "learning_rate": 4.522691705790298e-06, |
| "loss": 9.3209, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.6454253142356037, |
| "grad_norm": 0.2312268763780594, |
| "learning_rate": 4.507042253521127e-06, |
| "loss": 9.2316, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.6501023092662965, |
| "grad_norm": 0.24528440833091736, |
| "learning_rate": 4.491392801251956e-06, |
| "loss": 9.0482, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.6547793042969892, |
| "grad_norm": 0.19777342677116394, |
| "learning_rate": 4.475743348982786e-06, |
| "loss": 10.1556, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.659456299327682, |
| "grad_norm": 0.2033587247133255, |
| "learning_rate": 4.460093896713616e-06, |
| "loss": 8.9973, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.6641332943583746, |
| "grad_norm": 0.16927585005760193, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 9.5144, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.6688102893890675, |
| "grad_norm": 0.16959340870380402, |
| "learning_rate": 4.428794992175274e-06, |
| "loss": 9.5447, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.6734872844197604, |
| "grad_norm": 0.18593505024909973, |
| "learning_rate": 4.413145539906104e-06, |
| "loss": 9.6471, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.6781642794504532, |
| "grad_norm": 0.16945506632328033, |
| "learning_rate": 4.397496087636933e-06, |
| "loss": 8.5418, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.6828412744811458, |
| "grad_norm": 0.16277293860912323, |
| "learning_rate": 4.381846635367762e-06, |
| "loss": 9.2884, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.6875182695118387, |
| "grad_norm": 0.2155790776014328, |
| "learning_rate": 4.3661971830985915e-06, |
| "loss": 9.4547, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.6921952645425313, |
| "grad_norm": 0.19257700443267822, |
| "learning_rate": 4.350547730829422e-06, |
| "loss": 8.7859, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.6968722595732242, |
| "grad_norm": 0.21113352477550507, |
| "learning_rate": 4.334898278560251e-06, |
| "loss": 9.3654, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.701549254603917, |
| "grad_norm": 0.17781415581703186, |
| "learning_rate": 4.31924882629108e-06, |
| "loss": 9.5482, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.70622624963461, |
| "grad_norm": 0.14610658586025238, |
| "learning_rate": 4.30359937402191e-06, |
| "loss": 9.2182, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.7109032446653025, |
| "grad_norm": 0.19297371804714203, |
| "learning_rate": 4.287949921752739e-06, |
| "loss": 8.5858, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.7155802396959952, |
| "grad_norm": 0.16764657199382782, |
| "learning_rate": 4.272300469483568e-06, |
| "loss": 8.6679, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.720257234726688, |
| "grad_norm": 0.1740255355834961, |
| "learning_rate": 4.2566510172143975e-06, |
| "loss": 8.3984, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.7249342297573809, |
| "grad_norm": 0.2171589732170105, |
| "learning_rate": 4.241001564945228e-06, |
| "loss": 8.6767, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.7296112247880737, |
| "grad_norm": 0.15334008634090424, |
| "learning_rate": 4.225352112676057e-06, |
| "loss": 9.0357, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.7342882198187666, |
| "grad_norm": 0.1901715248823166, |
| "learning_rate": 4.209702660406886e-06, |
| "loss": 9.1397, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.7389652148494592, |
| "grad_norm": 0.14479465782642365, |
| "learning_rate": 4.194053208137716e-06, |
| "loss": 8.0689, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.7436422098801518, |
| "grad_norm": 0.13776177167892456, |
| "learning_rate": 4.178403755868545e-06, |
| "loss": 8.2216, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.7483192049108447, |
| "grad_norm": 0.13980716466903687, |
| "learning_rate": 4.162754303599374e-06, |
| "loss": 10.2694, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.7529961999415375, |
| "grad_norm": 0.15243536233901978, |
| "learning_rate": 4.1471048513302035e-06, |
| "loss": 8.4832, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.7576731949722304, |
| "grad_norm": 0.1408737152814865, |
| "learning_rate": 4.131455399061034e-06, |
| "loss": 10.5995, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.7623501900029233, |
| "grad_norm": 0.16743288934230804, |
| "learning_rate": 4.115805946791863e-06, |
| "loss": 9.0306, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.7670271850336159, |
| "grad_norm": 0.13096289336681366, |
| "learning_rate": 4.100156494522691e-06, |
| "loss": 8.799, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.7717041800643085, |
| "grad_norm": 0.18536189198493958, |
| "learning_rate": 4.0845070422535216e-06, |
| "loss": 8.6714, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.7763811750950014, |
| "grad_norm": 0.21224500238895416, |
| "learning_rate": 4.068857589984351e-06, |
| "loss": 8.8822, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.7810581701256942, |
| "grad_norm": 0.15303047001361847, |
| "learning_rate": 4.05320813771518e-06, |
| "loss": 8.8666, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.785735165156387, |
| "grad_norm": 0.14419591426849365, |
| "learning_rate": 4.0375586854460095e-06, |
| "loss": 8.916, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.79041216018708, |
| "grad_norm": 0.1363951712846756, |
| "learning_rate": 4.021909233176839e-06, |
| "loss": 8.3857, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.7950891552177726, |
| "grad_norm": 0.20621058344841003, |
| "learning_rate": 4.006259780907669e-06, |
| "loss": 10.1237, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.7997661502484652, |
| "grad_norm": 0.21105414628982544, |
| "learning_rate": 3.990610328638498e-06, |
| "loss": 9.5554, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.804443145279158, |
| "grad_norm": 0.21915097534656525, |
| "learning_rate": 3.9749608763693276e-06, |
| "loss": 7.717, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.809120140309851, |
| "grad_norm": 0.17555522918701172, |
| "learning_rate": 3.959311424100157e-06, |
| "loss": 9.1899, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.8137971353405438, |
| "grad_norm": 0.1890765279531479, |
| "learning_rate": 3.943661971830986e-06, |
| "loss": 8.0672, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.8184741303712366, |
| "grad_norm": 0.16451717913150787, |
| "learning_rate": 3.9280125195618155e-06, |
| "loss": 8.8205, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.8231511254019293, |
| "grad_norm": 0.16023708879947662, |
| "learning_rate": 3.912363067292645e-06, |
| "loss": 9.319, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.8278281204326219, |
| "grad_norm": 0.15548115968704224, |
| "learning_rate": 3.896713615023475e-06, |
| "loss": 8.2246, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.8325051154633147, |
| "grad_norm": 0.21226494014263153, |
| "learning_rate": 3.881064162754304e-06, |
| "loss": 9.135, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.8371821104940076, |
| "grad_norm": 0.14461496472358704, |
| "learning_rate": 3.865414710485133e-06, |
| "loss": 8.962, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.8418591055247004, |
| "grad_norm": 0.20766492187976837, |
| "learning_rate": 3.849765258215963e-06, |
| "loss": 8.8991, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.8465361005553933, |
| "grad_norm": 0.20327630639076233, |
| "learning_rate": 3.834115805946792e-06, |
| "loss": 9.1291, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.851213095586086, |
| "grad_norm": 0.23052388429641724, |
| "learning_rate": 3.8184663536776215e-06, |
| "loss": 8.3602, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.8558900906167786, |
| "grad_norm": 0.16140541434288025, |
| "learning_rate": 3.8028169014084508e-06, |
| "loss": 9.3176, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.8605670856474714, |
| "grad_norm": 0.17049185931682587, |
| "learning_rate": 3.7871674491392805e-06, |
| "loss": 8.6602, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.8652440806781643, |
| "grad_norm": 0.11496849358081818, |
| "learning_rate": 3.77151799687011e-06, |
| "loss": 10.3293, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.8699210757088571, |
| "grad_norm": 0.1907191127538681, |
| "learning_rate": 3.755868544600939e-06, |
| "loss": 8.4035, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.87459807073955, |
| "grad_norm": 0.16409359872341156, |
| "learning_rate": 3.740219092331769e-06, |
| "loss": 8.9062, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.8792750657702426, |
| "grad_norm": 0.15642918646335602, |
| "learning_rate": 3.724569640062598e-06, |
| "loss": 8.8751, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.8839520608009352, |
| "grad_norm": 0.1641726940870285, |
| "learning_rate": 3.708920187793428e-06, |
| "loss": 8.3851, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.888629055831628, |
| "grad_norm": 0.15342937409877777, |
| "learning_rate": 3.693270735524257e-06, |
| "loss": 9.3965, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.893306050862321, |
| "grad_norm": 0.15916384756565094, |
| "learning_rate": 3.677621283255086e-06, |
| "loss": 8.7446, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.8979830458930138, |
| "grad_norm": 0.21401815116405487, |
| "learning_rate": 3.6619718309859158e-06, |
| "loss": 8.8994, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.9026600409237067, |
| "grad_norm": 0.19148550927639008, |
| "learning_rate": 3.646322378716745e-06, |
| "loss": 8.5996, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.9073370359543993, |
| "grad_norm": 0.1755845844745636, |
| "learning_rate": 3.630672926447575e-06, |
| "loss": 8.7611, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.912014030985092, |
| "grad_norm": 0.17193089425563812, |
| "learning_rate": 3.615023474178404e-06, |
| "loss": 8.9488, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.9166910260157848, |
| "grad_norm": 0.17173364758491516, |
| "learning_rate": 3.5993740219092334e-06, |
| "loss": 8.0517, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.9213680210464776, |
| "grad_norm": 0.22657723724842072, |
| "learning_rate": 3.583724569640063e-06, |
| "loss": 8.7361, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.9260450160771705, |
| "grad_norm": 0.21941417455673218, |
| "learning_rate": 3.568075117370892e-06, |
| "loss": 9.2343, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.9307220111078633, |
| "grad_norm": 0.18514755368232727, |
| "learning_rate": 3.5524256651017218e-06, |
| "loss": 8.2767, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.935399006138556, |
| "grad_norm": 0.13066066801548004, |
| "learning_rate": 3.536776212832551e-06, |
| "loss": 8.7371, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.9400760011692486, |
| "grad_norm": 0.16903606057167053, |
| "learning_rate": 3.5211267605633804e-06, |
| "loss": 9.3067, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.9447529961999415, |
| "grad_norm": 0.14286428689956665, |
| "learning_rate": 3.50547730829421e-06, |
| "loss": 7.8586, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.9494299912306343, |
| "grad_norm": 0.1969095915555954, |
| "learning_rate": 3.4898278560250394e-06, |
| "loss": 9.6053, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.9541069862613272, |
| "grad_norm": 0.1750202775001526, |
| "learning_rate": 3.474178403755869e-06, |
| "loss": 9.0714, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.95878398129202, |
| "grad_norm": 0.21293002367019653, |
| "learning_rate": 3.458528951486698e-06, |
| "loss": 9.8726, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.9634609763227127, |
| "grad_norm": 0.1672164648771286, |
| "learning_rate": 3.4428794992175273e-06, |
| "loss": 9.5275, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.9681379713534053, |
| "grad_norm": 0.17561869323253632, |
| "learning_rate": 3.427230046948357e-06, |
| "loss": 7.2097, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.9728149663840981, |
| "grad_norm": 0.16326965391635895, |
| "learning_rate": 3.4115805946791864e-06, |
| "loss": 9.3302, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.977491961414791, |
| "grad_norm": 0.15163388848304749, |
| "learning_rate": 3.395931142410016e-06, |
| "loss": 9.1933, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.9821689564454839, |
| "grad_norm": 0.16277414560317993, |
| "learning_rate": 3.3802816901408454e-06, |
| "loss": 8.3196, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.9868459514761767, |
| "grad_norm": 0.18385657668113708, |
| "learning_rate": 3.364632237871675e-06, |
| "loss": 8.1472, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.9915229465068693, |
| "grad_norm": 0.1768423169851303, |
| "learning_rate": 3.348982785602504e-06, |
| "loss": 8.3639, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.996199941537562, |
| "grad_norm": 0.2325451821088791, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 9.265, |
| "step": 426 |
| }, |
| { |
| "epoch": 2.004676995030693, |
| "grad_norm": 0.22825832664966583, |
| "learning_rate": 3.317683881064163e-06, |
| "loss": 10.0732, |
| "step": 427 |
| }, |
| { |
| "epoch": 2.0093539900613857, |
| "grad_norm": 0.16034899652004242, |
| "learning_rate": 3.3020344287949924e-06, |
| "loss": 7.7232, |
| "step": 428 |
| }, |
| { |
| "epoch": 2.014030985092078, |
| "grad_norm": 0.1737372726202011, |
| "learning_rate": 3.286384976525822e-06, |
| "loss": 8.0928, |
| "step": 429 |
| }, |
| { |
| "epoch": 2.018707980122771, |
| "grad_norm": 0.20644846558570862, |
| "learning_rate": 3.2707355242566514e-06, |
| "loss": 8.6956, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.023384975153464, |
| "grad_norm": 0.3140431344509125, |
| "learning_rate": 3.2550860719874807e-06, |
| "loss": 9.0442, |
| "step": 431 |
| }, |
| { |
| "epoch": 2.0280619701841567, |
| "grad_norm": 0.2457619458436966, |
| "learning_rate": 3.2394366197183104e-06, |
| "loss": 8.6256, |
| "step": 432 |
| }, |
| { |
| "epoch": 2.0327389652148495, |
| "grad_norm": 0.2014688104391098, |
| "learning_rate": 3.2237871674491393e-06, |
| "loss": 9.7276, |
| "step": 433 |
| }, |
| { |
| "epoch": 2.0374159602455424, |
| "grad_norm": 0.1970800757408142, |
| "learning_rate": 3.208137715179969e-06, |
| "loss": 8.19, |
| "step": 434 |
| }, |
| { |
| "epoch": 2.0420929552762352, |
| "grad_norm": 0.12662629783153534, |
| "learning_rate": 3.1924882629107983e-06, |
| "loss": 9.235, |
| "step": 435 |
| }, |
| { |
| "epoch": 2.0467699503069277, |
| "grad_norm": 0.15353932976722717, |
| "learning_rate": 3.1768388106416277e-06, |
| "loss": 8.8255, |
| "step": 436 |
| }, |
| { |
| "epoch": 2.0514469453376205, |
| "grad_norm": 0.2180812507867813, |
| "learning_rate": 3.1611893583724574e-06, |
| "loss": 9.1142, |
| "step": 437 |
| }, |
| { |
| "epoch": 2.0561239403683134, |
| "grad_norm": 0.18303510546684265, |
| "learning_rate": 3.1455399061032867e-06, |
| "loss": 10.1061, |
| "step": 438 |
| }, |
| { |
| "epoch": 2.060800935399006, |
| "grad_norm": 0.15254124999046326, |
| "learning_rate": 3.1298904538341164e-06, |
| "loss": 8.5431, |
| "step": 439 |
| }, |
| { |
| "epoch": 2.065477930429699, |
| "grad_norm": 0.16063688695430756, |
| "learning_rate": 3.1142410015649453e-06, |
| "loss": 8.4382, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.0701549254603915, |
| "grad_norm": 0.20583708584308624, |
| "learning_rate": 3.0985915492957746e-06, |
| "loss": 8.1778, |
| "step": 441 |
| }, |
| { |
| "epoch": 2.0748319204910843, |
| "grad_norm": 0.11699045449495316, |
| "learning_rate": 3.0829420970266043e-06, |
| "loss": 7.8459, |
| "step": 442 |
| }, |
| { |
| "epoch": 2.079508915521777, |
| "grad_norm": 0.1605014204978943, |
| "learning_rate": 3.0672926447574336e-06, |
| "loss": 8.4224, |
| "step": 443 |
| }, |
| { |
| "epoch": 2.08418591055247, |
| "grad_norm": 0.14405608177185059, |
| "learning_rate": 3.0516431924882634e-06, |
| "loss": 8.3442, |
| "step": 444 |
| }, |
| { |
| "epoch": 2.088862905583163, |
| "grad_norm": 0.17145852744579315, |
| "learning_rate": 3.0359937402190927e-06, |
| "loss": 8.7685, |
| "step": 445 |
| }, |
| { |
| "epoch": 2.0935399006138558, |
| "grad_norm": 0.14711640775203705, |
| "learning_rate": 3.0203442879499216e-06, |
| "loss": 7.3568, |
| "step": 446 |
| }, |
| { |
| "epoch": 2.098216895644548, |
| "grad_norm": 0.13734185695648193, |
| "learning_rate": 3.0046948356807513e-06, |
| "loss": 8.4425, |
| "step": 447 |
| }, |
| { |
| "epoch": 2.102893890675241, |
| "grad_norm": 0.1571117639541626, |
| "learning_rate": 2.9890453834115806e-06, |
| "loss": 7.6952, |
| "step": 448 |
| }, |
| { |
| "epoch": 2.107570885705934, |
| "grad_norm": 0.15319029986858368, |
| "learning_rate": 2.9733959311424103e-06, |
| "loss": 7.9937, |
| "step": 449 |
| }, |
| { |
| "epoch": 2.1122478807366267, |
| "grad_norm": 0.18363691866397858, |
| "learning_rate": 2.9577464788732396e-06, |
| "loss": 8.4406, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.1169248757673196, |
| "grad_norm": 0.1433074176311493, |
| "learning_rate": 2.9420970266040694e-06, |
| "loss": 9.7219, |
| "step": 451 |
| }, |
| { |
| "epoch": 2.1216018707980124, |
| "grad_norm": 0.14841365814208984, |
| "learning_rate": 2.9264475743348987e-06, |
| "loss": 8.2459, |
| "step": 452 |
| }, |
| { |
| "epoch": 2.126278865828705, |
| "grad_norm": 0.18753403425216675, |
| "learning_rate": 2.910798122065728e-06, |
| "loss": 8.7057, |
| "step": 453 |
| }, |
| { |
| "epoch": 2.1309558608593977, |
| "grad_norm": 0.1748085618019104, |
| "learning_rate": 2.8951486697965577e-06, |
| "loss": 8.5651, |
| "step": 454 |
| }, |
| { |
| "epoch": 2.1356328558900906, |
| "grad_norm": 0.17874014377593994, |
| "learning_rate": 2.8794992175273866e-06, |
| "loss": 8.5838, |
| "step": 455 |
| }, |
| { |
| "epoch": 2.1403098509207834, |
| "grad_norm": 0.16495150327682495, |
| "learning_rate": 2.8638497652582163e-06, |
| "loss": 9.8249, |
| "step": 456 |
| }, |
| { |
| "epoch": 2.1449868459514763, |
| "grad_norm": 0.12347421795129776, |
| "learning_rate": 2.8482003129890456e-06, |
| "loss": 7.1875, |
| "step": 457 |
| }, |
| { |
| "epoch": 2.149663840982169, |
| "grad_norm": 0.1617746353149414, |
| "learning_rate": 2.832550860719875e-06, |
| "loss": 7.7209, |
| "step": 458 |
| }, |
| { |
| "epoch": 2.154340836012862, |
| "grad_norm": 0.160769984126091, |
| "learning_rate": 2.8169014084507046e-06, |
| "loss": 7.7851, |
| "step": 459 |
| }, |
| { |
| "epoch": 2.1590178310435544, |
| "grad_norm": 0.14725424349308014, |
| "learning_rate": 2.801251956181534e-06, |
| "loss": 7.8194, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.1636948260742472, |
| "grad_norm": 0.11912764608860016, |
| "learning_rate": 2.7856025039123637e-06, |
| "loss": 7.7984, |
| "step": 461 |
| }, |
| { |
| "epoch": 2.16837182110494, |
| "grad_norm": 0.17748208343982697, |
| "learning_rate": 2.7699530516431926e-06, |
| "loss": 8.0672, |
| "step": 462 |
| }, |
| { |
| "epoch": 2.173048816135633, |
| "grad_norm": 0.1708259880542755, |
| "learning_rate": 2.754303599374022e-06, |
| "loss": 9.2099, |
| "step": 463 |
| }, |
| { |
| "epoch": 2.177725811166326, |
| "grad_norm": 0.15187622606754303, |
| "learning_rate": 2.7386541471048516e-06, |
| "loss": 8.3165, |
| "step": 464 |
| }, |
| { |
| "epoch": 2.182402806197018, |
| "grad_norm": 0.18263490498065948, |
| "learning_rate": 2.723004694835681e-06, |
| "loss": 9.9331, |
| "step": 465 |
| }, |
| { |
| "epoch": 2.187079801227711, |
| "grad_norm": 0.12427602708339691, |
| "learning_rate": 2.7073552425665106e-06, |
| "loss": 8.5229, |
| "step": 466 |
| }, |
| { |
| "epoch": 2.191756796258404, |
| "grad_norm": 0.13961510360240936, |
| "learning_rate": 2.69170579029734e-06, |
| "loss": 8.3661, |
| "step": 467 |
| }, |
| { |
| "epoch": 2.1964337912890968, |
| "grad_norm": 0.14999401569366455, |
| "learning_rate": 2.676056338028169e-06, |
| "loss": 7.2095, |
| "step": 468 |
| }, |
| { |
| "epoch": 2.2011107863197896, |
| "grad_norm": 0.14472222328186035, |
| "learning_rate": 2.6604068857589986e-06, |
| "loss": 8.6861, |
| "step": 469 |
| }, |
| { |
| "epoch": 2.2057877813504825, |
| "grad_norm": 0.14089444279670715, |
| "learning_rate": 2.644757433489828e-06, |
| "loss": 7.8008, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.210464776381175, |
| "grad_norm": 0.13669133186340332, |
| "learning_rate": 2.6291079812206576e-06, |
| "loss": 8.5063, |
| "step": 471 |
| }, |
| { |
| "epoch": 2.2151417714118677, |
| "grad_norm": 0.1669352799654007, |
| "learning_rate": 2.613458528951487e-06, |
| "loss": 8.7907, |
| "step": 472 |
| }, |
| { |
| "epoch": 2.2198187664425606, |
| "grad_norm": 0.15821270644664764, |
| "learning_rate": 2.597809076682316e-06, |
| "loss": 8.0308, |
| "step": 473 |
| }, |
| { |
| "epoch": 2.2244957614732535, |
| "grad_norm": 0.21483926475048065, |
| "learning_rate": 2.582159624413146e-06, |
| "loss": 8.502, |
| "step": 474 |
| }, |
| { |
| "epoch": 2.2291727565039463, |
| "grad_norm": 0.18459928035736084, |
| "learning_rate": 2.5665101721439752e-06, |
| "loss": 9.5206, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.233849751534639, |
| "grad_norm": 0.1487099826335907, |
| "learning_rate": 2.550860719874805e-06, |
| "loss": 6.9168, |
| "step": 476 |
| }, |
| { |
| "epoch": 2.2385267465653316, |
| "grad_norm": 0.2513448894023895, |
| "learning_rate": 2.535211267605634e-06, |
| "loss": 9.3783, |
| "step": 477 |
| }, |
| { |
| "epoch": 2.2432037415960244, |
| "grad_norm": 0.1873185932636261, |
| "learning_rate": 2.519561815336463e-06, |
| "loss": 8.2886, |
| "step": 478 |
| }, |
| { |
| "epoch": 2.2478807366267173, |
| "grad_norm": 0.19832056760787964, |
| "learning_rate": 2.503912363067293e-06, |
| "loss": 8.1959, |
| "step": 479 |
| }, |
| { |
| "epoch": 2.25255773165741, |
| "grad_norm": 0.20701546967029572, |
| "learning_rate": 2.488262910798122e-06, |
| "loss": 8.1702, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.257234726688103, |
| "grad_norm": 0.12690390646457672, |
| "learning_rate": 2.4726134585289515e-06, |
| "loss": 8.7747, |
| "step": 481 |
| }, |
| { |
| "epoch": 2.261911721718796, |
| "grad_norm": 0.1636572629213333, |
| "learning_rate": 2.4569640062597812e-06, |
| "loss": 7.8555, |
| "step": 482 |
| }, |
| { |
| "epoch": 2.2665887167494887, |
| "grad_norm": 0.12632915377616882, |
| "learning_rate": 2.4413145539906105e-06, |
| "loss": 7.9758, |
| "step": 483 |
| }, |
| { |
| "epoch": 2.271265711780181, |
| "grad_norm": 0.16761943697929382, |
| "learning_rate": 2.42566510172144e-06, |
| "loss": 8.0032, |
| "step": 484 |
| }, |
| { |
| "epoch": 2.275942706810874, |
| "grad_norm": 0.15796944499015808, |
| "learning_rate": 2.4100156494522696e-06, |
| "loss": 8.8154, |
| "step": 485 |
| }, |
| { |
| "epoch": 2.280619701841567, |
| "grad_norm": 0.16528886556625366, |
| "learning_rate": 2.3943661971830984e-06, |
| "loss": 7.3999, |
| "step": 486 |
| }, |
| { |
| "epoch": 2.2852966968722597, |
| "grad_norm": 0.14766015112400055, |
| "learning_rate": 2.378716744913928e-06, |
| "loss": 7.8343, |
| "step": 487 |
| }, |
| { |
| "epoch": 2.2899736919029525, |
| "grad_norm": 0.12624794244766235, |
| "learning_rate": 2.3630672926447575e-06, |
| "loss": 8.0017, |
| "step": 488 |
| }, |
| { |
| "epoch": 2.294650686933645, |
| "grad_norm": 0.16594719886779785, |
| "learning_rate": 2.347417840375587e-06, |
| "loss": 7.7649, |
| "step": 489 |
| }, |
| { |
| "epoch": 2.299327681964338, |
| "grad_norm": 0.1574728637933731, |
| "learning_rate": 2.3317683881064165e-06, |
| "loss": 9.2884, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.3040046769950306, |
| "grad_norm": 0.1298084557056427, |
| "learning_rate": 2.316118935837246e-06, |
| "loss": 8.4339, |
| "step": 491 |
| }, |
| { |
| "epoch": 2.3086816720257235, |
| "grad_norm": 0.15643304586410522, |
| "learning_rate": 2.300469483568075e-06, |
| "loss": 8.0997, |
| "step": 492 |
| }, |
| { |
| "epoch": 2.3133586670564164, |
| "grad_norm": 0.13263966143131256, |
| "learning_rate": 2.284820031298905e-06, |
| "loss": 8.109, |
| "step": 493 |
| }, |
| { |
| "epoch": 2.318035662087109, |
| "grad_norm": 0.21980319917201996, |
| "learning_rate": 2.269170579029734e-06, |
| "loss": 8.2741, |
| "step": 494 |
| }, |
| { |
| "epoch": 2.322712657117802, |
| "grad_norm": 0.13680629432201385, |
| "learning_rate": 2.2535211267605635e-06, |
| "loss": 8.5315, |
| "step": 495 |
| }, |
| { |
| "epoch": 2.3273896521484945, |
| "grad_norm": 0.1529272496700287, |
| "learning_rate": 2.237871674491393e-06, |
| "loss": 8.0531, |
| "step": 496 |
| }, |
| { |
| "epoch": 2.3320666471791873, |
| "grad_norm": 0.174594908952713, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 7.7507, |
| "step": 497 |
| }, |
| { |
| "epoch": 2.33674364220988, |
| "grad_norm": 0.17085200548171997, |
| "learning_rate": 2.206572769953052e-06, |
| "loss": 7.1328, |
| "step": 498 |
| }, |
| { |
| "epoch": 2.341420637240573, |
| "grad_norm": 0.14975635707378387, |
| "learning_rate": 2.190923317683881e-06, |
| "loss": 9.8064, |
| "step": 499 |
| }, |
| { |
| "epoch": 2.346097632271266, |
| "grad_norm": 0.15309952199459076, |
| "learning_rate": 2.175273865414711e-06, |
| "loss": 8.6898, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.3507746273019583, |
| "grad_norm": 0.13084295392036438, |
| "learning_rate": 2.15962441314554e-06, |
| "loss": 8.1384, |
| "step": 501 |
| }, |
| { |
| "epoch": 2.355451622332651, |
| "grad_norm": 0.16496095061302185, |
| "learning_rate": 2.1439749608763695e-06, |
| "loss": 8.9057, |
| "step": 502 |
| }, |
| { |
| "epoch": 2.360128617363344, |
| "grad_norm": 0.157500758767128, |
| "learning_rate": 2.1283255086071988e-06, |
| "loss": 7.967, |
| "step": 503 |
| }, |
| { |
| "epoch": 2.364805612394037, |
| "grad_norm": 0.1988188475370407, |
| "learning_rate": 2.1126760563380285e-06, |
| "loss": 7.4129, |
| "step": 504 |
| }, |
| { |
| "epoch": 2.3694826074247297, |
| "grad_norm": 0.21104207634925842, |
| "learning_rate": 2.097026604068858e-06, |
| "loss": 7.5442, |
| "step": 505 |
| }, |
| { |
| "epoch": 2.3741596024554226, |
| "grad_norm": 0.20285457372665405, |
| "learning_rate": 2.081377151799687e-06, |
| "loss": 7.3523, |
| "step": 506 |
| }, |
| { |
| "epoch": 2.378836597486115, |
| "grad_norm": 0.24479469656944275, |
| "learning_rate": 2.065727699530517e-06, |
| "loss": 7.8577, |
| "step": 507 |
| }, |
| { |
| "epoch": 2.383513592516808, |
| "grad_norm": 0.150054469704628, |
| "learning_rate": 2.0500782472613457e-06, |
| "loss": 8.2585, |
| "step": 508 |
| }, |
| { |
| "epoch": 2.3881905875475007, |
| "grad_norm": 0.12602077424526215, |
| "learning_rate": 2.0344287949921754e-06, |
| "loss": 7.7554, |
| "step": 509 |
| }, |
| { |
| "epoch": 2.3928675825781935, |
| "grad_norm": 0.18626457452774048, |
| "learning_rate": 2.0187793427230047e-06, |
| "loss": 7.5464, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.3975445776088864, |
| "grad_norm": 0.20931190252304077, |
| "learning_rate": 2.0031298904538345e-06, |
| "loss": 7.5159, |
| "step": 511 |
| }, |
| { |
| "epoch": 2.4022215726395793, |
| "grad_norm": 0.2555796802043915, |
| "learning_rate": 1.9874804381846638e-06, |
| "loss": 7.5645, |
| "step": 512 |
| }, |
| { |
| "epoch": 2.4068985676702717, |
| "grad_norm": 0.17398537695407867, |
| "learning_rate": 1.971830985915493e-06, |
| "loss": 8.211, |
| "step": 513 |
| }, |
| { |
| "epoch": 2.4115755627009645, |
| "grad_norm": 0.19993047416210175, |
| "learning_rate": 1.9561815336463224e-06, |
| "loss": 8.3602, |
| "step": 514 |
| }, |
| { |
| "epoch": 2.4162525577316574, |
| "grad_norm": 0.15980151295661926, |
| "learning_rate": 1.940532081377152e-06, |
| "loss": 7.6245, |
| "step": 515 |
| }, |
| { |
| "epoch": 2.4209295527623502, |
| "grad_norm": 0.16947968304157257, |
| "learning_rate": 1.9248826291079814e-06, |
| "loss": 8.2847, |
| "step": 516 |
| }, |
| { |
| "epoch": 2.425606547793043, |
| "grad_norm": 0.1670764982700348, |
| "learning_rate": 1.9092331768388107e-06, |
| "loss": 8.4169, |
| "step": 517 |
| }, |
| { |
| "epoch": 2.430283542823736, |
| "grad_norm": 0.17053499817848206, |
| "learning_rate": 1.8935837245696402e-06, |
| "loss": 8.8886, |
| "step": 518 |
| }, |
| { |
| "epoch": 2.4349605378544283, |
| "grad_norm": 0.16047680377960205, |
| "learning_rate": 1.8779342723004696e-06, |
| "loss": 7.9574, |
| "step": 519 |
| }, |
| { |
| "epoch": 2.439637532885121, |
| "grad_norm": 0.2619805932044983, |
| "learning_rate": 1.862284820031299e-06, |
| "loss": 8.203, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.444314527915814, |
| "grad_norm": 0.2122809886932373, |
| "learning_rate": 1.8466353677621286e-06, |
| "loss": 7.8094, |
| "step": 521 |
| }, |
| { |
| "epoch": 2.448991522946507, |
| "grad_norm": 0.15507692098617554, |
| "learning_rate": 1.8309859154929579e-06, |
| "loss": 7.7085, |
| "step": 522 |
| }, |
| { |
| "epoch": 2.4536685179771998, |
| "grad_norm": 0.1406126171350479, |
| "learning_rate": 1.8153364632237874e-06, |
| "loss": 8.483, |
| "step": 523 |
| }, |
| { |
| "epoch": 2.4583455130078926, |
| "grad_norm": 0.19436419010162354, |
| "learning_rate": 1.7996870109546167e-06, |
| "loss": 8.271, |
| "step": 524 |
| }, |
| { |
| "epoch": 2.463022508038585, |
| "grad_norm": 0.17198602855205536, |
| "learning_rate": 1.784037558685446e-06, |
| "loss": 8.3665, |
| "step": 525 |
| }, |
| { |
| "epoch": 2.467699503069278, |
| "grad_norm": 0.28165027499198914, |
| "learning_rate": 1.7683881064162755e-06, |
| "loss": 7.8636, |
| "step": 526 |
| }, |
| { |
| "epoch": 2.4723764980999707, |
| "grad_norm": 0.2032092958688736, |
| "learning_rate": 1.752738654147105e-06, |
| "loss": 7.5732, |
| "step": 527 |
| }, |
| { |
| "epoch": 2.4770534931306636, |
| "grad_norm": 0.13977749645709991, |
| "learning_rate": 1.7370892018779346e-06, |
| "loss": 7.2479, |
| "step": 528 |
| }, |
| { |
| "epoch": 2.4817304881613564, |
| "grad_norm": 0.13071084022521973, |
| "learning_rate": 1.7214397496087637e-06, |
| "loss": 7.008, |
| "step": 529 |
| }, |
| { |
| "epoch": 2.4864074831920493, |
| "grad_norm": 0.15741536021232605, |
| "learning_rate": 1.7057902973395932e-06, |
| "loss": 8.0612, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.4910844782227417, |
| "grad_norm": 0.16548508405685425, |
| "learning_rate": 1.6901408450704227e-06, |
| "loss": 8.0312, |
| "step": 531 |
| }, |
| { |
| "epoch": 2.4957614732534346, |
| "grad_norm": 0.16299135982990265, |
| "learning_rate": 1.674491392801252e-06, |
| "loss": 8.8502, |
| "step": 532 |
| }, |
| { |
| "epoch": 2.5004384682841274, |
| "grad_norm": 0.159685879945755, |
| "learning_rate": 1.6588419405320815e-06, |
| "loss": 9.5205, |
| "step": 533 |
| }, |
| { |
| "epoch": 2.5051154633148203, |
| "grad_norm": 0.1804819405078888, |
| "learning_rate": 1.643192488262911e-06, |
| "loss": 7.683, |
| "step": 534 |
| }, |
| { |
| "epoch": 2.509792458345513, |
| "grad_norm": 0.16809211671352386, |
| "learning_rate": 1.6275430359937403e-06, |
| "loss": 8.6418, |
| "step": 535 |
| }, |
| { |
| "epoch": 2.514469453376206, |
| "grad_norm": 0.17984607815742493, |
| "learning_rate": 1.6118935837245697e-06, |
| "loss": 7.68, |
| "step": 536 |
| }, |
| { |
| "epoch": 2.5191464484068984, |
| "grad_norm": 0.17649582028388977, |
| "learning_rate": 1.5962441314553992e-06, |
| "loss": 8.1753, |
| "step": 537 |
| }, |
| { |
| "epoch": 2.5238234434375912, |
| "grad_norm": 0.16467247903347015, |
| "learning_rate": 1.5805946791862287e-06, |
| "loss": 7.6117, |
| "step": 538 |
| }, |
| { |
| "epoch": 2.528500438468284, |
| "grad_norm": 0.17968781292438507, |
| "learning_rate": 1.5649452269170582e-06, |
| "loss": 8.549, |
| "step": 539 |
| }, |
| { |
| "epoch": 2.533177433498977, |
| "grad_norm": 0.15423156321048737, |
| "learning_rate": 1.5492957746478873e-06, |
| "loss": 8.7104, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.53785442852967, |
| "grad_norm": 0.14077003300189972, |
| "learning_rate": 1.5336463223787168e-06, |
| "loss": 8.934, |
| "step": 541 |
| }, |
| { |
| "epoch": 2.542531423560362, |
| "grad_norm": 0.16637051105499268, |
| "learning_rate": 1.5179968701095463e-06, |
| "loss": 7.4252, |
| "step": 542 |
| }, |
| { |
| "epoch": 2.5472084185910555, |
| "grad_norm": 0.1724003106355667, |
| "learning_rate": 1.5023474178403756e-06, |
| "loss": 7.9955, |
| "step": 543 |
| }, |
| { |
| "epoch": 2.551885413621748, |
| "grad_norm": 0.19609539210796356, |
| "learning_rate": 1.4866979655712052e-06, |
| "loss": 8.3348, |
| "step": 544 |
| }, |
| { |
| "epoch": 2.5565624086524408, |
| "grad_norm": 0.12707825005054474, |
| "learning_rate": 1.4710485133020347e-06, |
| "loss": 8.0848, |
| "step": 545 |
| }, |
| { |
| "epoch": 2.5612394036831336, |
| "grad_norm": 0.2031966894865036, |
| "learning_rate": 1.455399061032864e-06, |
| "loss": 9.8729, |
| "step": 546 |
| }, |
| { |
| "epoch": 2.5659163987138265, |
| "grad_norm": 0.18515604734420776, |
| "learning_rate": 1.4397496087636933e-06, |
| "loss": 7.8293, |
| "step": 547 |
| }, |
| { |
| "epoch": 2.5705933937445193, |
| "grad_norm": 0.15621398389339447, |
| "learning_rate": 1.4241001564945228e-06, |
| "loss": 9.3478, |
| "step": 548 |
| }, |
| { |
| "epoch": 2.5752703887752117, |
| "grad_norm": 0.22210869193077087, |
| "learning_rate": 1.4084507042253523e-06, |
| "loss": 8.1303, |
| "step": 549 |
| }, |
| { |
| "epoch": 2.5799473838059046, |
| "grad_norm": 0.27393949031829834, |
| "learning_rate": 1.3928012519561818e-06, |
| "loss": 8.4729, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.5846243788365975, |
| "grad_norm": 0.13042934238910675, |
| "learning_rate": 1.377151799687011e-06, |
| "loss": 8.2335, |
| "step": 551 |
| }, |
| { |
| "epoch": 2.5893013738672903, |
| "grad_norm": 0.207389697432518, |
| "learning_rate": 1.3615023474178405e-06, |
| "loss": 8.0168, |
| "step": 552 |
| }, |
| { |
| "epoch": 2.593978368897983, |
| "grad_norm": 0.14343053102493286, |
| "learning_rate": 1.34585289514867e-06, |
| "loss": 7.9552, |
| "step": 553 |
| }, |
| { |
| "epoch": 2.5986553639286756, |
| "grad_norm": 0.1722148060798645, |
| "learning_rate": 1.3302034428794993e-06, |
| "loss": 7.6877, |
| "step": 554 |
| }, |
| { |
| "epoch": 2.603332358959369, |
| "grad_norm": 0.18076814711093903, |
| "learning_rate": 1.3145539906103288e-06, |
| "loss": 8.0741, |
| "step": 555 |
| }, |
| { |
| "epoch": 2.6080093539900613, |
| "grad_norm": 0.14633478224277496, |
| "learning_rate": 1.298904538341158e-06, |
| "loss": 7.3683, |
| "step": 556 |
| }, |
| { |
| "epoch": 2.612686349020754, |
| "grad_norm": 0.14783795177936554, |
| "learning_rate": 1.2832550860719876e-06, |
| "loss": 7.8992, |
| "step": 557 |
| }, |
| { |
| "epoch": 2.617363344051447, |
| "grad_norm": 0.15360093116760254, |
| "learning_rate": 1.267605633802817e-06, |
| "loss": 8.8425, |
| "step": 558 |
| }, |
| { |
| "epoch": 2.62204033908214, |
| "grad_norm": 0.1691809445619583, |
| "learning_rate": 1.2519561815336464e-06, |
| "loss": 8.669, |
| "step": 559 |
| }, |
| { |
| "epoch": 2.6267173341128327, |
| "grad_norm": 0.16426807641983032, |
| "learning_rate": 1.2363067292644757e-06, |
| "loss": 9.1265, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.631394329143525, |
| "grad_norm": 0.1331864446401596, |
| "learning_rate": 1.2206572769953053e-06, |
| "loss": 7.4892, |
| "step": 561 |
| }, |
| { |
| "epoch": 2.636071324174218, |
| "grad_norm": 0.1330748349428177, |
| "learning_rate": 1.2050078247261348e-06, |
| "loss": 8.6181, |
| "step": 562 |
| }, |
| { |
| "epoch": 2.640748319204911, |
| "grad_norm": 0.14942462742328644, |
| "learning_rate": 1.189358372456964e-06, |
| "loss": 7.8301, |
| "step": 563 |
| }, |
| { |
| "epoch": 2.6454253142356037, |
| "grad_norm": 0.16964685916900635, |
| "learning_rate": 1.1737089201877936e-06, |
| "loss": 7.1293, |
| "step": 564 |
| }, |
| { |
| "epoch": 2.6501023092662965, |
| "grad_norm": 0.1727379858493805, |
| "learning_rate": 1.158059467918623e-06, |
| "loss": 7.1773, |
| "step": 565 |
| }, |
| { |
| "epoch": 2.654779304296989, |
| "grad_norm": 0.14950168132781982, |
| "learning_rate": 1.1424100156494524e-06, |
| "loss": 7.5172, |
| "step": 566 |
| }, |
| { |
| "epoch": 2.6594562993276822, |
| "grad_norm": 0.16068300604820251, |
| "learning_rate": 1.1267605633802817e-06, |
| "loss": 8.7739, |
| "step": 567 |
| }, |
| { |
| "epoch": 2.6641332943583746, |
| "grad_norm": 0.18006567656993866, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 8.2067, |
| "step": 568 |
| }, |
| { |
| "epoch": 2.6688102893890675, |
| "grad_norm": 0.19861166179180145, |
| "learning_rate": 1.0954616588419406e-06, |
| "loss": 7.1208, |
| "step": 569 |
| }, |
| { |
| "epoch": 2.6734872844197604, |
| "grad_norm": 0.13374726474285126, |
| "learning_rate": 1.07981220657277e-06, |
| "loss": 7.8631, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.678164279450453, |
| "grad_norm": 0.17814220488071442, |
| "learning_rate": 1.0641627543035994e-06, |
| "loss": 7.4765, |
| "step": 571 |
| }, |
| { |
| "epoch": 2.682841274481146, |
| "grad_norm": 0.22474409639835358, |
| "learning_rate": 1.048513302034429e-06, |
| "loss": 7.0754, |
| "step": 572 |
| }, |
| { |
| "epoch": 2.6875182695118385, |
| "grad_norm": 0.16655339300632477, |
| "learning_rate": 1.0328638497652584e-06, |
| "loss": 7.2505, |
| "step": 573 |
| }, |
| { |
| "epoch": 2.6921952645425313, |
| "grad_norm": 0.172933891415596, |
| "learning_rate": 1.0172143974960877e-06, |
| "loss": 8.0832, |
| "step": 574 |
| }, |
| { |
| "epoch": 2.696872259573224, |
| "grad_norm": 0.14097332954406738, |
| "learning_rate": 1.0015649452269172e-06, |
| "loss": 8.0197, |
| "step": 575 |
| }, |
| { |
| "epoch": 2.701549254603917, |
| "grad_norm": 0.1363203376531601, |
| "learning_rate": 9.859154929577465e-07, |
| "loss": 7.2466, |
| "step": 576 |
| }, |
| { |
| "epoch": 2.70622624963461, |
| "grad_norm": 0.17508287727832794, |
| "learning_rate": 9.70266040688576e-07, |
| "loss": 9.3567, |
| "step": 577 |
| }, |
| { |
| "epoch": 2.7109032446653023, |
| "grad_norm": 0.169004425406456, |
| "learning_rate": 9.546165884194054e-07, |
| "loss": 8.0132, |
| "step": 578 |
| }, |
| { |
| "epoch": 2.715580239695995, |
| "grad_norm": 0.14103683829307556, |
| "learning_rate": 9.389671361502348e-07, |
| "loss": 9.0192, |
| "step": 579 |
| }, |
| { |
| "epoch": 2.720257234726688, |
| "grad_norm": 0.197422057390213, |
| "learning_rate": 9.233176838810643e-07, |
| "loss": 7.2779, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.724934229757381, |
| "grad_norm": 0.1950581669807434, |
| "learning_rate": 9.076682316118937e-07, |
| "loss": 7.6089, |
| "step": 581 |
| }, |
| { |
| "epoch": 2.7296112247880737, |
| "grad_norm": 0.23691439628601074, |
| "learning_rate": 8.92018779342723e-07, |
| "loss": 7.9658, |
| "step": 582 |
| }, |
| { |
| "epoch": 2.7342882198187666, |
| "grad_norm": 0.2558799684047699, |
| "learning_rate": 8.763693270735525e-07, |
| "loss": 8.4791, |
| "step": 583 |
| }, |
| { |
| "epoch": 2.7389652148494594, |
| "grad_norm": 0.17010214924812317, |
| "learning_rate": 8.607198748043818e-07, |
| "loss": 8.4854, |
| "step": 584 |
| }, |
| { |
| "epoch": 2.743642209880152, |
| "grad_norm": 0.13403132557868958, |
| "learning_rate": 8.450704225352114e-07, |
| "loss": 6.7996, |
| "step": 585 |
| }, |
| { |
| "epoch": 2.7483192049108447, |
| "grad_norm": 0.14201347529888153, |
| "learning_rate": 8.294209702660408e-07, |
| "loss": 8.3937, |
| "step": 586 |
| }, |
| { |
| "epoch": 2.7529961999415375, |
| "grad_norm": 0.28258565068244934, |
| "learning_rate": 8.137715179968702e-07, |
| "loss": 9.3709, |
| "step": 587 |
| }, |
| { |
| "epoch": 2.7576731949722304, |
| "grad_norm": 0.17337313294410706, |
| "learning_rate": 7.981220657276996e-07, |
| "loss": 7.6751, |
| "step": 588 |
| }, |
| { |
| "epoch": 2.7623501900029233, |
| "grad_norm": 0.1940070241689682, |
| "learning_rate": 7.824726134585291e-07, |
| "loss": 7.3925, |
| "step": 589 |
| }, |
| { |
| "epoch": 2.7670271850336157, |
| "grad_norm": 0.14429809153079987, |
| "learning_rate": 7.668231611893584e-07, |
| "loss": 7.4402, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.7717041800643085, |
| "grad_norm": 0.17765949666500092, |
| "learning_rate": 7.511737089201878e-07, |
| "loss": 7.6954, |
| "step": 591 |
| }, |
| { |
| "epoch": 2.7763811750950014, |
| "grad_norm": 0.15836399793624878, |
| "learning_rate": 7.355242566510173e-07, |
| "loss": 7.6582, |
| "step": 592 |
| }, |
| { |
| "epoch": 2.7810581701256942, |
| "grad_norm": 0.14881688356399536, |
| "learning_rate": 7.198748043818466e-07, |
| "loss": 8.7099, |
| "step": 593 |
| }, |
| { |
| "epoch": 2.785735165156387, |
| "grad_norm": 0.21536029875278473, |
| "learning_rate": 7.042253521126762e-07, |
| "loss": 8.3015, |
| "step": 594 |
| }, |
| { |
| "epoch": 2.79041216018708, |
| "grad_norm": 0.14025098085403442, |
| "learning_rate": 6.885758998435055e-07, |
| "loss": 8.7512, |
| "step": 595 |
| }, |
| { |
| "epoch": 2.795089155217773, |
| "grad_norm": 0.13290052115917206, |
| "learning_rate": 6.72926447574335e-07, |
| "loss": 7.5792, |
| "step": 596 |
| }, |
| { |
| "epoch": 2.799766150248465, |
| "grad_norm": 0.3149656057357788, |
| "learning_rate": 6.572769953051644e-07, |
| "loss": 8.4982, |
| "step": 597 |
| }, |
| { |
| "epoch": 2.804443145279158, |
| "grad_norm": 0.16543497145175934, |
| "learning_rate": 6.416275430359938e-07, |
| "loss": 8.3097, |
| "step": 598 |
| }, |
| { |
| "epoch": 2.809120140309851, |
| "grad_norm": 0.17708784341812134, |
| "learning_rate": 6.259780907668232e-07, |
| "loss": 8.8047, |
| "step": 599 |
| }, |
| { |
| "epoch": 2.8137971353405438, |
| "grad_norm": 0.14560888707637787, |
| "learning_rate": 6.103286384976526e-07, |
| "loss": 8.0925, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.8184741303712366, |
| "grad_norm": 0.1902446448802948, |
| "learning_rate": 5.94679186228482e-07, |
| "loss": 7.7629, |
| "step": 601 |
| }, |
| { |
| "epoch": 2.823151125401929, |
| "grad_norm": 0.1473388820886612, |
| "learning_rate": 5.790297339593115e-07, |
| "loss": 8.1077, |
| "step": 602 |
| }, |
| { |
| "epoch": 2.827828120432622, |
| "grad_norm": 0.16258402168750763, |
| "learning_rate": 5.633802816901409e-07, |
| "loss": 7.8582, |
| "step": 603 |
| }, |
| { |
| "epoch": 2.8325051154633147, |
| "grad_norm": 0.1769980639219284, |
| "learning_rate": 5.477308294209703e-07, |
| "loss": 7.1382, |
| "step": 604 |
| }, |
| { |
| "epoch": 2.8371821104940076, |
| "grad_norm": 0.1444021314382553, |
| "learning_rate": 5.320813771517997e-07, |
| "loss": 7.1383, |
| "step": 605 |
| }, |
| { |
| "epoch": 2.8418591055247004, |
| "grad_norm": 0.21616753935813904, |
| "learning_rate": 5.164319248826292e-07, |
| "loss": 7.605, |
| "step": 606 |
| }, |
| { |
| "epoch": 2.8465361005553933, |
| "grad_norm": 0.20384635031223297, |
| "learning_rate": 5.007824726134586e-07, |
| "loss": 8.8131, |
| "step": 607 |
| }, |
| { |
| "epoch": 2.851213095586086, |
| "grad_norm": 0.1579245626926422, |
| "learning_rate": 4.85133020344288e-07, |
| "loss": 7.8475, |
| "step": 608 |
| }, |
| { |
| "epoch": 2.8558900906167786, |
| "grad_norm": 0.20689930021762848, |
| "learning_rate": 4.694835680751174e-07, |
| "loss": 7.1765, |
| "step": 609 |
| }, |
| { |
| "epoch": 2.8605670856474714, |
| "grad_norm": 0.1589430868625641, |
| "learning_rate": 4.5383411580594685e-07, |
| "loss": 8.8879, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.8652440806781643, |
| "grad_norm": 0.16509409248828888, |
| "learning_rate": 4.3818466353677626e-07, |
| "loss": 7.6589, |
| "step": 611 |
| }, |
| { |
| "epoch": 2.869921075708857, |
| "grad_norm": 0.1439896821975708, |
| "learning_rate": 4.225352112676057e-07, |
| "loss": 8.0296, |
| "step": 612 |
| }, |
| { |
| "epoch": 2.87459807073955, |
| "grad_norm": 0.19501394033432007, |
| "learning_rate": 4.068857589984351e-07, |
| "loss": 8.5349, |
| "step": 613 |
| }, |
| { |
| "epoch": 2.8792750657702424, |
| "grad_norm": 0.18828211724758148, |
| "learning_rate": 3.9123630672926455e-07, |
| "loss": 7.3593, |
| "step": 614 |
| }, |
| { |
| "epoch": 2.8839520608009352, |
| "grad_norm": 0.15072734653949738, |
| "learning_rate": 3.755868544600939e-07, |
| "loss": 8.2512, |
| "step": 615 |
| }, |
| { |
| "epoch": 2.888629055831628, |
| "grad_norm": 0.1598856896162033, |
| "learning_rate": 3.599374021909233e-07, |
| "loss": 7.3246, |
| "step": 616 |
| }, |
| { |
| "epoch": 2.893306050862321, |
| "grad_norm": 0.15382905304431915, |
| "learning_rate": 3.4428794992175273e-07, |
| "loss": 7.674, |
| "step": 617 |
| }, |
| { |
| "epoch": 2.897983045893014, |
| "grad_norm": 0.13851745426654816, |
| "learning_rate": 3.286384976525822e-07, |
| "loss": 7.6664, |
| "step": 618 |
| }, |
| { |
| "epoch": 2.9026600409237067, |
| "grad_norm": 0.12572415173053741, |
| "learning_rate": 3.129890453834116e-07, |
| "loss": 8.6634, |
| "step": 619 |
| }, |
| { |
| "epoch": 2.9073370359543995, |
| "grad_norm": 0.181121364235878, |
| "learning_rate": 2.97339593114241e-07, |
| "loss": 7.5301, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.912014030985092, |
| "grad_norm": 0.18877944350242615, |
| "learning_rate": 2.8169014084507043e-07, |
| "loss": 7.4353, |
| "step": 621 |
| }, |
| { |
| "epoch": 2.916691026015785, |
| "grad_norm": 0.1800297349691391, |
| "learning_rate": 2.6604068857589984e-07, |
| "loss": 8.0261, |
| "step": 622 |
| }, |
| { |
| "epoch": 2.9213680210464776, |
| "grad_norm": 0.1459706425666809, |
| "learning_rate": 2.503912363067293e-07, |
| "loss": 8.5898, |
| "step": 623 |
| }, |
| { |
| "epoch": 2.9260450160771705, |
| "grad_norm": 0.19272330403327942, |
| "learning_rate": 2.347417840375587e-07, |
| "loss": 7.5655, |
| "step": 624 |
| }, |
| { |
| "epoch": 2.9307220111078633, |
| "grad_norm": 0.13995127379894257, |
| "learning_rate": 2.1909233176838813e-07, |
| "loss": 8.807, |
| "step": 625 |
| }, |
| { |
| "epoch": 2.9353990061385558, |
| "grad_norm": 0.19578878581523895, |
| "learning_rate": 2.0344287949921754e-07, |
| "loss": 8.2283, |
| "step": 626 |
| }, |
| { |
| "epoch": 2.9400760011692486, |
| "grad_norm": 0.18744409084320068, |
| "learning_rate": 1.8779342723004696e-07, |
| "loss": 7.8586, |
| "step": 627 |
| }, |
| { |
| "epoch": 2.9447529961999415, |
| "grad_norm": 0.18906202912330627, |
| "learning_rate": 1.7214397496087637e-07, |
| "loss": 8.9175, |
| "step": 628 |
| }, |
| { |
| "epoch": 2.9494299912306343, |
| "grad_norm": 0.2817856967449188, |
| "learning_rate": 1.564945226917058e-07, |
| "loss": 7.8464, |
| "step": 629 |
| }, |
| { |
| "epoch": 2.954106986261327, |
| "grad_norm": 0.1482636034488678, |
| "learning_rate": 1.4084507042253522e-07, |
| "loss": 8.0478, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.95878398129202, |
| "grad_norm": 0.1729574054479599, |
| "learning_rate": 1.2519561815336465e-07, |
| "loss": 8.3514, |
| "step": 631 |
| }, |
| { |
| "epoch": 2.963460976322713, |
| "grad_norm": 0.23052264750003815, |
| "learning_rate": 1.0954616588419407e-07, |
| "loss": 8.397, |
| "step": 632 |
| }, |
| { |
| "epoch": 2.9681379713534053, |
| "grad_norm": 0.16747911274433136, |
| "learning_rate": 9.389671361502348e-08, |
| "loss": 8.0443, |
| "step": 633 |
| }, |
| { |
| "epoch": 2.972814966384098, |
| "grad_norm": 0.14860796928405762, |
| "learning_rate": 7.82472613458529e-08, |
| "loss": 7.317, |
| "step": 634 |
| }, |
| { |
| "epoch": 2.977491961414791, |
| "grad_norm": 0.13674141466617584, |
| "learning_rate": 6.259780907668233e-08, |
| "loss": 7.6038, |
| "step": 635 |
| }, |
| { |
| "epoch": 2.982168956445484, |
| "grad_norm": 0.163039892911911, |
| "learning_rate": 4.694835680751174e-08, |
| "loss": 7.4459, |
| "step": 636 |
| }, |
| { |
| "epoch": 2.9868459514761767, |
| "grad_norm": 0.1598978042602539, |
| "learning_rate": 3.1298904538341164e-08, |
| "loss": 9.9334, |
| "step": 637 |
| }, |
| { |
| "epoch": 2.991522946506869, |
| "grad_norm": 0.16937094926834106, |
| "learning_rate": 1.5649452269170582e-08, |
| "loss": 7.9632, |
| "step": 638 |
| }, |
| { |
| "epoch": 2.996199941537562, |
| "grad_norm": 0.11614558100700378, |
| "learning_rate": 0.0, |
| "loss": 7.3632, |
| "step": 639 |
| }, |
| { |
| "epoch": 2.996199941537562, |
| "step": 639, |
| "total_flos": 2.8450474856619704e+18, |
| "train_loss": 9.811768141524146, |
| "train_runtime": 60574.6064, |
| "train_samples_per_second": 1.355, |
| "train_steps_per_second": 0.011 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 639, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.8450474856619704e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|