| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 31250, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.016, |
| "grad_norm": 0.06700329924693645, |
| "learning_rate": 1.9968e-05, |
| "loss": 0.1152, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 0.04710424954214583, |
| "learning_rate": 1.9936000000000004e-05, |
| "loss": 0.0038, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 0.04258497062208284, |
| "learning_rate": 1.9904e-05, |
| "loss": 0.0027, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 0.046287169575826764, |
| "learning_rate": 1.9872000000000002e-05, |
| "loss": 0.0022, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.02914093414933542, |
| "learning_rate": 1.9840000000000003e-05, |
| "loss": 0.0018, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 0.027612185075684643, |
| "learning_rate": 1.9808e-05, |
| "loss": 0.0015, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 0.027340652289474687, |
| "learning_rate": 1.9776000000000002e-05, |
| "loss": 0.0013, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.03207204814138842, |
| "learning_rate": 1.9744e-05, |
| "loss": 0.0011, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 0.022192312524934657, |
| "learning_rate": 1.9712000000000004e-05, |
| "loss": 0.001, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.02528324253889177, |
| "learning_rate": 1.968e-05, |
| "loss": 0.0009, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 0.02450599947369508, |
| "learning_rate": 1.9648000000000002e-05, |
| "loss": 0.0008, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.021875653202537572, |
| "learning_rate": 1.9616000000000003e-05, |
| "loss": 0.0008, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 0.024365675733290123, |
| "learning_rate": 1.9584e-05, |
| "loss": 0.0007, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.020088699678960917, |
| "learning_rate": 1.9552000000000002e-05, |
| "loss": 0.0005, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.023939461961356913, |
| "learning_rate": 1.9520000000000003e-05, |
| "loss": 0.0005, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.016654961004293048, |
| "learning_rate": 1.9488000000000004e-05, |
| "loss": 0.0005, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.01947896740438186, |
| "learning_rate": 1.9456e-05, |
| "loss": 0.0005, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.01632254770986144, |
| "learning_rate": 1.9424e-05, |
| "loss": 0.0004, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.01768857594358966, |
| "learning_rate": 1.9392000000000003e-05, |
| "loss": 0.0004, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.016646831355600935, |
| "learning_rate": 1.936e-05, |
| "loss": 0.0003, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 0.013828212178231426, |
| "learning_rate": 1.9328000000000002e-05, |
| "loss": 0.0003, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.014125730332492632, |
| "learning_rate": 1.9296000000000003e-05, |
| "loss": 0.0003, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.014130896060127419, |
| "learning_rate": 1.9264e-05, |
| "loss": 0.0003, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.01326246196506426, |
| "learning_rate": 1.9232e-05, |
| "loss": 0.0003, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.012809291124835588, |
| "learning_rate": 1.9200000000000003e-05, |
| "loss": 0.0002, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.009475817533782105, |
| "learning_rate": 1.9168000000000004e-05, |
| "loss": 0.0002, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 0.01342329167018312, |
| "learning_rate": 1.9136e-05, |
| "loss": 0.0002, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.010803873420981821, |
| "learning_rate": 1.9104000000000002e-05, |
| "loss": 0.0002, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 0.008662937868043983, |
| "learning_rate": 1.9072000000000003e-05, |
| "loss": 0.0002, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.009370210631900725, |
| "learning_rate": 1.904e-05, |
| "loss": 0.0002, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 0.014623062482376369, |
| "learning_rate": 1.9008e-05, |
| "loss": 0.0002, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.011257550251173976, |
| "learning_rate": 1.8976000000000003e-05, |
| "loss": 0.0001, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 0.007251658800690561, |
| "learning_rate": 1.8944000000000004e-05, |
| "loss": 0.0001, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 0.010009328096570643, |
| "learning_rate": 1.8912e-05, |
| "loss": 0.0002, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.010324858035729835, |
| "learning_rate": 1.8880000000000002e-05, |
| "loss": 0.0001, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.012200136441846834, |
| "learning_rate": 1.8848000000000003e-05, |
| "loss": 0.0001, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 0.011050524306703934, |
| "learning_rate": 1.8816e-05, |
| "loss": 0.0001, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 0.009424921961091424, |
| "learning_rate": 1.8784000000000002e-05, |
| "loss": 0.0001, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 0.00893922456004691, |
| "learning_rate": 1.8752000000000003e-05, |
| "loss": 0.0001, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.010239948440932777, |
| "learning_rate": 1.8720000000000004e-05, |
| "loss": 0.0001, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 0.00976946104257329, |
| "learning_rate": 1.8688e-05, |
| "loss": 0.0001, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 0.009651690086449066, |
| "learning_rate": 1.8656000000000002e-05, |
| "loss": 0.0001, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 0.05693047764323227, |
| "learning_rate": 1.8624000000000003e-05, |
| "loss": 0.0733, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.011338530961932, |
| "learning_rate": 1.8592e-05, |
| "loss": 0.0004, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.005269926068818781, |
| "learning_rate": 1.8560000000000002e-05, |
| "loss": 0.0001, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.007667043664845632, |
| "learning_rate": 1.8528000000000003e-05, |
| "loss": 0.0001, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 0.004397910133064048, |
| "learning_rate": 1.8496e-05, |
| "loss": 0.0001, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.005138608389724877, |
| "learning_rate": 1.8464e-05, |
| "loss": 0.0, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 0.013032095144063073, |
| "learning_rate": 1.8432000000000002e-05, |
| "loss": 0.0, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.0030479607356790257, |
| "learning_rate": 1.8400000000000003e-05, |
| "loss": 0.0, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 0.002160576995308326, |
| "learning_rate": 1.8368e-05, |
| "loss": 0.0, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 0.002576068592921048, |
| "learning_rate": 1.8336000000000002e-05, |
| "loss": 0.0, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 0.004527008786251422, |
| "learning_rate": 1.8304000000000003e-05, |
| "loss": 0.0, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.002401493354271683, |
| "learning_rate": 1.8272e-05, |
| "loss": 0.0, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.0016418474174344473, |
| "learning_rate": 1.824e-05, |
| "loss": 0.0, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.0029580974745545933, |
| "learning_rate": 1.8208000000000003e-05, |
| "loss": 0.0, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 0.0012505689661283781, |
| "learning_rate": 1.8176000000000004e-05, |
| "loss": 0.0, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 0.002206210406840656, |
| "learning_rate": 1.8144e-05, |
| "loss": 0.0, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 0.0006240208662934369, |
| "learning_rate": 1.8112000000000002e-05, |
| "loss": 0.0, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.003067833646212094, |
| "learning_rate": 1.8080000000000003e-05, |
| "loss": 0.0, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 0.001555266902384, |
| "learning_rate": 1.8048e-05, |
| "loss": 0.0, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 0.005684931150991733, |
| "learning_rate": 1.8016e-05, |
| "loss": 0.0, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.008, |
| "grad_norm": 0.00036054174942539585, |
| "learning_rate": 1.7984000000000003e-05, |
| "loss": 0.0, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.024, |
| "grad_norm": 0.0023891310529782278, |
| "learning_rate": 1.7952e-05, |
| "loss": 0.0, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 0.0017593637539934622, |
| "learning_rate": 1.792e-05, |
| "loss": 0.0, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.056, |
| "grad_norm": 0.00988261541456867, |
| "learning_rate": 1.7888000000000002e-05, |
| "loss": 0.0001, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.072, |
| "grad_norm": 0.008606785310498308, |
| "learning_rate": 1.7856000000000003e-05, |
| "loss": 0.0001, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.088, |
| "grad_norm": 0.009666636774150597, |
| "learning_rate": 1.7824e-05, |
| "loss": 0.0001, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.104, |
| "grad_norm": 0.0034236747560986843, |
| "learning_rate": 1.7792000000000002e-05, |
| "loss": 0.0001, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 0.018616762957628402, |
| "learning_rate": 1.7760000000000003e-05, |
| "loss": 0.0094, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.1360000000000001, |
| "grad_norm": 0.009926468045818765, |
| "learning_rate": 1.7728e-05, |
| "loss": 0.0002, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.152, |
| "grad_norm": 0.007468057768454313, |
| "learning_rate": 1.7696e-05, |
| "loss": 0.0001, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.168, |
| "grad_norm": 0.008415666101481943, |
| "learning_rate": 1.7664000000000002e-05, |
| "loss": 0.0001, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.184, |
| "grad_norm": 0.012134121535343333, |
| "learning_rate": 1.7632000000000003e-05, |
| "loss": 0.0001, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.0040802073628923375, |
| "learning_rate": 1.76e-05, |
| "loss": 0.0, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.216, |
| "grad_norm": 0.0012922320234944806, |
| "learning_rate": 1.7568000000000002e-05, |
| "loss": 0.0, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.232, |
| "grad_norm": 0.002679388648395559, |
| "learning_rate": 1.7536000000000003e-05, |
| "loss": 0.0, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.248, |
| "grad_norm": 0.0008935097752906122, |
| "learning_rate": 1.7504e-05, |
| "loss": 0.0, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.264, |
| "grad_norm": 0.002432102143415828, |
| "learning_rate": 1.7472e-05, |
| "loss": 0.0, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 0.0026062712960827376, |
| "learning_rate": 1.7440000000000002e-05, |
| "loss": 0.0, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.296, |
| "grad_norm": 0.0012617909961480731, |
| "learning_rate": 1.7408e-05, |
| "loss": 0.0, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.312, |
| "grad_norm": 0.005995550159991727, |
| "learning_rate": 1.7376e-05, |
| "loss": 0.0, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.328, |
| "grad_norm": 0.0022878669701388655, |
| "learning_rate": 1.7344000000000002e-05, |
| "loss": 0.0001, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.3439999999999999, |
| "grad_norm": 0.00472466199586374, |
| "learning_rate": 1.7312000000000003e-05, |
| "loss": 0.0001, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.3599999999999999, |
| "grad_norm": 0.004622604014149787, |
| "learning_rate": 1.728e-05, |
| "loss": 0.0, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.376, |
| "grad_norm": 0.006288850357467127, |
| "learning_rate": 1.7248e-05, |
| "loss": 0.0, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.392, |
| "grad_norm": 0.0059456589687283045, |
| "learning_rate": 1.7216000000000003e-05, |
| "loss": 0.0, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.408, |
| "grad_norm": 0.004299973129725888, |
| "learning_rate": 1.7184e-05, |
| "loss": 0.0, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.424, |
| "grad_norm": 0.006384727380231639, |
| "learning_rate": 1.7152e-05, |
| "loss": 0.0001, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 0.006961166462441037, |
| "learning_rate": 1.7120000000000002e-05, |
| "loss": 0.0, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.456, |
| "grad_norm": 0.002706931118082421, |
| "learning_rate": 1.7088000000000003e-05, |
| "loss": 0.0, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.472, |
| "grad_norm": 0.0044763406305933845, |
| "learning_rate": 1.7056e-05, |
| "loss": 0.0, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.488, |
| "grad_norm": 0.0018676112837315154, |
| "learning_rate": 1.7024e-05, |
| "loss": 0.0, |
| "step": 4650 |
| }, |
| { |
| "epoch": 1.504, |
| "grad_norm": 0.0035498650855882784, |
| "learning_rate": 1.6992000000000003e-05, |
| "loss": 0.0, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 0.0032614453273975457, |
| "learning_rate": 1.696e-05, |
| "loss": 0.0, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.536, |
| "grad_norm": 0.004704682912696666, |
| "learning_rate": 1.6928e-05, |
| "loss": 0.0, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.552, |
| "grad_norm": 0.003944103313663087, |
| "learning_rate": 1.6896000000000002e-05, |
| "loss": 0.0, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.568, |
| "grad_norm": 0.001028864743221988, |
| "learning_rate": 1.6864e-05, |
| "loss": 0.0, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.584, |
| "grad_norm": 0.008781206361834713, |
| "learning_rate": 1.6832e-05, |
| "loss": 0.0001, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.006941071455141092, |
| "learning_rate": 1.6800000000000002e-05, |
| "loss": 0.0001, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.616, |
| "grad_norm": 0.0035317612009535013, |
| "learning_rate": 1.6768000000000003e-05, |
| "loss": 0.0001, |
| "step": 5050 |
| }, |
| { |
| "epoch": 1.6320000000000001, |
| "grad_norm": 0.007130755050925452, |
| "learning_rate": 1.6736e-05, |
| "loss": 0.0001, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.6480000000000001, |
| "grad_norm": 0.009056134474244146, |
| "learning_rate": 1.6704e-05, |
| "loss": 0.0001, |
| "step": 5150 |
| }, |
| { |
| "epoch": 1.6640000000000001, |
| "grad_norm": 0.009975608465029769, |
| "learning_rate": 1.6672000000000002e-05, |
| "loss": 0.0001, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.6800000000000002, |
| "grad_norm": 0.00752603996543754, |
| "learning_rate": 1.664e-05, |
| "loss": 0.0001, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.696, |
| "grad_norm": 0.005644442586376258, |
| "learning_rate": 1.6608e-05, |
| "loss": 0.0001, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.712, |
| "grad_norm": 0.005278241124697153, |
| "learning_rate": 1.6576000000000002e-05, |
| "loss": 0.0001, |
| "step": 5350 |
| }, |
| { |
| "epoch": 1.728, |
| "grad_norm": 0.004435698738768064, |
| "learning_rate": 1.6544000000000003e-05, |
| "loss": 0.0001, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.744, |
| "grad_norm": 0.006404322123967892, |
| "learning_rate": 1.6512e-05, |
| "loss": 0.0, |
| "step": 5450 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 0.005399249694087203, |
| "learning_rate": 1.648e-05, |
| "loss": 0.0001, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.776, |
| "grad_norm": 0.008390970222631198, |
| "learning_rate": 1.6448000000000002e-05, |
| "loss": 0.0, |
| "step": 5550 |
| }, |
| { |
| "epoch": 1.792, |
| "grad_norm": 0.0049263709067859165, |
| "learning_rate": 1.6416e-05, |
| "loss": 0.0001, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.808, |
| "grad_norm": 0.0023635146513281257, |
| "learning_rate": 1.6384e-05, |
| "loss": 0.0, |
| "step": 5650 |
| }, |
| { |
| "epoch": 1.8239999999999998, |
| "grad_norm": 0.005111500857791294, |
| "learning_rate": 1.6352000000000002e-05, |
| "loss": 0.0, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.8399999999999999, |
| "grad_norm": 0.00844717374529172, |
| "learning_rate": 1.632e-05, |
| "loss": 0.0, |
| "step": 5750 |
| }, |
| { |
| "epoch": 1.8559999999999999, |
| "grad_norm": 0.0036198931176737673, |
| "learning_rate": 1.6288e-05, |
| "loss": 0.0, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.8719999999999999, |
| "grad_norm": 0.0048152236821402804, |
| "learning_rate": 1.6256e-05, |
| "loss": 0.0, |
| "step": 5850 |
| }, |
| { |
| "epoch": 1.888, |
| "grad_norm": 0.0008869093396117195, |
| "learning_rate": 1.6224000000000003e-05, |
| "loss": 0.0, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.904, |
| "grad_norm": 0.0025329131777146337, |
| "learning_rate": 1.6192e-05, |
| "loss": 0.0, |
| "step": 5950 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 0.0037397513843206794, |
| "learning_rate": 1.616e-05, |
| "loss": 0.0, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.936, |
| "grad_norm": 0.006049615714075747, |
| "learning_rate": 1.6128000000000002e-05, |
| "loss": 0.0, |
| "step": 6050 |
| }, |
| { |
| "epoch": 1.952, |
| "grad_norm": 0.0017716844046744302, |
| "learning_rate": 1.6096e-05, |
| "loss": 0.0, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.968, |
| "grad_norm": 0.009285851148933216, |
| "learning_rate": 1.6064e-05, |
| "loss": 0.0, |
| "step": 6150 |
| }, |
| { |
| "epoch": 1.984, |
| "grad_norm": 0.0004888867999470166, |
| "learning_rate": 1.6032e-05, |
| "loss": 0.0, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.006431946143017072, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.0, |
| "step": 6250 |
| }, |
| { |
| "epoch": 2.016, |
| "grad_norm": 0.0023743769950413467, |
| "learning_rate": 1.5968e-05, |
| "loss": 0.0001, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.032, |
| "grad_norm": 0.009089590999873571, |
| "learning_rate": 1.5936e-05, |
| "loss": 0.0001, |
| "step": 6350 |
| }, |
| { |
| "epoch": 2.048, |
| "grad_norm": 0.006365387805287794, |
| "learning_rate": 1.5904000000000002e-05, |
| "loss": 0.0, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.064, |
| "grad_norm": 0.004108736082112842, |
| "learning_rate": 1.5872e-05, |
| "loss": 0.0, |
| "step": 6450 |
| }, |
| { |
| "epoch": 2.08, |
| "grad_norm": 0.004691840217453286, |
| "learning_rate": 1.584e-05, |
| "loss": 0.0, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.096, |
| "grad_norm": 0.004716746413071922, |
| "learning_rate": 1.5808000000000002e-05, |
| "loss": 0.0, |
| "step": 6550 |
| }, |
| { |
| "epoch": 2.112, |
| "grad_norm": 0.008418820460181144, |
| "learning_rate": 1.5776e-05, |
| "loss": 0.0001, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.128, |
| "grad_norm": 0.001730413649374287, |
| "learning_rate": 1.5744e-05, |
| "loss": 0.0001, |
| "step": 6650 |
| }, |
| { |
| "epoch": 2.144, |
| "grad_norm": 0.08084393210770029, |
| "learning_rate": 1.5712e-05, |
| "loss": 0.0001, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.16, |
| "grad_norm": 0.0091071714838611, |
| "learning_rate": 1.5680000000000002e-05, |
| "loss": 0.0002, |
| "step": 6750 |
| }, |
| { |
| "epoch": 2.176, |
| "grad_norm": 0.011937686894892474, |
| "learning_rate": 1.5648e-05, |
| "loss": 0.0007, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.192, |
| "grad_norm": 0.007773091042443193, |
| "learning_rate": 1.5616e-05, |
| "loss": 0.0002, |
| "step": 6850 |
| }, |
| { |
| "epoch": 2.208, |
| "grad_norm": 0.0059540468380226055, |
| "learning_rate": 1.5584000000000002e-05, |
| "loss": 0.0001, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.224, |
| "grad_norm": 0.004014490897874684, |
| "learning_rate": 1.5552e-05, |
| "loss": 0.0001, |
| "step": 6950 |
| }, |
| { |
| "epoch": 2.24, |
| "grad_norm": 0.006875418357679233, |
| "learning_rate": 1.552e-05, |
| "loss": 0.0001, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.2560000000000002, |
| "grad_norm": 0.007989346292625824, |
| "learning_rate": 1.5488e-05, |
| "loss": 0.0001, |
| "step": 7050 |
| }, |
| { |
| "epoch": 2.2720000000000002, |
| "grad_norm": 0.006473879081312047, |
| "learning_rate": 1.5456000000000002e-05, |
| "loss": 0.0, |
| "step": 7100 |
| }, |
| { |
| "epoch": 2.288, |
| "grad_norm": 0.001344406505995323, |
| "learning_rate": 1.5424e-05, |
| "loss": 0.0, |
| "step": 7150 |
| }, |
| { |
| "epoch": 2.304, |
| "grad_norm": 0.0013279502116751898, |
| "learning_rate": 1.5392e-05, |
| "loss": 0.0, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.32, |
| "grad_norm": 0.0025834903526500636, |
| "learning_rate": 1.5360000000000002e-05, |
| "loss": 0.0001, |
| "step": 7250 |
| }, |
| { |
| "epoch": 2.336, |
| "grad_norm": 0.003613767461251546, |
| "learning_rate": 1.5328e-05, |
| "loss": 0.0, |
| "step": 7300 |
| }, |
| { |
| "epoch": 2.352, |
| "grad_norm": 0.007693189752853583, |
| "learning_rate": 1.5296e-05, |
| "loss": 0.0, |
| "step": 7350 |
| }, |
| { |
| "epoch": 2.368, |
| "grad_norm": 0.0039145709733582204, |
| "learning_rate": 1.5264e-05, |
| "loss": 0.0, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.384, |
| "grad_norm": 0.0024948404702111915, |
| "learning_rate": 1.5232000000000003e-05, |
| "loss": 0.0001, |
| "step": 7450 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.005963883109858903, |
| "learning_rate": 1.5200000000000002e-05, |
| "loss": 0.0, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.416, |
| "grad_norm": 0.005203832761613304, |
| "learning_rate": 1.5168000000000001e-05, |
| "loss": 0.0, |
| "step": 7550 |
| }, |
| { |
| "epoch": 2.432, |
| "grad_norm": 0.003019086304287391, |
| "learning_rate": 1.5136000000000002e-05, |
| "loss": 0.0, |
| "step": 7600 |
| }, |
| { |
| "epoch": 2.448, |
| "grad_norm": 0.002330648219270858, |
| "learning_rate": 1.5104000000000001e-05, |
| "loss": 0.0, |
| "step": 7650 |
| }, |
| { |
| "epoch": 2.464, |
| "grad_norm": 0.005547183836451343, |
| "learning_rate": 1.5072000000000002e-05, |
| "loss": 0.0, |
| "step": 7700 |
| }, |
| { |
| "epoch": 2.48, |
| "grad_norm": 0.007732333230021918, |
| "learning_rate": 1.5040000000000002e-05, |
| "loss": 0.0001, |
| "step": 7750 |
| }, |
| { |
| "epoch": 2.496, |
| "grad_norm": 0.0028876572710758606, |
| "learning_rate": 1.5008000000000001e-05, |
| "loss": 0.0, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.512, |
| "grad_norm": 0.00504986025914165, |
| "learning_rate": 1.4976000000000002e-05, |
| "loss": 0.0, |
| "step": 7850 |
| }, |
| { |
| "epoch": 2.528, |
| "grad_norm": 0.004979440608394857, |
| "learning_rate": 1.4944000000000001e-05, |
| "loss": 0.0, |
| "step": 7900 |
| }, |
| { |
| "epoch": 2.544, |
| "grad_norm": 0.004136333653101882, |
| "learning_rate": 1.4912000000000002e-05, |
| "loss": 0.0, |
| "step": 7950 |
| }, |
| { |
| "epoch": 2.56, |
| "grad_norm": 0.00870644207668528, |
| "learning_rate": 1.4880000000000002e-05, |
| "loss": 0.0001, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.576, |
| "grad_norm": 0.004798469088496501, |
| "learning_rate": 1.4848e-05, |
| "loss": 0.0001, |
| "step": 8050 |
| }, |
| { |
| "epoch": 2.592, |
| "grad_norm": 0.0070315691108956335, |
| "learning_rate": 1.4816000000000002e-05, |
| "loss": 0.0001, |
| "step": 8100 |
| }, |
| { |
| "epoch": 2.608, |
| "grad_norm": 0.0025286695712524415, |
| "learning_rate": 1.4784000000000001e-05, |
| "loss": 0.0001, |
| "step": 8150 |
| }, |
| { |
| "epoch": 2.624, |
| "grad_norm": 0.004842817224651438, |
| "learning_rate": 1.4752000000000002e-05, |
| "loss": 0.0001, |
| "step": 8200 |
| }, |
| { |
| "epoch": 2.64, |
| "grad_norm": 0.007477324219913048, |
| "learning_rate": 1.4720000000000001e-05, |
| "loss": 0.0, |
| "step": 8250 |
| }, |
| { |
| "epoch": 2.656, |
| "grad_norm": 0.007776510130998169, |
| "learning_rate": 1.4688000000000002e-05, |
| "loss": 0.0001, |
| "step": 8300 |
| }, |
| { |
| "epoch": 2.672, |
| "grad_norm": 0.004357492973329834, |
| "learning_rate": 1.4656000000000002e-05, |
| "loss": 0.0, |
| "step": 8350 |
| }, |
| { |
| "epoch": 2.6879999999999997, |
| "grad_norm": 0.0032312366379610435, |
| "learning_rate": 1.4624000000000001e-05, |
| "loss": 0.0, |
| "step": 8400 |
| }, |
| { |
| "epoch": 2.7039999999999997, |
| "grad_norm": 0.0014641972549175538, |
| "learning_rate": 1.4592000000000002e-05, |
| "loss": 0.0, |
| "step": 8450 |
| }, |
| { |
| "epoch": 2.7199999999999998, |
| "grad_norm": 0.002893495767324172, |
| "learning_rate": 1.4560000000000001e-05, |
| "loss": 0.0, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.7359999999999998, |
| "grad_norm": 0.003053109756022501, |
| "learning_rate": 1.4528000000000002e-05, |
| "loss": 0.0, |
| "step": 8550 |
| }, |
| { |
| "epoch": 2.752, |
| "grad_norm": 0.0045809013298590984, |
| "learning_rate": 1.4496000000000001e-05, |
| "loss": 0.0001, |
| "step": 8600 |
| }, |
| { |
| "epoch": 2.768, |
| "grad_norm": 0.0028170962722070987, |
| "learning_rate": 1.4464e-05, |
| "loss": 0.0001, |
| "step": 8650 |
| }, |
| { |
| "epoch": 2.784, |
| "grad_norm": 0.0016398323178968255, |
| "learning_rate": 1.4432000000000002e-05, |
| "loss": 0.0, |
| "step": 8700 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.0015106826900520753, |
| "learning_rate": 1.4400000000000001e-05, |
| "loss": 0.0, |
| "step": 8750 |
| }, |
| { |
| "epoch": 2.816, |
| "grad_norm": 0.0016227105812749837, |
| "learning_rate": 1.4368000000000002e-05, |
| "loss": 0.0, |
| "step": 8800 |
| }, |
| { |
| "epoch": 2.832, |
| "grad_norm": 0.002216048526258624, |
| "learning_rate": 1.4336000000000001e-05, |
| "loss": 0.0, |
| "step": 8850 |
| }, |
| { |
| "epoch": 2.848, |
| "grad_norm": 0.0028826250412742134, |
| "learning_rate": 1.4304e-05, |
| "loss": 0.0, |
| "step": 8900 |
| }, |
| { |
| "epoch": 2.864, |
| "grad_norm": 0.0031560966093997045, |
| "learning_rate": 1.4272000000000002e-05, |
| "loss": 0.0, |
| "step": 8950 |
| }, |
| { |
| "epoch": 2.88, |
| "grad_norm": 0.005840061376547936, |
| "learning_rate": 1.4240000000000001e-05, |
| "loss": 0.0, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.896, |
| "grad_norm": 0.0033866397581423604, |
| "learning_rate": 1.4208000000000002e-05, |
| "loss": 0.0, |
| "step": 9050 |
| }, |
| { |
| "epoch": 2.912, |
| "grad_norm": 0.002624744023065921, |
| "learning_rate": 1.4176000000000001e-05, |
| "loss": 0.0, |
| "step": 9100 |
| }, |
| { |
| "epoch": 2.928, |
| "grad_norm": 0.005004706105838203, |
| "learning_rate": 1.4144000000000002e-05, |
| "loss": 0.0, |
| "step": 9150 |
| }, |
| { |
| "epoch": 2.944, |
| "grad_norm": 0.006743938270662594, |
| "learning_rate": 1.4112000000000001e-05, |
| "loss": 0.0001, |
| "step": 9200 |
| }, |
| { |
| "epoch": 2.96, |
| "grad_norm": 0.009483278180315352, |
| "learning_rate": 1.408e-05, |
| "loss": 0.0001, |
| "step": 9250 |
| }, |
| { |
| "epoch": 2.976, |
| "grad_norm": 0.0007610189598327726, |
| "learning_rate": 1.4048000000000002e-05, |
| "loss": 0.0001, |
| "step": 9300 |
| }, |
| { |
| "epoch": 2.992, |
| "grad_norm": 0.004900031620646641, |
| "learning_rate": 1.4016000000000001e-05, |
| "loss": 0.0001, |
| "step": 9350 |
| }, |
| { |
| "epoch": 3.008, |
| "grad_norm": 0.0040788821051548334, |
| "learning_rate": 1.3984000000000002e-05, |
| "loss": 0.0, |
| "step": 9400 |
| }, |
| { |
| "epoch": 3.024, |
| "grad_norm": 0.006604571524243836, |
| "learning_rate": 1.3952000000000001e-05, |
| "loss": 0.0001, |
| "step": 9450 |
| }, |
| { |
| "epoch": 3.04, |
| "grad_norm": 0.004390543355203382, |
| "learning_rate": 1.392e-05, |
| "loss": 0.0001, |
| "step": 9500 |
| }, |
| { |
| "epoch": 3.056, |
| "grad_norm": 0.006092729798201787, |
| "learning_rate": 1.3888000000000002e-05, |
| "loss": 0.0001, |
| "step": 9550 |
| }, |
| { |
| "epoch": 3.072, |
| "grad_norm": 0.0024120775231851973, |
| "learning_rate": 1.3856e-05, |
| "loss": 0.0001, |
| "step": 9600 |
| }, |
| { |
| "epoch": 3.088, |
| "grad_norm": 0.0023121633006781624, |
| "learning_rate": 1.3824000000000002e-05, |
| "loss": 0.0, |
| "step": 9650 |
| }, |
| { |
| "epoch": 3.104, |
| "grad_norm": 0.004009670235882467, |
| "learning_rate": 1.3792000000000001e-05, |
| "loss": 0.0, |
| "step": 9700 |
| }, |
| { |
| "epoch": 3.12, |
| "grad_norm": 0.0029967749699539876, |
| "learning_rate": 1.376e-05, |
| "loss": 0.0, |
| "step": 9750 |
| }, |
| { |
| "epoch": 3.136, |
| "grad_norm": 0.0011939540142138618, |
| "learning_rate": 1.3728000000000001e-05, |
| "loss": 0.0, |
| "step": 9800 |
| }, |
| { |
| "epoch": 3.152, |
| "grad_norm": 0.0018367777306534149, |
| "learning_rate": 1.3696e-05, |
| "loss": 0.0, |
| "step": 9850 |
| }, |
| { |
| "epoch": 3.168, |
| "grad_norm": 0.005363794390571419, |
| "learning_rate": 1.3664000000000002e-05, |
| "loss": 0.0, |
| "step": 9900 |
| }, |
| { |
| "epoch": 3.184, |
| "grad_norm": 0.008476719613859292, |
| "learning_rate": 1.3632000000000001e-05, |
| "loss": 0.0, |
| "step": 9950 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 0.004822397551004876, |
| "learning_rate": 1.3600000000000002e-05, |
| "loss": 0.0001, |
| "step": 10000 |
| }, |
| { |
| "epoch": 3.216, |
| "grad_norm": 0.005770066249937767, |
| "learning_rate": 1.3568000000000001e-05, |
| "loss": 0.0001, |
| "step": 10050 |
| }, |
| { |
| "epoch": 3.232, |
| "grad_norm": 0.005910492274029805, |
| "learning_rate": 1.3536e-05, |
| "loss": 0.0001, |
| "step": 10100 |
| }, |
| { |
| "epoch": 3.248, |
| "grad_norm": 0.003960172672416223, |
| "learning_rate": 1.3504000000000001e-05, |
| "loss": 0.0001, |
| "step": 10150 |
| }, |
| { |
| "epoch": 3.2640000000000002, |
| "grad_norm": 0.001980537126146146, |
| "learning_rate": 1.3472e-05, |
| "loss": 0.0001, |
| "step": 10200 |
| }, |
| { |
| "epoch": 3.2800000000000002, |
| "grad_norm": 0.0036278941646021327, |
| "learning_rate": 1.3440000000000002e-05, |
| "loss": 0.0, |
| "step": 10250 |
| }, |
| { |
| "epoch": 3.296, |
| "grad_norm": 0.0018966529117990538, |
| "learning_rate": 1.3408000000000001e-05, |
| "loss": 0.0, |
| "step": 10300 |
| }, |
| { |
| "epoch": 3.312, |
| "grad_norm": 0.005134519388485188, |
| "learning_rate": 1.3376e-05, |
| "loss": 0.0, |
| "step": 10350 |
| }, |
| { |
| "epoch": 3.328, |
| "grad_norm": 0.0038905098172508363, |
| "learning_rate": 1.3344000000000001e-05, |
| "loss": 0.0, |
| "step": 10400 |
| }, |
| { |
| "epoch": 3.344, |
| "grad_norm": 0.0031574628583158175, |
| "learning_rate": 1.3312e-05, |
| "loss": 0.0001, |
| "step": 10450 |
| }, |
| { |
| "epoch": 3.36, |
| "grad_norm": 0.002999972961544216, |
| "learning_rate": 1.3280000000000002e-05, |
| "loss": 0.0, |
| "step": 10500 |
| }, |
| { |
| "epoch": 3.376, |
| "grad_norm": 0.0033055234024522047, |
| "learning_rate": 1.3248000000000001e-05, |
| "loss": 0.0, |
| "step": 10550 |
| }, |
| { |
| "epoch": 3.392, |
| "grad_norm": 0.00401214391851968, |
| "learning_rate": 1.3216000000000002e-05, |
| "loss": 0.0, |
| "step": 10600 |
| }, |
| { |
| "epoch": 3.408, |
| "grad_norm": 0.00021936232095850022, |
| "learning_rate": 1.3184000000000001e-05, |
| "loss": 0.0, |
| "step": 10650 |
| }, |
| { |
| "epoch": 3.424, |
| "grad_norm": 0.000917802895080481, |
| "learning_rate": 1.3152e-05, |
| "loss": 0.0, |
| "step": 10700 |
| }, |
| { |
| "epoch": 3.44, |
| "grad_norm": 0.00214730949334728, |
| "learning_rate": 1.3120000000000001e-05, |
| "loss": 0.0, |
| "step": 10750 |
| }, |
| { |
| "epoch": 3.456, |
| "grad_norm": 0.0030398923791812287, |
| "learning_rate": 1.3088e-05, |
| "loss": 0.0, |
| "step": 10800 |
| }, |
| { |
| "epoch": 3.472, |
| "grad_norm": 0.0022242808251567757, |
| "learning_rate": 1.3056000000000002e-05, |
| "loss": 0.0, |
| "step": 10850 |
| }, |
| { |
| "epoch": 3.488, |
| "grad_norm": 0.0008940846133161214, |
| "learning_rate": 1.3024000000000001e-05, |
| "loss": 0.0, |
| "step": 10900 |
| }, |
| { |
| "epoch": 3.504, |
| "grad_norm": 0.0052346185053533355, |
| "learning_rate": 1.2992e-05, |
| "loss": 0.0, |
| "step": 10950 |
| }, |
| { |
| "epoch": 3.52, |
| "grad_norm": 0.002586167307537178, |
| "learning_rate": 1.2960000000000001e-05, |
| "loss": 0.0, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.536, |
| "grad_norm": 0.014366158301715531, |
| "learning_rate": 1.2928e-05, |
| "loss": 0.0, |
| "step": 11050 |
| }, |
| { |
| "epoch": 3.552, |
| "grad_norm": 0.005454338786955534, |
| "learning_rate": 1.2896000000000002e-05, |
| "loss": 0.0001, |
| "step": 11100 |
| }, |
| { |
| "epoch": 3.568, |
| "grad_norm": 0.0021260355729629562, |
| "learning_rate": 1.2864e-05, |
| "loss": 0.0, |
| "step": 11150 |
| }, |
| { |
| "epoch": 3.584, |
| "grad_norm": 0.0035810871167441354, |
| "learning_rate": 1.2832e-05, |
| "loss": 0.0, |
| "step": 11200 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 0.002774145084319564, |
| "learning_rate": 1.2800000000000001e-05, |
| "loss": 0.0, |
| "step": 11250 |
| }, |
| { |
| "epoch": 3.616, |
| "grad_norm": 0.0017297614850600386, |
| "learning_rate": 1.2768e-05, |
| "loss": 0.0, |
| "step": 11300 |
| }, |
| { |
| "epoch": 3.632, |
| "grad_norm": 0.005428288582783459, |
| "learning_rate": 1.2736000000000001e-05, |
| "loss": 0.0, |
| "step": 11350 |
| }, |
| { |
| "epoch": 3.648, |
| "grad_norm": 0.0026717447275682845, |
| "learning_rate": 1.2704e-05, |
| "loss": 0.0, |
| "step": 11400 |
| }, |
| { |
| "epoch": 3.664, |
| "grad_norm": 0.00408219616036445, |
| "learning_rate": 1.2672000000000002e-05, |
| "loss": 0.0, |
| "step": 11450 |
| }, |
| { |
| "epoch": 3.68, |
| "grad_norm": 0.0021619661272979113, |
| "learning_rate": 1.2640000000000001e-05, |
| "loss": 0.0, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.6959999999999997, |
| "grad_norm": 0.0035456991848178644, |
| "learning_rate": 1.2608e-05, |
| "loss": 0.0001, |
| "step": 11550 |
| }, |
| { |
| "epoch": 3.7119999999999997, |
| "grad_norm": 0.004459203173437518, |
| "learning_rate": 1.2576000000000001e-05, |
| "loss": 0.0, |
| "step": 11600 |
| }, |
| { |
| "epoch": 3.7279999999999998, |
| "grad_norm": 0.005408717563076554, |
| "learning_rate": 1.2544e-05, |
| "loss": 0.0, |
| "step": 11650 |
| }, |
| { |
| "epoch": 3.7439999999999998, |
| "grad_norm": 0.0035494391855729153, |
| "learning_rate": 1.2512000000000002e-05, |
| "loss": 0.0, |
| "step": 11700 |
| }, |
| { |
| "epoch": 3.76, |
| "grad_norm": 0.0020159908061204174, |
| "learning_rate": 1.248e-05, |
| "loss": 0.0, |
| "step": 11750 |
| }, |
| { |
| "epoch": 3.776, |
| "grad_norm": 0.003331910844435852, |
| "learning_rate": 1.2448e-05, |
| "loss": 0.0, |
| "step": 11800 |
| }, |
| { |
| "epoch": 3.792, |
| "grad_norm": 0.0012062460066491493, |
| "learning_rate": 1.2416000000000001e-05, |
| "loss": 0.0, |
| "step": 11850 |
| }, |
| { |
| "epoch": 3.808, |
| "grad_norm": 0.0027807416627710986, |
| "learning_rate": 1.2384e-05, |
| "loss": 0.0, |
| "step": 11900 |
| }, |
| { |
| "epoch": 3.824, |
| "grad_norm": 0.001871484410386379, |
| "learning_rate": 1.2352000000000001e-05, |
| "loss": 0.0, |
| "step": 11950 |
| }, |
| { |
| "epoch": 3.84, |
| "grad_norm": 0.0012550393993999455, |
| "learning_rate": 1.232e-05, |
| "loss": 0.0, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.856, |
| "grad_norm": 0.0035320592711352403, |
| "learning_rate": 1.2288e-05, |
| "loss": 0.0, |
| "step": 12050 |
| }, |
| { |
| "epoch": 3.872, |
| "grad_norm": 0.0017416538958658072, |
| "learning_rate": 1.2256000000000001e-05, |
| "loss": 0.0, |
| "step": 12100 |
| }, |
| { |
| "epoch": 3.888, |
| "grad_norm": 0.0008299586122563334, |
| "learning_rate": 1.2224e-05, |
| "loss": 0.0, |
| "step": 12150 |
| }, |
| { |
| "epoch": 3.904, |
| "grad_norm": 0.002838479964551143, |
| "learning_rate": 1.2192000000000001e-05, |
| "loss": 0.0, |
| "step": 12200 |
| }, |
| { |
| "epoch": 3.92, |
| "grad_norm": 0.002082164082108635, |
| "learning_rate": 1.216e-05, |
| "loss": 0.0, |
| "step": 12250 |
| }, |
| { |
| "epoch": 3.936, |
| "grad_norm": 0.0037577523462426405, |
| "learning_rate": 1.2128000000000001e-05, |
| "loss": 0.0, |
| "step": 12300 |
| }, |
| { |
| "epoch": 3.952, |
| "grad_norm": 0.0026759700344170553, |
| "learning_rate": 1.2096e-05, |
| "loss": 0.0, |
| "step": 12350 |
| }, |
| { |
| "epoch": 3.968, |
| "grad_norm": 0.003510314103134683, |
| "learning_rate": 1.2064e-05, |
| "loss": 0.0001, |
| "step": 12400 |
| }, |
| { |
| "epoch": 3.984, |
| "grad_norm": 0.0053526146673045175, |
| "learning_rate": 1.2032000000000001e-05, |
| "loss": 0.0001, |
| "step": 12450 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.0012346099755580994, |
| "learning_rate": 1.2e-05, |
| "loss": 0.0001, |
| "step": 12500 |
| }, |
| { |
| "epoch": 4.016, |
| "grad_norm": 0.0018089321919392111, |
| "learning_rate": 1.1968000000000001e-05, |
| "loss": 0.0, |
| "step": 12550 |
| }, |
| { |
| "epoch": 4.032, |
| "grad_norm": 0.001913026176722158, |
| "learning_rate": 1.1936e-05, |
| "loss": 0.0, |
| "step": 12600 |
| }, |
| { |
| "epoch": 4.048, |
| "grad_norm": 0.003324750552626263, |
| "learning_rate": 1.1904e-05, |
| "loss": 0.0, |
| "step": 12650 |
| }, |
| { |
| "epoch": 4.064, |
| "grad_norm": 0.00024141692305707922, |
| "learning_rate": 1.1872000000000001e-05, |
| "loss": 0.0, |
| "step": 12700 |
| }, |
| { |
| "epoch": 4.08, |
| "grad_norm": 0.00360992104204235, |
| "learning_rate": 1.184e-05, |
| "loss": 0.0, |
| "step": 12750 |
| }, |
| { |
| "epoch": 4.096, |
| "grad_norm": 0.005440945540162379, |
| "learning_rate": 1.1808000000000001e-05, |
| "loss": 0.0, |
| "step": 12800 |
| }, |
| { |
| "epoch": 4.112, |
| "grad_norm": 0.0012483986833069704, |
| "learning_rate": 1.1776e-05, |
| "loss": 0.0, |
| "step": 12850 |
| }, |
| { |
| "epoch": 4.128, |
| "grad_norm": 0.0012047606022239964, |
| "learning_rate": 1.1744000000000001e-05, |
| "loss": 0.0, |
| "step": 12900 |
| }, |
| { |
| "epoch": 4.144, |
| "grad_norm": 0.0012935697233135471, |
| "learning_rate": 1.1712e-05, |
| "loss": 0.0, |
| "step": 12950 |
| }, |
| { |
| "epoch": 4.16, |
| "grad_norm": 0.0003751557511675381, |
| "learning_rate": 1.168e-05, |
| "loss": 0.0, |
| "step": 13000 |
| }, |
| { |
| "epoch": 4.176, |
| "grad_norm": 0.0007975727216523395, |
| "learning_rate": 1.1648000000000001e-05, |
| "loss": 0.0, |
| "step": 13050 |
| }, |
| { |
| "epoch": 4.192, |
| "grad_norm": 0.0024788380663340725, |
| "learning_rate": 1.1616e-05, |
| "loss": 0.0, |
| "step": 13100 |
| }, |
| { |
| "epoch": 4.208, |
| "grad_norm": 0.002267500273223812, |
| "learning_rate": 1.1584000000000001e-05, |
| "loss": 0.0, |
| "step": 13150 |
| }, |
| { |
| "epoch": 4.224, |
| "grad_norm": 0.00046404415579022056, |
| "learning_rate": 1.1552e-05, |
| "loss": 0.0, |
| "step": 13200 |
| }, |
| { |
| "epoch": 4.24, |
| "grad_norm": 0.0003794275042058772, |
| "learning_rate": 1.152e-05, |
| "loss": 0.0, |
| "step": 13250 |
| }, |
| { |
| "epoch": 4.256, |
| "grad_norm": 5.7509067974309376e-05, |
| "learning_rate": 1.1488e-05, |
| "loss": 0.0, |
| "step": 13300 |
| }, |
| { |
| "epoch": 4.272, |
| "grad_norm": 0.0010141779101323779, |
| "learning_rate": 1.1456e-05, |
| "loss": 0.0, |
| "step": 13350 |
| }, |
| { |
| "epoch": 4.288, |
| "grad_norm": 0.000488791600842277, |
| "learning_rate": 1.1424000000000001e-05, |
| "loss": 0.0, |
| "step": 13400 |
| }, |
| { |
| "epoch": 4.304, |
| "grad_norm": 0.009154124210044096, |
| "learning_rate": 1.1392e-05, |
| "loss": 0.0, |
| "step": 13450 |
| }, |
| { |
| "epoch": 4.32, |
| "grad_norm": 0.0014804520943499787, |
| "learning_rate": 1.136e-05, |
| "loss": 0.0, |
| "step": 13500 |
| }, |
| { |
| "epoch": 4.336, |
| "grad_norm": 0.003087388225389283, |
| "learning_rate": 1.1328e-05, |
| "loss": 0.0, |
| "step": 13550 |
| }, |
| { |
| "epoch": 4.352, |
| "grad_norm": 0.0012502357899471038, |
| "learning_rate": 1.1296e-05, |
| "loss": 0.0, |
| "step": 13600 |
| }, |
| { |
| "epoch": 4.368, |
| "grad_norm": 0.004994197578764371, |
| "learning_rate": 1.1264000000000001e-05, |
| "loss": 0.0, |
| "step": 13650 |
| }, |
| { |
| "epoch": 4.384, |
| "grad_norm": 0.00236250892486376, |
| "learning_rate": 1.1232e-05, |
| "loss": 0.0, |
| "step": 13700 |
| }, |
| { |
| "epoch": 4.4, |
| "grad_norm": 0.0009338977271527734, |
| "learning_rate": 1.1200000000000001e-05, |
| "loss": 0.0, |
| "step": 13750 |
| }, |
| { |
| "epoch": 4.416, |
| "grad_norm": 0.0017632337662450276, |
| "learning_rate": 1.1168e-05, |
| "loss": 0.0, |
| "step": 13800 |
| }, |
| { |
| "epoch": 4.432, |
| "grad_norm": 0.004662567943032515, |
| "learning_rate": 1.1136e-05, |
| "loss": 0.0, |
| "step": 13850 |
| }, |
| { |
| "epoch": 4.448, |
| "grad_norm": 0.0007880804713907978, |
| "learning_rate": 1.1104e-05, |
| "loss": 0.0, |
| "step": 13900 |
| }, |
| { |
| "epoch": 4.464, |
| "grad_norm": 0.004039315206219138, |
| "learning_rate": 1.1072e-05, |
| "loss": 0.0, |
| "step": 13950 |
| }, |
| { |
| "epoch": 4.48, |
| "grad_norm": 0.0025060993362850705, |
| "learning_rate": 1.1040000000000001e-05, |
| "loss": 0.0, |
| "step": 14000 |
| }, |
| { |
| "epoch": 4.496, |
| "grad_norm": 0.0021604759353097895, |
| "learning_rate": 1.1008e-05, |
| "loss": 0.0, |
| "step": 14050 |
| }, |
| { |
| "epoch": 4.5120000000000005, |
| "grad_norm": 0.002867990975950256, |
| "learning_rate": 1.0976e-05, |
| "loss": 0.0, |
| "step": 14100 |
| }, |
| { |
| "epoch": 4.5280000000000005, |
| "grad_norm": 0.00401193895429708, |
| "learning_rate": 1.0944e-05, |
| "loss": 0.0001, |
| "step": 14150 |
| }, |
| { |
| "epoch": 4.5440000000000005, |
| "grad_norm": 0.00573613040417129, |
| "learning_rate": 1.0912e-05, |
| "loss": 0.0, |
| "step": 14200 |
| }, |
| { |
| "epoch": 4.5600000000000005, |
| "grad_norm": 0.0030535103264292297, |
| "learning_rate": 1.0880000000000001e-05, |
| "loss": 0.0, |
| "step": 14250 |
| }, |
| { |
| "epoch": 4.576, |
| "grad_norm": 0.0013740615247333295, |
| "learning_rate": 1.0848e-05, |
| "loss": 0.0, |
| "step": 14300 |
| }, |
| { |
| "epoch": 4.592, |
| "grad_norm": 0.0013075234756195337, |
| "learning_rate": 1.0816e-05, |
| "loss": 0.0, |
| "step": 14350 |
| }, |
| { |
| "epoch": 4.608, |
| "grad_norm": 0.003144544935729042, |
| "learning_rate": 1.0784e-05, |
| "loss": 0.0, |
| "step": 14400 |
| }, |
| { |
| "epoch": 4.624, |
| "grad_norm": 0.0004717265126905979, |
| "learning_rate": 1.0752e-05, |
| "loss": 0.0, |
| "step": 14450 |
| }, |
| { |
| "epoch": 4.64, |
| "grad_norm": 0.0006579410910577381, |
| "learning_rate": 1.072e-05, |
| "loss": 0.0, |
| "step": 14500 |
| }, |
| { |
| "epoch": 4.656, |
| "grad_norm": 0.0011156066953653578, |
| "learning_rate": 1.0688e-05, |
| "loss": 0.0, |
| "step": 14550 |
| }, |
| { |
| "epoch": 4.672, |
| "grad_norm": 0.0005837718126418017, |
| "learning_rate": 1.0656000000000003e-05, |
| "loss": 0.0, |
| "step": 14600 |
| }, |
| { |
| "epoch": 4.688, |
| "grad_norm": 0.0002716341341737398, |
| "learning_rate": 1.0624e-05, |
| "loss": 0.0, |
| "step": 14650 |
| }, |
| { |
| "epoch": 4.704, |
| "grad_norm": 0.0006885004604574123, |
| "learning_rate": 1.0592e-05, |
| "loss": 0.0, |
| "step": 14700 |
| }, |
| { |
| "epoch": 4.72, |
| "grad_norm": 0.0001909594048696283, |
| "learning_rate": 1.056e-05, |
| "loss": 0.0, |
| "step": 14750 |
| }, |
| { |
| "epoch": 4.736, |
| "grad_norm": 0.0005826950037546315, |
| "learning_rate": 1.0528e-05, |
| "loss": 0.0, |
| "step": 14800 |
| }, |
| { |
| "epoch": 4.752, |
| "grad_norm": 9.968768791997796e-05, |
| "learning_rate": 1.0496000000000003e-05, |
| "loss": 0.0, |
| "step": 14850 |
| }, |
| { |
| "epoch": 4.768, |
| "grad_norm": 3.934391483544783e-05, |
| "learning_rate": 1.0464e-05, |
| "loss": 0.0, |
| "step": 14900 |
| }, |
| { |
| "epoch": 4.784, |
| "grad_norm": 0.0006812436780773194, |
| "learning_rate": 1.0432e-05, |
| "loss": 0.0, |
| "step": 14950 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 0.0023170184859336246, |
| "learning_rate": 1.04e-05, |
| "loss": 0.0, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.816, |
| "grad_norm": 0.0022018970646360205, |
| "learning_rate": 1.0368e-05, |
| "loss": 0.0, |
| "step": 15050 |
| }, |
| { |
| "epoch": 4.832, |
| "grad_norm": 0.000783685265043126, |
| "learning_rate": 1.0336000000000002e-05, |
| "loss": 0.0, |
| "step": 15100 |
| }, |
| { |
| "epoch": 4.848, |
| "grad_norm": 0.0038512547713090923, |
| "learning_rate": 1.0304e-05, |
| "loss": 0.0, |
| "step": 15150 |
| }, |
| { |
| "epoch": 4.864, |
| "grad_norm": 0.002436496154884748, |
| "learning_rate": 1.0272e-05, |
| "loss": 0.0, |
| "step": 15200 |
| }, |
| { |
| "epoch": 4.88, |
| "grad_norm": 0.00219326761780263, |
| "learning_rate": 1.024e-05, |
| "loss": 0.0, |
| "step": 15250 |
| }, |
| { |
| "epoch": 4.896, |
| "grad_norm": 0.0011281003341395849, |
| "learning_rate": 1.0208e-05, |
| "loss": 0.0, |
| "step": 15300 |
| }, |
| { |
| "epoch": 4.912, |
| "grad_norm": 0.003253789912933492, |
| "learning_rate": 1.0176000000000002e-05, |
| "loss": 0.0, |
| "step": 15350 |
| }, |
| { |
| "epoch": 4.928, |
| "grad_norm": 0.003571745357413305, |
| "learning_rate": 1.0144e-05, |
| "loss": 0.0, |
| "step": 15400 |
| }, |
| { |
| "epoch": 4.944, |
| "grad_norm": 0.003456593668540485, |
| "learning_rate": 1.0112000000000002e-05, |
| "loss": 0.0064, |
| "step": 15450 |
| }, |
| { |
| "epoch": 4.96, |
| "grad_norm": 0.0007709261431954543, |
| "learning_rate": 1.008e-05, |
| "loss": 0.0, |
| "step": 15500 |
| }, |
| { |
| "epoch": 4.976, |
| "grad_norm": 0.0016190183412224056, |
| "learning_rate": 1.0048e-05, |
| "loss": 0.0, |
| "step": 15550 |
| }, |
| { |
| "epoch": 4.992, |
| "grad_norm": 0.0004234418224673852, |
| "learning_rate": 1.0016000000000002e-05, |
| "loss": 0.0, |
| "step": 15600 |
| }, |
| { |
| "epoch": 5.008, |
| "grad_norm": 0.00026016080219425926, |
| "learning_rate": 9.984e-06, |
| "loss": 0.0, |
| "step": 15650 |
| }, |
| { |
| "epoch": 5.024, |
| "grad_norm": 0.00011395553690679996, |
| "learning_rate": 9.952e-06, |
| "loss": 0.0, |
| "step": 15700 |
| }, |
| { |
| "epoch": 5.04, |
| "grad_norm": 0.0007346519053602048, |
| "learning_rate": 9.920000000000002e-06, |
| "loss": 0.0, |
| "step": 15750 |
| }, |
| { |
| "epoch": 5.056, |
| "grad_norm": 0.000826212364328148, |
| "learning_rate": 9.888000000000001e-06, |
| "loss": 0.0, |
| "step": 15800 |
| }, |
| { |
| "epoch": 5.072, |
| "grad_norm": 3.820115501457758e-05, |
| "learning_rate": 9.856000000000002e-06, |
| "loss": 0.0, |
| "step": 15850 |
| }, |
| { |
| "epoch": 5.088, |
| "grad_norm": 0.000369496148175044, |
| "learning_rate": 9.824000000000001e-06, |
| "loss": 0.0, |
| "step": 15900 |
| }, |
| { |
| "epoch": 5.104, |
| "grad_norm": 3.493119944946329e-05, |
| "learning_rate": 9.792e-06, |
| "loss": 0.0, |
| "step": 15950 |
| }, |
| { |
| "epoch": 5.12, |
| "grad_norm": 0.0011101639738022185, |
| "learning_rate": 9.760000000000001e-06, |
| "loss": 0.0, |
| "step": 16000 |
| }, |
| { |
| "epoch": 5.136, |
| "grad_norm": 0.00027278241311842, |
| "learning_rate": 9.728e-06, |
| "loss": 0.0, |
| "step": 16050 |
| }, |
| { |
| "epoch": 5.152, |
| "grad_norm": 7.425121803341847e-05, |
| "learning_rate": 9.696000000000002e-06, |
| "loss": 0.0, |
| "step": 16100 |
| }, |
| { |
| "epoch": 5.168, |
| "grad_norm": 2.913913515572778e-05, |
| "learning_rate": 9.664000000000001e-06, |
| "loss": 0.0, |
| "step": 16150 |
| }, |
| { |
| "epoch": 5.184, |
| "grad_norm": 0.00022998811212515617, |
| "learning_rate": 9.632e-06, |
| "loss": 0.0, |
| "step": 16200 |
| }, |
| { |
| "epoch": 5.2, |
| "grad_norm": 0.0001390860734604394, |
| "learning_rate": 9.600000000000001e-06, |
| "loss": 0.0, |
| "step": 16250 |
| }, |
| { |
| "epoch": 5.216, |
| "grad_norm": 0.00046520681578371507, |
| "learning_rate": 9.568e-06, |
| "loss": 0.0, |
| "step": 16300 |
| }, |
| { |
| "epoch": 5.232, |
| "grad_norm": 1.703864278625447e-05, |
| "learning_rate": 9.536000000000002e-06, |
| "loss": 0.0, |
| "step": 16350 |
| }, |
| { |
| "epoch": 5.248, |
| "grad_norm": 0.0003273884641160331, |
| "learning_rate": 9.504e-06, |
| "loss": 0.0, |
| "step": 16400 |
| }, |
| { |
| "epoch": 5.264, |
| "grad_norm": 1.3852768295605306e-05, |
| "learning_rate": 9.472000000000002e-06, |
| "loss": 0.0, |
| "step": 16450 |
| }, |
| { |
| "epoch": 5.28, |
| "grad_norm": 0.0008515611050291017, |
| "learning_rate": 9.440000000000001e-06, |
| "loss": 0.0, |
| "step": 16500 |
| }, |
| { |
| "epoch": 5.296, |
| "grad_norm": 0.0008534837876489844, |
| "learning_rate": 9.408e-06, |
| "loss": 0.0, |
| "step": 16550 |
| }, |
| { |
| "epoch": 5.312, |
| "grad_norm": 0.0006724273990838339, |
| "learning_rate": 9.376000000000001e-06, |
| "loss": 0.0, |
| "step": 16600 |
| }, |
| { |
| "epoch": 5.328, |
| "grad_norm": 0.0007236852854461475, |
| "learning_rate": 9.344e-06, |
| "loss": 0.0, |
| "step": 16650 |
| }, |
| { |
| "epoch": 5.344, |
| "grad_norm": 0.00010455353593376917, |
| "learning_rate": 9.312000000000002e-06, |
| "loss": 0.0, |
| "step": 16700 |
| }, |
| { |
| "epoch": 5.36, |
| "grad_norm": 0.0007310347123221347, |
| "learning_rate": 9.280000000000001e-06, |
| "loss": 0.0, |
| "step": 16750 |
| }, |
| { |
| "epoch": 5.376, |
| "grad_norm": 0.0005001089317212744, |
| "learning_rate": 9.248e-06, |
| "loss": 0.0, |
| "step": 16800 |
| }, |
| { |
| "epoch": 5.392, |
| "grad_norm": 0.001043623838616486, |
| "learning_rate": 9.216000000000001e-06, |
| "loss": 0.0, |
| "step": 16850 |
| }, |
| { |
| "epoch": 5.408, |
| "grad_norm": 0.0007706737604305622, |
| "learning_rate": 9.184e-06, |
| "loss": 0.0, |
| "step": 16900 |
| }, |
| { |
| "epoch": 5.424, |
| "grad_norm": 9.706295875901001e-06, |
| "learning_rate": 9.152000000000001e-06, |
| "loss": 0.0, |
| "step": 16950 |
| }, |
| { |
| "epoch": 5.44, |
| "grad_norm": 0.0008492197216223447, |
| "learning_rate": 9.12e-06, |
| "loss": 0.0, |
| "step": 17000 |
| }, |
| { |
| "epoch": 5.456, |
| "grad_norm": 0.0013652762132398285, |
| "learning_rate": 9.088000000000002e-06, |
| "loss": 0.0, |
| "step": 17050 |
| }, |
| { |
| "epoch": 5.4719999999999995, |
| "grad_norm": 0.0008027400483996306, |
| "learning_rate": 9.056000000000001e-06, |
| "loss": 0.0, |
| "step": 17100 |
| }, |
| { |
| "epoch": 5.4879999999999995, |
| "grad_norm": 1.0452267647832807e-05, |
| "learning_rate": 9.024e-06, |
| "loss": 0.0, |
| "step": 17150 |
| }, |
| { |
| "epoch": 5.504, |
| "grad_norm": 6.906557178289084e-06, |
| "learning_rate": 8.992000000000001e-06, |
| "loss": 0.0, |
| "step": 17200 |
| }, |
| { |
| "epoch": 5.52, |
| "grad_norm": 0.0005904441392333963, |
| "learning_rate": 8.96e-06, |
| "loss": 0.0, |
| "step": 17250 |
| }, |
| { |
| "epoch": 5.536, |
| "grad_norm": 0.0010790578585745268, |
| "learning_rate": 8.928000000000002e-06, |
| "loss": 0.0, |
| "step": 17300 |
| }, |
| { |
| "epoch": 5.552, |
| "grad_norm": 0.00035965364826304414, |
| "learning_rate": 8.896000000000001e-06, |
| "loss": 0.0, |
| "step": 17350 |
| }, |
| { |
| "epoch": 5.568, |
| "grad_norm": 0.00045245800981555623, |
| "learning_rate": 8.864e-06, |
| "loss": 0.0, |
| "step": 17400 |
| }, |
| { |
| "epoch": 5.584, |
| "grad_norm": 0.00010256823791726092, |
| "learning_rate": 8.832000000000001e-06, |
| "loss": 0.0, |
| "step": 17450 |
| }, |
| { |
| "epoch": 5.6, |
| "grad_norm": 8.117090788676675e-06, |
| "learning_rate": 8.8e-06, |
| "loss": 0.0, |
| "step": 17500 |
| }, |
| { |
| "epoch": 5.616, |
| "grad_norm": 0.0007363027867556159, |
| "learning_rate": 8.768000000000001e-06, |
| "loss": 0.0, |
| "step": 17550 |
| }, |
| { |
| "epoch": 5.632, |
| "grad_norm": 0.0005243403229001632, |
| "learning_rate": 8.736e-06, |
| "loss": 0.0, |
| "step": 17600 |
| }, |
| { |
| "epoch": 5.648, |
| "grad_norm": 0.00040569564594367943, |
| "learning_rate": 8.704e-06, |
| "loss": 0.0, |
| "step": 17650 |
| }, |
| { |
| "epoch": 5.664, |
| "grad_norm": 0.00042046445593646643, |
| "learning_rate": 8.672000000000001e-06, |
| "loss": 0.0, |
| "step": 17700 |
| }, |
| { |
| "epoch": 5.68, |
| "grad_norm": 0.0004739053906413846, |
| "learning_rate": 8.64e-06, |
| "loss": 0.0, |
| "step": 17750 |
| }, |
| { |
| "epoch": 5.696, |
| "grad_norm": 5.641085857115355e-05, |
| "learning_rate": 8.608000000000001e-06, |
| "loss": 0.0, |
| "step": 17800 |
| }, |
| { |
| "epoch": 5.712, |
| "grad_norm": 0.000717217643766636, |
| "learning_rate": 8.576e-06, |
| "loss": 0.0, |
| "step": 17850 |
| }, |
| { |
| "epoch": 5.728, |
| "grad_norm": 0.00035495184416538993, |
| "learning_rate": 8.544000000000002e-06, |
| "loss": 0.0, |
| "step": 17900 |
| }, |
| { |
| "epoch": 5.744, |
| "grad_norm": 0.0007510280987038669, |
| "learning_rate": 8.512e-06, |
| "loss": 0.0, |
| "step": 17950 |
| }, |
| { |
| "epoch": 5.76, |
| "grad_norm": 0.00019448124680507923, |
| "learning_rate": 8.48e-06, |
| "loss": 0.0, |
| "step": 18000 |
| }, |
| { |
| "epoch": 5.776, |
| "grad_norm": 0.000741237981085058, |
| "learning_rate": 8.448000000000001e-06, |
| "loss": 0.0, |
| "step": 18050 |
| }, |
| { |
| "epoch": 5.792, |
| "grad_norm": 0.0012189065717582211, |
| "learning_rate": 8.416e-06, |
| "loss": 0.0, |
| "step": 18100 |
| }, |
| { |
| "epoch": 5.808, |
| "grad_norm": 0.00013649905846145337, |
| "learning_rate": 8.384000000000001e-06, |
| "loss": 0.0, |
| "step": 18150 |
| }, |
| { |
| "epoch": 5.824, |
| "grad_norm": 0.0012586456173907475, |
| "learning_rate": 8.352e-06, |
| "loss": 0.0, |
| "step": 18200 |
| }, |
| { |
| "epoch": 5.84, |
| "grad_norm": 0.00046421865350467135, |
| "learning_rate": 8.32e-06, |
| "loss": 0.0, |
| "step": 18250 |
| }, |
| { |
| "epoch": 5.856, |
| "grad_norm": 0.0008588455951536167, |
| "learning_rate": 8.288000000000001e-06, |
| "loss": 0.0, |
| "step": 18300 |
| }, |
| { |
| "epoch": 5.872, |
| "grad_norm": 0.0007684313974727155, |
| "learning_rate": 8.256e-06, |
| "loss": 0.0, |
| "step": 18350 |
| }, |
| { |
| "epoch": 5.888, |
| "grad_norm": 0.0007039589600819322, |
| "learning_rate": 8.224000000000001e-06, |
| "loss": 0.0, |
| "step": 18400 |
| }, |
| { |
| "epoch": 5.904, |
| "grad_norm": 3.51837159957191e-05, |
| "learning_rate": 8.192e-06, |
| "loss": 0.0, |
| "step": 18450 |
| }, |
| { |
| "epoch": 5.92, |
| "grad_norm": 0.0008249995746808375, |
| "learning_rate": 8.16e-06, |
| "loss": 0.0, |
| "step": 18500 |
| }, |
| { |
| "epoch": 5.936, |
| "grad_norm": 0.0005751905756043754, |
| "learning_rate": 8.128e-06, |
| "loss": 0.0, |
| "step": 18550 |
| }, |
| { |
| "epoch": 5.952, |
| "grad_norm": 0.0006014537857756254, |
| "learning_rate": 8.096e-06, |
| "loss": 0.0, |
| "step": 18600 |
| }, |
| { |
| "epoch": 5.968, |
| "grad_norm": 0.0004182340166299973, |
| "learning_rate": 8.064000000000001e-06, |
| "loss": 0.0, |
| "step": 18650 |
| }, |
| { |
| "epoch": 5.984, |
| "grad_norm": 7.328929449883947e-06, |
| "learning_rate": 8.032e-06, |
| "loss": 0.0, |
| "step": 18700 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.0005532029221570249, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.0, |
| "step": 18750 |
| }, |
| { |
| "epoch": 6.016, |
| "grad_norm": 0.0004575747788336891, |
| "learning_rate": 7.968e-06, |
| "loss": 0.0, |
| "step": 18800 |
| }, |
| { |
| "epoch": 6.032, |
| "grad_norm": 0.001869666267334979, |
| "learning_rate": 7.936e-06, |
| "loss": 0.0, |
| "step": 18850 |
| }, |
| { |
| "epoch": 6.048, |
| "grad_norm": 0.00022141063379263522, |
| "learning_rate": 7.904000000000001e-06, |
| "loss": 0.0, |
| "step": 18900 |
| }, |
| { |
| "epoch": 6.064, |
| "grad_norm": 0.0002755551414857357, |
| "learning_rate": 7.872e-06, |
| "loss": 0.0, |
| "step": 18950 |
| }, |
| { |
| "epoch": 6.08, |
| "grad_norm": 0.0001262094453801626, |
| "learning_rate": 7.840000000000001e-06, |
| "loss": 0.0, |
| "step": 19000 |
| }, |
| { |
| "epoch": 6.096, |
| "grad_norm": 0.00021903586619369134, |
| "learning_rate": 7.808e-06, |
| "loss": 0.0, |
| "step": 19050 |
| }, |
| { |
| "epoch": 6.112, |
| "grad_norm": 0.0006519057730708936, |
| "learning_rate": 7.776e-06, |
| "loss": 0.0, |
| "step": 19100 |
| }, |
| { |
| "epoch": 6.128, |
| "grad_norm": 0.000695502894667579, |
| "learning_rate": 7.744e-06, |
| "loss": 0.0, |
| "step": 19150 |
| }, |
| { |
| "epoch": 6.144, |
| "grad_norm": 0.0006224173048035789, |
| "learning_rate": 7.712e-06, |
| "loss": 0.0, |
| "step": 19200 |
| }, |
| { |
| "epoch": 6.16, |
| "grad_norm": 3.4614518032103047e-06, |
| "learning_rate": 7.680000000000001e-06, |
| "loss": 0.0, |
| "step": 19250 |
| }, |
| { |
| "epoch": 6.176, |
| "grad_norm": 0.00033523878449984705, |
| "learning_rate": 7.648e-06, |
| "loss": 0.0, |
| "step": 19300 |
| }, |
| { |
| "epoch": 6.192, |
| "grad_norm": 0.0008220658302305507, |
| "learning_rate": 7.616000000000001e-06, |
| "loss": 0.0, |
| "step": 19350 |
| }, |
| { |
| "epoch": 6.208, |
| "grad_norm": 0.0003363971581878067, |
| "learning_rate": 7.5840000000000006e-06, |
| "loss": 0.0, |
| "step": 19400 |
| }, |
| { |
| "epoch": 6.224, |
| "grad_norm": 0.0006817803137925653, |
| "learning_rate": 7.552000000000001e-06, |
| "loss": 0.0, |
| "step": 19450 |
| }, |
| { |
| "epoch": 6.24, |
| "grad_norm": 0.0011120061826881474, |
| "learning_rate": 7.520000000000001e-06, |
| "loss": 0.0, |
| "step": 19500 |
| }, |
| { |
| "epoch": 6.256, |
| "grad_norm": 1.966465122456936e-06, |
| "learning_rate": 7.488000000000001e-06, |
| "loss": 0.0, |
| "step": 19550 |
| }, |
| { |
| "epoch": 6.272, |
| "grad_norm": 0.00026856259334759303, |
| "learning_rate": 7.456000000000001e-06, |
| "loss": 0.0, |
| "step": 19600 |
| }, |
| { |
| "epoch": 6.288, |
| "grad_norm": 0.00044370451229992384, |
| "learning_rate": 7.424e-06, |
| "loss": 0.0, |
| "step": 19650 |
| }, |
| { |
| "epoch": 6.304, |
| "grad_norm": 0.00028155440140211114, |
| "learning_rate": 7.3920000000000005e-06, |
| "loss": 0.0, |
| "step": 19700 |
| }, |
| { |
| "epoch": 6.32, |
| "grad_norm": 0.0005292253112573046, |
| "learning_rate": 7.360000000000001e-06, |
| "loss": 0.0, |
| "step": 19750 |
| }, |
| { |
| "epoch": 6.336, |
| "grad_norm": 0.0007954187703791572, |
| "learning_rate": 7.328000000000001e-06, |
| "loss": 0.0, |
| "step": 19800 |
| }, |
| { |
| "epoch": 6.352, |
| "grad_norm": 0.00027606186196444586, |
| "learning_rate": 7.296000000000001e-06, |
| "loss": 0.0, |
| "step": 19850 |
| }, |
| { |
| "epoch": 6.368, |
| "grad_norm": 6.270867123019646e-06, |
| "learning_rate": 7.264000000000001e-06, |
| "loss": 0.0, |
| "step": 19900 |
| }, |
| { |
| "epoch": 6.384, |
| "grad_norm": 0.00022346505397508303, |
| "learning_rate": 7.232e-06, |
| "loss": 0.0, |
| "step": 19950 |
| }, |
| { |
| "epoch": 6.4, |
| "grad_norm": 0.0005133763871789368, |
| "learning_rate": 7.2000000000000005e-06, |
| "loss": 0.0, |
| "step": 20000 |
| }, |
| { |
| "epoch": 6.416, |
| "grad_norm": 0.0012385305880268965, |
| "learning_rate": 7.168000000000001e-06, |
| "loss": 0.0, |
| "step": 20050 |
| }, |
| { |
| "epoch": 6.432, |
| "grad_norm": 1.8737486868066673e-06, |
| "learning_rate": 7.136000000000001e-06, |
| "loss": 0.0, |
| "step": 20100 |
| }, |
| { |
| "epoch": 6.448, |
| "grad_norm": 2.0181046177978524e-06, |
| "learning_rate": 7.104000000000001e-06, |
| "loss": 0.0, |
| "step": 20150 |
| }, |
| { |
| "epoch": 6.464, |
| "grad_norm": 0.000845228083001162, |
| "learning_rate": 7.072000000000001e-06, |
| "loss": 0.0, |
| "step": 20200 |
| }, |
| { |
| "epoch": 6.48, |
| "grad_norm": 0.00010442413166408255, |
| "learning_rate": 7.04e-06, |
| "loss": 0.0, |
| "step": 20250 |
| }, |
| { |
| "epoch": 6.496, |
| "grad_norm": 0.0008259383918680861, |
| "learning_rate": 7.0080000000000005e-06, |
| "loss": 0.0, |
| "step": 20300 |
| }, |
| { |
| "epoch": 6.5120000000000005, |
| "grad_norm": 1.9801867703023435e-06, |
| "learning_rate": 6.976000000000001e-06, |
| "loss": 0.0, |
| "step": 20350 |
| }, |
| { |
| "epoch": 6.5280000000000005, |
| "grad_norm": 0.0003420242897073274, |
| "learning_rate": 6.944000000000001e-06, |
| "loss": 0.0, |
| "step": 20400 |
| }, |
| { |
| "epoch": 6.5440000000000005, |
| "grad_norm": 0.000927320381200099, |
| "learning_rate": 6.912000000000001e-06, |
| "loss": 0.0, |
| "step": 20450 |
| }, |
| { |
| "epoch": 6.5600000000000005, |
| "grad_norm": 0.0005350790156068339, |
| "learning_rate": 6.88e-06, |
| "loss": 0.0, |
| "step": 20500 |
| }, |
| { |
| "epoch": 6.576, |
| "grad_norm": 0.0003440149318502077, |
| "learning_rate": 6.848e-06, |
| "loss": 0.0, |
| "step": 20550 |
| }, |
| { |
| "epoch": 6.592, |
| "grad_norm": 0.0007668699288568762, |
| "learning_rate": 6.8160000000000005e-06, |
| "loss": 0.0, |
| "step": 20600 |
| }, |
| { |
| "epoch": 6.608, |
| "grad_norm": 0.00046621701082447185, |
| "learning_rate": 6.784000000000001e-06, |
| "loss": 0.0, |
| "step": 20650 |
| }, |
| { |
| "epoch": 6.624, |
| "grad_norm": 5.3773575242483376e-05, |
| "learning_rate": 6.752000000000001e-06, |
| "loss": 0.0, |
| "step": 20700 |
| }, |
| { |
| "epoch": 6.64, |
| "grad_norm": 8.350580982910908e-05, |
| "learning_rate": 6.720000000000001e-06, |
| "loss": 0.0, |
| "step": 20750 |
| }, |
| { |
| "epoch": 6.656, |
| "grad_norm": 0.0002395916406284074, |
| "learning_rate": 6.688e-06, |
| "loss": 0.0, |
| "step": 20800 |
| }, |
| { |
| "epoch": 6.672, |
| "grad_norm": 0.0006504634991768935, |
| "learning_rate": 6.656e-06, |
| "loss": 0.0, |
| "step": 20850 |
| }, |
| { |
| "epoch": 6.688, |
| "grad_norm": 0.0006017391748290424, |
| "learning_rate": 6.6240000000000004e-06, |
| "loss": 0.0, |
| "step": 20900 |
| }, |
| { |
| "epoch": 6.704, |
| "grad_norm": 0.0002806311098905263, |
| "learning_rate": 6.592000000000001e-06, |
| "loss": 0.0, |
| "step": 20950 |
| }, |
| { |
| "epoch": 6.72, |
| "grad_norm": 0.0008733269204726745, |
| "learning_rate": 6.560000000000001e-06, |
| "loss": 0.0, |
| "step": 21000 |
| }, |
| { |
| "epoch": 6.736, |
| "grad_norm": 0.0005751125071293194, |
| "learning_rate": 6.528000000000001e-06, |
| "loss": 0.0, |
| "step": 21050 |
| }, |
| { |
| "epoch": 6.752, |
| "grad_norm": 0.006290860724228949, |
| "learning_rate": 6.496e-06, |
| "loss": 0.0, |
| "step": 21100 |
| }, |
| { |
| "epoch": 6.768, |
| "grad_norm": 0.0040169282359627054, |
| "learning_rate": 6.464e-06, |
| "loss": 0.0, |
| "step": 21150 |
| }, |
| { |
| "epoch": 6.784, |
| "grad_norm": 0.001080348541164438, |
| "learning_rate": 6.432e-06, |
| "loss": 0.0, |
| "step": 21200 |
| }, |
| { |
| "epoch": 6.8, |
| "grad_norm": 0.003811186265590068, |
| "learning_rate": 6.4000000000000006e-06, |
| "loss": 0.0, |
| "step": 21250 |
| }, |
| { |
| "epoch": 6.816, |
| "grad_norm": 0.00016534484442220018, |
| "learning_rate": 6.368000000000001e-06, |
| "loss": 0.0, |
| "step": 21300 |
| }, |
| { |
| "epoch": 6.832, |
| "grad_norm": 0.0001613165255933953, |
| "learning_rate": 6.336000000000001e-06, |
| "loss": 0.0, |
| "step": 21350 |
| }, |
| { |
| "epoch": 6.848, |
| "grad_norm": 0.000679046061777582, |
| "learning_rate": 6.304e-06, |
| "loss": 0.0, |
| "step": 21400 |
| }, |
| { |
| "epoch": 6.864, |
| "grad_norm": 0.0010514425379430034, |
| "learning_rate": 6.272e-06, |
| "loss": 0.0, |
| "step": 21450 |
| }, |
| { |
| "epoch": 6.88, |
| "grad_norm": 0.0035533924147301845, |
| "learning_rate": 6.24e-06, |
| "loss": 0.0, |
| "step": 21500 |
| }, |
| { |
| "epoch": 6.896, |
| "grad_norm": 0.0010389500345308828, |
| "learning_rate": 6.2080000000000005e-06, |
| "loss": 0.0, |
| "step": 21550 |
| }, |
| { |
| "epoch": 6.912, |
| "grad_norm": 0.0006256094370156619, |
| "learning_rate": 6.176000000000001e-06, |
| "loss": 0.0, |
| "step": 21600 |
| }, |
| { |
| "epoch": 6.928, |
| "grad_norm": 0.00014983574866582935, |
| "learning_rate": 6.144e-06, |
| "loss": 0.0, |
| "step": 21650 |
| }, |
| { |
| "epoch": 6.944, |
| "grad_norm": 7.81575212969094e-06, |
| "learning_rate": 6.112e-06, |
| "loss": 0.0, |
| "step": 21700 |
| }, |
| { |
| "epoch": 6.96, |
| "grad_norm": 0.00010497171085101809, |
| "learning_rate": 6.08e-06, |
| "loss": 0.0, |
| "step": 21750 |
| }, |
| { |
| "epoch": 6.976, |
| "grad_norm": 0.00026859080304041767, |
| "learning_rate": 6.048e-06, |
| "loss": 0.0, |
| "step": 21800 |
| }, |
| { |
| "epoch": 6.992, |
| "grad_norm": 0.0007656211816805965, |
| "learning_rate": 6.0160000000000005e-06, |
| "loss": 0.0, |
| "step": 21850 |
| }, |
| { |
| "epoch": 7.008, |
| "grad_norm": 0.00016866370577004142, |
| "learning_rate": 5.984000000000001e-06, |
| "loss": 0.0, |
| "step": 21900 |
| }, |
| { |
| "epoch": 7.024, |
| "grad_norm": 0.000841725008729615, |
| "learning_rate": 5.952e-06, |
| "loss": 0.0, |
| "step": 21950 |
| }, |
| { |
| "epoch": 7.04, |
| "grad_norm": 0.001403596803582104, |
| "learning_rate": 5.92e-06, |
| "loss": 0.0, |
| "step": 22000 |
| }, |
| { |
| "epoch": 7.056, |
| "grad_norm": 0.0007519805421921276, |
| "learning_rate": 5.888e-06, |
| "loss": 0.0, |
| "step": 22050 |
| }, |
| { |
| "epoch": 7.072, |
| "grad_norm": 3.008559634025441e-05, |
| "learning_rate": 5.856e-06, |
| "loss": 0.0, |
| "step": 22100 |
| }, |
| { |
| "epoch": 7.088, |
| "grad_norm": 0.0006102491301472853, |
| "learning_rate": 5.8240000000000005e-06, |
| "loss": 0.0, |
| "step": 22150 |
| }, |
| { |
| "epoch": 7.104, |
| "grad_norm": 0.000600562913095834, |
| "learning_rate": 5.792000000000001e-06, |
| "loss": 0.0, |
| "step": 22200 |
| }, |
| { |
| "epoch": 7.12, |
| "grad_norm": 0.0004817236180086057, |
| "learning_rate": 5.76e-06, |
| "loss": 0.0, |
| "step": 22250 |
| }, |
| { |
| "epoch": 7.136, |
| "grad_norm": 0.000686443266077338, |
| "learning_rate": 5.728e-06, |
| "loss": 0.0, |
| "step": 22300 |
| }, |
| { |
| "epoch": 7.152, |
| "grad_norm": 0.001062635129484694, |
| "learning_rate": 5.696e-06, |
| "loss": 0.0, |
| "step": 22350 |
| }, |
| { |
| "epoch": 7.168, |
| "grad_norm": 0.000404941651506133, |
| "learning_rate": 5.664e-06, |
| "loss": 0.0, |
| "step": 22400 |
| }, |
| { |
| "epoch": 7.184, |
| "grad_norm": 0.000816725754364363, |
| "learning_rate": 5.6320000000000005e-06, |
| "loss": 0.0, |
| "step": 22450 |
| }, |
| { |
| "epoch": 7.2, |
| "grad_norm": 0.00037021587973858533, |
| "learning_rate": 5.600000000000001e-06, |
| "loss": 0.0, |
| "step": 22500 |
| }, |
| { |
| "epoch": 7.216, |
| "grad_norm": 4.088127671811198e-06, |
| "learning_rate": 5.568e-06, |
| "loss": 0.0, |
| "step": 22550 |
| }, |
| { |
| "epoch": 7.232, |
| "grad_norm": 0.00013304569592629414, |
| "learning_rate": 5.536e-06, |
| "loss": 0.0, |
| "step": 22600 |
| }, |
| { |
| "epoch": 7.248, |
| "grad_norm": 0.00011693788654007106, |
| "learning_rate": 5.504e-06, |
| "loss": 0.0, |
| "step": 22650 |
| }, |
| { |
| "epoch": 7.264, |
| "grad_norm": 0.00039873529911523876, |
| "learning_rate": 5.472e-06, |
| "loss": 0.0, |
| "step": 22700 |
| }, |
| { |
| "epoch": 7.28, |
| "grad_norm": 0.0005861985910699136, |
| "learning_rate": 5.4400000000000004e-06, |
| "loss": 0.0, |
| "step": 22750 |
| }, |
| { |
| "epoch": 7.296, |
| "grad_norm": 0.0001408305739675062, |
| "learning_rate": 5.408e-06, |
| "loss": 0.0, |
| "step": 22800 |
| }, |
| { |
| "epoch": 7.312, |
| "grad_norm": 0.0010182175879277604, |
| "learning_rate": 5.376e-06, |
| "loss": 0.0, |
| "step": 22850 |
| }, |
| { |
| "epoch": 7.328, |
| "grad_norm": 0.00032526688809646643, |
| "learning_rate": 5.344e-06, |
| "loss": 0.0, |
| "step": 22900 |
| }, |
| { |
| "epoch": 7.344, |
| "grad_norm": 0.00017128057733419155, |
| "learning_rate": 5.312e-06, |
| "loss": 0.0, |
| "step": 22950 |
| }, |
| { |
| "epoch": 7.36, |
| "grad_norm": 2.4948106999283475e-06, |
| "learning_rate": 5.28e-06, |
| "loss": 0.0, |
| "step": 23000 |
| }, |
| { |
| "epoch": 7.376, |
| "grad_norm": 0.000593923369887912, |
| "learning_rate": 5.248000000000001e-06, |
| "loss": 0.0, |
| "step": 23050 |
| }, |
| { |
| "epoch": 7.392, |
| "grad_norm": 0.0011459595030973938, |
| "learning_rate": 5.216e-06, |
| "loss": 0.0, |
| "step": 23100 |
| }, |
| { |
| "epoch": 7.408, |
| "grad_norm": 0.00026313583806989295, |
| "learning_rate": 5.184e-06, |
| "loss": 0.0, |
| "step": 23150 |
| }, |
| { |
| "epoch": 7.424, |
| "grad_norm": 0.00039830820671334343, |
| "learning_rate": 5.152e-06, |
| "loss": 0.0, |
| "step": 23200 |
| }, |
| { |
| "epoch": 7.44, |
| "grad_norm": 0.0006286453125070797, |
| "learning_rate": 5.12e-06, |
| "loss": 0.0, |
| "step": 23250 |
| }, |
| { |
| "epoch": 7.456, |
| "grad_norm": 0.0006427562567428625, |
| "learning_rate": 5.088000000000001e-06, |
| "loss": 0.0, |
| "step": 23300 |
| }, |
| { |
| "epoch": 7.4719999999999995, |
| "grad_norm": 0.00015946006697521752, |
| "learning_rate": 5.056000000000001e-06, |
| "loss": 0.0, |
| "step": 23350 |
| }, |
| { |
| "epoch": 7.4879999999999995, |
| "grad_norm": 0.0004289380452787707, |
| "learning_rate": 5.024e-06, |
| "loss": 0.0, |
| "step": 23400 |
| }, |
| { |
| "epoch": 7.504, |
| "grad_norm": 0.000861813993896526, |
| "learning_rate": 4.992e-06, |
| "loss": 0.0, |
| "step": 23450 |
| }, |
| { |
| "epoch": 7.52, |
| "grad_norm": 0.0005979597824620298, |
| "learning_rate": 4.960000000000001e-06, |
| "loss": 0.0, |
| "step": 23500 |
| }, |
| { |
| "epoch": 7.536, |
| "grad_norm": 3.6426689561550785e-06, |
| "learning_rate": 4.928000000000001e-06, |
| "loss": 0.0, |
| "step": 23550 |
| }, |
| { |
| "epoch": 7.552, |
| "grad_norm": 0.0004956499352349563, |
| "learning_rate": 4.896e-06, |
| "loss": 0.0, |
| "step": 23600 |
| }, |
| { |
| "epoch": 7.568, |
| "grad_norm": 2.025048998589199e-05, |
| "learning_rate": 4.864e-06, |
| "loss": 0.0, |
| "step": 23650 |
| }, |
| { |
| "epoch": 7.584, |
| "grad_norm": 0.0003160837457656607, |
| "learning_rate": 4.8320000000000005e-06, |
| "loss": 0.0, |
| "step": 23700 |
| }, |
| { |
| "epoch": 7.6, |
| "grad_norm": 0.0006549989193442926, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 0.0, |
| "step": 23750 |
| }, |
| { |
| "epoch": 7.616, |
| "grad_norm": 0.00022446916196028902, |
| "learning_rate": 4.768000000000001e-06, |
| "loss": 0.0, |
| "step": 23800 |
| }, |
| { |
| "epoch": 7.632, |
| "grad_norm": 0.00018036750821287214, |
| "learning_rate": 4.736000000000001e-06, |
| "loss": 0.0, |
| "step": 23850 |
| }, |
| { |
| "epoch": 7.648, |
| "grad_norm": 0.0004573385403025739, |
| "learning_rate": 4.704e-06, |
| "loss": 0.0, |
| "step": 23900 |
| }, |
| { |
| "epoch": 7.664, |
| "grad_norm": 0.00019710767122580646, |
| "learning_rate": 4.672e-06, |
| "loss": 0.0, |
| "step": 23950 |
| }, |
| { |
| "epoch": 7.68, |
| "grad_norm": 1.5084117814095418e-06, |
| "learning_rate": 4.6400000000000005e-06, |
| "loss": 0.0, |
| "step": 24000 |
| }, |
| { |
| "epoch": 7.696, |
| "grad_norm": 0.0006600246803147537, |
| "learning_rate": 4.608000000000001e-06, |
| "loss": 0.0, |
| "step": 24050 |
| }, |
| { |
| "epoch": 7.712, |
| "grad_norm": 0.0004764085088233622, |
| "learning_rate": 4.576000000000001e-06, |
| "loss": 0.0, |
| "step": 24100 |
| }, |
| { |
| "epoch": 7.728, |
| "grad_norm": 0.00044471308815751895, |
| "learning_rate": 4.544000000000001e-06, |
| "loss": 0.0, |
| "step": 24150 |
| }, |
| { |
| "epoch": 7.744, |
| "grad_norm": 0.0001657220726173927, |
| "learning_rate": 4.512e-06, |
| "loss": 0.0, |
| "step": 24200 |
| }, |
| { |
| "epoch": 7.76, |
| "grad_norm": 0.00021805960530738044, |
| "learning_rate": 4.48e-06, |
| "loss": 0.0, |
| "step": 24250 |
| }, |
| { |
| "epoch": 7.776, |
| "grad_norm": 0.0006216156458408036, |
| "learning_rate": 4.4480000000000004e-06, |
| "loss": 0.0, |
| "step": 24300 |
| }, |
| { |
| "epoch": 7.792, |
| "grad_norm": 1.279911660415784e-06, |
| "learning_rate": 4.416000000000001e-06, |
| "loss": 0.0, |
| "step": 24350 |
| }, |
| { |
| "epoch": 7.808, |
| "grad_norm": 0.0001357939729174033, |
| "learning_rate": 4.384000000000001e-06, |
| "loss": 0.0, |
| "step": 24400 |
| }, |
| { |
| "epoch": 7.824, |
| "grad_norm": 0.0005642799025411847, |
| "learning_rate": 4.352e-06, |
| "loss": 0.0, |
| "step": 24450 |
| }, |
| { |
| "epoch": 7.84, |
| "grad_norm": 0.00018091340913126554, |
| "learning_rate": 4.32e-06, |
| "loss": 0.0, |
| "step": 24500 |
| }, |
| { |
| "epoch": 7.856, |
| "grad_norm": 0.0007611652586088176, |
| "learning_rate": 4.288e-06, |
| "loss": 0.0, |
| "step": 24550 |
| }, |
| { |
| "epoch": 7.872, |
| "grad_norm": 0.0008024893561623562, |
| "learning_rate": 4.256e-06, |
| "loss": 0.0, |
| "step": 24600 |
| }, |
| { |
| "epoch": 7.888, |
| "grad_norm": 1.2498287700551542e-06, |
| "learning_rate": 4.2240000000000006e-06, |
| "loss": 0.0, |
| "step": 24650 |
| }, |
| { |
| "epoch": 7.904, |
| "grad_norm": 1.5673886902933284e-06, |
| "learning_rate": 4.192000000000001e-06, |
| "loss": 0.0, |
| "step": 24700 |
| }, |
| { |
| "epoch": 7.92, |
| "grad_norm": 0.0006167687246851426, |
| "learning_rate": 4.16e-06, |
| "loss": 0.0, |
| "step": 24750 |
| }, |
| { |
| "epoch": 7.936, |
| "grad_norm": 1.2371482089162035e-06, |
| "learning_rate": 4.128e-06, |
| "loss": 0.0, |
| "step": 24800 |
| }, |
| { |
| "epoch": 7.952, |
| "grad_norm": 0.00020817390844313262, |
| "learning_rate": 4.096e-06, |
| "loss": 0.0, |
| "step": 24850 |
| }, |
| { |
| "epoch": 7.968, |
| "grad_norm": 0.000763205279133764, |
| "learning_rate": 4.064e-06, |
| "loss": 0.0, |
| "step": 24900 |
| }, |
| { |
| "epoch": 7.984, |
| "grad_norm": 0.0010338995161218454, |
| "learning_rate": 4.0320000000000005e-06, |
| "loss": 0.0, |
| "step": 24950 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.0005393700329852327, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.0, |
| "step": 25000 |
| }, |
| { |
| "epoch": 8.016, |
| "grad_norm": 1.5599262561285773e-06, |
| "learning_rate": 3.968e-06, |
| "loss": 0.0, |
| "step": 25050 |
| }, |
| { |
| "epoch": 8.032, |
| "grad_norm": 0.00012475265397112023, |
| "learning_rate": 3.936e-06, |
| "loss": 0.0, |
| "step": 25100 |
| }, |
| { |
| "epoch": 8.048, |
| "grad_norm": 0.0004963011431024235, |
| "learning_rate": 3.904e-06, |
| "loss": 0.0, |
| "step": 25150 |
| }, |
| { |
| "epoch": 8.064, |
| "grad_norm": 2.1454502270095894e-05, |
| "learning_rate": 3.872e-06, |
| "loss": 0.0, |
| "step": 25200 |
| }, |
| { |
| "epoch": 8.08, |
| "grad_norm": 7.98369110830542e-05, |
| "learning_rate": 3.8400000000000005e-06, |
| "loss": 0.0, |
| "step": 25250 |
| }, |
| { |
| "epoch": 8.096, |
| "grad_norm": 0.0001569271601524967, |
| "learning_rate": 3.8080000000000006e-06, |
| "loss": 0.0, |
| "step": 25300 |
| }, |
| { |
| "epoch": 8.112, |
| "grad_norm": 3.448395868399914e-06, |
| "learning_rate": 3.7760000000000004e-06, |
| "loss": 0.0, |
| "step": 25350 |
| }, |
| { |
| "epoch": 8.128, |
| "grad_norm": 0.00021317804687393338, |
| "learning_rate": 3.7440000000000005e-06, |
| "loss": 0.0, |
| "step": 25400 |
| }, |
| { |
| "epoch": 8.144, |
| "grad_norm": 0.002043492139335667, |
| "learning_rate": 3.712e-06, |
| "loss": 0.0, |
| "step": 25450 |
| }, |
| { |
| "epoch": 8.16, |
| "grad_norm": 0.0007697758477107576, |
| "learning_rate": 3.6800000000000003e-06, |
| "loss": 0.0, |
| "step": 25500 |
| }, |
| { |
| "epoch": 8.176, |
| "grad_norm": 0.00042121951583966874, |
| "learning_rate": 3.6480000000000005e-06, |
| "loss": 0.0, |
| "step": 25550 |
| }, |
| { |
| "epoch": 8.192, |
| "grad_norm": 0.0010275855739816497, |
| "learning_rate": 3.616e-06, |
| "loss": 0.0, |
| "step": 25600 |
| }, |
| { |
| "epoch": 8.208, |
| "grad_norm": 0.0004073857184752387, |
| "learning_rate": 3.5840000000000003e-06, |
| "loss": 0.0, |
| "step": 25650 |
| }, |
| { |
| "epoch": 8.224, |
| "grad_norm": 2.6395644739553403e-05, |
| "learning_rate": 3.5520000000000005e-06, |
| "loss": 0.0, |
| "step": 25700 |
| }, |
| { |
| "epoch": 8.24, |
| "grad_norm": 8.350450297173982e-05, |
| "learning_rate": 3.52e-06, |
| "loss": 0.0, |
| "step": 25750 |
| }, |
| { |
| "epoch": 8.256, |
| "grad_norm": 0.0005017647122814175, |
| "learning_rate": 3.4880000000000003e-06, |
| "loss": 0.0, |
| "step": 25800 |
| }, |
| { |
| "epoch": 8.272, |
| "grad_norm": 0.0002957195671807615, |
| "learning_rate": 3.4560000000000005e-06, |
| "loss": 0.0, |
| "step": 25850 |
| }, |
| { |
| "epoch": 8.288, |
| "grad_norm": 0.0007477102897762827, |
| "learning_rate": 3.424e-06, |
| "loss": 0.0, |
| "step": 25900 |
| }, |
| { |
| "epoch": 8.304, |
| "grad_norm": 0.00021069394537475973, |
| "learning_rate": 3.3920000000000003e-06, |
| "loss": 0.0, |
| "step": 25950 |
| }, |
| { |
| "epoch": 8.32, |
| "grad_norm": 0.0007237320727261912, |
| "learning_rate": 3.3600000000000004e-06, |
| "loss": 0.0, |
| "step": 26000 |
| }, |
| { |
| "epoch": 8.336, |
| "grad_norm": 0.000516590777468066, |
| "learning_rate": 3.328e-06, |
| "loss": 0.0, |
| "step": 26050 |
| }, |
| { |
| "epoch": 8.352, |
| "grad_norm": 0.0005010114184220883, |
| "learning_rate": 3.2960000000000003e-06, |
| "loss": 0.0, |
| "step": 26100 |
| }, |
| { |
| "epoch": 8.368, |
| "grad_norm": 5.28964766080006e-06, |
| "learning_rate": 3.2640000000000004e-06, |
| "loss": 0.0, |
| "step": 26150 |
| }, |
| { |
| "epoch": 8.384, |
| "grad_norm": 0.0005157406666104107, |
| "learning_rate": 3.232e-06, |
| "loss": 0.0, |
| "step": 26200 |
| }, |
| { |
| "epoch": 8.4, |
| "grad_norm": 0.0006507550123747543, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 0.0, |
| "step": 26250 |
| }, |
| { |
| "epoch": 8.416, |
| "grad_norm": 0.0007155645493115397, |
| "learning_rate": 3.1680000000000004e-06, |
| "loss": 0.0, |
| "step": 26300 |
| }, |
| { |
| "epoch": 8.432, |
| "grad_norm": 0.00035348868456337083, |
| "learning_rate": 3.136e-06, |
| "loss": 0.0, |
| "step": 26350 |
| }, |
| { |
| "epoch": 8.448, |
| "grad_norm": 0.0005827653476283665, |
| "learning_rate": 3.1040000000000003e-06, |
| "loss": 0.0, |
| "step": 26400 |
| }, |
| { |
| "epoch": 8.464, |
| "grad_norm": 5.392560798091041e-05, |
| "learning_rate": 3.072e-06, |
| "loss": 0.0, |
| "step": 26450 |
| }, |
| { |
| "epoch": 8.48, |
| "grad_norm": 0.0003822684090234525, |
| "learning_rate": 3.04e-06, |
| "loss": 0.0, |
| "step": 26500 |
| }, |
| { |
| "epoch": 8.496, |
| "grad_norm": 0.00047049084334724653, |
| "learning_rate": 3.0080000000000003e-06, |
| "loss": 0.0, |
| "step": 26550 |
| }, |
| { |
| "epoch": 8.512, |
| "grad_norm": 0.0004919742737354103, |
| "learning_rate": 2.976e-06, |
| "loss": 0.0, |
| "step": 26600 |
| }, |
| { |
| "epoch": 8.528, |
| "grad_norm": 0.0002140207608021743, |
| "learning_rate": 2.944e-06, |
| "loss": 0.0, |
| "step": 26650 |
| }, |
| { |
| "epoch": 8.544, |
| "grad_norm": 3.5859741698396775e-05, |
| "learning_rate": 2.9120000000000002e-06, |
| "loss": 0.0, |
| "step": 26700 |
| }, |
| { |
| "epoch": 8.56, |
| "grad_norm": 2.8239349881814044e-05, |
| "learning_rate": 2.88e-06, |
| "loss": 0.0, |
| "step": 26750 |
| }, |
| { |
| "epoch": 8.576, |
| "grad_norm": 3.6122702859208877e-06, |
| "learning_rate": 2.848e-06, |
| "loss": 0.0, |
| "step": 26800 |
| }, |
| { |
| "epoch": 8.592, |
| "grad_norm": 0.0026791316330236677, |
| "learning_rate": 2.8160000000000002e-06, |
| "loss": 0.0, |
| "step": 26850 |
| }, |
| { |
| "epoch": 8.608, |
| "grad_norm": 0.00020946549940804708, |
| "learning_rate": 2.784e-06, |
| "loss": 0.0, |
| "step": 26900 |
| }, |
| { |
| "epoch": 8.624, |
| "grad_norm": 0.0003340978536385738, |
| "learning_rate": 2.752e-06, |
| "loss": 0.0, |
| "step": 26950 |
| }, |
| { |
| "epoch": 8.64, |
| "grad_norm": 0.0006912123574857489, |
| "learning_rate": 2.7200000000000002e-06, |
| "loss": 0.0, |
| "step": 27000 |
| }, |
| { |
| "epoch": 8.656, |
| "grad_norm": 1.1918299463922867e-05, |
| "learning_rate": 2.688e-06, |
| "loss": 0.0, |
| "step": 27050 |
| }, |
| { |
| "epoch": 8.672, |
| "grad_norm": 0.0005004993104376737, |
| "learning_rate": 2.656e-06, |
| "loss": 0.0, |
| "step": 27100 |
| }, |
| { |
| "epoch": 8.688, |
| "grad_norm": 0.0006647739241711589, |
| "learning_rate": 2.6240000000000006e-06, |
| "loss": 0.0, |
| "step": 27150 |
| }, |
| { |
| "epoch": 8.704, |
| "grad_norm": 0.0005349888973660498, |
| "learning_rate": 2.592e-06, |
| "loss": 0.0, |
| "step": 27200 |
| }, |
| { |
| "epoch": 8.72, |
| "grad_norm": 0.0001071714356645616, |
| "learning_rate": 2.56e-06, |
| "loss": 0.0, |
| "step": 27250 |
| }, |
| { |
| "epoch": 8.736, |
| "grad_norm": 0.0004991064010764234, |
| "learning_rate": 2.5280000000000006e-06, |
| "loss": 0.0, |
| "step": 27300 |
| }, |
| { |
| "epoch": 8.752, |
| "grad_norm": 0.00022779807704196374, |
| "learning_rate": 2.496e-06, |
| "loss": 0.0, |
| "step": 27350 |
| }, |
| { |
| "epoch": 8.768, |
| "grad_norm": 0.0012819212872404967, |
| "learning_rate": 2.4640000000000005e-06, |
| "loss": 0.0, |
| "step": 27400 |
| }, |
| { |
| "epoch": 8.784, |
| "grad_norm": 2.4458772638304895e-06, |
| "learning_rate": 2.432e-06, |
| "loss": 0.0, |
| "step": 27450 |
| }, |
| { |
| "epoch": 8.8, |
| "grad_norm": 4.978203463715246e-05, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 0.0, |
| "step": 27500 |
| }, |
| { |
| "epoch": 8.816, |
| "grad_norm": 0.0006102147175976708, |
| "learning_rate": 2.3680000000000005e-06, |
| "loss": 0.0, |
| "step": 27550 |
| }, |
| { |
| "epoch": 8.832, |
| "grad_norm": 0.00054039008837191, |
| "learning_rate": 2.336e-06, |
| "loss": 0.0, |
| "step": 27600 |
| }, |
| { |
| "epoch": 8.848, |
| "grad_norm": 0.00044893481288572814, |
| "learning_rate": 2.3040000000000003e-06, |
| "loss": 0.0, |
| "step": 27650 |
| }, |
| { |
| "epoch": 8.864, |
| "grad_norm": 0.00046239574733305844, |
| "learning_rate": 2.2720000000000004e-06, |
| "loss": 0.0, |
| "step": 27700 |
| }, |
| { |
| "epoch": 8.88, |
| "grad_norm": 0.00037309598762979125, |
| "learning_rate": 2.24e-06, |
| "loss": 0.0, |
| "step": 27750 |
| }, |
| { |
| "epoch": 8.896, |
| "grad_norm": 0.0004490570977280375, |
| "learning_rate": 2.2080000000000003e-06, |
| "loss": 0.0, |
| "step": 27800 |
| }, |
| { |
| "epoch": 8.912, |
| "grad_norm": 5.971637937791403e-06, |
| "learning_rate": 2.176e-06, |
| "loss": 0.0, |
| "step": 27850 |
| }, |
| { |
| "epoch": 8.928, |
| "grad_norm": 0.00019607249904638697, |
| "learning_rate": 2.144e-06, |
| "loss": 0.0, |
| "step": 27900 |
| }, |
| { |
| "epoch": 8.943999999999999, |
| "grad_norm": 2.173728894982944e-05, |
| "learning_rate": 2.1120000000000003e-06, |
| "loss": 0.0, |
| "step": 27950 |
| }, |
| { |
| "epoch": 8.96, |
| "grad_norm": 0.0005814522831536409, |
| "learning_rate": 2.08e-06, |
| "loss": 0.0, |
| "step": 28000 |
| }, |
| { |
| "epoch": 8.975999999999999, |
| "grad_norm": 1.1979971851172046e-05, |
| "learning_rate": 2.048e-06, |
| "loss": 0.0, |
| "step": 28050 |
| }, |
| { |
| "epoch": 8.992, |
| "grad_norm": 0.001145707081894591, |
| "learning_rate": 2.0160000000000003e-06, |
| "loss": 0.0, |
| "step": 28100 |
| }, |
| { |
| "epoch": 9.008, |
| "grad_norm": 0.00015980546781926578, |
| "learning_rate": 1.984e-06, |
| "loss": 0.0, |
| "step": 28150 |
| }, |
| { |
| "epoch": 9.024, |
| "grad_norm": 0.00038132672038477974, |
| "learning_rate": 1.952e-06, |
| "loss": 0.0, |
| "step": 28200 |
| }, |
| { |
| "epoch": 9.04, |
| "grad_norm": 0.00029638386517636414, |
| "learning_rate": 1.9200000000000003e-06, |
| "loss": 0.0, |
| "step": 28250 |
| }, |
| { |
| "epoch": 9.056, |
| "grad_norm": 9.84668351885671e-05, |
| "learning_rate": 1.8880000000000002e-06, |
| "loss": 0.0, |
| "step": 28300 |
| }, |
| { |
| "epoch": 9.072, |
| "grad_norm": 0.0006899543451120302, |
| "learning_rate": 1.856e-06, |
| "loss": 0.0, |
| "step": 28350 |
| }, |
| { |
| "epoch": 9.088, |
| "grad_norm": 0.00023321439846025538, |
| "learning_rate": 1.8240000000000002e-06, |
| "loss": 0.0, |
| "step": 28400 |
| }, |
| { |
| "epoch": 9.104, |
| "grad_norm": 1.2255045251781342e-06, |
| "learning_rate": 1.7920000000000002e-06, |
| "loss": 0.0, |
| "step": 28450 |
| }, |
| { |
| "epoch": 9.12, |
| "grad_norm": 0.000224750950400635, |
| "learning_rate": 1.76e-06, |
| "loss": 0.0, |
| "step": 28500 |
| }, |
| { |
| "epoch": 9.136, |
| "grad_norm": 1.504019715073742e-06, |
| "learning_rate": 1.7280000000000002e-06, |
| "loss": 0.0, |
| "step": 28550 |
| }, |
| { |
| "epoch": 9.152, |
| "grad_norm": 0.0004469699493177947, |
| "learning_rate": 1.6960000000000002e-06, |
| "loss": 0.0, |
| "step": 28600 |
| }, |
| { |
| "epoch": 9.168, |
| "grad_norm": 0.0008992359796643962, |
| "learning_rate": 1.664e-06, |
| "loss": 0.0, |
| "step": 28650 |
| }, |
| { |
| "epoch": 9.184, |
| "grad_norm": 1.1807481020292317e-06, |
| "learning_rate": 1.6320000000000002e-06, |
| "loss": 0.0, |
| "step": 28700 |
| }, |
| { |
| "epoch": 9.2, |
| "grad_norm": 0.0006720244284168616, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 0.0, |
| "step": 28750 |
| }, |
| { |
| "epoch": 9.216, |
| "grad_norm": 1.954402796502112e-06, |
| "learning_rate": 1.568e-06, |
| "loss": 0.0, |
| "step": 28800 |
| }, |
| { |
| "epoch": 9.232, |
| "grad_norm": 0.00045854257866828515, |
| "learning_rate": 1.536e-06, |
| "loss": 0.0, |
| "step": 28850 |
| }, |
| { |
| "epoch": 9.248, |
| "grad_norm": 1.2499022623047775e-06, |
| "learning_rate": 1.5040000000000001e-06, |
| "loss": 0.0, |
| "step": 28900 |
| }, |
| { |
| "epoch": 9.264, |
| "grad_norm": 3.5069474526942418e-06, |
| "learning_rate": 1.472e-06, |
| "loss": 0.0, |
| "step": 28950 |
| }, |
| { |
| "epoch": 9.28, |
| "grad_norm": 0.0004939352589701349, |
| "learning_rate": 1.44e-06, |
| "loss": 0.0, |
| "step": 29000 |
| }, |
| { |
| "epoch": 9.296, |
| "grad_norm": 0.0006403164759103728, |
| "learning_rate": 1.4080000000000001e-06, |
| "loss": 0.0, |
| "step": 29050 |
| }, |
| { |
| "epoch": 9.312, |
| "grad_norm": 0.0004906591431802971, |
| "learning_rate": 1.376e-06, |
| "loss": 0.0, |
| "step": 29100 |
| }, |
| { |
| "epoch": 9.328, |
| "grad_norm": 0.0006355382829934992, |
| "learning_rate": 1.344e-06, |
| "loss": 0.0, |
| "step": 29150 |
| }, |
| { |
| "epoch": 9.344, |
| "grad_norm": 0.0008888097102175601, |
| "learning_rate": 1.3120000000000003e-06, |
| "loss": 0.0, |
| "step": 29200 |
| }, |
| { |
| "epoch": 9.36, |
| "grad_norm": 2.2844333657135604e-06, |
| "learning_rate": 1.28e-06, |
| "loss": 0.0, |
| "step": 29250 |
| }, |
| { |
| "epoch": 9.376, |
| "grad_norm": 0.0007355380038243548, |
| "learning_rate": 1.248e-06, |
| "loss": 0.0, |
| "step": 29300 |
| }, |
| { |
| "epoch": 9.392, |
| "grad_norm": 0.00016113117251488604, |
| "learning_rate": 1.216e-06, |
| "loss": 0.0, |
| "step": 29350 |
| }, |
| { |
| "epoch": 9.408, |
| "grad_norm": 6.144866567676681e-05, |
| "learning_rate": 1.1840000000000002e-06, |
| "loss": 0.0, |
| "step": 29400 |
| }, |
| { |
| "epoch": 9.424, |
| "grad_norm": 1.280262092788024e-06, |
| "learning_rate": 1.1520000000000002e-06, |
| "loss": 0.0, |
| "step": 29450 |
| }, |
| { |
| "epoch": 9.44, |
| "grad_norm": 0.00023403691930978503, |
| "learning_rate": 1.12e-06, |
| "loss": 0.0, |
| "step": 29500 |
| }, |
| { |
| "epoch": 9.456, |
| "grad_norm": 1.0651580903925167e-05, |
| "learning_rate": 1.088e-06, |
| "loss": 0.0, |
| "step": 29550 |
| }, |
| { |
| "epoch": 9.472, |
| "grad_norm": 0.0004401165605022274, |
| "learning_rate": 1.0560000000000001e-06, |
| "loss": 0.0, |
| "step": 29600 |
| }, |
| { |
| "epoch": 9.488, |
| "grad_norm": 0.0007374198247804132, |
| "learning_rate": 1.024e-06, |
| "loss": 0.0, |
| "step": 29650 |
| }, |
| { |
| "epoch": 9.504, |
| "grad_norm": 0.00033734901422662343, |
| "learning_rate": 9.92e-07, |
| "loss": 0.0, |
| "step": 29700 |
| }, |
| { |
| "epoch": 9.52, |
| "grad_norm": 0.0009391331921544294, |
| "learning_rate": 9.600000000000001e-07, |
| "loss": 0.0, |
| "step": 29750 |
| }, |
| { |
| "epoch": 9.536, |
| "grad_norm": 0.0012792312844443715, |
| "learning_rate": 9.28e-07, |
| "loss": 0.0, |
| "step": 29800 |
| }, |
| { |
| "epoch": 9.552, |
| "grad_norm": 0.000600425656969412, |
| "learning_rate": 8.960000000000001e-07, |
| "loss": 0.0, |
| "step": 29850 |
| }, |
| { |
| "epoch": 9.568, |
| "grad_norm": 0.0005356183437125068, |
| "learning_rate": 8.640000000000001e-07, |
| "loss": 0.0, |
| "step": 29900 |
| }, |
| { |
| "epoch": 9.584, |
| "grad_norm": 1.169450332702385e-06, |
| "learning_rate": 8.32e-07, |
| "loss": 0.0, |
| "step": 29950 |
| }, |
| { |
| "epoch": 9.6, |
| "grad_norm": 1.603757236150262e-06, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 0.0, |
| "step": 30000 |
| }, |
| { |
| "epoch": 9.616, |
| "grad_norm": 1.3789867557409609e-06, |
| "learning_rate": 7.68e-07, |
| "loss": 0.0, |
| "step": 30050 |
| }, |
| { |
| "epoch": 9.632, |
| "grad_norm": 0.00022794675409267614, |
| "learning_rate": 7.36e-07, |
| "loss": 0.0, |
| "step": 30100 |
| }, |
| { |
| "epoch": 9.648, |
| "grad_norm": 4.063516211647283e-06, |
| "learning_rate": 7.040000000000001e-07, |
| "loss": 0.0, |
| "step": 30150 |
| }, |
| { |
| "epoch": 9.664, |
| "grad_norm": 0.00020120298713090787, |
| "learning_rate": 6.72e-07, |
| "loss": 0.0, |
| "step": 30200 |
| }, |
| { |
| "epoch": 9.68, |
| "grad_norm": 0.0009682823219180043, |
| "learning_rate": 6.4e-07, |
| "loss": 0.0, |
| "step": 30250 |
| }, |
| { |
| "epoch": 9.696, |
| "grad_norm": 1.653173775427169e-06, |
| "learning_rate": 6.08e-07, |
| "loss": 0.0, |
| "step": 30300 |
| }, |
| { |
| "epoch": 9.712, |
| "grad_norm": 0.0010948577216183535, |
| "learning_rate": 5.760000000000001e-07, |
| "loss": 0.0, |
| "step": 30350 |
| }, |
| { |
| "epoch": 9.728, |
| "grad_norm": 1.579158744151985e-06, |
| "learning_rate": 5.44e-07, |
| "loss": 0.0, |
| "step": 30400 |
| }, |
| { |
| "epoch": 9.744, |
| "grad_norm": 0.0005095525509816707, |
| "learning_rate": 5.12e-07, |
| "loss": 0.0, |
| "step": 30450 |
| }, |
| { |
| "epoch": 9.76, |
| "grad_norm": 5.6971938558435215e-05, |
| "learning_rate": 4.800000000000001e-07, |
| "loss": 0.0, |
| "step": 30500 |
| }, |
| { |
| "epoch": 9.776, |
| "grad_norm": 0.0003876663750785411, |
| "learning_rate": 4.4800000000000004e-07, |
| "loss": 0.0, |
| "step": 30550 |
| }, |
| { |
| "epoch": 9.792, |
| "grad_norm": 0.0004980878635902623, |
| "learning_rate": 4.16e-07, |
| "loss": 0.0, |
| "step": 30600 |
| }, |
| { |
| "epoch": 9.808, |
| "grad_norm": 1.1188634027454468e-06, |
| "learning_rate": 3.84e-07, |
| "loss": 0.0, |
| "step": 30650 |
| }, |
| { |
| "epoch": 9.824, |
| "grad_norm": 0.000647302404932787, |
| "learning_rate": 3.5200000000000003e-07, |
| "loss": 0.0, |
| "step": 30700 |
| }, |
| { |
| "epoch": 9.84, |
| "grad_norm": 0.00032616120339746537, |
| "learning_rate": 3.2e-07, |
| "loss": 0.0, |
| "step": 30750 |
| }, |
| { |
| "epoch": 9.856, |
| "grad_norm": 0.0008723718200811839, |
| "learning_rate": 2.8800000000000004e-07, |
| "loss": 0.0, |
| "step": 30800 |
| }, |
| { |
| "epoch": 9.872, |
| "grad_norm": 0.000471858358002294, |
| "learning_rate": 2.56e-07, |
| "loss": 0.0, |
| "step": 30850 |
| }, |
| { |
| "epoch": 9.888, |
| "grad_norm": 1.3544616120540576e-06, |
| "learning_rate": 2.2400000000000002e-07, |
| "loss": 0.0, |
| "step": 30900 |
| }, |
| { |
| "epoch": 9.904, |
| "grad_norm": 8.617191444341816e-05, |
| "learning_rate": 1.92e-07, |
| "loss": 0.0, |
| "step": 30950 |
| }, |
| { |
| "epoch": 9.92, |
| "grad_norm": 0.0004407695022914176, |
| "learning_rate": 1.6e-07, |
| "loss": 0.0, |
| "step": 31000 |
| }, |
| { |
| "epoch": 9.936, |
| "grad_norm": 0.00020840921158746508, |
| "learning_rate": 1.28e-07, |
| "loss": 0.0, |
| "step": 31050 |
| }, |
| { |
| "epoch": 9.952, |
| "grad_norm": 0.000421999513896553, |
| "learning_rate": 9.6e-08, |
| "loss": 0.0, |
| "step": 31100 |
| }, |
| { |
| "epoch": 9.968, |
| "grad_norm": 1.128787548845069e-06, |
| "learning_rate": 6.4e-08, |
| "loss": 0.0, |
| "step": 31150 |
| }, |
| { |
| "epoch": 9.984, |
| "grad_norm": 7.0422523529939706e-06, |
| "learning_rate": 3.2e-08, |
| "loss": 0.0, |
| "step": 31200 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 1.438006921888056e-06, |
| "learning_rate": 0.0, |
| "loss": 0.0, |
| "step": 31250 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 31250, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.285894220342886e+16, |
| "train_batch_size": 6, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|