| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9984532159196, |
| "eval_steps": 500, |
| "global_step": 122000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "grad_norm": 1.3454886674880981, |
| "learning_rate": 9.991815957246561e-06, |
| "loss": 5.7653, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 0.9953828454017639, |
| "learning_rate": 9.983631914493123e-06, |
| "loss": 5.6192, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 0.7844876646995544, |
| "learning_rate": 9.975447871739683e-06, |
| "loss": 5.4452, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 0.567190945148468, |
| "learning_rate": 9.967263828986244e-06, |
| "loss": 5.3183, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 0.5304737687110901, |
| "learning_rate": 9.959079786232804e-06, |
| "loss": 5.2156, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 0.3840586841106415, |
| "learning_rate": 9.950895743479366e-06, |
| "loss": 5.1203, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.34571415185928345, |
| "learning_rate": 9.942711700725926e-06, |
| "loss": 5.0732, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.32429179549217224, |
| "learning_rate": 9.934527657972486e-06, |
| "loss": 5.0458, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.4024583101272583, |
| "learning_rate": 9.926343615219047e-06, |
| "loss": 5.0157, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.3507966101169586, |
| "learning_rate": 9.918159572465607e-06, |
| "loss": 4.9921, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.37788429856300354, |
| "learning_rate": 9.909975529712169e-06, |
| "loss": 4.9803, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.3670382797718048, |
| "learning_rate": 9.901791486958729e-06, |
| "loss": 4.9651, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.2876070737838745, |
| "learning_rate": 9.89360744420529e-06, |
| "loss": 4.9585, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.3649226725101471, |
| "learning_rate": 9.88542340145185e-06, |
| "loss": 4.9491, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.39525964856147766, |
| "learning_rate": 9.87723935869841e-06, |
| "loss": 4.9416, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.30799126625061035, |
| "learning_rate": 9.86905531594497e-06, |
| "loss": 4.9318, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.2949569523334503, |
| "learning_rate": 9.860871273191532e-06, |
| "loss": 4.9201, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.3036958575248718, |
| "learning_rate": 9.852687230438092e-06, |
| "loss": 4.9212, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.7450295686721802, |
| "learning_rate": 9.844503187684653e-06, |
| "loss": 4.9166, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.439261794090271, |
| "learning_rate": 9.836319144931213e-06, |
| "loss": 4.9028, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.28415539860725403, |
| "learning_rate": 9.828135102177775e-06, |
| "loss": 4.8989, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.3490685522556305, |
| "learning_rate": 9.819951059424335e-06, |
| "loss": 4.9008, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.4549264907836914, |
| "learning_rate": 9.811767016670895e-06, |
| "loss": 4.8968, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.3350650668144226, |
| "learning_rate": 9.803582973917457e-06, |
| "loss": 4.8886, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.41439351439476013, |
| "learning_rate": 9.795398931164018e-06, |
| "loss": 4.8869, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.47745582461357117, |
| "learning_rate": 9.787214888410578e-06, |
| "loss": 4.8864, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.4192076027393341, |
| "learning_rate": 9.779030845657138e-06, |
| "loss": 4.886, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.38134631514549255, |
| "learning_rate": 9.7708468029037e-06, |
| "loss": 4.8803, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 1.0664669275283813, |
| "learning_rate": 9.76266276015026e-06, |
| "loss": 4.8764, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.6459155678749084, |
| "learning_rate": 9.75447871739682e-06, |
| "loss": 4.8721, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.4217374622821808, |
| "learning_rate": 9.746294674643381e-06, |
| "loss": 4.8758, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 1.4077221155166626, |
| "learning_rate": 9.738110631889943e-06, |
| "loss": 4.8694, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.793872058391571, |
| "learning_rate": 9.729926589136503e-06, |
| "loss": 4.872, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.6410694718360901, |
| "learning_rate": 9.721742546383063e-06, |
| "loss": 4.8697, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.9800311923027039, |
| "learning_rate": 9.713558503629624e-06, |
| "loss": 4.86, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.999591052532196, |
| "learning_rate": 9.705374460876184e-06, |
| "loss": 4.8601, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 1.2334632873535156, |
| "learning_rate": 9.697190418122744e-06, |
| "loss": 4.8603, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.6848945617675781, |
| "learning_rate": 9.689006375369305e-06, |
| "loss": 4.8537, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.4569337069988251, |
| "learning_rate": 9.680822332615866e-06, |
| "loss": 4.859, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 1.2655045986175537, |
| "learning_rate": 9.672638289862427e-06, |
| "loss": 4.8556, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 1.2269976139068604, |
| "learning_rate": 9.664454247108987e-06, |
| "loss": 4.8569, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.5749172568321228, |
| "learning_rate": 9.656270204355547e-06, |
| "loss": 4.8549, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.5450171232223511, |
| "learning_rate": 9.64808616160211e-06, |
| "loss": 4.853, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.6002165675163269, |
| "learning_rate": 9.63990211884867e-06, |
| "loss": 4.8495, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.4352850914001465, |
| "learning_rate": 9.63171807609523e-06, |
| "loss": 4.8471, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.8037896752357483, |
| "learning_rate": 9.623534033341792e-06, |
| "loss": 4.841, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.7141408324241638, |
| "learning_rate": 9.615349990588352e-06, |
| "loss": 4.8433, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.3830125331878662, |
| "learning_rate": 9.607165947834912e-06, |
| "loss": 4.84, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.2149487733840942, |
| "learning_rate": 9.598981905081473e-06, |
| "loss": 4.8414, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.5489853620529175, |
| "learning_rate": 9.590797862328034e-06, |
| "loss": 4.8425, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.0431714057922363, |
| "learning_rate": 9.582613819574595e-06, |
| "loss": 4.8369, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.2853955030441284, |
| "learning_rate": 9.574429776821155e-06, |
| "loss": 4.8411, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.6190789937973022, |
| "learning_rate": 9.566245734067715e-06, |
| "loss": 4.8381, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.9078882336616516, |
| "learning_rate": 9.558061691314277e-06, |
| "loss": 4.8355, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.6225169897079468, |
| "learning_rate": 9.549877648560838e-06, |
| "loss": 4.8382, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.5269715189933777, |
| "learning_rate": 9.541693605807398e-06, |
| "loss": 4.8368, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.5707417130470276, |
| "learning_rate": 9.533509563053958e-06, |
| "loss": 4.8331, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 1.0135433673858643, |
| "learning_rate": 9.525325520300518e-06, |
| "loss": 4.8372, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.9796397686004639, |
| "learning_rate": 9.517141477547079e-06, |
| "loss": 4.8301, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.6885799765586853, |
| "learning_rate": 9.50895743479364e-06, |
| "loss": 4.8309, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.8593583703041077, |
| "learning_rate": 9.5007733920402e-06, |
| "loss": 4.829, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 1.0259130001068115, |
| "learning_rate": 9.492589349286761e-06, |
| "loss": 4.8298, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.5682145357131958, |
| "learning_rate": 9.484405306533321e-06, |
| "loss": 4.8283, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 1.9293478727340698, |
| "learning_rate": 9.476221263779882e-06, |
| "loss": 4.8321, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 1.0222758054733276, |
| "learning_rate": 9.468037221026444e-06, |
| "loss": 4.8286, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.748030960559845, |
| "learning_rate": 9.459853178273004e-06, |
| "loss": 4.8277, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 1.7026931047439575, |
| "learning_rate": 9.451669135519564e-06, |
| "loss": 4.8227, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.8596793413162231, |
| "learning_rate": 9.443485092766126e-06, |
| "loss": 4.8296, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 1.9839978218078613, |
| "learning_rate": 9.435301050012686e-06, |
| "loss": 4.8298, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.7572035789489746, |
| "learning_rate": 9.427117007259247e-06, |
| "loss": 4.8285, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 1.3615273237228394, |
| "learning_rate": 9.418932964505807e-06, |
| "loss": 4.8263, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.8525121808052063, |
| "learning_rate": 9.410748921752369e-06, |
| "loss": 4.8233, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.6726747751235962, |
| "learning_rate": 9.402564878998929e-06, |
| "loss": 4.8232, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.404517412185669, |
| "learning_rate": 9.39438083624549e-06, |
| "loss": 4.8222, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.3438527584075928, |
| "learning_rate": 9.38619679349205e-06, |
| "loss": 4.8275, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.9558647871017456, |
| "learning_rate": 9.378012750738612e-06, |
| "loss": 4.8188, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 1.5687648057937622, |
| "learning_rate": 9.369828707985172e-06, |
| "loss": 4.8203, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 4.528586387634277, |
| "learning_rate": 9.361644665231732e-06, |
| "loss": 4.8237, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 1.5821335315704346, |
| "learning_rate": 9.353460622478292e-06, |
| "loss": 4.8213, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 1.3139538764953613, |
| "learning_rate": 9.345276579724853e-06, |
| "loss": 4.8241, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.6411435008049011, |
| "learning_rate": 9.337092536971415e-06, |
| "loss": 4.8176, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.011660575866699, |
| "learning_rate": 9.328908494217975e-06, |
| "loss": 4.8142, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.9855501055717468, |
| "learning_rate": 9.320724451464535e-06, |
| "loss": 4.816, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.8996455073356628, |
| "learning_rate": 9.312540408711095e-06, |
| "loss": 4.8162, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 1.75440514087677, |
| "learning_rate": 9.304356365957656e-06, |
| "loss": 4.8174, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.5616024136543274, |
| "learning_rate": 9.296172323204216e-06, |
| "loss": 4.8137, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 1.5236667394638062, |
| "learning_rate": 9.287988280450778e-06, |
| "loss": 4.8156, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.869793176651001, |
| "learning_rate": 9.279804237697338e-06, |
| "loss": 4.8201, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.2847354412078857, |
| "learning_rate": 9.271620194943898e-06, |
| "loss": 4.816, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 1.1544415950775146, |
| "learning_rate": 9.26343615219046e-06, |
| "loss": 4.8174, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.6653382182121277, |
| "learning_rate": 9.25525210943702e-06, |
| "loss": 4.8136, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.0501242876052856, |
| "learning_rate": 9.247068066683581e-06, |
| "loss": 4.8104, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.6880828142166138, |
| "learning_rate": 9.238884023930141e-06, |
| "loss": 4.8093, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.9488196969032288, |
| "learning_rate": 9.230699981176703e-06, |
| "loss": 4.8121, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 4.403717041015625, |
| "learning_rate": 9.222515938423263e-06, |
| "loss": 4.8113, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.2665512561798096, |
| "learning_rate": 9.214331895669824e-06, |
| "loss": 4.8092, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.7057957053184509, |
| "learning_rate": 9.206147852916384e-06, |
| "loss": 4.8108, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 3.9034390449523926, |
| "learning_rate": 9.197963810162946e-06, |
| "loss": 4.808, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.9307584762573242, |
| "learning_rate": 9.189779767409506e-06, |
| "loss": 4.809, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 3.995647668838501, |
| "learning_rate": 9.181595724656066e-06, |
| "loss": 4.8079, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.2378621101379395, |
| "learning_rate": 9.173411681902627e-06, |
| "loss": 4.81, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.424851417541504, |
| "learning_rate": 9.165227639149189e-06, |
| "loss": 4.8045, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.8927842378616333, |
| "learning_rate": 9.157043596395749e-06, |
| "loss": 4.8085, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 3.9594876766204834, |
| "learning_rate": 9.14885955364231e-06, |
| "loss": 4.808, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.6992530822753906, |
| "learning_rate": 9.14067551088887e-06, |
| "loss": 4.8099, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.1311434507369995, |
| "learning_rate": 9.13249146813543e-06, |
| "loss": 4.8071, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.8243028521537781, |
| "learning_rate": 9.12430742538199e-06, |
| "loss": 4.8056, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.7261260747909546, |
| "learning_rate": 9.11612338262855e-06, |
| "loss": 4.8076, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 4.485065460205078, |
| "learning_rate": 9.107939339875112e-06, |
| "loss": 4.8072, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.044236660003662, |
| "learning_rate": 9.099755297121673e-06, |
| "loss": 4.8076, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.155515670776367, |
| "learning_rate": 9.091571254368233e-06, |
| "loss": 4.8061, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.4560600519180298, |
| "learning_rate": 9.083387211614795e-06, |
| "loss": 4.8079, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.577777624130249, |
| "learning_rate": 9.075203168861355e-06, |
| "loss": 4.8058, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.1256022453308105, |
| "learning_rate": 9.067019126107915e-06, |
| "loss": 4.8047, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.9217951893806458, |
| "learning_rate": 9.058835083354476e-06, |
| "loss": 4.8047, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.4003558158874512, |
| "learning_rate": 9.050651040601038e-06, |
| "loss": 4.8006, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.1220279932022095, |
| "learning_rate": 9.042466997847598e-06, |
| "loss": 4.8061, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 3.371317148208618, |
| "learning_rate": 9.034282955094158e-06, |
| "loss": 4.8034, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.5556405782699585, |
| "learning_rate": 9.026098912340718e-06, |
| "loss": 4.8022, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.9803473949432373, |
| "learning_rate": 9.01791486958728e-06, |
| "loss": 4.8029, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.7116793990135193, |
| "learning_rate": 9.00973082683384e-06, |
| "loss": 4.8047, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.8312737941741943, |
| "learning_rate": 9.0015467840804e-06, |
| "loss": 4.8004, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.492680072784424, |
| "learning_rate": 8.993362741326963e-06, |
| "loss": 4.8031, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.0807607173919678, |
| "learning_rate": 8.985178698573523e-06, |
| "loss": 4.7997, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 4.5318403244018555, |
| "learning_rate": 8.976994655820083e-06, |
| "loss": 4.8052, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 6.294131278991699, |
| "learning_rate": 8.968810613066644e-06, |
| "loss": 4.8025, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.2838228940963745, |
| "learning_rate": 8.960626570313204e-06, |
| "loss": 4.8014, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 3.4684486389160156, |
| "learning_rate": 8.952442527559764e-06, |
| "loss": 4.7991, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.9761227965354919, |
| "learning_rate": 8.944258484806324e-06, |
| "loss": 4.8013, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 5.355940341949463, |
| "learning_rate": 8.936074442052886e-06, |
| "loss": 4.8014, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 3.334829568862915, |
| "learning_rate": 8.927890399299447e-06, |
| "loss": 4.7994, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 4.577433109283447, |
| "learning_rate": 8.919706356546007e-06, |
| "loss": 4.7931, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 4.9149322509765625, |
| "learning_rate": 8.911522313792567e-06, |
| "loss": 4.8025, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.5174356698989868, |
| "learning_rate": 8.903338271039129e-06, |
| "loss": 4.7982, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 4.420125961303711, |
| "learning_rate": 8.89515422828569e-06, |
| "loss": 4.7976, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 4.246557712554932, |
| "learning_rate": 8.88697018553225e-06, |
| "loss": 4.7954, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.887624979019165, |
| "learning_rate": 8.87878614277881e-06, |
| "loss": 4.7956, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.2699388265609741, |
| "learning_rate": 8.870602100025372e-06, |
| "loss": 4.7993, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 2.5432891845703125, |
| "learning_rate": 8.862418057271932e-06, |
| "loss": 4.8017, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 3.0773308277130127, |
| "learning_rate": 8.854234014518492e-06, |
| "loss": 4.8002, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.0699973106384277, |
| "learning_rate": 8.846049971765053e-06, |
| "loss": 4.7958, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.9238589406013489, |
| "learning_rate": 8.837865929011615e-06, |
| "loss": 4.7907, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.0784467458724976, |
| "learning_rate": 8.829681886258175e-06, |
| "loss": 4.797, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.1584768295288086, |
| "learning_rate": 8.821497843504735e-06, |
| "loss": 4.7947, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.0812736749649048, |
| "learning_rate": 8.813313800751297e-06, |
| "loss": 4.7894, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.7061692476272583, |
| "learning_rate": 8.805129757997857e-06, |
| "loss": 4.7946, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.771735429763794, |
| "learning_rate": 8.796945715244418e-06, |
| "loss": 4.7973, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.1708245277404785, |
| "learning_rate": 8.788761672490978e-06, |
| "loss": 4.7953, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.4777073860168457, |
| "learning_rate": 8.780577629737538e-06, |
| "loss": 4.7917, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.5953549146652222, |
| "learning_rate": 8.772393586984098e-06, |
| "loss": 4.7917, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.7896368503570557, |
| "learning_rate": 8.76420954423066e-06, |
| "loss": 4.7923, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.7050272822380066, |
| "learning_rate": 8.75602550147722e-06, |
| "loss": 4.7977, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 4.6306023597717285, |
| "learning_rate": 8.747841458723781e-06, |
| "loss": 4.7911, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.3503997325897217, |
| "learning_rate": 8.739657415970341e-06, |
| "loss": 4.7895, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 4.242427825927734, |
| "learning_rate": 8.731473373216901e-06, |
| "loss": 4.7902, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.8283806443214417, |
| "learning_rate": 8.723289330463463e-06, |
| "loss": 4.7938, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 3.901630401611328, |
| "learning_rate": 8.715105287710024e-06, |
| "loss": 4.7936, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 6.563976764678955, |
| "learning_rate": 8.706921244956584e-06, |
| "loss": 4.7907, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.2243263721466064, |
| "learning_rate": 8.698737202203144e-06, |
| "loss": 4.7852, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.0529608726501465, |
| "learning_rate": 8.690553159449706e-06, |
| "loss": 4.7916, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 3.0728704929351807, |
| "learning_rate": 8.682369116696266e-06, |
| "loss": 4.7947, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.8949060440063477, |
| "learning_rate": 8.674185073942827e-06, |
| "loss": 4.7935, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.9687060713768005, |
| "learning_rate": 8.666001031189387e-06, |
| "loss": 4.7939, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 1.091792345046997, |
| "learning_rate": 8.657816988435949e-06, |
| "loss": 4.7941, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.7492835521697998, |
| "learning_rate": 8.64963294568251e-06, |
| "loss": 4.7897, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.856666088104248, |
| "learning_rate": 8.64144890292907e-06, |
| "loss": 4.7917, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 2.1373181343078613, |
| "learning_rate": 8.633264860175631e-06, |
| "loss": 4.7875, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 3.3695366382598877, |
| "learning_rate": 8.625080817422192e-06, |
| "loss": 4.7935, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 3.0197083950042725, |
| "learning_rate": 8.616896774668752e-06, |
| "loss": 4.7896, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 2.717109441757202, |
| "learning_rate": 8.608712731915312e-06, |
| "loss": 4.7898, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 2.4040961265563965, |
| "learning_rate": 8.600528689161873e-06, |
| "loss": 4.7919, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 7.917181968688965, |
| "learning_rate": 8.592344646408434e-06, |
| "loss": 4.7904, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.2932788133621216, |
| "learning_rate": 8.584160603654995e-06, |
| "loss": 4.7856, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 4.480923175811768, |
| "learning_rate": 8.575976560901555e-06, |
| "loss": 4.7952, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.211406946182251, |
| "learning_rate": 8.567792518148115e-06, |
| "loss": 4.7903, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.0489588975906372, |
| "learning_rate": 8.559608475394676e-06, |
| "loss": 4.7941, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 3.5961625576019287, |
| "learning_rate": 8.551424432641236e-06, |
| "loss": 4.7845, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.3053170442581177, |
| "learning_rate": 8.543240389887798e-06, |
| "loss": 4.7919, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.7930333614349365, |
| "learning_rate": 8.535056347134358e-06, |
| "loss": 4.7893, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.9989149570465088, |
| "learning_rate": 8.526872304380918e-06, |
| "loss": 4.7879, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 7.412938594818115, |
| "learning_rate": 8.518688261627479e-06, |
| "loss": 4.7876, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.9795100688934326, |
| "learning_rate": 8.51050421887404e-06, |
| "loss": 4.7895, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.8868602514266968, |
| "learning_rate": 8.5023201761206e-06, |
| "loss": 4.7895, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.9831721186637878, |
| "learning_rate": 8.494136133367161e-06, |
| "loss": 4.7874, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 3.5411503314971924, |
| "learning_rate": 8.485952090613721e-06, |
| "loss": 4.786, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 2.617466926574707, |
| "learning_rate": 8.477768047860283e-06, |
| "loss": 4.7832, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 2.1326940059661865, |
| "learning_rate": 8.469584005106844e-06, |
| "loss": 4.789, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 2.020888328552246, |
| "learning_rate": 8.461399962353404e-06, |
| "loss": 4.7867, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 2.366783618927002, |
| "learning_rate": 8.453215919599966e-06, |
| "loss": 4.7871, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.739832878112793, |
| "learning_rate": 8.445031876846526e-06, |
| "loss": 4.7829, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.9564040899276733, |
| "learning_rate": 8.436847834093086e-06, |
| "loss": 4.7901, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 4.063528537750244, |
| "learning_rate": 8.428663791339647e-06, |
| "loss": 4.7839, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.9199309349060059, |
| "learning_rate": 8.420479748586209e-06, |
| "loss": 4.7919, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.8330548405647278, |
| "learning_rate": 8.412295705832769e-06, |
| "loss": 4.7877, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.459280490875244, |
| "learning_rate": 8.404111663079329e-06, |
| "loss": 4.7854, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 5.8329572677612305, |
| "learning_rate": 8.39592762032589e-06, |
| "loss": 4.7864, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.6338814496994019, |
| "learning_rate": 8.38774357757245e-06, |
| "loss": 4.7855, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.8343069553375244, |
| "learning_rate": 8.37955953481901e-06, |
| "loss": 4.786, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.5812463760375977, |
| "learning_rate": 8.37137549206557e-06, |
| "loss": 4.784, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.3143149614334106, |
| "learning_rate": 8.363191449312132e-06, |
| "loss": 4.7761, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.27919340133667, |
| "learning_rate": 8.355007406558692e-06, |
| "loss": 4.7841, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 2.111786365509033, |
| "learning_rate": 8.346823363805253e-06, |
| "loss": 4.7902, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.4459553956985474, |
| "learning_rate": 8.338639321051813e-06, |
| "loss": 4.7827, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 5.489434242248535, |
| "learning_rate": 8.330455278298375e-06, |
| "loss": 4.7804, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.8438489437103271, |
| "learning_rate": 8.322271235544935e-06, |
| "loss": 4.7826, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.8289409875869751, |
| "learning_rate": 8.314087192791495e-06, |
| "loss": 4.788, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 2.2698094844818115, |
| "learning_rate": 8.305903150038056e-06, |
| "loss": 4.785, |
| "step": 20700 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 6.737819194793701, |
| "learning_rate": 8.297719107284618e-06, |
| "loss": 4.7879, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.1006556749343872, |
| "learning_rate": 8.289535064531178e-06, |
| "loss": 4.7861, |
| "step": 20900 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.757918119430542, |
| "learning_rate": 8.281351021777738e-06, |
| "loss": 4.7838, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.443129539489746, |
| "learning_rate": 8.273166979024298e-06, |
| "loss": 4.7815, |
| "step": 21100 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.2434520721435547, |
| "learning_rate": 8.26498293627086e-06, |
| "loss": 4.7837, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.9488204717636108, |
| "learning_rate": 8.25679889351742e-06, |
| "loss": 4.7826, |
| "step": 21300 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.4644496440887451, |
| "learning_rate": 8.248614850763981e-06, |
| "loss": 4.7805, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.1131641864776611, |
| "learning_rate": 8.240430808010543e-06, |
| "loss": 4.7848, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.9792927503585815, |
| "learning_rate": 8.232246765257103e-06, |
| "loss": 4.7844, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 6.909112453460693, |
| "learning_rate": 8.224062722503663e-06, |
| "loss": 4.7791, |
| "step": 21700 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.1648467779159546, |
| "learning_rate": 8.215878679750224e-06, |
| "loss": 4.7834, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.4722325801849365, |
| "learning_rate": 8.207694636996784e-06, |
| "loss": 4.7775, |
| "step": 21900 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.8322229385375977, |
| "learning_rate": 8.199510594243344e-06, |
| "loss": 4.7848, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.4244656562805176, |
| "learning_rate": 8.191326551489906e-06, |
| "loss": 4.7784, |
| "step": 22100 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 3.1128671169281006, |
| "learning_rate": 8.183142508736466e-06, |
| "loss": 4.7819, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 2.6106808185577393, |
| "learning_rate": 8.174958465983027e-06, |
| "loss": 4.7791, |
| "step": 22300 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 5.204963207244873, |
| "learning_rate": 8.166774423229587e-06, |
| "loss": 4.7841, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.9447894096374512, |
| "learning_rate": 8.158590380476147e-06, |
| "loss": 4.7776, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.2476049661636353, |
| "learning_rate": 8.15040633772271e-06, |
| "loss": 4.7759, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.7892690896987915, |
| "learning_rate": 8.14222229496927e-06, |
| "loss": 4.7797, |
| "step": 22700 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 2.874403476715088, |
| "learning_rate": 8.13403825221583e-06, |
| "loss": 4.7807, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 2.8661632537841797, |
| "learning_rate": 8.12585420946239e-06, |
| "loss": 4.7772, |
| "step": 22900 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.8099445104598999, |
| "learning_rate": 8.117670166708952e-06, |
| "loss": 4.7814, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.0212429761886597, |
| "learning_rate": 8.109486123955512e-06, |
| "loss": 4.7802, |
| "step": 23100 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.0928311347961426, |
| "learning_rate": 8.101302081202072e-06, |
| "loss": 4.777, |
| "step": 23200 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.8933788537979126, |
| "learning_rate": 8.093118038448633e-06, |
| "loss": 4.7832, |
| "step": 23300 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.9154900908470154, |
| "learning_rate": 8.084933995695195e-06, |
| "loss": 4.7854, |
| "step": 23400 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 4.510463714599609, |
| "learning_rate": 8.076749952941755e-06, |
| "loss": 4.7869, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 2.9989612102508545, |
| "learning_rate": 8.068565910188315e-06, |
| "loss": 4.785, |
| "step": 23600 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.1799613237380981, |
| "learning_rate": 8.060381867434877e-06, |
| "loss": 4.7845, |
| "step": 23700 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.8453476428985596, |
| "learning_rate": 8.052197824681437e-06, |
| "loss": 4.7849, |
| "step": 23800 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.3734474182128906, |
| "learning_rate": 8.044013781927998e-06, |
| "loss": 4.7799, |
| "step": 23900 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 7.692543983459473, |
| "learning_rate": 8.035829739174558e-06, |
| "loss": 4.7787, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 4.369629859924316, |
| "learning_rate": 8.027645696421118e-06, |
| "loss": 4.779, |
| "step": 24100 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.2958143949508667, |
| "learning_rate": 8.01946165366768e-06, |
| "loss": 4.7801, |
| "step": 24200 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.838066041469574, |
| "learning_rate": 8.01127761091424e-06, |
| "loss": 4.78, |
| "step": 24300 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.3854634761810303, |
| "learning_rate": 8.0030935681608e-06, |
| "loss": 4.7797, |
| "step": 24400 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.1053544282913208, |
| "learning_rate": 7.994909525407361e-06, |
| "loss": 4.7816, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.4308538436889648, |
| "learning_rate": 7.986725482653921e-06, |
| "loss": 4.7819, |
| "step": 24600 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 3.392876148223877, |
| "learning_rate": 7.978541439900482e-06, |
| "loss": 4.7782, |
| "step": 24700 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.5023071765899658, |
| "learning_rate": 7.970357397147044e-06, |
| "loss": 4.7828, |
| "step": 24800 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.7376088500022888, |
| "learning_rate": 7.962173354393604e-06, |
| "loss": 4.7816, |
| "step": 24900 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 3.311122417449951, |
| "learning_rate": 7.953989311640164e-06, |
| "loss": 4.7819, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.5772916078567505, |
| "learning_rate": 7.945805268886724e-06, |
| "loss": 4.7779, |
| "step": 25100 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.9813963174819946, |
| "learning_rate": 7.937621226133286e-06, |
| "loss": 4.781, |
| "step": 25200 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.7451574802398682, |
| "learning_rate": 7.929437183379847e-06, |
| "loss": 4.7817, |
| "step": 25300 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.064371943473816, |
| "learning_rate": 7.921253140626407e-06, |
| "loss": 4.7794, |
| "step": 25400 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.4085423946380615, |
| "learning_rate": 7.913069097872967e-06, |
| "loss": 4.7838, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.9586715698242188, |
| "learning_rate": 7.904885055119529e-06, |
| "loss": 4.7799, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 2.1608307361602783, |
| "learning_rate": 7.89670101236609e-06, |
| "loss": 4.7803, |
| "step": 25700 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.8091859221458435, |
| "learning_rate": 7.88851696961265e-06, |
| "loss": 4.784, |
| "step": 25800 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.7846326231956482, |
| "learning_rate": 7.880332926859212e-06, |
| "loss": 4.778, |
| "step": 25900 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 2.0327844619750977, |
| "learning_rate": 7.872148884105772e-06, |
| "loss": 4.7825, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.0779097080230713, |
| "learning_rate": 7.863964841352332e-06, |
| "loss": 4.7765, |
| "step": 26100 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.2201029062271118, |
| "learning_rate": 7.855780798598892e-06, |
| "loss": 4.776, |
| "step": 26200 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.4344494342803955, |
| "learning_rate": 7.847596755845454e-06, |
| "loss": 4.7781, |
| "step": 26300 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.114273190498352, |
| "learning_rate": 7.839412713092015e-06, |
| "loss": 4.7764, |
| "step": 26400 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.8567458391189575, |
| "learning_rate": 7.831228670338575e-06, |
| "loss": 4.7831, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 4.325285911560059, |
| "learning_rate": 7.823044627585135e-06, |
| "loss": 4.7811, |
| "step": 26600 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.2828031778335571, |
| "learning_rate": 7.814860584831695e-06, |
| "loss": 4.7795, |
| "step": 26700 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 2.048180341720581, |
| "learning_rate": 7.806676542078256e-06, |
| "loss": 4.7782, |
| "step": 26800 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.9398120045661926, |
| "learning_rate": 7.798492499324818e-06, |
| "loss": 4.778, |
| "step": 26900 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.8492949604988098, |
| "learning_rate": 7.790308456571378e-06, |
| "loss": 4.776, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.9857730865478516, |
| "learning_rate": 7.782124413817938e-06, |
| "loss": 4.7741, |
| "step": 27100 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.0787758827209473, |
| "learning_rate": 7.773940371064498e-06, |
| "loss": 4.7738, |
| "step": 27200 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.2202094793319702, |
| "learning_rate": 7.765756328311059e-06, |
| "loss": 4.777, |
| "step": 27300 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 6.543772220611572, |
| "learning_rate": 7.75757228555762e-06, |
| "loss": 4.778, |
| "step": 27400 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.9749574065208435, |
| "learning_rate": 7.749388242804181e-06, |
| "loss": 4.7761, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.2425750494003296, |
| "learning_rate": 7.741204200050741e-06, |
| "loss": 4.7799, |
| "step": 27600 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 2.1919734477996826, |
| "learning_rate": 7.733020157297301e-06, |
| "loss": 4.7781, |
| "step": 27700 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.6858105659484863, |
| "learning_rate": 7.724836114543863e-06, |
| "loss": 4.7761, |
| "step": 27800 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.082306146621704, |
| "learning_rate": 7.716652071790424e-06, |
| "loss": 4.7727, |
| "step": 27900 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.4394657611846924, |
| "learning_rate": 7.708468029036984e-06, |
| "loss": 4.7773, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 3.90745210647583, |
| "learning_rate": 7.700283986283546e-06, |
| "loss": 4.7771, |
| "step": 28100 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.1074409484863281, |
| "learning_rate": 7.692099943530106e-06, |
| "loss": 4.7813, |
| "step": 28200 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.775972306728363, |
| "learning_rate": 7.683915900776666e-06, |
| "loss": 4.7793, |
| "step": 28300 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.8545662760734558, |
| "learning_rate": 7.675731858023227e-06, |
| "loss": 4.7745, |
| "step": 28400 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.9553655385971069, |
| "learning_rate": 7.667547815269789e-06, |
| "loss": 4.7741, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.9458415508270264, |
| "learning_rate": 7.659363772516349e-06, |
| "loss": 4.7757, |
| "step": 28600 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.3447215557098389, |
| "learning_rate": 7.65117972976291e-06, |
| "loss": 4.7721, |
| "step": 28700 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.8614388108253479, |
| "learning_rate": 7.64299568700947e-06, |
| "loss": 4.7728, |
| "step": 28800 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.083814263343811, |
| "learning_rate": 7.63481164425603e-06, |
| "loss": 4.7714, |
| "step": 28900 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.9633229374885559, |
| "learning_rate": 7.626627601502591e-06, |
| "loss": 4.7736, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.1467323303222656, |
| "learning_rate": 7.618443558749151e-06, |
| "loss": 4.7771, |
| "step": 29100 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 2.2095165252685547, |
| "learning_rate": 7.610259515995713e-06, |
| "loss": 4.7712, |
| "step": 29200 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.9931008815765381, |
| "learning_rate": 7.602075473242273e-06, |
| "loss": 4.7776, |
| "step": 29300 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 12.287310600280762, |
| "learning_rate": 7.593891430488834e-06, |
| "loss": 4.7797, |
| "step": 29400 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 3.100107192993164, |
| "learning_rate": 7.585707387735394e-06, |
| "loss": 4.7759, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 2.249577045440674, |
| "learning_rate": 7.577523344981955e-06, |
| "loss": 4.7708, |
| "step": 29600 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 3.1430749893188477, |
| "learning_rate": 7.569339302228515e-06, |
| "loss": 4.7742, |
| "step": 29700 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.9823663830757141, |
| "learning_rate": 7.5611552594750755e-06, |
| "loss": 4.7739, |
| "step": 29800 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 4.1920166015625, |
| "learning_rate": 7.552971216721637e-06, |
| "loss": 4.778, |
| "step": 29900 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 2.579256534576416, |
| "learning_rate": 7.544787173968198e-06, |
| "loss": 4.7784, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.4403995275497437, |
| "learning_rate": 7.536603131214758e-06, |
| "loss": 4.7701, |
| "step": 30100 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 3.48757266998291, |
| "learning_rate": 7.528419088461318e-06, |
| "loss": 4.7705, |
| "step": 30200 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 4.121363162994385, |
| "learning_rate": 7.52023504570788e-06, |
| "loss": 4.7704, |
| "step": 30300 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.439936876296997, |
| "learning_rate": 7.5120510029544405e-06, |
| "loss": 4.7711, |
| "step": 30400 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.546036720275879, |
| "learning_rate": 7.503866960201001e-06, |
| "loss": 4.7691, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 5.736770153045654, |
| "learning_rate": 7.495682917447561e-06, |
| "loss": 4.7802, |
| "step": 30600 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.7358049154281616, |
| "learning_rate": 7.487498874694122e-06, |
| "loss": 4.771, |
| "step": 30700 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.9707129001617432, |
| "learning_rate": 7.479314831940682e-06, |
| "loss": 4.7691, |
| "step": 30800 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 2.6308770179748535, |
| "learning_rate": 7.471130789187243e-06, |
| "loss": 4.7706, |
| "step": 30900 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 2.7046618461608887, |
| "learning_rate": 7.462946746433804e-06, |
| "loss": 4.7767, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.627031922340393, |
| "learning_rate": 7.454762703680365e-06, |
| "loss": 4.7743, |
| "step": 31100 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.203337550163269, |
| "learning_rate": 7.446578660926925e-06, |
| "loss": 4.7764, |
| "step": 31200 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.2097506523132324, |
| "learning_rate": 7.4383946181734854e-06, |
| "loss": 4.7711, |
| "step": 31300 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.1853944063186646, |
| "learning_rate": 7.430210575420047e-06, |
| "loss": 4.7699, |
| "step": 31400 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.8438019752502441, |
| "learning_rate": 7.422026532666608e-06, |
| "loss": 4.7732, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.5300862789154053, |
| "learning_rate": 7.413842489913168e-06, |
| "loss": 4.7747, |
| "step": 31600 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.4141496419906616, |
| "learning_rate": 7.405658447159728e-06, |
| "loss": 4.7711, |
| "step": 31700 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.673567533493042, |
| "learning_rate": 7.397474404406289e-06, |
| "loss": 4.777, |
| "step": 31800 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 2.1357908248901367, |
| "learning_rate": 7.38929036165285e-06, |
| "loss": 4.7723, |
| "step": 31900 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 4.316195964813232, |
| "learning_rate": 7.381106318899411e-06, |
| "loss": 4.7678, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.7443231344223022, |
| "learning_rate": 7.372922276145971e-06, |
| "loss": 4.7707, |
| "step": 32100 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 2.7221803665161133, |
| "learning_rate": 7.364738233392532e-06, |
| "loss": 4.7708, |
| "step": 32200 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.8770394921302795, |
| "learning_rate": 7.356554190639092e-06, |
| "loss": 4.7721, |
| "step": 32300 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.9764662384986877, |
| "learning_rate": 7.348370147885653e-06, |
| "loss": 4.7756, |
| "step": 32400 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 3.6692917346954346, |
| "learning_rate": 7.3401861051322146e-06, |
| "loss": 4.7773, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.8451597690582275, |
| "learning_rate": 7.332002062378775e-06, |
| "loss": 4.7742, |
| "step": 32600 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.9092561602592468, |
| "learning_rate": 7.323818019625335e-06, |
| "loss": 4.7666, |
| "step": 32700 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.9218481779098511, |
| "learning_rate": 7.315633976871895e-06, |
| "loss": 4.7717, |
| "step": 32800 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.8461436033248901, |
| "learning_rate": 7.3074499341184565e-06, |
| "loss": 4.7752, |
| "step": 32900 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.879395067691803, |
| "learning_rate": 7.299265891365017e-06, |
| "loss": 4.7736, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.9056565165519714, |
| "learning_rate": 7.291081848611578e-06, |
| "loss": 4.7671, |
| "step": 33100 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 2.0318729877471924, |
| "learning_rate": 7.282897805858138e-06, |
| "loss": 4.7672, |
| "step": 33200 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.0115817785263062, |
| "learning_rate": 7.274713763104699e-06, |
| "loss": 4.7782, |
| "step": 33300 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.3754358291625977, |
| "learning_rate": 7.2665297203512595e-06, |
| "loss": 4.7711, |
| "step": 33400 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.6245019435882568, |
| "learning_rate": 7.25834567759782e-06, |
| "loss": 4.7716, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.5871042013168335, |
| "learning_rate": 7.250161634844382e-06, |
| "loss": 4.7689, |
| "step": 33600 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.6681900024414062, |
| "learning_rate": 7.241977592090942e-06, |
| "loss": 4.7734, |
| "step": 33700 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.2739856243133545, |
| "learning_rate": 7.233793549337502e-06, |
| "loss": 4.7746, |
| "step": 33800 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.8256975412368774, |
| "learning_rate": 7.2256095065840625e-06, |
| "loss": 4.7683, |
| "step": 33900 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 2.5301151275634766, |
| "learning_rate": 7.217425463830624e-06, |
| "loss": 4.7715, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.9835234880447388, |
| "learning_rate": 7.209241421077185e-06, |
| "loss": 4.7675, |
| "step": 34100 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.1152873039245605, |
| "learning_rate": 7.201057378323745e-06, |
| "loss": 4.767, |
| "step": 34200 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 4.025820732116699, |
| "learning_rate": 7.192873335570305e-06, |
| "loss": 4.7739, |
| "step": 34300 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 4.386086463928223, |
| "learning_rate": 7.184689292816866e-06, |
| "loss": 4.7716, |
| "step": 34400 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 3.9003477096557617, |
| "learning_rate": 7.176505250063427e-06, |
| "loss": 4.7691, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.3681395053863525, |
| "learning_rate": 7.168321207309987e-06, |
| "loss": 4.7747, |
| "step": 34600 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 12.061843872070312, |
| "learning_rate": 7.160137164556549e-06, |
| "loss": 4.7714, |
| "step": 34700 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.0302884578704834, |
| "learning_rate": 7.151953121803109e-06, |
| "loss": 4.7711, |
| "step": 34800 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.5776879787445068, |
| "learning_rate": 7.1437690790496694e-06, |
| "loss": 4.7682, |
| "step": 34900 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.52390718460083, |
| "learning_rate": 7.13558503629623e-06, |
| "loss": 4.7712, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 2.443223237991333, |
| "learning_rate": 7.127400993542791e-06, |
| "loss": 4.7696, |
| "step": 35100 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.4894499778747559, |
| "learning_rate": 7.119216950789352e-06, |
| "loss": 4.7739, |
| "step": 35200 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.0729478597640991, |
| "learning_rate": 7.111032908035912e-06, |
| "loss": 4.7728, |
| "step": 35300 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.0306514501571655, |
| "learning_rate": 7.1028488652824725e-06, |
| "loss": 4.7739, |
| "step": 35400 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 2.0036280155181885, |
| "learning_rate": 7.094664822529034e-06, |
| "loss": 4.7714, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.9095188975334167, |
| "learning_rate": 7.086480779775594e-06, |
| "loss": 4.7647, |
| "step": 35600 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.0899231433868408, |
| "learning_rate": 7.078296737022154e-06, |
| "loss": 4.7637, |
| "step": 35700 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 5.961045742034912, |
| "learning_rate": 7.070112694268716e-06, |
| "loss": 4.7722, |
| "step": 35800 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.9270179867744446, |
| "learning_rate": 7.061928651515276e-06, |
| "loss": 4.7682, |
| "step": 35900 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.7026699781417847, |
| "learning_rate": 7.053744608761837e-06, |
| "loss": 4.7697, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.5568283796310425, |
| "learning_rate": 7.045560566008397e-06, |
| "loss": 4.7695, |
| "step": 36100 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 4.148453712463379, |
| "learning_rate": 7.037376523254959e-06, |
| "loss": 4.7667, |
| "step": 36200 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 2.0565524101257324, |
| "learning_rate": 7.029192480501519e-06, |
| "loss": 4.7728, |
| "step": 36300 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.3457672595977783, |
| "learning_rate": 7.021008437748079e-06, |
| "loss": 4.7719, |
| "step": 36400 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.2144137620925903, |
| "learning_rate": 7.01282439499464e-06, |
| "loss": 4.7714, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.7697350978851318, |
| "learning_rate": 7.004640352241201e-06, |
| "loss": 4.7712, |
| "step": 36600 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.9844958782196045, |
| "learning_rate": 6.996456309487761e-06, |
| "loss": 4.7723, |
| "step": 36700 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 3.750422716140747, |
| "learning_rate": 6.988272266734321e-06, |
| "loss": 4.7717, |
| "step": 36800 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 2.6416289806365967, |
| "learning_rate": 6.980088223980883e-06, |
| "loss": 4.7696, |
| "step": 36900 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.745283842086792, |
| "learning_rate": 6.9719041812274435e-06, |
| "loss": 4.7681, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.1269015073776245, |
| "learning_rate": 6.963720138474004e-06, |
| "loss": 4.7659, |
| "step": 37100 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 4.63193941116333, |
| "learning_rate": 6.955536095720564e-06, |
| "loss": 4.7676, |
| "step": 37200 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.586788296699524, |
| "learning_rate": 6.947352052967126e-06, |
| "loss": 4.7729, |
| "step": 37300 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.2502408027648926, |
| "learning_rate": 6.939168010213686e-06, |
| "loss": 4.7718, |
| "step": 37400 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.620895504951477, |
| "learning_rate": 6.9309839674602465e-06, |
| "loss": 4.7703, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.9904155731201172, |
| "learning_rate": 6.922799924706807e-06, |
| "loss": 4.77, |
| "step": 37600 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.9440988302230835, |
| "learning_rate": 6.914615881953368e-06, |
| "loss": 4.7688, |
| "step": 37700 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 2.9490699768066406, |
| "learning_rate": 6.906431839199928e-06, |
| "loss": 4.7718, |
| "step": 37800 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.7235488891601562, |
| "learning_rate": 6.898247796446489e-06, |
| "loss": 4.7685, |
| "step": 37900 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.076762318611145, |
| "learning_rate": 6.89006375369305e-06, |
| "loss": 4.7708, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.8281159400939941, |
| "learning_rate": 6.881879710939611e-06, |
| "loss": 4.7697, |
| "step": 38100 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.1257106065750122, |
| "learning_rate": 6.873695668186171e-06, |
| "loss": 4.7727, |
| "step": 38200 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 4.7161760330200195, |
| "learning_rate": 6.865511625432731e-06, |
| "loss": 4.7695, |
| "step": 38300 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 2.1901695728302, |
| "learning_rate": 6.857327582679293e-06, |
| "loss": 4.7712, |
| "step": 38400 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.988418459892273, |
| "learning_rate": 6.8491435399258535e-06, |
| "loss": 4.7691, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.4924893379211426, |
| "learning_rate": 6.840959497172414e-06, |
| "loss": 4.7705, |
| "step": 38600 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 2.154937744140625, |
| "learning_rate": 6.832775454418974e-06, |
| "loss": 4.7652, |
| "step": 38700 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.1525272130966187, |
| "learning_rate": 6.824591411665535e-06, |
| "loss": 4.7705, |
| "step": 38800 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.9818894863128662, |
| "learning_rate": 6.816407368912095e-06, |
| "loss": 4.767, |
| "step": 38900 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 4.012678146362305, |
| "learning_rate": 6.8082233261586565e-06, |
| "loss": 4.7657, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 3.9307897090911865, |
| "learning_rate": 6.800039283405218e-06, |
| "loss": 4.7629, |
| "step": 39100 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 2.6684534549713135, |
| "learning_rate": 6.791855240651778e-06, |
| "loss": 4.7695, |
| "step": 39200 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 3.2453012466430664, |
| "learning_rate": 6.783671197898338e-06, |
| "loss": 4.7733, |
| "step": 39300 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.1828510761260986, |
| "learning_rate": 6.775487155144898e-06, |
| "loss": 4.7696, |
| "step": 39400 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.2635380029678345, |
| "learning_rate": 6.76730311239146e-06, |
| "loss": 4.7726, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.8589321970939636, |
| "learning_rate": 6.759119069638021e-06, |
| "loss": 4.768, |
| "step": 39600 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.4459155797958374, |
| "learning_rate": 6.750935026884581e-06, |
| "loss": 4.7664, |
| "step": 39700 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 3.3713083267211914, |
| "learning_rate": 6.742750984131141e-06, |
| "loss": 4.7684, |
| "step": 39800 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 3.4334757328033447, |
| "learning_rate": 6.734566941377702e-06, |
| "loss": 4.767, |
| "step": 39900 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.072906255722046, |
| "learning_rate": 6.726382898624263e-06, |
| "loss": 4.7623, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.8414703607559204, |
| "learning_rate": 6.718198855870824e-06, |
| "loss": 4.7657, |
| "step": 40100 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.423050045967102, |
| "learning_rate": 6.710014813117385e-06, |
| "loss": 4.7684, |
| "step": 40200 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.1521214246749878, |
| "learning_rate": 6.701830770363945e-06, |
| "loss": 4.7682, |
| "step": 40300 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.2841377258300781, |
| "learning_rate": 6.693646727610505e-06, |
| "loss": 4.7661, |
| "step": 40400 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 2.815016508102417, |
| "learning_rate": 6.6854626848570656e-06, |
| "loss": 4.7724, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.9150479435920715, |
| "learning_rate": 6.6772786421036275e-06, |
| "loss": 4.7667, |
| "step": 40600 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.2387914657592773, |
| "learning_rate": 6.669094599350188e-06, |
| "loss": 4.7695, |
| "step": 40700 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 3.5044503211975098, |
| "learning_rate": 6.660910556596748e-06, |
| "loss": 4.7671, |
| "step": 40800 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.1371254920959473, |
| "learning_rate": 6.652726513843308e-06, |
| "loss": 4.7652, |
| "step": 40900 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.3063691854476929, |
| "learning_rate": 6.6445424710898694e-06, |
| "loss": 4.7674, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.7822604179382324, |
| "learning_rate": 6.6363584283364306e-06, |
| "loss": 4.7692, |
| "step": 41100 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.9057841300964355, |
| "learning_rate": 6.628174385582991e-06, |
| "loss": 4.7676, |
| "step": 41200 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.9072014093399048, |
| "learning_rate": 6.619990342829552e-06, |
| "loss": 4.7625, |
| "step": 41300 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.9912985563278198, |
| "learning_rate": 6.611806300076112e-06, |
| "loss": 4.7709, |
| "step": 41400 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 2.278571605682373, |
| "learning_rate": 6.6036222573226725e-06, |
| "loss": 4.7725, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 2.2209765911102295, |
| "learning_rate": 6.595438214569233e-06, |
| "loss": 4.7693, |
| "step": 41600 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.4683972597122192, |
| "learning_rate": 6.587254171815795e-06, |
| "loss": 4.7715, |
| "step": 41700 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 2.1982457637786865, |
| "learning_rate": 6.579070129062355e-06, |
| "loss": 4.7629, |
| "step": 41800 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 3.916114568710327, |
| "learning_rate": 6.570886086308915e-06, |
| "loss": 4.7628, |
| "step": 41900 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 4.219468116760254, |
| "learning_rate": 6.5627020435554755e-06, |
| "loss": 4.766, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.3876959085464478, |
| "learning_rate": 6.5545180008020375e-06, |
| "loss": 4.7689, |
| "step": 42100 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 4.21665620803833, |
| "learning_rate": 6.546333958048598e-06, |
| "loss": 4.767, |
| "step": 42200 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.8205769658088684, |
| "learning_rate": 6.538149915295158e-06, |
| "loss": 4.7673, |
| "step": 42300 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 2.474163293838501, |
| "learning_rate": 6.529965872541719e-06, |
| "loss": 4.7662, |
| "step": 42400 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 2.4474871158599854, |
| "learning_rate": 6.521781829788279e-06, |
| "loss": 4.7672, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.422839879989624, |
| "learning_rate": 6.51359778703484e-06, |
| "loss": 4.7639, |
| "step": 42600 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.002898097038269, |
| "learning_rate": 6.5054137442814e-06, |
| "loss": 4.7707, |
| "step": 42700 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.21475350856781, |
| "learning_rate": 6.497229701527962e-06, |
| "loss": 4.7643, |
| "step": 42800 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.273319959640503, |
| "learning_rate": 6.489045658774522e-06, |
| "loss": 4.764, |
| "step": 42900 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.963934063911438, |
| "learning_rate": 6.480861616021082e-06, |
| "loss": 4.7675, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 2.1018524169921875, |
| "learning_rate": 6.472677573267643e-06, |
| "loss": 4.7722, |
| "step": 43100 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.1742087602615356, |
| "learning_rate": 6.464493530514205e-06, |
| "loss": 4.7697, |
| "step": 43200 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.3663380146026611, |
| "learning_rate": 6.456309487760765e-06, |
| "loss": 4.7654, |
| "step": 43300 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.2402204275131226, |
| "learning_rate": 6.448125445007325e-06, |
| "loss": 4.7657, |
| "step": 43400 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.8193938732147217, |
| "learning_rate": 6.439941402253886e-06, |
| "loss": 4.7631, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.9568284749984741, |
| "learning_rate": 6.4317573595004465e-06, |
| "loss": 4.7687, |
| "step": 43600 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.9550923109054565, |
| "learning_rate": 6.423573316747007e-06, |
| "loss": 4.766, |
| "step": 43700 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.496541976928711, |
| "learning_rate": 6.415389273993567e-06, |
| "loss": 4.7674, |
| "step": 43800 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 7.160891532897949, |
| "learning_rate": 6.407205231240129e-06, |
| "loss": 4.7667, |
| "step": 43900 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.1409087181091309, |
| "learning_rate": 6.399021188486689e-06, |
| "loss": 4.7657, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.3777464628219604, |
| "learning_rate": 6.3908371457332496e-06, |
| "loss": 4.7657, |
| "step": 44100 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.171425223350525, |
| "learning_rate": 6.38265310297981e-06, |
| "loss": 4.7645, |
| "step": 44200 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.4748115539550781, |
| "learning_rate": 6.374469060226372e-06, |
| "loss": 4.7622, |
| "step": 44300 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 4.880084991455078, |
| "learning_rate": 6.366285017472932e-06, |
| "loss": 4.766, |
| "step": 44400 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.557969331741333, |
| "learning_rate": 6.358100974719492e-06, |
| "loss": 4.7688, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 2.078839063644409, |
| "learning_rate": 6.3499169319660534e-06, |
| "loss": 4.7652, |
| "step": 44600 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 5.024000644683838, |
| "learning_rate": 6.341732889212614e-06, |
| "loss": 4.767, |
| "step": 44700 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.7246519327163696, |
| "learning_rate": 6.333548846459174e-06, |
| "loss": 4.7631, |
| "step": 44800 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.9058144688606262, |
| "learning_rate": 6.325364803705735e-06, |
| "loss": 4.7641, |
| "step": 44900 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.1066261529922485, |
| "learning_rate": 6.317180760952296e-06, |
| "loss": 4.7657, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.048341989517212, |
| "learning_rate": 6.3089967181988565e-06, |
| "loss": 4.7573, |
| "step": 45100 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 2.8883166313171387, |
| "learning_rate": 6.300812675445417e-06, |
| "loss": 4.7681, |
| "step": 45200 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 3.5398635864257812, |
| "learning_rate": 6.292628632691977e-06, |
| "loss": 4.7622, |
| "step": 45300 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 4.646157741546631, |
| "learning_rate": 6.284444589938539e-06, |
| "loss": 4.7675, |
| "step": 45400 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.9889692068099976, |
| "learning_rate": 6.276260547185099e-06, |
| "loss": 4.7616, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.8759746551513672, |
| "learning_rate": 6.2680765044316595e-06, |
| "loss": 4.7674, |
| "step": 45600 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.6846829652786255, |
| "learning_rate": 6.259892461678221e-06, |
| "loss": 4.762, |
| "step": 45700 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.9014168381690979, |
| "learning_rate": 6.251708418924781e-06, |
| "loss": 4.7671, |
| "step": 45800 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 2.2552223205566406, |
| "learning_rate": 6.243524376171341e-06, |
| "loss": 4.7665, |
| "step": 45900 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 7.994378566741943, |
| "learning_rate": 6.235340333417902e-06, |
| "loss": 4.7679, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.071326732635498, |
| "learning_rate": 6.227156290664463e-06, |
| "loss": 4.7621, |
| "step": 46100 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.366959810256958, |
| "learning_rate": 6.218972247911024e-06, |
| "loss": 4.7714, |
| "step": 46200 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.2230727672576904, |
| "learning_rate": 6.210788205157584e-06, |
| "loss": 4.7671, |
| "step": 46300 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 3.3939905166625977, |
| "learning_rate": 6.202604162404144e-06, |
| "loss": 4.7652, |
| "step": 46400 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.4360277652740479, |
| "learning_rate": 6.194420119650706e-06, |
| "loss": 4.7613, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 2.0407369136810303, |
| "learning_rate": 6.186236076897266e-06, |
| "loss": 4.7704, |
| "step": 46600 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.9247026443481445, |
| "learning_rate": 6.178052034143827e-06, |
| "loss": 4.7611, |
| "step": 46700 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.4636540412902832, |
| "learning_rate": 6.169867991390388e-06, |
| "loss": 4.7658, |
| "step": 46800 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.443965196609497, |
| "learning_rate": 6.161683948636948e-06, |
| "loss": 4.7674, |
| "step": 46900 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.4305676221847534, |
| "learning_rate": 6.153499905883509e-06, |
| "loss": 4.7653, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.9233379364013672, |
| "learning_rate": 6.1453158631300694e-06, |
| "loss": 4.7685, |
| "step": 47100 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.9005165100097656, |
| "learning_rate": 6.1371318203766305e-06, |
| "loss": 4.7605, |
| "step": 47200 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 2.1827316284179688, |
| "learning_rate": 6.128947777623191e-06, |
| "loss": 4.7639, |
| "step": 47300 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.330802083015442, |
| "learning_rate": 6.120763734869751e-06, |
| "loss": 4.765, |
| "step": 47400 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.0128445625305176, |
| "learning_rate": 6.112579692116311e-06, |
| "loss": 4.7704, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 3.6746978759765625, |
| "learning_rate": 6.104395649362873e-06, |
| "loss": 4.7645, |
| "step": 47600 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.546904444694519, |
| "learning_rate": 6.0962116066094336e-06, |
| "loss": 4.764, |
| "step": 47700 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 3.3686161041259766, |
| "learning_rate": 6.088027563855994e-06, |
| "loss": 4.7679, |
| "step": 47800 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.7452073097229004, |
| "learning_rate": 6.079843521102555e-06, |
| "loss": 4.7597, |
| "step": 47900 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.0575987100601196, |
| "learning_rate": 6.071659478349115e-06, |
| "loss": 4.7656, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 5.366837501525879, |
| "learning_rate": 6.063475435595676e-06, |
| "loss": 4.761, |
| "step": 48100 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.977851927280426, |
| "learning_rate": 6.055291392842237e-06, |
| "loss": 4.7644, |
| "step": 48200 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 4.891844749450684, |
| "learning_rate": 6.047107350088798e-06, |
| "loss": 4.7681, |
| "step": 48300 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.2509273290634155, |
| "learning_rate": 6.038923307335358e-06, |
| "loss": 4.7587, |
| "step": 48400 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.095798373222351, |
| "learning_rate": 6.030739264581918e-06, |
| "loss": 4.7617, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.093984842300415, |
| "learning_rate": 6.0225552218284785e-06, |
| "loss": 4.7629, |
| "step": 48600 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.8967887759208679, |
| "learning_rate": 6.0143711790750405e-06, |
| "loss": 4.7623, |
| "step": 48700 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.8838722705841064, |
| "learning_rate": 6.006187136321601e-06, |
| "loss": 4.7658, |
| "step": 48800 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.5173430442810059, |
| "learning_rate": 5.998003093568161e-06, |
| "loss": 4.7631, |
| "step": 48900 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.8584578037261963, |
| "learning_rate": 5.989819050814722e-06, |
| "loss": 4.7597, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.672813892364502, |
| "learning_rate": 5.981635008061283e-06, |
| "loss": 4.7614, |
| "step": 49100 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.2858943939208984, |
| "learning_rate": 5.9734509653078435e-06, |
| "loss": 4.7585, |
| "step": 49200 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.0808370113372803, |
| "learning_rate": 5.965266922554404e-06, |
| "loss": 4.7652, |
| "step": 49300 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.6658849716186523, |
| "learning_rate": 5.957082879800965e-06, |
| "loss": 4.7622, |
| "step": 49400 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.312129259109497, |
| "learning_rate": 5.948898837047525e-06, |
| "loss": 4.7643, |
| "step": 49500 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.109014868736267, |
| "learning_rate": 5.940714794294085e-06, |
| "loss": 4.7635, |
| "step": 49600 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.059754729270935, |
| "learning_rate": 5.932530751540646e-06, |
| "loss": 4.7635, |
| "step": 49700 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.4219303131103516, |
| "learning_rate": 5.924346708787208e-06, |
| "loss": 4.7644, |
| "step": 49800 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 4.515329360961914, |
| "learning_rate": 5.916162666033768e-06, |
| "loss": 4.764, |
| "step": 49900 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 2.7879600524902344, |
| "learning_rate": 5.907978623280328e-06, |
| "loss": 4.7629, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.4872534275054932, |
| "learning_rate": 5.899794580526889e-06, |
| "loss": 4.7627, |
| "step": 50100 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.7036062479019165, |
| "learning_rate": 5.89161053777345e-06, |
| "loss": 4.7602, |
| "step": 50200 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.1334501504898071, |
| "learning_rate": 5.883426495020011e-06, |
| "loss": 4.7649, |
| "step": 50300 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.8255513906478882, |
| "learning_rate": 5.875242452266571e-06, |
| "loss": 4.7662, |
| "step": 50400 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.9870157241821289, |
| "learning_rate": 5.867058409513132e-06, |
| "loss": 4.7613, |
| "step": 50500 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.7012158632278442, |
| "learning_rate": 5.858874366759692e-06, |
| "loss": 4.7609, |
| "step": 50600 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.9148879051208496, |
| "learning_rate": 5.850690324006253e-06, |
| "loss": 4.7601, |
| "step": 50700 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.1383265256881714, |
| "learning_rate": 5.842506281252813e-06, |
| "loss": 4.7583, |
| "step": 50800 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.8608391284942627, |
| "learning_rate": 5.834322238499375e-06, |
| "loss": 4.7639, |
| "step": 50900 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 2.019380807876587, |
| "learning_rate": 5.826138195745935e-06, |
| "loss": 4.7638, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 2.069756031036377, |
| "learning_rate": 5.817954152992495e-06, |
| "loss": 4.7691, |
| "step": 51100 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.2474431991577148, |
| "learning_rate": 5.809770110239057e-06, |
| "loss": 4.7668, |
| "step": 51200 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.1330208778381348, |
| "learning_rate": 5.8015860674856176e-06, |
| "loss": 4.7647, |
| "step": 51300 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.4908925294876099, |
| "learning_rate": 5.793402024732178e-06, |
| "loss": 4.759, |
| "step": 51400 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.1022288799285889, |
| "learning_rate": 5.785217981978738e-06, |
| "loss": 4.7583, |
| "step": 51500 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.2714922428131104, |
| "learning_rate": 5.777033939225299e-06, |
| "loss": 4.7619, |
| "step": 51600 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.3751145601272583, |
| "learning_rate": 5.7688498964718595e-06, |
| "loss": 4.7607, |
| "step": 51700 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.9955260753631592, |
| "learning_rate": 5.76066585371842e-06, |
| "loss": 4.7612, |
| "step": 51800 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.48429274559021, |
| "learning_rate": 5.752481810964981e-06, |
| "loss": 4.7631, |
| "step": 51900 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 3.2120578289031982, |
| "learning_rate": 5.744297768211542e-06, |
| "loss": 4.7652, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.1913766860961914, |
| "learning_rate": 5.736113725458102e-06, |
| "loss": 4.7651, |
| "step": 52100 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 3.3173420429229736, |
| "learning_rate": 5.7279296827046625e-06, |
| "loss": 4.7583, |
| "step": 52200 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 2.184640407562256, |
| "learning_rate": 5.7197456399512245e-06, |
| "loss": 4.7674, |
| "step": 52300 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.406989574432373, |
| "learning_rate": 5.711561597197785e-06, |
| "loss": 4.7624, |
| "step": 52400 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 4.420721530914307, |
| "learning_rate": 5.703377554444345e-06, |
| "loss": 4.7584, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.6438698768615723, |
| "learning_rate": 5.695193511690905e-06, |
| "loss": 4.759, |
| "step": 52600 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.0008363723754883, |
| "learning_rate": 5.687009468937466e-06, |
| "loss": 4.7622, |
| "step": 52700 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.9954501986503601, |
| "learning_rate": 5.678825426184027e-06, |
| "loss": 4.7599, |
| "step": 52800 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 2.2024335861206055, |
| "learning_rate": 5.670641383430587e-06, |
| "loss": 4.757, |
| "step": 52900 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.8690518140792847, |
| "learning_rate": 5.662457340677148e-06, |
| "loss": 4.7638, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 4.789288520812988, |
| "learning_rate": 5.654273297923709e-06, |
| "loss": 4.7651, |
| "step": 53100 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.9161509275436401, |
| "learning_rate": 5.646089255170269e-06, |
| "loss": 4.762, |
| "step": 53200 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.1791253089904785, |
| "learning_rate": 5.63790521241683e-06, |
| "loss": 4.7629, |
| "step": 53300 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 3.780832529067993, |
| "learning_rate": 5.629721169663392e-06, |
| "loss": 4.7553, |
| "step": 53400 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.1403292417526245, |
| "learning_rate": 5.621537126909952e-06, |
| "loss": 4.7615, |
| "step": 53500 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.2787580490112305, |
| "learning_rate": 5.613353084156512e-06, |
| "loss": 4.7623, |
| "step": 53600 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 2.774376153945923, |
| "learning_rate": 5.6051690414030724e-06, |
| "loss": 4.7626, |
| "step": 53700 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 3.4365766048431396, |
| "learning_rate": 5.5969849986496336e-06, |
| "loss": 4.7573, |
| "step": 53800 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.0744109153747559, |
| "learning_rate": 5.588800955896194e-06, |
| "loss": 4.7621, |
| "step": 53900 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.9712745547294617, |
| "learning_rate": 5.580616913142755e-06, |
| "loss": 4.7605, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.24153470993042, |
| "learning_rate": 5.572432870389315e-06, |
| "loss": 4.7654, |
| "step": 54100 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.9112986326217651, |
| "learning_rate": 5.564248827635876e-06, |
| "loss": 4.7666, |
| "step": 54200 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 2.2796552181243896, |
| "learning_rate": 5.556064784882437e-06, |
| "loss": 4.7594, |
| "step": 54300 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.3988897800445557, |
| "learning_rate": 5.547880742128997e-06, |
| "loss": 4.7596, |
| "step": 54400 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.2597512006759644, |
| "learning_rate": 5.539696699375559e-06, |
| "loss": 4.7648, |
| "step": 54500 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 2.735841989517212, |
| "learning_rate": 5.531512656622119e-06, |
| "loss": 4.766, |
| "step": 54600 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.2517529726028442, |
| "learning_rate": 5.523328613868679e-06, |
| "loss": 4.7585, |
| "step": 54700 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.7145378589630127, |
| "learning_rate": 5.51514457111524e-06, |
| "loss": 4.7575, |
| "step": 54800 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.1680995225906372, |
| "learning_rate": 5.506960528361801e-06, |
| "loss": 4.7611, |
| "step": 54900 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 3.5684611797332764, |
| "learning_rate": 5.498776485608361e-06, |
| "loss": 4.7579, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.0898152589797974, |
| "learning_rate": 5.490592442854922e-06, |
| "loss": 4.7587, |
| "step": 55100 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.0716261863708496, |
| "learning_rate": 5.482408400101482e-06, |
| "loss": 4.7605, |
| "step": 55200 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 2.373514413833618, |
| "learning_rate": 5.4742243573480435e-06, |
| "loss": 4.7626, |
| "step": 55300 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 3.0533201694488525, |
| "learning_rate": 5.466040314594604e-06, |
| "loss": 4.7601, |
| "step": 55400 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.9586790800094604, |
| "learning_rate": 5.457856271841164e-06, |
| "loss": 4.7607, |
| "step": 55500 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.8159968852996826, |
| "learning_rate": 5.449672229087726e-06, |
| "loss": 4.7613, |
| "step": 55600 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.0811960697174072, |
| "learning_rate": 5.441488186334286e-06, |
| "loss": 4.7613, |
| "step": 55700 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.0583518743515015, |
| "learning_rate": 5.4333041435808465e-06, |
| "loss": 4.7558, |
| "step": 55800 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.993607223033905, |
| "learning_rate": 5.425120100827407e-06, |
| "loss": 4.7577, |
| "step": 55900 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.3715596199035645, |
| "learning_rate": 5.416936058073968e-06, |
| "loss": 4.7565, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.3586755990982056, |
| "learning_rate": 5.408752015320529e-06, |
| "loss": 4.7627, |
| "step": 56100 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.5538294315338135, |
| "learning_rate": 5.400567972567089e-06, |
| "loss": 4.7616, |
| "step": 56200 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.3117858171463013, |
| "learning_rate": 5.3923839298136495e-06, |
| "loss": 4.7615, |
| "step": 56300 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 2.4825961589813232, |
| "learning_rate": 5.384199887060211e-06, |
| "loss": 4.7673, |
| "step": 56400 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 3.6540427207946777, |
| "learning_rate": 5.376015844306771e-06, |
| "loss": 4.7583, |
| "step": 56500 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.217731237411499, |
| "learning_rate": 5.367831801553331e-06, |
| "loss": 4.7647, |
| "step": 56600 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.3690531253814697, |
| "learning_rate": 5.359647758799893e-06, |
| "loss": 4.7607, |
| "step": 56700 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 2.986236572265625, |
| "learning_rate": 5.351463716046453e-06, |
| "loss": 4.7649, |
| "step": 56800 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 4.169172763824463, |
| "learning_rate": 5.343279673293014e-06, |
| "loss": 4.7609, |
| "step": 56900 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 3.941506862640381, |
| "learning_rate": 5.335095630539574e-06, |
| "loss": 4.7588, |
| "step": 57000 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.151626467704773, |
| "learning_rate": 5.326911587786135e-06, |
| "loss": 4.7626, |
| "step": 57100 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.1332154273986816, |
| "learning_rate": 5.318727545032696e-06, |
| "loss": 4.7638, |
| "step": 57200 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.0343974828720093, |
| "learning_rate": 5.3105435022792564e-06, |
| "loss": 4.7591, |
| "step": 57300 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.0772465467453003, |
| "learning_rate": 5.302359459525817e-06, |
| "loss": 4.7594, |
| "step": 57400 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 5.96359920501709, |
| "learning_rate": 5.294175416772378e-06, |
| "loss": 4.7597, |
| "step": 57500 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.451434850692749, |
| "learning_rate": 5.285991374018938e-06, |
| "loss": 4.7622, |
| "step": 57600 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 3.3634424209594727, |
| "learning_rate": 5.277807331265498e-06, |
| "loss": 4.7639, |
| "step": 57700 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.3753291368484497, |
| "learning_rate": 5.2696232885120595e-06, |
| "loss": 4.762, |
| "step": 57800 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.0764108896255493, |
| "learning_rate": 5.261439245758621e-06, |
| "loss": 4.7565, |
| "step": 57900 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.7822853326797485, |
| "learning_rate": 5.253255203005181e-06, |
| "loss": 4.76, |
| "step": 58000 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.0505635738372803, |
| "learning_rate": 5.245071160251741e-06, |
| "loss": 4.7563, |
| "step": 58100 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 2.2111964225769043, |
| "learning_rate": 5.236887117498303e-06, |
| "loss": 4.7613, |
| "step": 58200 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.0773766040802002, |
| "learning_rate": 5.228703074744863e-06, |
| "loss": 4.7645, |
| "step": 58300 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.387395977973938, |
| "learning_rate": 5.220519031991424e-06, |
| "loss": 4.7581, |
| "step": 58400 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.5407155752182007, |
| "learning_rate": 5.212334989237984e-06, |
| "loss": 4.7599, |
| "step": 58500 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.9045255184173584, |
| "learning_rate": 5.204150946484545e-06, |
| "loss": 4.7632, |
| "step": 58600 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.1140589714050293, |
| "learning_rate": 5.195966903731105e-06, |
| "loss": 4.758, |
| "step": 58700 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 3.0775272846221924, |
| "learning_rate": 5.1877828609776655e-06, |
| "loss": 4.7591, |
| "step": 58800 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.5358976125717163, |
| "learning_rate": 5.179598818224227e-06, |
| "loss": 4.7613, |
| "step": 58900 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 2.506425380706787, |
| "learning_rate": 5.171414775470788e-06, |
| "loss": 4.7584, |
| "step": 59000 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.562016248703003, |
| "learning_rate": 5.163230732717348e-06, |
| "loss": 4.7603, |
| "step": 59100 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 3.894599437713623, |
| "learning_rate": 5.155046689963908e-06, |
| "loss": 4.7582, |
| "step": 59200 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.7475073337554932, |
| "learning_rate": 5.14686264721047e-06, |
| "loss": 4.7552, |
| "step": 59300 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 2.168311357498169, |
| "learning_rate": 5.1386786044570305e-06, |
| "loss": 4.7546, |
| "step": 59400 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.3866901397705078, |
| "learning_rate": 5.130494561703591e-06, |
| "loss": 4.7595, |
| "step": 59500 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.5141569375991821, |
| "learning_rate": 5.122310518950151e-06, |
| "loss": 4.7594, |
| "step": 59600 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.22174072265625, |
| "learning_rate": 5.114126476196712e-06, |
| "loss": 4.7591, |
| "step": 59700 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.8501654863357544, |
| "learning_rate": 5.1059424334432724e-06, |
| "loss": 4.7566, |
| "step": 59800 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.8938846588134766, |
| "learning_rate": 5.0977583906898336e-06, |
| "loss": 4.7601, |
| "step": 59900 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 3.8000481128692627, |
| "learning_rate": 5.089574347936394e-06, |
| "loss": 4.7645, |
| "step": 60000 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 2.3883254528045654, |
| "learning_rate": 5.081390305182955e-06, |
| "loss": 4.759, |
| "step": 60100 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.5231989622116089, |
| "learning_rate": 5.073206262429515e-06, |
| "loss": 4.7591, |
| "step": 60200 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 2.584988832473755, |
| "learning_rate": 5.0650222196760755e-06, |
| "loss": 4.7614, |
| "step": 60300 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.4782294034957886, |
| "learning_rate": 5.0568381769226374e-06, |
| "loss": 4.7577, |
| "step": 60400 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 2.2520744800567627, |
| "learning_rate": 5.048654134169198e-06, |
| "loss": 4.7576, |
| "step": 60500 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.1555761098861694, |
| "learning_rate": 5.040470091415758e-06, |
| "loss": 4.7613, |
| "step": 60600 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.0871673822402954, |
| "learning_rate": 5.032286048662318e-06, |
| "loss": 4.7534, |
| "step": 60700 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.9198378324508667, |
| "learning_rate": 5.024102005908879e-06, |
| "loss": 4.7599, |
| "step": 60800 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.8254669904708862, |
| "learning_rate": 5.01591796315544e-06, |
| "loss": 4.7577, |
| "step": 60900 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 2.592374563217163, |
| "learning_rate": 5.007733920402001e-06, |
| "loss": 4.7596, |
| "step": 61000 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.4294451475143433, |
| "learning_rate": 4.999549877648561e-06, |
| "loss": 4.7587, |
| "step": 61100 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.3959410190582275, |
| "learning_rate": 4.991365834895122e-06, |
| "loss": 4.7587, |
| "step": 61200 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.02007257938385, |
| "learning_rate": 4.983181792141682e-06, |
| "loss": 4.7591, |
| "step": 61300 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.9636856317520142, |
| "learning_rate": 4.9749977493882435e-06, |
| "loss": 4.7586, |
| "step": 61400 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.2003675699234009, |
| "learning_rate": 4.966813706634804e-06, |
| "loss": 4.7578, |
| "step": 61500 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.2069259881973267, |
| "learning_rate": 4.958629663881365e-06, |
| "loss": 4.7594, |
| "step": 61600 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.160438895225525, |
| "learning_rate": 4.950445621127925e-06, |
| "loss": 4.7537, |
| "step": 61700 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.4509081840515137, |
| "learning_rate": 4.942261578374486e-06, |
| "loss": 4.7566, |
| "step": 61800 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.4008055925369263, |
| "learning_rate": 4.9340775356210465e-06, |
| "loss": 4.7596, |
| "step": 61900 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 2.445732593536377, |
| "learning_rate": 4.925893492867608e-06, |
| "loss": 4.7585, |
| "step": 62000 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 2.155773162841797, |
| "learning_rate": 4.917709450114168e-06, |
| "loss": 4.7594, |
| "step": 62100 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.3300435543060303, |
| "learning_rate": 4.909525407360728e-06, |
| "loss": 4.7599, |
| "step": 62200 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.1474816799163818, |
| "learning_rate": 4.901341364607289e-06, |
| "loss": 4.7575, |
| "step": 62300 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.1090396642684937, |
| "learning_rate": 4.8931573218538495e-06, |
| "loss": 4.7556, |
| "step": 62400 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.4400147199630737, |
| "learning_rate": 4.884973279100411e-06, |
| "loss": 4.7543, |
| "step": 62500 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.534568190574646, |
| "learning_rate": 4.876789236346971e-06, |
| "loss": 4.7542, |
| "step": 62600 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.1177995204925537, |
| "learning_rate": 4.868605193593532e-06, |
| "loss": 4.7557, |
| "step": 62700 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.2250655889511108, |
| "learning_rate": 4.860421150840092e-06, |
| "loss": 4.7588, |
| "step": 62800 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.2332854270935059, |
| "learning_rate": 4.852237108086653e-06, |
| "loss": 4.7623, |
| "step": 62900 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.0365347862243652, |
| "learning_rate": 4.844053065333214e-06, |
| "loss": 4.7591, |
| "step": 63000 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 3.2265894412994385, |
| "learning_rate": 4.835869022579775e-06, |
| "loss": 4.7589, |
| "step": 63100 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.374605417251587, |
| "learning_rate": 4.827684979826335e-06, |
| "loss": 4.7574, |
| "step": 63200 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.239890694618225, |
| "learning_rate": 4.819500937072895e-06, |
| "loss": 4.7622, |
| "step": 63300 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 4.042061805725098, |
| "learning_rate": 4.8113168943194564e-06, |
| "loss": 4.7591, |
| "step": 63400 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 2.166978597640991, |
| "learning_rate": 4.803132851566017e-06, |
| "loss": 4.7594, |
| "step": 63500 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.0814965963363647, |
| "learning_rate": 4.794948808812578e-06, |
| "loss": 4.759, |
| "step": 63600 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.7993803024291992, |
| "learning_rate": 4.786764766059138e-06, |
| "loss": 4.7574, |
| "step": 63700 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.1397624015808105, |
| "learning_rate": 4.778580723305699e-06, |
| "loss": 4.7568, |
| "step": 63800 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.801677942276001, |
| "learning_rate": 4.7703966805522595e-06, |
| "loss": 4.7561, |
| "step": 63900 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.5364161729812622, |
| "learning_rate": 4.762212637798821e-06, |
| "loss": 4.7584, |
| "step": 64000 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.0341291427612305, |
| "learning_rate": 4.754028595045381e-06, |
| "loss": 4.7581, |
| "step": 64100 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.0642578601837158, |
| "learning_rate": 4.745844552291942e-06, |
| "loss": 4.7566, |
| "step": 64200 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.6422146558761597, |
| "learning_rate": 4.737660509538502e-06, |
| "loss": 4.7583, |
| "step": 64300 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.8048427104949951, |
| "learning_rate": 4.7294764667850625e-06, |
| "loss": 4.7575, |
| "step": 64400 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.5397706031799316, |
| "learning_rate": 4.721292424031624e-06, |
| "loss": 4.7603, |
| "step": 64500 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.1673585176467896, |
| "learning_rate": 4.713108381278184e-06, |
| "loss": 4.7603, |
| "step": 64600 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.130509376525879, |
| "learning_rate": 4.704924338524745e-06, |
| "loss": 4.7582, |
| "step": 64700 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.8829139471054077, |
| "learning_rate": 4.696740295771305e-06, |
| "loss": 4.7559, |
| "step": 64800 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.014657974243164, |
| "learning_rate": 4.688556253017866e-06, |
| "loss": 4.7585, |
| "step": 64900 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.3047457933425903, |
| "learning_rate": 4.680372210264427e-06, |
| "loss": 4.7582, |
| "step": 65000 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 5.515758514404297, |
| "learning_rate": 4.672188167510988e-06, |
| "loss": 4.7581, |
| "step": 65100 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.061341643333435, |
| "learning_rate": 4.664004124757548e-06, |
| "loss": 4.7537, |
| "step": 65200 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.1620299816131592, |
| "learning_rate": 4.655820082004109e-06, |
| "loss": 4.7589, |
| "step": 65300 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.9839982390403748, |
| "learning_rate": 4.647636039250669e-06, |
| "loss": 4.7602, |
| "step": 65400 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.1345555782318115, |
| "learning_rate": 4.6394519964972305e-06, |
| "loss": 4.7595, |
| "step": 65500 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.2723256349563599, |
| "learning_rate": 4.631267953743791e-06, |
| "loss": 4.7631, |
| "step": 65600 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.2903733253479004, |
| "learning_rate": 4.623083910990351e-06, |
| "loss": 4.7572, |
| "step": 65700 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.849879503250122, |
| "learning_rate": 4.614899868236912e-06, |
| "loss": 4.7536, |
| "step": 65800 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.9856821298599243, |
| "learning_rate": 4.6067158254834724e-06, |
| "loss": 4.7566, |
| "step": 65900 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.0516011714935303, |
| "learning_rate": 4.5985317827300335e-06, |
| "loss": 4.7577, |
| "step": 66000 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.0833971500396729, |
| "learning_rate": 4.590347739976594e-06, |
| "loss": 4.7553, |
| "step": 66100 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 3.236478805541992, |
| "learning_rate": 4.582163697223155e-06, |
| "loss": 4.7524, |
| "step": 66200 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 4.561278820037842, |
| "learning_rate": 4.573979654469715e-06, |
| "loss": 4.7598, |
| "step": 66300 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.0862793922424316, |
| "learning_rate": 4.565795611716276e-06, |
| "loss": 4.756, |
| "step": 66400 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.248744249343872, |
| "learning_rate": 4.5576115689628366e-06, |
| "loss": 4.7579, |
| "step": 66500 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.2721776962280273, |
| "learning_rate": 4.549427526209398e-06, |
| "loss": 4.7549, |
| "step": 66600 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.6902192831039429, |
| "learning_rate": 4.541243483455958e-06, |
| "loss": 4.7605, |
| "step": 66700 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 2.721341609954834, |
| "learning_rate": 4.533059440702518e-06, |
| "loss": 4.7619, |
| "step": 66800 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.5549167394638062, |
| "learning_rate": 4.524875397949079e-06, |
| "loss": 4.7581, |
| "step": 66900 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 4.686578273773193, |
| "learning_rate": 4.51669135519564e-06, |
| "loss": 4.757, |
| "step": 67000 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.2544666528701782, |
| "learning_rate": 4.508507312442201e-06, |
| "loss": 4.7614, |
| "step": 67100 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.3165531158447266, |
| "learning_rate": 4.500323269688761e-06, |
| "loss": 4.7549, |
| "step": 67200 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.4608168601989746, |
| "learning_rate": 4.492139226935322e-06, |
| "loss": 4.7553, |
| "step": 67300 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.003299593925476, |
| "learning_rate": 4.483955184181882e-06, |
| "loss": 4.7638, |
| "step": 67400 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.496551752090454, |
| "learning_rate": 4.4757711414284435e-06, |
| "loss": 4.759, |
| "step": 67500 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.3934059143066406, |
| "learning_rate": 4.467587098675004e-06, |
| "loss": 4.7566, |
| "step": 67600 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 2.459867238998413, |
| "learning_rate": 4.459403055921565e-06, |
| "loss": 4.7567, |
| "step": 67700 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.2848294973373413, |
| "learning_rate": 4.451219013168125e-06, |
| "loss": 4.7562, |
| "step": 67800 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.5182512998580933, |
| "learning_rate": 4.443034970414685e-06, |
| "loss": 4.7595, |
| "step": 67900 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.2391725778579712, |
| "learning_rate": 4.4348509276612465e-06, |
| "loss": 4.7538, |
| "step": 68000 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 3.008521318435669, |
| "learning_rate": 4.426666884907807e-06, |
| "loss": 4.7568, |
| "step": 68100 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.6599717140197754, |
| "learning_rate": 4.418482842154368e-06, |
| "loss": 4.7598, |
| "step": 68200 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 2.2164254188537598, |
| "learning_rate": 4.410298799400928e-06, |
| "loss": 4.7545, |
| "step": 68300 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 3.473665237426758, |
| "learning_rate": 4.402114756647489e-06, |
| "loss": 4.7601, |
| "step": 68400 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.9182640314102173, |
| "learning_rate": 4.3939307138940495e-06, |
| "loss": 4.7559, |
| "step": 68500 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 2.2187399864196777, |
| "learning_rate": 4.385746671140611e-06, |
| "loss": 4.7611, |
| "step": 68600 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 2.2415308952331543, |
| "learning_rate": 4.377562628387171e-06, |
| "loss": 4.7572, |
| "step": 68700 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.0853921175003052, |
| "learning_rate": 4.369378585633732e-06, |
| "loss": 4.7522, |
| "step": 68800 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 2.0470669269561768, |
| "learning_rate": 4.361194542880292e-06, |
| "loss": 4.7567, |
| "step": 68900 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.5501480102539062, |
| "learning_rate": 4.353010500126853e-06, |
| "loss": 4.7548, |
| "step": 69000 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.0756503343582153, |
| "learning_rate": 4.344826457373414e-06, |
| "loss": 4.7556, |
| "step": 69100 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.0396485328674316, |
| "learning_rate": 4.336642414619974e-06, |
| "loss": 4.756, |
| "step": 69200 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.5130740404129028, |
| "learning_rate": 4.328458371866535e-06, |
| "loss": 4.7538, |
| "step": 69300 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.2191152572631836, |
| "learning_rate": 4.320274329113095e-06, |
| "loss": 4.7594, |
| "step": 69400 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.1031177043914795, |
| "learning_rate": 4.3120902863596564e-06, |
| "loss": 4.7585, |
| "step": 69500 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 3.345165967941284, |
| "learning_rate": 4.303906243606217e-06, |
| "loss": 4.7601, |
| "step": 69600 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.058370590209961, |
| "learning_rate": 4.295722200852778e-06, |
| "loss": 4.7575, |
| "step": 69700 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.364247441291809, |
| "learning_rate": 4.287538158099338e-06, |
| "loss": 4.7592, |
| "step": 69800 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 2.520071029663086, |
| "learning_rate": 4.279354115345899e-06, |
| "loss": 4.7597, |
| "step": 69900 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.4218943119049072, |
| "learning_rate": 4.2711700725924595e-06, |
| "loss": 4.7525, |
| "step": 70000 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 2.9582276344299316, |
| "learning_rate": 4.2629860298390206e-06, |
| "loss": 4.7611, |
| "step": 70100 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 2.1016061305999756, |
| "learning_rate": 4.254801987085581e-06, |
| "loss": 4.7557, |
| "step": 70200 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.1501710414886475, |
| "learning_rate": 4.246617944332141e-06, |
| "loss": 4.7582, |
| "step": 70300 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 2.3157947063446045, |
| "learning_rate": 4.238433901578702e-06, |
| "loss": 4.7574, |
| "step": 70400 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.0421010255813599, |
| "learning_rate": 4.2302498588252625e-06, |
| "loss": 4.7583, |
| "step": 70500 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.3601773977279663, |
| "learning_rate": 4.222065816071824e-06, |
| "loss": 4.7567, |
| "step": 70600 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.1386362314224243, |
| "learning_rate": 4.213881773318384e-06, |
| "loss": 4.7535, |
| "step": 70700 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.3439152240753174, |
| "learning_rate": 4.205697730564945e-06, |
| "loss": 4.7595, |
| "step": 70800 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.9923715591430664, |
| "learning_rate": 4.197513687811505e-06, |
| "loss": 4.7561, |
| "step": 70900 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.0728856325149536, |
| "learning_rate": 4.189329645058066e-06, |
| "loss": 4.7528, |
| "step": 71000 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.5504320859909058, |
| "learning_rate": 4.181145602304627e-06, |
| "loss": 4.7555, |
| "step": 71100 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 3.476879358291626, |
| "learning_rate": 4.172961559551188e-06, |
| "loss": 4.7571, |
| "step": 71200 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.4305684566497803, |
| "learning_rate": 4.164777516797748e-06, |
| "loss": 4.7501, |
| "step": 71300 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.2255500555038452, |
| "learning_rate": 4.156593474044308e-06, |
| "loss": 4.7591, |
| "step": 71400 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 2.292752742767334, |
| "learning_rate": 4.148409431290869e-06, |
| "loss": 4.7576, |
| "step": 71500 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.9670140743255615, |
| "learning_rate": 4.14022538853743e-06, |
| "loss": 4.7605, |
| "step": 71600 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 2.035198450088501, |
| "learning_rate": 4.132041345783991e-06, |
| "loss": 4.7564, |
| "step": 71700 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.918428659439087, |
| "learning_rate": 4.123857303030551e-06, |
| "loss": 4.7538, |
| "step": 71800 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 4.245315074920654, |
| "learning_rate": 4.115673260277112e-06, |
| "loss": 4.7585, |
| "step": 71900 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 3.4246652126312256, |
| "learning_rate": 4.107489217523672e-06, |
| "loss": 4.7507, |
| "step": 72000 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.2266836166381836, |
| "learning_rate": 4.0993051747702335e-06, |
| "loss": 4.7602, |
| "step": 72100 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.9559603929519653, |
| "learning_rate": 4.091121132016794e-06, |
| "loss": 4.7555, |
| "step": 72200 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 2.2520241737365723, |
| "learning_rate": 4.082937089263355e-06, |
| "loss": 4.7551, |
| "step": 72300 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 2.5236260890960693, |
| "learning_rate": 4.074753046509915e-06, |
| "loss": 4.7552, |
| "step": 72400 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.0424671173095703, |
| "learning_rate": 4.066569003756476e-06, |
| "loss": 4.7548, |
| "step": 72500 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.1566205024719238, |
| "learning_rate": 4.0583849610030366e-06, |
| "loss": 4.7522, |
| "step": 72600 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.9585150480270386, |
| "learning_rate": 4.050200918249597e-06, |
| "loss": 4.7609, |
| "step": 72700 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 6.500349044799805, |
| "learning_rate": 4.042016875496158e-06, |
| "loss": 4.7562, |
| "step": 72800 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.1571673154830933, |
| "learning_rate": 4.033832832742718e-06, |
| "loss": 4.7571, |
| "step": 72900 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.7180365324020386, |
| "learning_rate": 4.025648789989279e-06, |
| "loss": 4.7614, |
| "step": 73000 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.5343599319458008, |
| "learning_rate": 4.01746474723584e-06, |
| "loss": 4.7588, |
| "step": 73100 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.5855658054351807, |
| "learning_rate": 4.009280704482401e-06, |
| "loss": 4.7603, |
| "step": 73200 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.0107240676879883, |
| "learning_rate": 4.001096661728961e-06, |
| "loss": 4.7576, |
| "step": 73300 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.6505345106124878, |
| "learning_rate": 3.992912618975522e-06, |
| "loss": 4.7562, |
| "step": 73400 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 2.6212563514709473, |
| "learning_rate": 3.984728576222082e-06, |
| "loss": 4.7516, |
| "step": 73500 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.5305055379867554, |
| "learning_rate": 3.9765445334686435e-06, |
| "loss": 4.7533, |
| "step": 73600 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 3.195974826812744, |
| "learning_rate": 3.968360490715204e-06, |
| "loss": 4.7509, |
| "step": 73700 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.088273286819458, |
| "learning_rate": 3.960176447961764e-06, |
| "loss": 4.7559, |
| "step": 73800 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 2.045375108718872, |
| "learning_rate": 3.951992405208325e-06, |
| "loss": 4.7516, |
| "step": 73900 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.5279570817947388, |
| "learning_rate": 3.943808362454885e-06, |
| "loss": 4.7566, |
| "step": 74000 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.987199306488037, |
| "learning_rate": 3.9356243197014465e-06, |
| "loss": 4.7531, |
| "step": 74100 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.7594095468521118, |
| "learning_rate": 3.927440276948007e-06, |
| "loss": 4.7568, |
| "step": 74200 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.9795931577682495, |
| "learning_rate": 3.919256234194568e-06, |
| "loss": 4.7606, |
| "step": 74300 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.383016586303711, |
| "learning_rate": 3.911072191441128e-06, |
| "loss": 4.7559, |
| "step": 74400 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.4739179611206055, |
| "learning_rate": 3.902888148687689e-06, |
| "loss": 4.7553, |
| "step": 74500 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.1515618562698364, |
| "learning_rate": 3.89470410593425e-06, |
| "loss": 4.7608, |
| "step": 74600 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.4380580186843872, |
| "learning_rate": 3.886520063180811e-06, |
| "loss": 4.7578, |
| "step": 74700 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.187768578529358, |
| "learning_rate": 3.878336020427371e-06, |
| "loss": 4.7533, |
| "step": 74800 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 4.768670082092285, |
| "learning_rate": 3.870151977673931e-06, |
| "loss": 4.7573, |
| "step": 74900 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.2797937393188477, |
| "learning_rate": 3.861967934920492e-06, |
| "loss": 4.7546, |
| "step": 75000 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 2.6686596870422363, |
| "learning_rate": 3.8537838921670525e-06, |
| "loss": 4.7562, |
| "step": 75100 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 2.777021646499634, |
| "learning_rate": 3.845599849413614e-06, |
| "loss": 4.7578, |
| "step": 75200 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.4774482250213623, |
| "learning_rate": 3.837415806660174e-06, |
| "loss": 4.7553, |
| "step": 75300 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.154783010482788, |
| "learning_rate": 3.829231763906735e-06, |
| "loss": 4.7536, |
| "step": 75400 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.1363816261291504, |
| "learning_rate": 3.821047721153295e-06, |
| "loss": 4.7553, |
| "step": 75500 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.314833402633667, |
| "learning_rate": 3.8128636783998564e-06, |
| "loss": 4.754, |
| "step": 75600 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 2.0026655197143555, |
| "learning_rate": 3.804679635646417e-06, |
| "loss": 4.7564, |
| "step": 75700 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 2.530662775039673, |
| "learning_rate": 3.7964955928929774e-06, |
| "loss": 4.7621, |
| "step": 75800 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.7200578451156616, |
| "learning_rate": 3.7883115501395385e-06, |
| "loss": 4.7534, |
| "step": 75900 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.1230708360671997, |
| "learning_rate": 3.7801275073860988e-06, |
| "loss": 4.7573, |
| "step": 76000 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.1518919467926025, |
| "learning_rate": 3.77194346463266e-06, |
| "loss": 4.756, |
| "step": 76100 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 4.6440582275390625, |
| "learning_rate": 3.76375942187922e-06, |
| "loss": 4.7538, |
| "step": 76200 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.2866283655166626, |
| "learning_rate": 3.755575379125781e-06, |
| "loss": 4.7572, |
| "step": 76300 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.0763148069381714, |
| "learning_rate": 3.7473913363723415e-06, |
| "loss": 4.7568, |
| "step": 76400 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.0883269309997559, |
| "learning_rate": 3.739207293618902e-06, |
| "loss": 4.7562, |
| "step": 76500 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.50298011302948, |
| "learning_rate": 3.7310232508654625e-06, |
| "loss": 4.7512, |
| "step": 76600 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.144468069076538, |
| "learning_rate": 3.7228392081120236e-06, |
| "loss": 4.7571, |
| "step": 76700 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.2953712940216064, |
| "learning_rate": 3.7146551653585843e-06, |
| "loss": 4.7502, |
| "step": 76800 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 3.047788143157959, |
| "learning_rate": 3.7064711226051445e-06, |
| "loss": 4.7561, |
| "step": 76900 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.7507997751235962, |
| "learning_rate": 3.6982870798517057e-06, |
| "loss": 4.7547, |
| "step": 77000 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.189469814300537, |
| "learning_rate": 3.690103037098266e-06, |
| "loss": 4.7554, |
| "step": 77100 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.2107117176055908, |
| "learning_rate": 3.681918994344827e-06, |
| "loss": 4.7544, |
| "step": 77200 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.0349069833755493, |
| "learning_rate": 3.6737349515913873e-06, |
| "loss": 4.7603, |
| "step": 77300 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.1641030311584473, |
| "learning_rate": 3.665550908837948e-06, |
| "loss": 4.7566, |
| "step": 77400 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.1426669359207153, |
| "learning_rate": 3.6573668660845087e-06, |
| "loss": 4.7535, |
| "step": 77500 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 2.1960771083831787, |
| "learning_rate": 3.6491828233310694e-06, |
| "loss": 4.7559, |
| "step": 77600 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.1451727151870728, |
| "learning_rate": 3.6409987805776296e-06, |
| "loss": 4.7591, |
| "step": 77700 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.3886533975601196, |
| "learning_rate": 3.6328147378241908e-06, |
| "loss": 4.7553, |
| "step": 77800 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.1393433809280396, |
| "learning_rate": 3.6246306950707514e-06, |
| "loss": 4.7581, |
| "step": 77900 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.2284306287765503, |
| "learning_rate": 3.616446652317312e-06, |
| "loss": 4.7566, |
| "step": 78000 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.0621302127838135, |
| "learning_rate": 3.608262609563873e-06, |
| "loss": 4.7532, |
| "step": 78100 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.1183658838272095, |
| "learning_rate": 3.600078566810433e-06, |
| "loss": 4.7583, |
| "step": 78200 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.1688071489334106, |
| "learning_rate": 3.591894524056994e-06, |
| "loss": 4.7542, |
| "step": 78300 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 3.1448984146118164, |
| "learning_rate": 3.5837104813035545e-06, |
| "loss": 4.7592, |
| "step": 78400 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.1687928438186646, |
| "learning_rate": 3.5755264385501156e-06, |
| "loss": 4.7553, |
| "step": 78500 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.1805286407470703, |
| "learning_rate": 3.567342395796676e-06, |
| "loss": 4.7544, |
| "step": 78600 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 2.4032955169677734, |
| "learning_rate": 3.5591583530432365e-06, |
| "loss": 4.7569, |
| "step": 78700 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 3.784090757369995, |
| "learning_rate": 3.550974310289797e-06, |
| "loss": 4.7565, |
| "step": 78800 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.2469580173492432, |
| "learning_rate": 3.542790267536358e-06, |
| "loss": 4.7485, |
| "step": 78900 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 2.6100597381591797, |
| "learning_rate": 3.5346062247829186e-06, |
| "loss": 4.7561, |
| "step": 79000 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.4072147607803345, |
| "learning_rate": 3.5264221820294793e-06, |
| "loss": 4.7556, |
| "step": 79100 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.2158293724060059, |
| "learning_rate": 3.51823813927604e-06, |
| "loss": 4.7503, |
| "step": 79200 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.4874674081802368, |
| "learning_rate": 3.5100540965226003e-06, |
| "loss": 4.7547, |
| "step": 79300 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.221482515335083, |
| "learning_rate": 3.5018700537691614e-06, |
| "loss": 4.7559, |
| "step": 79400 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.1589709520339966, |
| "learning_rate": 3.4936860110157216e-06, |
| "loss": 4.7545, |
| "step": 79500 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.2871575355529785, |
| "learning_rate": 3.4855019682622828e-06, |
| "loss": 4.7522, |
| "step": 79600 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.7240387201309204, |
| "learning_rate": 3.477317925508843e-06, |
| "loss": 4.7566, |
| "step": 79700 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 6.059484004974365, |
| "learning_rate": 3.4691338827554037e-06, |
| "loss": 4.7566, |
| "step": 79800 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.1639505624771118, |
| "learning_rate": 3.4609498400019644e-06, |
| "loss": 4.7521, |
| "step": 79900 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.1786649227142334, |
| "learning_rate": 3.452765797248525e-06, |
| "loss": 4.7475, |
| "step": 80000 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.275763988494873, |
| "learning_rate": 3.4445817544950862e-06, |
| "loss": 4.7512, |
| "step": 80100 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 2.3286848068237305, |
| "learning_rate": 3.4363977117416465e-06, |
| "loss": 4.7573, |
| "step": 80200 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.2990089654922485, |
| "learning_rate": 3.428213668988207e-06, |
| "loss": 4.752, |
| "step": 80300 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 2.1798534393310547, |
| "learning_rate": 3.4200296262347674e-06, |
| "loss": 4.7566, |
| "step": 80400 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 2.029482841491699, |
| "learning_rate": 3.4118455834813286e-06, |
| "loss": 4.7528, |
| "step": 80500 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.3646825551986694, |
| "learning_rate": 3.403661540727889e-06, |
| "loss": 4.7567, |
| "step": 80600 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.1107501983642578, |
| "learning_rate": 3.39547749797445e-06, |
| "loss": 4.7518, |
| "step": 80700 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.7238624095916748, |
| "learning_rate": 3.38729345522101e-06, |
| "loss": 4.7535, |
| "step": 80800 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.2147496938705444, |
| "learning_rate": 3.379109412467571e-06, |
| "loss": 4.7529, |
| "step": 80900 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.3453848361968994, |
| "learning_rate": 3.3709253697141316e-06, |
| "loss": 4.7549, |
| "step": 81000 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.3312301635742188, |
| "learning_rate": 3.3627413269606923e-06, |
| "loss": 4.7533, |
| "step": 81100 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.0629857778549194, |
| "learning_rate": 3.3545572842072534e-06, |
| "loss": 4.754, |
| "step": 81200 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.145863652229309, |
| "learning_rate": 3.3463732414538136e-06, |
| "loss": 4.7561, |
| "step": 81300 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.1477543115615845, |
| "learning_rate": 3.3381891987003743e-06, |
| "loss": 4.7546, |
| "step": 81400 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 2.111903190612793, |
| "learning_rate": 3.330005155946935e-06, |
| "loss": 4.7523, |
| "step": 81500 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 2.4378135204315186, |
| "learning_rate": 3.3218211131934957e-06, |
| "loss": 4.7524, |
| "step": 81600 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.0920718908309937, |
| "learning_rate": 3.313637070440056e-06, |
| "loss": 4.7579, |
| "step": 81700 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.142166018486023, |
| "learning_rate": 3.305453027686617e-06, |
| "loss": 4.751, |
| "step": 81800 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 2.497532844543457, |
| "learning_rate": 3.2972689849331774e-06, |
| "loss": 4.7587, |
| "step": 81900 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.1811760663986206, |
| "learning_rate": 3.2890849421797385e-06, |
| "loss": 4.7565, |
| "step": 82000 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.4381294250488281, |
| "learning_rate": 3.2809008994262987e-06, |
| "loss": 4.7523, |
| "step": 82100 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.1105141639709473, |
| "learning_rate": 3.2727168566728594e-06, |
| "loss": 4.755, |
| "step": 82200 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.0709772109985352, |
| "learning_rate": 3.2645328139194206e-06, |
| "loss": 4.7518, |
| "step": 82300 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.3575836420059204, |
| "learning_rate": 3.256348771165981e-06, |
| "loss": 4.7538, |
| "step": 82400 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.1453856229782104, |
| "learning_rate": 3.2481647284125415e-06, |
| "loss": 4.7567, |
| "step": 82500 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.2590690851211548, |
| "learning_rate": 3.239980685659102e-06, |
| "loss": 4.7556, |
| "step": 82600 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.3445689678192139, |
| "learning_rate": 3.231796642905663e-06, |
| "loss": 4.7529, |
| "step": 82700 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.1240034103393555, |
| "learning_rate": 3.223612600152223e-06, |
| "loss": 4.7595, |
| "step": 82800 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.2913769483566284, |
| "learning_rate": 3.2154285573987843e-06, |
| "loss": 4.7523, |
| "step": 82900 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.2136105298995972, |
| "learning_rate": 3.2072445146453445e-06, |
| "loss": 4.7548, |
| "step": 83000 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.0630725622177124, |
| "learning_rate": 3.1990604718919057e-06, |
| "loss": 4.7551, |
| "step": 83100 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.495082139968872, |
| "learning_rate": 3.190876429138466e-06, |
| "loss": 4.7553, |
| "step": 83200 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.9895689487457275, |
| "learning_rate": 3.1826923863850266e-06, |
| "loss": 4.759, |
| "step": 83300 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.4668093919754028, |
| "learning_rate": 3.1745083436315877e-06, |
| "loss": 4.7561, |
| "step": 83400 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.5256825685501099, |
| "learning_rate": 3.166324300878148e-06, |
| "loss": 4.7573, |
| "step": 83500 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 3.0631277561187744, |
| "learning_rate": 3.158140258124709e-06, |
| "loss": 4.7543, |
| "step": 83600 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.171787977218628, |
| "learning_rate": 3.1499562153712694e-06, |
| "loss": 4.7508, |
| "step": 83700 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 7.035879611968994, |
| "learning_rate": 3.14177217261783e-06, |
| "loss": 4.7519, |
| "step": 83800 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 2.3109359741210938, |
| "learning_rate": 3.1335881298643903e-06, |
| "loss": 4.7573, |
| "step": 83900 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 2.3658266067504883, |
| "learning_rate": 3.1254040871109514e-06, |
| "loss": 4.754, |
| "step": 84000 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.6524670124053955, |
| "learning_rate": 3.1172200443575117e-06, |
| "loss": 4.7549, |
| "step": 84100 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.797340989112854, |
| "learning_rate": 3.109036001604073e-06, |
| "loss": 4.7499, |
| "step": 84200 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 3.3878042697906494, |
| "learning_rate": 3.100851958850633e-06, |
| "loss": 4.7518, |
| "step": 84300 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.5656503438949585, |
| "learning_rate": 3.0926679160971938e-06, |
| "loss": 4.7588, |
| "step": 84400 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.4081205129623413, |
| "learning_rate": 3.084483873343755e-06, |
| "loss": 4.7587, |
| "step": 84500 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 2.011707305908203, |
| "learning_rate": 3.076299830590315e-06, |
| "loss": 4.7525, |
| "step": 84600 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.1103359460830688, |
| "learning_rate": 3.0681157878368763e-06, |
| "loss": 4.7547, |
| "step": 84700 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.331764578819275, |
| "learning_rate": 3.0599317450834365e-06, |
| "loss": 4.7544, |
| "step": 84800 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.1731749773025513, |
| "learning_rate": 3.0517477023299972e-06, |
| "loss": 4.7527, |
| "step": 84900 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 2.4476029872894287, |
| "learning_rate": 3.043563659576558e-06, |
| "loss": 4.7529, |
| "step": 85000 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 2.6501026153564453, |
| "learning_rate": 3.0353796168231186e-06, |
| "loss": 4.7495, |
| "step": 85100 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.4330114126205444, |
| "learning_rate": 3.027195574069679e-06, |
| "loss": 4.7551, |
| "step": 85200 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.794797420501709, |
| "learning_rate": 3.01901153131624e-06, |
| "loss": 4.7554, |
| "step": 85300 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.1396609544754028, |
| "learning_rate": 3.0108274885628003e-06, |
| "loss": 4.7521, |
| "step": 85400 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.5291541814804077, |
| "learning_rate": 3.0026434458093614e-06, |
| "loss": 4.7538, |
| "step": 85500 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.9390245676040649, |
| "learning_rate": 2.9944594030559216e-06, |
| "loss": 4.7499, |
| "step": 85600 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 4.141879558563232, |
| "learning_rate": 2.9862753603024823e-06, |
| "loss": 4.7587, |
| "step": 85700 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 2.151954412460327, |
| "learning_rate": 2.9780913175490434e-06, |
| "loss": 4.7525, |
| "step": 85800 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.340173363685608, |
| "learning_rate": 2.9699072747956037e-06, |
| "loss": 4.7519, |
| "step": 85900 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.9204574823379517, |
| "learning_rate": 2.9617232320421644e-06, |
| "loss": 4.7583, |
| "step": 86000 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.391839623451233, |
| "learning_rate": 2.953539189288725e-06, |
| "loss": 4.751, |
| "step": 86100 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.4064441919326782, |
| "learning_rate": 2.9453551465352858e-06, |
| "loss": 4.7506, |
| "step": 86200 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.2319107055664062, |
| "learning_rate": 2.937171103781846e-06, |
| "loss": 4.7551, |
| "step": 86300 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 2.515320301055908, |
| "learning_rate": 2.928987061028407e-06, |
| "loss": 4.7517, |
| "step": 86400 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 2.4007177352905273, |
| "learning_rate": 2.9208030182749674e-06, |
| "loss": 4.7513, |
| "step": 86500 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.4867286682128906, |
| "learning_rate": 2.9126189755215285e-06, |
| "loss": 4.7549, |
| "step": 86600 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.2570160627365112, |
| "learning_rate": 2.904434932768089e-06, |
| "loss": 4.753, |
| "step": 86700 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 2.847069025039673, |
| "learning_rate": 2.8962508900146495e-06, |
| "loss": 4.7555, |
| "step": 86800 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.0997235774993896, |
| "learning_rate": 2.8880668472612106e-06, |
| "loss": 4.7532, |
| "step": 86900 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.8394368886947632, |
| "learning_rate": 2.879882804507771e-06, |
| "loss": 4.7504, |
| "step": 87000 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 3.6865549087524414, |
| "learning_rate": 2.871698761754332e-06, |
| "loss": 4.7567, |
| "step": 87100 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 3.022850275039673, |
| "learning_rate": 2.8635147190008923e-06, |
| "loss": 4.7509, |
| "step": 87200 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.7531808614730835, |
| "learning_rate": 2.855330676247453e-06, |
| "loss": 4.7527, |
| "step": 87300 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 2.0469372272491455, |
| "learning_rate": 2.8471466334940136e-06, |
| "loss": 4.7564, |
| "step": 87400 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.4322601556777954, |
| "learning_rate": 2.8389625907405743e-06, |
| "loss": 4.7552, |
| "step": 87500 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.2034333944320679, |
| "learning_rate": 2.8307785479871346e-06, |
| "loss": 4.7555, |
| "step": 87600 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.0759299993515015, |
| "learning_rate": 2.8225945052336957e-06, |
| "loss": 4.7508, |
| "step": 87700 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.1701573133468628, |
| "learning_rate": 2.814410462480256e-06, |
| "loss": 4.7535, |
| "step": 87800 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.4818124771118164, |
| "learning_rate": 2.8062264197268167e-06, |
| "loss": 4.7528, |
| "step": 87900 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.362298846244812, |
| "learning_rate": 2.7980423769733778e-06, |
| "loss": 4.7488, |
| "step": 88000 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.9951609969139099, |
| "learning_rate": 2.789858334219938e-06, |
| "loss": 4.7509, |
| "step": 88100 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.2555766105651855, |
| "learning_rate": 2.781674291466499e-06, |
| "loss": 4.7559, |
| "step": 88200 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.8623309135437012, |
| "learning_rate": 2.7734902487130594e-06, |
| "loss": 4.7489, |
| "step": 88300 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.2883721590042114, |
| "learning_rate": 2.76530620595962e-06, |
| "loss": 4.751, |
| "step": 88400 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.1867636442184448, |
| "learning_rate": 2.757122163206181e-06, |
| "loss": 4.7524, |
| "step": 88500 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.4036273956298828, |
| "learning_rate": 2.7489381204527415e-06, |
| "loss": 4.755, |
| "step": 88600 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.2148162126541138, |
| "learning_rate": 2.7407540776993018e-06, |
| "loss": 4.7582, |
| "step": 88700 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 2.2214956283569336, |
| "learning_rate": 2.732570034945863e-06, |
| "loss": 4.7543, |
| "step": 88800 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.103264331817627, |
| "learning_rate": 2.724385992192423e-06, |
| "loss": 4.7468, |
| "step": 88900 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.3318493366241455, |
| "learning_rate": 2.7162019494389843e-06, |
| "loss": 4.7547, |
| "step": 89000 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.7869521379470825, |
| "learning_rate": 2.708017906685545e-06, |
| "loss": 4.7528, |
| "step": 89100 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.0730737447738647, |
| "learning_rate": 2.6998338639321052e-06, |
| "loss": 4.7554, |
| "step": 89200 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.677322268486023, |
| "learning_rate": 2.6916498211786663e-06, |
| "loss": 4.7574, |
| "step": 89300 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.7166889905929565, |
| "learning_rate": 2.6834657784252266e-06, |
| "loss": 4.7563, |
| "step": 89400 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.3023245334625244, |
| "learning_rate": 2.6752817356717877e-06, |
| "loss": 4.7569, |
| "step": 89500 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.2815351486206055, |
| "learning_rate": 2.667097692918348e-06, |
| "loss": 4.7568, |
| "step": 89600 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.1161749362945557, |
| "learning_rate": 2.6589136501649087e-06, |
| "loss": 4.7536, |
| "step": 89700 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.4548900127410889, |
| "learning_rate": 2.650729607411469e-06, |
| "loss": 4.7566, |
| "step": 89800 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 8.324539184570312, |
| "learning_rate": 2.64254556465803e-06, |
| "loss": 4.7539, |
| "step": 89900 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 2.0228288173675537, |
| "learning_rate": 2.6343615219045903e-06, |
| "loss": 4.7514, |
| "step": 90000 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.1695142984390259, |
| "learning_rate": 2.6261774791511514e-06, |
| "loss": 4.7519, |
| "step": 90100 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.6865144968032837, |
| "learning_rate": 2.617993436397712e-06, |
| "loss": 4.7557, |
| "step": 90200 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.9601481556892395, |
| "learning_rate": 2.6098093936442724e-06, |
| "loss": 4.7518, |
| "step": 90300 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.0379222631454468, |
| "learning_rate": 2.6016253508908335e-06, |
| "loss": 4.7521, |
| "step": 90400 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.6704763174057007, |
| "learning_rate": 2.5934413081373938e-06, |
| "loss": 4.7526, |
| "step": 90500 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.0544642210006714, |
| "learning_rate": 2.585257265383955e-06, |
| "loss": 4.7529, |
| "step": 90600 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.2152049541473389, |
| "learning_rate": 2.577073222630515e-06, |
| "loss": 4.7557, |
| "step": 90700 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.1299751996994019, |
| "learning_rate": 2.568889179877076e-06, |
| "loss": 4.7552, |
| "step": 90800 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.3130440711975098, |
| "learning_rate": 2.5607051371236365e-06, |
| "loss": 4.7512, |
| "step": 90900 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.1738765239715576, |
| "learning_rate": 2.5525210943701972e-06, |
| "loss": 4.7478, |
| "step": 91000 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.3825798034667969, |
| "learning_rate": 2.5443370516167575e-06, |
| "loss": 4.7545, |
| "step": 91100 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.2850853204727173, |
| "learning_rate": 2.5361530088633186e-06, |
| "loss": 4.7546, |
| "step": 91200 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.215085506439209, |
| "learning_rate": 2.5279689661098793e-06, |
| "loss": 4.7488, |
| "step": 91300 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.4124336242675781, |
| "learning_rate": 2.5197849233564396e-06, |
| "loss": 4.7441, |
| "step": 91400 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 2.5708861351013184, |
| "learning_rate": 2.5116008806030007e-06, |
| "loss": 4.7553, |
| "step": 91500 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.9249249696731567, |
| "learning_rate": 2.503416837849561e-06, |
| "loss": 4.7565, |
| "step": 91600 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.1398611068725586, |
| "learning_rate": 2.4952327950961216e-06, |
| "loss": 4.7513, |
| "step": 91700 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 2.037564516067505, |
| "learning_rate": 2.4870487523426827e-06, |
| "loss": 4.7517, |
| "step": 91800 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.4297902584075928, |
| "learning_rate": 2.478864709589243e-06, |
| "loss": 4.7494, |
| "step": 91900 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.369734764099121, |
| "learning_rate": 2.4706806668358037e-06, |
| "loss": 4.7511, |
| "step": 92000 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.1665796041488647, |
| "learning_rate": 2.4624966240823644e-06, |
| "loss": 4.7576, |
| "step": 92100 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.085404396057129, |
| "learning_rate": 2.454312581328925e-06, |
| "loss": 4.7535, |
| "step": 92200 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 5.764316082000732, |
| "learning_rate": 2.4461285385754858e-06, |
| "loss": 4.7497, |
| "step": 92300 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.3492110967636108, |
| "learning_rate": 2.4379444958220465e-06, |
| "loss": 4.7558, |
| "step": 92400 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.0760524272918701, |
| "learning_rate": 2.429760453068607e-06, |
| "loss": 4.7525, |
| "step": 92500 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.2596811056137085, |
| "learning_rate": 2.4215764103151674e-06, |
| "loss": 4.7532, |
| "step": 92600 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.0836505889892578, |
| "learning_rate": 2.413392367561728e-06, |
| "loss": 4.7506, |
| "step": 92700 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 2.759760618209839, |
| "learning_rate": 2.405208324808289e-06, |
| "loss": 4.7493, |
| "step": 92800 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.3454488515853882, |
| "learning_rate": 2.39702428205485e-06, |
| "loss": 4.7539, |
| "step": 92900 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.2812906503677368, |
| "learning_rate": 2.3888402393014106e-06, |
| "loss": 4.7509, |
| "step": 93000 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.247383952140808, |
| "learning_rate": 2.380656196547971e-06, |
| "loss": 4.7493, |
| "step": 93100 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.803625226020813, |
| "learning_rate": 2.3724721537945316e-06, |
| "loss": 4.7527, |
| "step": 93200 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 2.4045066833496094, |
| "learning_rate": 2.3642881110410922e-06, |
| "loss": 4.7578, |
| "step": 93300 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 2.0578811168670654, |
| "learning_rate": 2.356104068287653e-06, |
| "loss": 4.7539, |
| "step": 93400 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 2.907444477081299, |
| "learning_rate": 2.3479200255342136e-06, |
| "loss": 4.7534, |
| "step": 93500 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.3155359029769897, |
| "learning_rate": 2.3397359827807743e-06, |
| "loss": 4.7543, |
| "step": 93600 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.2676302194595337, |
| "learning_rate": 2.331551940027335e-06, |
| "loss": 4.7501, |
| "step": 93700 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.1166573762893677, |
| "learning_rate": 2.3233678972738953e-06, |
| "loss": 4.7518, |
| "step": 93800 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.3180181980133057, |
| "learning_rate": 2.315183854520456e-06, |
| "loss": 4.7548, |
| "step": 93900 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 2.867478132247925, |
| "learning_rate": 2.306999811767017e-06, |
| "loss": 4.7551, |
| "step": 94000 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.3548469543457031, |
| "learning_rate": 2.2988157690135778e-06, |
| "loss": 4.7524, |
| "step": 94100 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 3.94809627532959, |
| "learning_rate": 2.290631726260138e-06, |
| "loss": 4.7516, |
| "step": 94200 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.0845712423324585, |
| "learning_rate": 2.2824476835066987e-06, |
| "loss": 4.7549, |
| "step": 94300 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.9430265426635742, |
| "learning_rate": 2.2742636407532594e-06, |
| "loss": 4.7552, |
| "step": 94400 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.1491626501083374, |
| "learning_rate": 2.26607959799982e-06, |
| "loss": 4.7533, |
| "step": 94500 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.323564887046814, |
| "learning_rate": 2.257895555246381e-06, |
| "loss": 4.7502, |
| "step": 94600 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.2415287494659424, |
| "learning_rate": 2.2497115124929415e-06, |
| "loss": 4.754, |
| "step": 94700 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.1996134519577026, |
| "learning_rate": 2.241527469739502e-06, |
| "loss": 4.7475, |
| "step": 94800 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.3265007734298706, |
| "learning_rate": 2.2333434269860624e-06, |
| "loss": 4.7504, |
| "step": 94900 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 2.0656216144561768, |
| "learning_rate": 2.225159384232623e-06, |
| "loss": 4.7536, |
| "step": 95000 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.7077275514602661, |
| "learning_rate": 2.2169753414791843e-06, |
| "loss": 4.7559, |
| "step": 95100 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.9614852070808411, |
| "learning_rate": 2.208791298725745e-06, |
| "loss": 4.753, |
| "step": 95200 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.010793685913086, |
| "learning_rate": 2.2006072559723056e-06, |
| "loss": 4.7531, |
| "step": 95300 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.7269645929336548, |
| "learning_rate": 2.192423213218866e-06, |
| "loss": 4.7525, |
| "step": 95400 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.1839239597320557, |
| "learning_rate": 2.1842391704654266e-06, |
| "loss": 4.7516, |
| "step": 95500 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.0646226406097412, |
| "learning_rate": 2.1760551277119873e-06, |
| "loss": 4.7489, |
| "step": 95600 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.2255668640136719, |
| "learning_rate": 2.167871084958548e-06, |
| "loss": 4.7525, |
| "step": 95700 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.5146337747573853, |
| "learning_rate": 2.1596870422051087e-06, |
| "loss": 4.7524, |
| "step": 95800 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 2.578728437423706, |
| "learning_rate": 2.1515029994516693e-06, |
| "loss": 4.7537, |
| "step": 95900 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.3910084962844849, |
| "learning_rate": 2.14331895669823e-06, |
| "loss": 4.7557, |
| "step": 96000 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.6304432153701782, |
| "learning_rate": 2.1351349139447903e-06, |
| "loss": 4.7509, |
| "step": 96100 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.6290279626846313, |
| "learning_rate": 2.1269508711913514e-06, |
| "loss": 4.7499, |
| "step": 96200 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.312935471534729, |
| "learning_rate": 2.118766828437912e-06, |
| "loss": 4.7512, |
| "step": 96300 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 2.8677687644958496, |
| "learning_rate": 2.110582785684473e-06, |
| "loss": 4.7507, |
| "step": 96400 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 2.544320583343506, |
| "learning_rate": 2.1023987429310335e-06, |
| "loss": 4.7503, |
| "step": 96500 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 2.5052340030670166, |
| "learning_rate": 2.0942147001775938e-06, |
| "loss": 4.7543, |
| "step": 96600 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 2.0886638164520264, |
| "learning_rate": 2.0860306574241544e-06, |
| "loss": 4.7513, |
| "step": 96700 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.1290991306304932, |
| "learning_rate": 2.077846614670715e-06, |
| "loss": 4.7486, |
| "step": 96800 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.968976616859436, |
| "learning_rate": 2.069662571917276e-06, |
| "loss": 4.7549, |
| "step": 96900 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.1029621362686157, |
| "learning_rate": 2.0614785291638365e-06, |
| "loss": 4.7545, |
| "step": 97000 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.5654712915420532, |
| "learning_rate": 2.053294486410397e-06, |
| "loss": 4.7516, |
| "step": 97100 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.3423889875411987, |
| "learning_rate": 2.045110443656958e-06, |
| "loss": 4.7529, |
| "step": 97200 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.2194217443466187, |
| "learning_rate": 2.0369264009035186e-06, |
| "loss": 4.7503, |
| "step": 97300 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.0679503679275513, |
| "learning_rate": 2.0287423581500793e-06, |
| "loss": 4.7515, |
| "step": 97400 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.2756659984588623, |
| "learning_rate": 2.02055831539664e-06, |
| "loss": 4.752, |
| "step": 97500 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.315800428390503, |
| "learning_rate": 2.0123742726432007e-06, |
| "loss": 4.7554, |
| "step": 97600 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.2954620122909546, |
| "learning_rate": 2.0041902298897614e-06, |
| "loss": 4.7525, |
| "step": 97700 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.1520215272903442, |
| "learning_rate": 1.9960061871363216e-06, |
| "loss": 4.7548, |
| "step": 97800 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.7471413612365723, |
| "learning_rate": 1.9878221443828823e-06, |
| "loss": 4.7549, |
| "step": 97900 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.0936230421066284, |
| "learning_rate": 1.979638101629443e-06, |
| "loss": 4.753, |
| "step": 98000 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.110677719116211, |
| "learning_rate": 1.9714540588760037e-06, |
| "loss": 4.7524, |
| "step": 98100 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.094068169593811, |
| "learning_rate": 1.9632700161225644e-06, |
| "loss": 4.7508, |
| "step": 98200 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.3435810804367065, |
| "learning_rate": 1.955085973369125e-06, |
| "loss": 4.7504, |
| "step": 98300 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.7671642303466797, |
| "learning_rate": 1.9469019306156858e-06, |
| "loss": 4.7504, |
| "step": 98400 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 2.0996792316436768, |
| "learning_rate": 1.9387178878622464e-06, |
| "loss": 4.7468, |
| "step": 98500 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.256888508796692, |
| "learning_rate": 1.930533845108807e-06, |
| "loss": 4.7501, |
| "step": 98600 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.4650033712387085, |
| "learning_rate": 1.922349802355368e-06, |
| "loss": 4.7493, |
| "step": 98700 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.3852843046188354, |
| "learning_rate": 1.9141657596019285e-06, |
| "loss": 4.7553, |
| "step": 98800 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.2050074338912964, |
| "learning_rate": 1.905981716848489e-06, |
| "loss": 4.7481, |
| "step": 98900 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 2.955382823944092, |
| "learning_rate": 1.8977976740950497e-06, |
| "loss": 4.7498, |
| "step": 99000 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.6441978216171265, |
| "learning_rate": 1.8896136313416102e-06, |
| "loss": 4.7498, |
| "step": 99100 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.577948808670044, |
| "learning_rate": 1.8814295885881709e-06, |
| "loss": 4.7552, |
| "step": 99200 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.3677524328231812, |
| "learning_rate": 1.8732455458347315e-06, |
| "loss": 4.7555, |
| "step": 99300 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.4369767904281616, |
| "learning_rate": 1.8650615030812922e-06, |
| "loss": 4.7499, |
| "step": 99400 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.5186824798583984, |
| "learning_rate": 1.8568774603278531e-06, |
| "loss": 4.7539, |
| "step": 99500 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.2422914505004883, |
| "learning_rate": 1.8486934175744136e-06, |
| "loss": 4.7557, |
| "step": 99600 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.3044426441192627, |
| "learning_rate": 1.8405093748209743e-06, |
| "loss": 4.7525, |
| "step": 99700 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 3.3080742359161377, |
| "learning_rate": 1.832325332067535e-06, |
| "loss": 4.7509, |
| "step": 99800 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.1785410642623901, |
| "learning_rate": 1.8241412893140955e-06, |
| "loss": 4.7519, |
| "step": 99900 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.4587723016738892, |
| "learning_rate": 1.8159572465606562e-06, |
| "loss": 4.7484, |
| "step": 100000 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 3.0926313400268555, |
| "learning_rate": 1.8077732038072169e-06, |
| "loss": 4.7562, |
| "step": 100100 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.2200719118118286, |
| "learning_rate": 1.7995891610537775e-06, |
| "loss": 4.7584, |
| "step": 100200 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.3386414051055908, |
| "learning_rate": 1.791405118300338e-06, |
| "loss": 4.7458, |
| "step": 100300 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.484997034072876, |
| "learning_rate": 1.7832210755468987e-06, |
| "loss": 4.7449, |
| "step": 100400 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 2.519181489944458, |
| "learning_rate": 1.7750370327934594e-06, |
| "loss": 4.7529, |
| "step": 100500 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 2.090131998062134, |
| "learning_rate": 1.7668529900400199e-06, |
| "loss": 4.756, |
| "step": 100600 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.105173110961914, |
| "learning_rate": 1.7586689472865808e-06, |
| "loss": 4.754, |
| "step": 100700 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.1731809377670288, |
| "learning_rate": 1.7504849045331415e-06, |
| "loss": 4.7607, |
| "step": 100800 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.1397889852523804, |
| "learning_rate": 1.7423008617797022e-06, |
| "loss": 4.7506, |
| "step": 100900 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.1067900657653809, |
| "learning_rate": 1.7341168190262629e-06, |
| "loss": 4.7521, |
| "step": 101000 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.0581586360931396, |
| "learning_rate": 1.7259327762728233e-06, |
| "loss": 4.7531, |
| "step": 101100 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.9792087078094482, |
| "learning_rate": 1.717748733519384e-06, |
| "loss": 4.7541, |
| "step": 101200 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.133318305015564, |
| "learning_rate": 1.7095646907659447e-06, |
| "loss": 4.7554, |
| "step": 101300 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.241073489189148, |
| "learning_rate": 1.7013806480125052e-06, |
| "loss": 4.7493, |
| "step": 101400 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.2004437446594238, |
| "learning_rate": 1.6931966052590659e-06, |
| "loss": 4.7515, |
| "step": 101500 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.545440912246704, |
| "learning_rate": 1.6850125625056266e-06, |
| "loss": 4.7498, |
| "step": 101600 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.2501575946807861, |
| "learning_rate": 1.6768285197521873e-06, |
| "loss": 4.7457, |
| "step": 101700 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.2254008054733276, |
| "learning_rate": 1.6686444769987482e-06, |
| "loss": 4.754, |
| "step": 101800 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.8597551584243774, |
| "learning_rate": 1.6604604342453086e-06, |
| "loss": 4.7545, |
| "step": 101900 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.5887017250061035, |
| "learning_rate": 1.6522763914918693e-06, |
| "loss": 4.7491, |
| "step": 102000 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.3773962259292603, |
| "learning_rate": 1.64409234873843e-06, |
| "loss": 4.753, |
| "step": 102100 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.1974895000457764, |
| "learning_rate": 1.6359083059849907e-06, |
| "loss": 4.7563, |
| "step": 102200 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.3141651153564453, |
| "learning_rate": 1.6277242632315512e-06, |
| "loss": 4.7483, |
| "step": 102300 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 2.256546974182129, |
| "learning_rate": 1.6195402204781119e-06, |
| "loss": 4.747, |
| "step": 102400 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 2.344313859939575, |
| "learning_rate": 1.6113561777246726e-06, |
| "loss": 4.7458, |
| "step": 102500 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.533346176147461, |
| "learning_rate": 1.603172134971233e-06, |
| "loss": 4.7488, |
| "step": 102600 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.1802254915237427, |
| "learning_rate": 1.5949880922177937e-06, |
| "loss": 4.7503, |
| "step": 102700 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.1822803020477295, |
| "learning_rate": 1.5868040494643544e-06, |
| "loss": 4.7526, |
| "step": 102800 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.3468492031097412, |
| "learning_rate": 1.5786200067109153e-06, |
| "loss": 4.7511, |
| "step": 102900 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 4.000704765319824, |
| "learning_rate": 1.570435963957476e-06, |
| "loss": 4.754, |
| "step": 103000 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.139367699623108, |
| "learning_rate": 1.5622519212040365e-06, |
| "loss": 4.7488, |
| "step": 103100 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 4.506742000579834, |
| "learning_rate": 1.5540678784505972e-06, |
| "loss": 4.7518, |
| "step": 103200 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.9105794429779053, |
| "learning_rate": 1.5458838356971579e-06, |
| "loss": 4.751, |
| "step": 103300 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.203366994857788, |
| "learning_rate": 1.5376997929437184e-06, |
| "loss": 4.7505, |
| "step": 103400 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.2069025039672852, |
| "learning_rate": 1.529515750190279e-06, |
| "loss": 4.7502, |
| "step": 103500 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.0046311616897583, |
| "learning_rate": 1.5213317074368397e-06, |
| "loss": 4.7522, |
| "step": 103600 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 2.022199869155884, |
| "learning_rate": 1.5131476646834004e-06, |
| "loss": 4.7543, |
| "step": 103700 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.0690085887908936, |
| "learning_rate": 1.504963621929961e-06, |
| "loss": 4.7497, |
| "step": 103800 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.2978872060775757, |
| "learning_rate": 1.4967795791765216e-06, |
| "loss": 4.7511, |
| "step": 103900 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.110472321510315, |
| "learning_rate": 1.4885955364230825e-06, |
| "loss": 4.7504, |
| "step": 104000 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 2.129612684249878, |
| "learning_rate": 1.4804114936696432e-06, |
| "loss": 4.7538, |
| "step": 104100 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.1347908973693848, |
| "learning_rate": 1.4722274509162037e-06, |
| "loss": 4.7535, |
| "step": 104200 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.1420745849609375, |
| "learning_rate": 1.4640434081627644e-06, |
| "loss": 4.7491, |
| "step": 104300 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 2.80501127243042, |
| "learning_rate": 1.455859365409325e-06, |
| "loss": 4.7507, |
| "step": 104400 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.399776816368103, |
| "learning_rate": 1.4476753226558857e-06, |
| "loss": 4.7523, |
| "step": 104500 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.2114007472991943, |
| "learning_rate": 1.4394912799024462e-06, |
| "loss": 4.7522, |
| "step": 104600 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.078600525856018, |
| "learning_rate": 1.431307237149007e-06, |
| "loss": 4.7535, |
| "step": 104700 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.3322091102600098, |
| "learning_rate": 1.4231231943955676e-06, |
| "loss": 4.7511, |
| "step": 104800 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.2436057329177856, |
| "learning_rate": 1.4149391516421283e-06, |
| "loss": 4.7501, |
| "step": 104900 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.163930058479309, |
| "learning_rate": 1.4067551088886888e-06, |
| "loss": 4.7533, |
| "step": 105000 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.1139936447143555, |
| "learning_rate": 1.3985710661352497e-06, |
| "loss": 4.7474, |
| "step": 105100 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.704499363899231, |
| "learning_rate": 1.3903870233818104e-06, |
| "loss": 4.7524, |
| "step": 105200 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.2708555459976196, |
| "learning_rate": 1.382202980628371e-06, |
| "loss": 4.7558, |
| "step": 105300 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 2.6546807289123535, |
| "learning_rate": 1.3740189378749315e-06, |
| "loss": 4.7514, |
| "step": 105400 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.196606159210205, |
| "learning_rate": 1.3658348951214922e-06, |
| "loss": 4.7479, |
| "step": 105500 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 2.2983286380767822, |
| "learning_rate": 1.357650852368053e-06, |
| "loss": 4.7532, |
| "step": 105600 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.0857946872711182, |
| "learning_rate": 1.3494668096146136e-06, |
| "loss": 4.7531, |
| "step": 105700 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.606785535812378, |
| "learning_rate": 1.341282766861174e-06, |
| "loss": 4.7506, |
| "step": 105800 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.9557284116744995, |
| "learning_rate": 1.3330987241077348e-06, |
| "loss": 4.7553, |
| "step": 105900 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 2.19726824760437, |
| "learning_rate": 1.3249146813542955e-06, |
| "loss": 4.7524, |
| "step": 106000 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.0980935096740723, |
| "learning_rate": 1.316730638600856e-06, |
| "loss": 4.7515, |
| "step": 106100 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.3451943397521973, |
| "learning_rate": 1.3085465958474168e-06, |
| "loss": 4.7547, |
| "step": 106200 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.2886918783187866, |
| "learning_rate": 1.3003625530939775e-06, |
| "loss": 4.7505, |
| "step": 106300 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.1479195356369019, |
| "learning_rate": 1.2921785103405382e-06, |
| "loss": 4.7538, |
| "step": 106400 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.7975718975067139, |
| "learning_rate": 1.283994467587099e-06, |
| "loss": 4.7472, |
| "step": 106500 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.536818027496338, |
| "learning_rate": 1.2758104248336594e-06, |
| "loss": 4.7498, |
| "step": 106600 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.1835073232650757, |
| "learning_rate": 1.26762638208022e-06, |
| "loss": 4.751, |
| "step": 106700 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.418748378753662, |
| "learning_rate": 1.2594423393267808e-06, |
| "loss": 4.7506, |
| "step": 106800 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.7083241939544678, |
| "learning_rate": 1.2512582965733413e-06, |
| "loss": 4.757, |
| "step": 106900 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.9533259868621826, |
| "learning_rate": 1.243074253819902e-06, |
| "loss": 4.7491, |
| "step": 107000 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.549060344696045, |
| "learning_rate": 1.2348902110664629e-06, |
| "loss": 4.7502, |
| "step": 107100 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.22414231300354, |
| "learning_rate": 1.2267061683130233e-06, |
| "loss": 4.7512, |
| "step": 107200 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 2.5019216537475586, |
| "learning_rate": 1.218522125559584e-06, |
| "loss": 4.7551, |
| "step": 107300 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.4612125158309937, |
| "learning_rate": 1.2103380828061447e-06, |
| "loss": 4.7517, |
| "step": 107400 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.41428542137146, |
| "learning_rate": 1.2021540400527052e-06, |
| "loss": 4.7522, |
| "step": 107500 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.1158181428909302, |
| "learning_rate": 1.193969997299266e-06, |
| "loss": 4.7534, |
| "step": 107600 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.4456719160079956, |
| "learning_rate": 1.1857859545458268e-06, |
| "loss": 4.7515, |
| "step": 107700 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 2.0484957695007324, |
| "learning_rate": 1.1776019117923873e-06, |
| "loss": 4.7475, |
| "step": 107800 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.0839197635650635, |
| "learning_rate": 1.169417869038948e-06, |
| "loss": 4.7474, |
| "step": 107900 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.4242494106292725, |
| "learning_rate": 1.1612338262855086e-06, |
| "loss": 4.7516, |
| "step": 108000 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.1142181158065796, |
| "learning_rate": 1.1530497835320691e-06, |
| "loss": 4.7513, |
| "step": 108100 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.2992855310440063, |
| "learning_rate": 1.14486574077863e-06, |
| "loss": 4.7497, |
| "step": 108200 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.1050403118133545, |
| "learning_rate": 1.1366816980251905e-06, |
| "loss": 4.7478, |
| "step": 108300 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.6111624240875244, |
| "learning_rate": 1.1284976552717512e-06, |
| "loss": 4.7521, |
| "step": 108400 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.6379482746124268, |
| "learning_rate": 1.1203136125183119e-06, |
| "loss": 4.7456, |
| "step": 108500 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.8351396322250366, |
| "learning_rate": 1.1121295697648726e-06, |
| "loss": 4.7518, |
| "step": 108600 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.1721076965332031, |
| "learning_rate": 1.1039455270114333e-06, |
| "loss": 4.7557, |
| "step": 108700 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.4993492364883423, |
| "learning_rate": 1.095761484257994e-06, |
| "loss": 4.7519, |
| "step": 108800 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.1917214393615723, |
| "learning_rate": 1.0875774415045544e-06, |
| "loss": 4.748, |
| "step": 108900 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.0404828786849976, |
| "learning_rate": 1.0793933987511151e-06, |
| "loss": 4.7565, |
| "step": 109000 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.5994240045547485, |
| "learning_rate": 1.0712093559976758e-06, |
| "loss": 4.7499, |
| "step": 109100 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.197583556175232, |
| "learning_rate": 1.0630253132442365e-06, |
| "loss": 4.7537, |
| "step": 109200 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.6032483577728271, |
| "learning_rate": 1.0548412704907972e-06, |
| "loss": 4.7542, |
| "step": 109300 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.39584481716156, |
| "learning_rate": 1.0466572277373579e-06, |
| "loss": 4.7493, |
| "step": 109400 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.410801649093628, |
| "learning_rate": 1.0384731849839184e-06, |
| "loss": 4.7559, |
| "step": 109500 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.246910810470581, |
| "learning_rate": 1.030289142230479e-06, |
| "loss": 4.751, |
| "step": 109600 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 4.328908920288086, |
| "learning_rate": 1.0221050994770397e-06, |
| "loss": 4.7496, |
| "step": 109700 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.5280972719192505, |
| "learning_rate": 1.0139210567236004e-06, |
| "loss": 4.7539, |
| "step": 109800 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 2.1216630935668945, |
| "learning_rate": 1.0057370139701611e-06, |
| "loss": 4.7523, |
| "step": 109900 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.4128057956695557, |
| "learning_rate": 9.975529712167218e-07, |
| "loss": 4.7492, |
| "step": 110000 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.2564375400543213, |
| "learning_rate": 9.893689284632823e-07, |
| "loss": 4.754, |
| "step": 110100 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 2.3144404888153076, |
| "learning_rate": 9.81184885709843e-07, |
| "loss": 4.7473, |
| "step": 110200 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.9776962399482727, |
| "learning_rate": 9.730008429564037e-07, |
| "loss": 4.7479, |
| "step": 110300 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 2.479701519012451, |
| "learning_rate": 9.648168002029644e-07, |
| "loss": 4.7514, |
| "step": 110400 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 3.217172145843506, |
| "learning_rate": 9.56632757449525e-07, |
| "loss": 4.7535, |
| "step": 110500 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.103346824645996, |
| "learning_rate": 9.484487146960856e-07, |
| "loss": 4.7474, |
| "step": 110600 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.1965771913528442, |
| "learning_rate": 9.402646719426463e-07, |
| "loss": 4.7475, |
| "step": 110700 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.2940635681152344, |
| "learning_rate": 9.320806291892069e-07, |
| "loss": 4.7484, |
| "step": 110800 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.06132972240448, |
| "learning_rate": 9.238965864357677e-07, |
| "loss": 4.7547, |
| "step": 110900 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.8715641498565674, |
| "learning_rate": 9.157125436823283e-07, |
| "loss": 4.749, |
| "step": 111000 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.1907116174697876, |
| "learning_rate": 9.07528500928889e-07, |
| "loss": 4.7539, |
| "step": 111100 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.5867308378219604, |
| "learning_rate": 8.993444581754496e-07, |
| "loss": 4.7562, |
| "step": 111200 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.2849870920181274, |
| "learning_rate": 8.911604154220103e-07, |
| "loss": 4.7512, |
| "step": 111300 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.3407094478607178, |
| "learning_rate": 8.829763726685708e-07, |
| "loss": 4.7543, |
| "step": 111400 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.0691750049591064, |
| "learning_rate": 8.747923299151316e-07, |
| "loss": 4.7451, |
| "step": 111500 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.0635693073272705, |
| "learning_rate": 8.666082871616922e-07, |
| "loss": 4.7521, |
| "step": 111600 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.5273666381835938, |
| "learning_rate": 8.584242444082529e-07, |
| "loss": 4.7551, |
| "step": 111700 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.7429158687591553, |
| "learning_rate": 8.502402016548135e-07, |
| "loss": 4.7544, |
| "step": 111800 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.0581636428833008, |
| "learning_rate": 8.420561589013741e-07, |
| "loss": 4.7532, |
| "step": 111900 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.3187443017959595, |
| "learning_rate": 8.338721161479348e-07, |
| "loss": 4.7551, |
| "step": 112000 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.2842453718185425, |
| "learning_rate": 8.256880733944956e-07, |
| "loss": 4.7482, |
| "step": 112100 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.264115571975708, |
| "learning_rate": 8.175040306410561e-07, |
| "loss": 4.7475, |
| "step": 112200 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.338619589805603, |
| "learning_rate": 8.093199878876168e-07, |
| "loss": 4.7465, |
| "step": 112300 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.2081668376922607, |
| "learning_rate": 8.011359451341774e-07, |
| "loss": 4.75, |
| "step": 112400 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.475716471672058, |
| "learning_rate": 7.92951902380738e-07, |
| "loss": 4.7475, |
| "step": 112500 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.1391123533248901, |
| "learning_rate": 7.847678596272988e-07, |
| "loss": 4.7529, |
| "step": 112600 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.5139949321746826, |
| "learning_rate": 7.765838168738595e-07, |
| "loss": 4.7457, |
| "step": 112700 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.250877857208252, |
| "learning_rate": 7.683997741204201e-07, |
| "loss": 4.7512, |
| "step": 112800 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.3660330772399902, |
| "learning_rate": 7.602157313669807e-07, |
| "loss": 4.7519, |
| "step": 112900 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.3007818460464478, |
| "learning_rate": 7.520316886135414e-07, |
| "loss": 4.7539, |
| "step": 113000 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.1533290147781372, |
| "learning_rate": 7.438476458601019e-07, |
| "loss": 4.7522, |
| "step": 113100 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.1087945699691772, |
| "learning_rate": 7.356636031066627e-07, |
| "loss": 4.7549, |
| "step": 113200 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.3991641998291016, |
| "learning_rate": 7.274795603532233e-07, |
| "loss": 4.7538, |
| "step": 113300 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.820162296295166, |
| "learning_rate": 7.19295517599784e-07, |
| "loss": 4.7521, |
| "step": 113400 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.1187483072280884, |
| "learning_rate": 7.111114748463446e-07, |
| "loss": 4.7577, |
| "step": 113500 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.0411303043365479, |
| "learning_rate": 7.029274320929053e-07, |
| "loss": 4.7514, |
| "step": 113600 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.2369052171707153, |
| "learning_rate": 6.947433893394661e-07, |
| "loss": 4.7497, |
| "step": 113700 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.505008578300476, |
| "learning_rate": 6.865593465860267e-07, |
| "loss": 4.7505, |
| "step": 113800 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.2643870115280151, |
| "learning_rate": 6.783753038325872e-07, |
| "loss": 4.7499, |
| "step": 113900 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.095914363861084, |
| "learning_rate": 6.701912610791479e-07, |
| "loss": 4.7478, |
| "step": 114000 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.4800920486450195, |
| "learning_rate": 6.620072183257085e-07, |
| "loss": 4.748, |
| "step": 114100 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.6267393827438354, |
| "learning_rate": 6.538231755722692e-07, |
| "loss": 4.7523, |
| "step": 114200 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.7788121700286865, |
| "learning_rate": 6.456391328188299e-07, |
| "loss": 4.7548, |
| "step": 114300 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.2876346111297607, |
| "learning_rate": 6.374550900653906e-07, |
| "loss": 4.7555, |
| "step": 114400 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.0011918544769287, |
| "learning_rate": 6.292710473119512e-07, |
| "loss": 4.7487, |
| "step": 114500 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 2.096606731414795, |
| "learning_rate": 6.210870045585119e-07, |
| "loss": 4.7527, |
| "step": 114600 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.2112175226211548, |
| "learning_rate": 6.129029618050726e-07, |
| "loss": 4.7482, |
| "step": 114700 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.4160529375076294, |
| "learning_rate": 6.047189190516331e-07, |
| "loss": 4.7567, |
| "step": 114800 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.36955988407135, |
| "learning_rate": 5.965348762981938e-07, |
| "loss": 4.7562, |
| "step": 114900 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.577446460723877, |
| "learning_rate": 5.883508335447545e-07, |
| "loss": 4.7534, |
| "step": 115000 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.2918033599853516, |
| "learning_rate": 5.801667907913151e-07, |
| "loss": 4.7472, |
| "step": 115100 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.07747220993042, |
| "learning_rate": 5.719827480378758e-07, |
| "loss": 4.7495, |
| "step": 115200 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.4656786918640137, |
| "learning_rate": 5.637987052844365e-07, |
| "loss": 4.7501, |
| "step": 115300 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.2017009258270264, |
| "learning_rate": 5.556146625309971e-07, |
| "loss": 4.7482, |
| "step": 115400 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.3578641414642334, |
| "learning_rate": 5.474306197775578e-07, |
| "loss": 4.7467, |
| "step": 115500 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.246904969215393, |
| "learning_rate": 5.392465770241185e-07, |
| "loss": 4.7564, |
| "step": 115600 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.289212942123413, |
| "learning_rate": 5.31062534270679e-07, |
| "loss": 4.7516, |
| "step": 115700 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 2.1466667652130127, |
| "learning_rate": 5.228784915172397e-07, |
| "loss": 4.7485, |
| "step": 115800 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.6222789287567139, |
| "learning_rate": 5.146944487638004e-07, |
| "loss": 4.7535, |
| "step": 115900 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.2257126569747925, |
| "learning_rate": 5.06510406010361e-07, |
| "loss": 4.7537, |
| "step": 116000 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.0743142366409302, |
| "learning_rate": 4.983263632569217e-07, |
| "loss": 4.7468, |
| "step": 116100 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.4326200485229492, |
| "learning_rate": 4.901423205034824e-07, |
| "loss": 4.7537, |
| "step": 116200 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.513900876045227, |
| "learning_rate": 4.81958277750043e-07, |
| "loss": 4.7492, |
| "step": 116300 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.3525514602661133, |
| "learning_rate": 4.7377423499660366e-07, |
| "loss": 4.7511, |
| "step": 116400 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.0643374919891357, |
| "learning_rate": 4.655901922431643e-07, |
| "loss": 4.756, |
| "step": 116500 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.0973927974700928, |
| "learning_rate": 4.57406149489725e-07, |
| "loss": 4.7512, |
| "step": 116600 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.498604416847229, |
| "learning_rate": 4.492221067362856e-07, |
| "loss": 4.7534, |
| "step": 116700 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 2.6713321208953857, |
| "learning_rate": 4.4103806398284626e-07, |
| "loss": 4.7497, |
| "step": 116800 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.2505509853363037, |
| "learning_rate": 4.3285402122940695e-07, |
| "loss": 4.7551, |
| "step": 116900 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.1062592267990112, |
| "learning_rate": 4.246699784759676e-07, |
| "loss": 4.7536, |
| "step": 117000 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.4121407270431519, |
| "learning_rate": 4.1648593572252823e-07, |
| "loss": 4.7548, |
| "step": 117100 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.2598930597305298, |
| "learning_rate": 4.083018929690889e-07, |
| "loss": 4.7559, |
| "step": 117200 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.1028268337249756, |
| "learning_rate": 4.0011785021564955e-07, |
| "loss": 4.7495, |
| "step": 117300 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.5532176494598389, |
| "learning_rate": 3.919338074622102e-07, |
| "loss": 4.7543, |
| "step": 117400 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.2546781301498413, |
| "learning_rate": 3.837497647087709e-07, |
| "loss": 4.7502, |
| "step": 117500 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.0731525421142578, |
| "learning_rate": 3.755657219553315e-07, |
| "loss": 4.7519, |
| "step": 117600 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.3289110660552979, |
| "learning_rate": 3.6738167920189216e-07, |
| "loss": 4.7495, |
| "step": 117700 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.2810921669006348, |
| "learning_rate": 3.5919763644845285e-07, |
| "loss": 4.7522, |
| "step": 117800 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.0300296545028687, |
| "learning_rate": 3.510135936950135e-07, |
| "loss": 4.7491, |
| "step": 117900 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.9749176502227783, |
| "learning_rate": 3.428295509415742e-07, |
| "loss": 4.7558, |
| "step": 118000 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.6398601531982422, |
| "learning_rate": 3.346455081881348e-07, |
| "loss": 4.7489, |
| "step": 118100 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.154733657836914, |
| "learning_rate": 3.2646146543469545e-07, |
| "loss": 4.747, |
| "step": 118200 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.1068767309188843, |
| "learning_rate": 3.1827742268125614e-07, |
| "loss": 4.7532, |
| "step": 118300 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.5782713890075684, |
| "learning_rate": 3.100933799278168e-07, |
| "loss": 4.7525, |
| "step": 118400 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.2185344696044922, |
| "learning_rate": 3.019093371743774e-07, |
| "loss": 4.7507, |
| "step": 118500 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.168750286102295, |
| "learning_rate": 2.9372529442093805e-07, |
| "loss": 4.7503, |
| "step": 118600 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.3794721364974976, |
| "learning_rate": 2.8554125166749874e-07, |
| "loss": 4.7539, |
| "step": 118700 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.0481081008911133, |
| "learning_rate": 2.773572089140594e-07, |
| "loss": 4.7493, |
| "step": 118800 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.1850849390029907, |
| "learning_rate": 2.6917316616062007e-07, |
| "loss": 4.752, |
| "step": 118900 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.3937416076660156, |
| "learning_rate": 2.609891234071807e-07, |
| "loss": 4.7518, |
| "step": 119000 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.0362590551376343, |
| "learning_rate": 2.5280508065374134e-07, |
| "loss": 4.7494, |
| "step": 119100 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.226879358291626, |
| "learning_rate": 2.4462103790030203e-07, |
| "loss": 4.7541, |
| "step": 119200 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.289158582687378, |
| "learning_rate": 2.3643699514686265e-07, |
| "loss": 4.7541, |
| "step": 119300 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.2052675485610962, |
| "learning_rate": 2.282529523934233e-07, |
| "loss": 4.7494, |
| "step": 119400 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.3836339712142944, |
| "learning_rate": 2.2006890963998397e-07, |
| "loss": 4.7501, |
| "step": 119500 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.075812578201294, |
| "learning_rate": 2.1188486688654464e-07, |
| "loss": 4.7466, |
| "step": 119600 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.0450024604797363, |
| "learning_rate": 2.0370082413310527e-07, |
| "loss": 4.7513, |
| "step": 119700 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.6995435953140259, |
| "learning_rate": 1.9551678137966594e-07, |
| "loss": 4.7544, |
| "step": 119800 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 2.0586297512054443, |
| "learning_rate": 1.873327386262266e-07, |
| "loss": 4.7512, |
| "step": 119900 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.734843134880066, |
| "learning_rate": 1.7914869587278727e-07, |
| "loss": 4.752, |
| "step": 120000 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.8307639360427856, |
| "learning_rate": 1.709646531193479e-07, |
| "loss": 4.7486, |
| "step": 120100 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.0754849910736084, |
| "learning_rate": 1.6278061036590857e-07, |
| "loss": 4.7518, |
| "step": 120200 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.8558402061462402, |
| "learning_rate": 1.545965676124692e-07, |
| "loss": 4.7496, |
| "step": 120300 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.3178366422653198, |
| "learning_rate": 1.4641252485902987e-07, |
| "loss": 4.7474, |
| "step": 120400 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.176018238067627, |
| "learning_rate": 1.3822848210559053e-07, |
| "loss": 4.7517, |
| "step": 120500 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.0331361293792725, |
| "learning_rate": 1.300444393521512e-07, |
| "loss": 4.7445, |
| "step": 120600 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.2724260091781616, |
| "learning_rate": 1.2186039659871183e-07, |
| "loss": 4.7519, |
| "step": 120700 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.1402854919433594, |
| "learning_rate": 1.1367635384527251e-07, |
| "loss": 4.7547, |
| "step": 120800 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.6155171394348145, |
| "learning_rate": 1.0549231109183315e-07, |
| "loss": 4.7553, |
| "step": 120900 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.3488671779632568, |
| "learning_rate": 9.73082683383938e-08, |
| "loss": 4.7489, |
| "step": 121000 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.4476072788238525, |
| "learning_rate": 8.912422558495446e-08, |
| "loss": 4.7531, |
| "step": 121100 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.5881561040878296, |
| "learning_rate": 8.094018283151511e-08, |
| "loss": 4.7472, |
| "step": 121200 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.3526599407196045, |
| "learning_rate": 7.275614007807578e-08, |
| "loss": 4.7507, |
| "step": 121300 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.1709797382354736, |
| "learning_rate": 6.457209732463643e-08, |
| "loss": 4.7495, |
| "step": 121400 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.1517142057418823, |
| "learning_rate": 5.6388054571197084e-08, |
| "loss": 4.7534, |
| "step": 121500 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.6636133193969727, |
| "learning_rate": 4.820401181775774e-08, |
| "loss": 4.7524, |
| "step": 121600 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.1265342235565186, |
| "learning_rate": 4.001996906431839e-08, |
| "loss": 4.749, |
| "step": 121700 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.4785326719284058, |
| "learning_rate": 3.183592631087905e-08, |
| "loss": 4.7507, |
| "step": 121800 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 3.0491678714752197, |
| "learning_rate": 2.3651883557439706e-08, |
| "loss": 4.7502, |
| "step": 121900 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.162163257598877, |
| "learning_rate": 1.5467840804000363e-08, |
| "loss": 4.7493, |
| "step": 122000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 122189, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "total_flos": 1.4891090143551898e+18, |
| "train_batch_size": 96, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|