| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9973474801061006, |
| "eval_steps": 500, |
| "global_step": 2826, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.010610079575596816, |
| "grad_norm": 4.634474754333496, |
| "learning_rate": 1.5901060070671379e-07, |
| "loss": 0.741, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.021220159151193633, |
| "grad_norm": 2.9002726078033447, |
| "learning_rate": 3.356890459363958e-07, |
| "loss": 0.5551, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03183023872679045, |
| "grad_norm": 4.242003917694092, |
| "learning_rate": 5.123674911660778e-07, |
| "loss": 0.6185, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.042440318302387266, |
| "grad_norm": 3.8156638145446777, |
| "learning_rate": 6.890459363957598e-07, |
| "loss": 0.6358, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.05305039787798409, |
| "grad_norm": 3.047624349594116, |
| "learning_rate": 8.657243816254418e-07, |
| "loss": 0.5922, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0636604774535809, |
| "grad_norm": 2.2943954467773438, |
| "learning_rate": 1.0424028268551239e-06, |
| "loss": 0.6282, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.07427055702917772, |
| "grad_norm": 2.831937551498413, |
| "learning_rate": 1.2190812720848057e-06, |
| "loss": 0.5836, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.08488063660477453, |
| "grad_norm": 3.941297769546509, |
| "learning_rate": 1.3957597173144876e-06, |
| "loss": 0.5836, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.09549071618037135, |
| "grad_norm": 2.4598379135131836, |
| "learning_rate": 1.5724381625441699e-06, |
| "loss": 0.4983, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.10610079575596817, |
| "grad_norm": 2.533829927444458, |
| "learning_rate": 1.7491166077738517e-06, |
| "loss": 0.6057, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.11671087533156499, |
| "grad_norm": 2.412334442138672, |
| "learning_rate": 1.925795053003534e-06, |
| "loss": 0.5135, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1273209549071618, |
| "grad_norm": 2.7505877017974854, |
| "learning_rate": 2.1024734982332157e-06, |
| "loss": 0.4844, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.13793103448275862, |
| "grad_norm": 2.701307535171509, |
| "learning_rate": 2.279151943462898e-06, |
| "loss": 0.5386, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.14854111405835543, |
| "grad_norm": 2.8261961936950684, |
| "learning_rate": 2.45583038869258e-06, |
| "loss": 0.4774, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.15915119363395225, |
| "grad_norm": 2.4490256309509277, |
| "learning_rate": 2.6325088339222617e-06, |
| "loss": 0.5035, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.16976127320954906, |
| "grad_norm": 2.418158769607544, |
| "learning_rate": 2.8091872791519436e-06, |
| "loss": 0.4897, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.18037135278514588, |
| "grad_norm": 3.5972161293029785, |
| "learning_rate": 2.985865724381626e-06, |
| "loss": 0.5196, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.1909814323607427, |
| "grad_norm": 2.814927577972412, |
| "learning_rate": 3.162544169611308e-06, |
| "loss": 0.4791, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.20159151193633953, |
| "grad_norm": 2.6151270866394043, |
| "learning_rate": 3.3392226148409896e-06, |
| "loss": 0.5024, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.21220159151193635, |
| "grad_norm": 2.8331387042999268, |
| "learning_rate": 3.5159010600706715e-06, |
| "loss": 0.5781, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.22281167108753316, |
| "grad_norm": 2.433027744293213, |
| "learning_rate": 3.6925795053003538e-06, |
| "loss": 0.4186, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.23342175066312998, |
| "grad_norm": 2.671696186065674, |
| "learning_rate": 3.869257950530036e-06, |
| "loss": 0.4819, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.2440318302387268, |
| "grad_norm": 2.5337982177734375, |
| "learning_rate": 4.045936395759718e-06, |
| "loss": 0.547, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.2546419098143236, |
| "grad_norm": 2.2034990787506104, |
| "learning_rate": 4.222614840989399e-06, |
| "loss": 0.5603, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.26525198938992045, |
| "grad_norm": 2.2893121242523193, |
| "learning_rate": 4.399293286219082e-06, |
| "loss": 0.4483, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.27586206896551724, |
| "grad_norm": 1.8757219314575195, |
| "learning_rate": 4.575971731448763e-06, |
| "loss": 0.5178, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.2864721485411141, |
| "grad_norm": 2.3748602867126465, |
| "learning_rate": 4.752650176678445e-06, |
| "loss": 0.5264, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.29708222811671087, |
| "grad_norm": 3.0481033325195312, |
| "learning_rate": 4.929328621908128e-06, |
| "loss": 0.5124, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.3076923076923077, |
| "grad_norm": 2.682847023010254, |
| "learning_rate": 4.99993132201408e-06, |
| "loss": 0.4977, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.3183023872679045, |
| "grad_norm": 2.472842216491699, |
| "learning_rate": 4.9995116368759e-06, |
| "loss": 0.5005, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.32891246684350134, |
| "grad_norm": 2.582815647125244, |
| "learning_rate": 4.998710485009401e-06, |
| "loss": 0.4857, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.3395225464190981, |
| "grad_norm": 2.3572824001312256, |
| "learning_rate": 4.99752798868358e-06, |
| "loss": 0.4637, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.35013262599469497, |
| "grad_norm": 2.3432295322418213, |
| "learning_rate": 4.99596432836689e-06, |
| "loss": 0.4775, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.36074270557029176, |
| "grad_norm": 2.7486777305603027, |
| "learning_rate": 4.994019742699705e-06, |
| "loss": 0.5779, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.3713527851458886, |
| "grad_norm": 2.3831562995910645, |
| "learning_rate": 4.991694528457891e-06, |
| "loss": 0.5057, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.3819628647214854, |
| "grad_norm": 2.5414721965789795, |
| "learning_rate": 4.988989040507518e-06, |
| "loss": 0.5313, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.3925729442970822, |
| "grad_norm": 2.4140472412109375, |
| "learning_rate": 4.985903691750697e-06, |
| "loss": 0.4441, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.40318302387267907, |
| "grad_norm": 2.4907593727111816, |
| "learning_rate": 4.982438953062572e-06, |
| "loss": 0.4778, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.41379310344827586, |
| "grad_norm": 2.579932928085327, |
| "learning_rate": 4.978595353219449e-06, |
| "loss": 0.4848, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.4244031830238727, |
| "grad_norm": 2.5512266159057617, |
| "learning_rate": 4.974373478818098e-06, |
| "loss": 0.4891, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.4350132625994695, |
| "grad_norm": 2.3293063640594482, |
| "learning_rate": 4.969773974186235e-06, |
| "loss": 0.4954, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.44562334217506633, |
| "grad_norm": 2.6347479820251465, |
| "learning_rate": 4.964797541284175e-06, |
| "loss": 0.5353, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.4562334217506631, |
| "grad_norm": 2.7719151973724365, |
| "learning_rate": 4.959444939597712e-06, |
| "loss": 0.5726, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.46684350132625996, |
| "grad_norm": 2.1757211685180664, |
| "learning_rate": 4.953716986022204e-06, |
| "loss": 0.5642, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.47745358090185674, |
| "grad_norm": 2.432244300842285, |
| "learning_rate": 4.947614554737904e-06, |
| "loss": 0.4429, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.4880636604774536, |
| "grad_norm": 1.972844123840332, |
| "learning_rate": 4.941138577076538e-06, |
| "loss": 0.4683, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.4986737400530504, |
| "grad_norm": 2.484992742538452, |
| "learning_rate": 4.934290041379182e-06, |
| "loss": 0.4385, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.5092838196286472, |
| "grad_norm": 2.0424418449401855, |
| "learning_rate": 4.92706999284541e-06, |
| "loss": 0.4935, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.519893899204244, |
| "grad_norm": 2.3754308223724365, |
| "learning_rate": 4.9194795333737925e-06, |
| "loss": 0.4548, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.5305039787798409, |
| "grad_norm": 3.0801432132720947, |
| "learning_rate": 4.911519821393718e-06, |
| "loss": 0.5486, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5411140583554377, |
| "grad_norm": 2.2712507247924805, |
| "learning_rate": 4.9031920716886035e-06, |
| "loss": 0.5121, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5517241379310345, |
| "grad_norm": 2.0000548362731934, |
| "learning_rate": 4.894497555210499e-06, |
| "loss": 0.4495, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5623342175066313, |
| "grad_norm": 2.590303897857666, |
| "learning_rate": 4.8854375988861134e-06, |
| "loss": 0.5028, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.5729442970822282, |
| "grad_norm": 2.377298355102539, |
| "learning_rate": 4.87601358541431e-06, |
| "loss": 0.5193, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.583554376657825, |
| "grad_norm": 2.966008186340332, |
| "learning_rate": 4.8662269530550825e-06, |
| "loss": 0.545, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5941644562334217, |
| "grad_norm": 2.250293254852295, |
| "learning_rate": 4.856079195410046e-06, |
| "loss": 0.5219, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.6047745358090185, |
| "grad_norm": 2.437361240386963, |
| "learning_rate": 4.845571861194501e-06, |
| "loss": 0.4725, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.6153846153846154, |
| "grad_norm": 2.435994863510132, |
| "learning_rate": 4.834706554001065e-06, |
| "loss": 0.4232, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.6259946949602122, |
| "grad_norm": 2.705902099609375, |
| "learning_rate": 4.823484932054937e-06, |
| "loss": 0.4834, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.636604774535809, |
| "grad_norm": 2.1471517086029053, |
| "learning_rate": 4.811908707960832e-06, |
| "loss": 0.5302, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6472148541114059, |
| "grad_norm": 2.0760443210601807, |
| "learning_rate": 4.799979648441602e-06, |
| "loss": 0.494, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6578249336870027, |
| "grad_norm": 2.334944009780884, |
| "learning_rate": 4.787699574068611e-06, |
| "loss": 0.487, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.6684350132625995, |
| "grad_norm": 2.3444855213165283, |
| "learning_rate": 4.775070358983881e-06, |
| "loss": 0.4911, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.6790450928381963, |
| "grad_norm": 2.127737045288086, |
| "learning_rate": 4.7620939306140696e-06, |
| "loss": 0.4744, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.6896551724137931, |
| "grad_norm": 2.2132568359375, |
| "learning_rate": 4.748772269376312e-06, |
| "loss": 0.4789, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7002652519893899, |
| "grad_norm": 1.9452372789382935, |
| "learning_rate": 4.735107408375977e-06, |
| "loss": 0.488, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.7108753315649867, |
| "grad_norm": 2.7268893718719482, |
| "learning_rate": 4.721101433096381e-06, |
| "loss": 0.4462, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.7214854111405835, |
| "grad_norm": 2.1095452308654785, |
| "learning_rate": 4.706756481080511e-06, |
| "loss": 0.5087, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.7320954907161804, |
| "grad_norm": 2.278555154800415, |
| "learning_rate": 4.692074741604795e-06, |
| "loss": 0.5304, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.7427055702917772, |
| "grad_norm": 2.455960512161255, |
| "learning_rate": 4.677058455344989e-06, |
| "loss": 0.5177, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.753315649867374, |
| "grad_norm": 2.1136856079101562, |
| "learning_rate": 4.661709914034209e-06, |
| "loss": 0.4841, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.7639257294429708, |
| "grad_norm": 2.296614646911621, |
| "learning_rate": 4.646031460113175e-06, |
| "loss": 0.4544, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.7745358090185677, |
| "grad_norm": 1.8733782768249512, |
| "learning_rate": 4.630025486372715e-06, |
| "loss": 0.4715, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.7851458885941645, |
| "grad_norm": 2.526837110519409, |
| "learning_rate": 4.613694435588589e-06, |
| "loss": 0.4824, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.7957559681697612, |
| "grad_norm": 2.2026150226593018, |
| "learning_rate": 4.597040800148679e-06, |
| "loss": 0.4852, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8063660477453581, |
| "grad_norm": 2.214277744293213, |
| "learning_rate": 4.580067121672607e-06, |
| "loss": 0.4134, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.8169761273209549, |
| "grad_norm": 2.623305559158325, |
| "learning_rate": 4.562775990623847e-06, |
| "loss": 0.4493, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.8275862068965517, |
| "grad_norm": 2.9433794021606445, |
| "learning_rate": 4.5451700459143735e-06, |
| "loss": 0.5255, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.8381962864721485, |
| "grad_norm": 2.143739938735962, |
| "learning_rate": 4.527251974501923e-06, |
| "loss": 0.4503, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.8488063660477454, |
| "grad_norm": 2.1592986583709717, |
| "learning_rate": 4.509024510979917e-06, |
| "loss": 0.4636, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.8594164456233422, |
| "grad_norm": 2.2622759342193604, |
| "learning_rate": 4.4904904371601176e-06, |
| "loss": 0.4685, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.870026525198939, |
| "grad_norm": 2.3408522605895996, |
| "learning_rate": 4.4716525816480816e-06, |
| "loss": 0.5248, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.8806366047745358, |
| "grad_norm": 2.5351459980010986, |
| "learning_rate": 4.4525138194114644e-06, |
| "loss": 0.4747, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.8912466843501327, |
| "grad_norm": 2.4038591384887695, |
| "learning_rate": 4.4330770713412555e-06, |
| "loss": 0.4198, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.9018567639257294, |
| "grad_norm": 2.2719292640686035, |
| "learning_rate": 4.413345303805996e-06, |
| "loss": 0.4545, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9124668435013262, |
| "grad_norm": 3.1209301948547363, |
| "learning_rate": 4.393321528199072e-06, |
| "loss": 0.5003, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 2.414945125579834, |
| "learning_rate": 4.373008800479118e-06, |
| "loss": 0.472, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.9336870026525199, |
| "grad_norm": 2.21144437789917, |
| "learning_rate": 4.352410220703629e-06, |
| "loss": 0.4661, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.9442970822281167, |
| "grad_norm": 2.210827589035034, |
| "learning_rate": 4.331528932555844e-06, |
| "loss": 0.4614, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.9549071618037135, |
| "grad_norm": 2.403038740158081, |
| "learning_rate": 4.3103681228649626e-06, |
| "loss": 0.4623, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.9655172413793104, |
| "grad_norm": 2.588114023208618, |
| "learning_rate": 4.288931021119788e-06, |
| "loss": 0.4902, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.9761273209549072, |
| "grad_norm": 2.288691997528076, |
| "learning_rate": 4.267220898975848e-06, |
| "loss": 0.5047, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.986737400530504, |
| "grad_norm": 2.2487804889678955, |
| "learning_rate": 4.245241069756092e-06, |
| "loss": 0.5358, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.9973474801061007, |
| "grad_norm": 2.5266008377075195, |
| "learning_rate": 4.222994887945219e-06, |
| "loss": 0.4928, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.0074270557029177, |
| "grad_norm": 2.5962352752685547, |
| "learning_rate": 4.20048574867773e-06, |
| "loss": 0.3963, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.0180371352785147, |
| "grad_norm": 2.707613229751587, |
| "learning_rate": 4.1777170872197725e-06, |
| "loss": 0.3125, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.0286472148541115, |
| "grad_norm": 2.4237964153289795, |
| "learning_rate": 4.1546923784448646e-06, |
| "loss": 0.3457, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.0392572944297083, |
| "grad_norm": 1.6531928777694702, |
| "learning_rate": 4.1314151363035705e-06, |
| "loss": 0.3029, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.049867374005305, |
| "grad_norm": 2.1669981479644775, |
| "learning_rate": 4.1078889132872145e-06, |
| "loss": 0.3289, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.0604774535809018, |
| "grad_norm": 2.445012092590332, |
| "learning_rate": 4.084117299885712e-06, |
| "loss": 0.3234, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.0710875331564986, |
| "grad_norm": 2.0615527629852295, |
| "learning_rate": 4.060103924039599e-06, |
| "loss": 0.3139, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.0816976127320954, |
| "grad_norm": 1.990400791168213, |
| "learning_rate": 4.035852450586352e-06, |
| "loss": 0.3144, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.0923076923076924, |
| "grad_norm": 2.5510122776031494, |
| "learning_rate": 4.011366580701073e-06, |
| "loss": 0.323, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.1029177718832892, |
| "grad_norm": 2.462083101272583, |
| "learning_rate": 3.9866500513316274e-06, |
| "loss": 0.3694, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.113527851458886, |
| "grad_norm": 2.4385085105895996, |
| "learning_rate": 3.961706634628323e-06, |
| "loss": 0.3351, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.1241379310344828, |
| "grad_norm": 1.7553578615188599, |
| "learning_rate": 3.936540137368222e-06, |
| "loss": 0.3459, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.1347480106100796, |
| "grad_norm": 2.513950824737549, |
| "learning_rate": 3.911154400374159e-06, |
| "loss": 0.3186, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.1453580901856764, |
| "grad_norm": 2.6273515224456787, |
| "learning_rate": 3.885553297928573e-06, |
| "loss": 0.3333, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.1559681697612731, |
| "grad_norm": 2.4155592918395996, |
| "learning_rate": 3.859740737182222e-06, |
| "loss": 0.3137, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.16657824933687, |
| "grad_norm": 2.719611644744873, |
| "learning_rate": 3.833720657557894e-06, |
| "loss": 0.3426, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.1771883289124667, |
| "grad_norm": 2.5729358196258545, |
| "learning_rate": 3.807497030149181e-06, |
| "loss": 0.3709, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.1877984084880637, |
| "grad_norm": 1.9626141786575317, |
| "learning_rate": 3.7810738571144257e-06, |
| "loss": 0.329, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.1984084880636605, |
| "grad_norm": 2.601951837539673, |
| "learning_rate": 3.7544551710659296e-06, |
| "loss": 0.305, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.2090185676392573, |
| "grad_norm": 2.4118540287017822, |
| "learning_rate": 3.7276450344545024e-06, |
| "loss": 0.3449, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.219628647214854, |
| "grad_norm": 2.5080604553222656, |
| "learning_rate": 3.7006475389494723e-06, |
| "loss": 0.3403, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.2302387267904509, |
| "grad_norm": 2.6882951259613037, |
| "learning_rate": 3.6734668048142273e-06, |
| "loss": 0.3342, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.2408488063660477, |
| "grad_norm": 2.3755247592926025, |
| "learning_rate": 3.646106980277394e-06, |
| "loss": 0.3589, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.2514588859416444, |
| "grad_norm": 2.4138166904449463, |
| "learning_rate": 3.618572240899748e-06, |
| "loss": 0.3447, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.2620689655172415, |
| "grad_norm": 2.6930105686187744, |
| "learning_rate": 3.5908667889369603e-06, |
| "loss": 0.3787, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.2726790450928382, |
| "grad_norm": 2.732795476913452, |
| "learning_rate": 3.5629948526982563e-06, |
| "loss": 0.3376, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.283289124668435, |
| "grad_norm": 1.8468087911605835, |
| "learning_rate": 3.534960685901111e-06, |
| "loss": 0.3461, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.2938992042440318, |
| "grad_norm": 2.3408284187316895, |
| "learning_rate": 3.506768567022062e-06, |
| "loss": 0.3396, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.3045092838196286, |
| "grad_norm": 2.7420434951782227, |
| "learning_rate": 3.478422798643737e-06, |
| "loss": 0.3364, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.3151193633952254, |
| "grad_norm": 2.634403705596924, |
| "learning_rate": 3.4499277067982177e-06, |
| "loss": 0.3126, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.3257294429708222, |
| "grad_norm": 2.4217336177825928, |
| "learning_rate": 3.421287640306809e-06, |
| "loss": 0.3092, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.3363395225464192, |
| "grad_norm": 1.7107937335968018, |
| "learning_rate": 3.3925069701163406e-06, |
| "loss": 0.3374, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.346949602122016, |
| "grad_norm": 2.1515822410583496, |
| "learning_rate": 3.363590088632085e-06, |
| "loss": 0.3436, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.3575596816976128, |
| "grad_norm": 2.0105717182159424, |
| "learning_rate": 3.334541409047408e-06, |
| "loss": 0.3283, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.3681697612732096, |
| "grad_norm": 1.8952791690826416, |
| "learning_rate": 3.3053653646702422e-06, |
| "loss": 0.358, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.3787798408488063, |
| "grad_norm": 1.8639928102493286, |
| "learning_rate": 3.276066408246487e-06, |
| "loss": 0.3084, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.3893899204244031, |
| "grad_norm": 2.563251256942749, |
| "learning_rate": 3.2466490112804484e-06, |
| "loss": 0.3508, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 2.214616060256958, |
| "learning_rate": 3.217117663352417e-06, |
| "loss": 0.3215, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.410610079575597, |
| "grad_norm": 1.793468952178955, |
| "learning_rate": 3.187476871433478e-06, |
| "loss": 0.3193, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.4212201591511937, |
| "grad_norm": 2.204789638519287, |
| "learning_rate": 3.1577311591976766e-06, |
| "loss": 0.3019, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.4318302387267905, |
| "grad_norm": 2.307568311691284, |
| "learning_rate": 3.1278850663316307e-06, |
| "loss": 0.3099, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.4424403183023873, |
| "grad_norm": 2.485848903656006, |
| "learning_rate": 3.0979431478416987e-06, |
| "loss": 0.3085, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.453050397877984, |
| "grad_norm": 1.953053593635559, |
| "learning_rate": 3.067909973358811e-06, |
| "loss": 0.3211, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.4636604774535809, |
| "grad_norm": 2.2350101470947266, |
| "learning_rate": 3.0377901264410673e-06, |
| "loss": 0.3329, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.4742705570291776, |
| "grad_norm": 2.542452335357666, |
| "learning_rate": 3.0075882038742133e-06, |
| "loss": 0.3376, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.4848806366047747, |
| "grad_norm": 2.3203530311584473, |
| "learning_rate": 2.9773088149700923e-06, |
| "loss": 0.2896, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.4954907161803712, |
| "grad_norm": 1.9708584547042847, |
| "learning_rate": 2.9469565808631888e-06, |
| "loss": 0.299, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.5061007957559682, |
| "grad_norm": 2.63698148727417, |
| "learning_rate": 2.9165361338053683e-06, |
| "loss": 0.3484, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.516710875331565, |
| "grad_norm": 2.091648578643799, |
| "learning_rate": 2.886052116458918e-06, |
| "loss": 0.3316, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.5273209549071618, |
| "grad_norm": 1.955355167388916, |
| "learning_rate": 2.8555091811880004e-06, |
| "loss": 0.328, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.5379310344827586, |
| "grad_norm": 1.6724951267242432, |
| "learning_rate": 2.8249119893486252e-06, |
| "loss": 0.3215, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.5485411140583554, |
| "grad_norm": 2.1872570514678955, |
| "learning_rate": 2.7942652105772516e-06, |
| "loss": 0.3118, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.5591511936339524, |
| "grad_norm": 3.0710208415985107, |
| "learning_rate": 2.7635735220781214e-06, |
| "loss": 0.2973, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.569761273209549, |
| "grad_norm": 2.357663631439209, |
| "learning_rate": 2.7328416079094412e-06, |
| "loss": 0.3423, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.580371352785146, |
| "grad_norm": 2.2559144496917725, |
| "learning_rate": 2.7020741582685217e-06, |
| "loss": 0.3211, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.5909814323607427, |
| "grad_norm": 2.0730817317962646, |
| "learning_rate": 2.6712758687759706e-06, |
| "loss": 0.2733, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.6015915119363395, |
| "grad_norm": 2.6119141578674316, |
| "learning_rate": 2.6404514397590657e-06, |
| "loss": 0.338, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.6122015915119363, |
| "grad_norm": 2.315875768661499, |
| "learning_rate": 2.6096055755344113e-06, |
| "loss": 0.3124, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.622811671087533, |
| "grad_norm": 2.2880892753601074, |
| "learning_rate": 2.578742983689973e-06, |
| "loss": 0.3538, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.6334217506631301, |
| "grad_norm": 2.2615041732788086, |
| "learning_rate": 2.547868374366631e-06, |
| "loss": 0.3353, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.6440318302387267, |
| "grad_norm": 1.9062315225601196, |
| "learning_rate": 2.5169864595393295e-06, |
| "loss": 0.302, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.6546419098143237, |
| "grad_norm": 2.7016942501068115, |
| "learning_rate": 2.4861019522979537e-06, |
| "loss": 0.3124, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.6652519893899205, |
| "grad_norm": 2.4618184566497803, |
| "learning_rate": 2.455219566128034e-06, |
| "loss": 0.3497, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.6758620689655173, |
| "grad_norm": 2.8924951553344727, |
| "learning_rate": 2.4243440141913905e-06, |
| "loss": 0.3233, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.686472148541114, |
| "grad_norm": 2.32255482673645, |
| "learning_rate": 2.393480008606825e-06, |
| "loss": 0.3067, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.6970822281167108, |
| "grad_norm": 1.8984359502792358, |
| "learning_rate": 2.3626322597309774e-06, |
| "loss": 0.2893, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.7076923076923078, |
| "grad_norm": 1.8360289335250854, |
| "learning_rate": 2.331805475439445e-06, |
| "loss": 0.2825, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.7183023872679044, |
| "grad_norm": 2.331998109817505, |
| "learning_rate": 2.3010043604082824e-06, |
| "loss": 0.3379, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.7289124668435014, |
| "grad_norm": 2.3304574489593506, |
| "learning_rate": 2.2702336153959925e-06, |
| "loss": 0.301, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.739522546419098, |
| "grad_norm": 2.534090518951416, |
| "learning_rate": 2.2394979365261134e-06, |
| "loss": 0.404, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.750132625994695, |
| "grad_norm": 2.273122549057007, |
| "learning_rate": 2.208802014570507e-06, |
| "loss": 0.3242, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.7607427055702918, |
| "grad_norm": 1.8859643936157227, |
| "learning_rate": 2.1781505342334775e-06, |
| "loss": 0.3152, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.7713527851458886, |
| "grad_norm": 2.567715644836426, |
| "learning_rate": 2.147548173436805e-06, |
| "loss": 0.3302, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.7819628647214856, |
| "grad_norm": 2.7930519580841064, |
| "learning_rate": 2.116999602605814e-06, |
| "loss": 0.293, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.7925729442970821, |
| "grad_norm": 2.646296262741089, |
| "learning_rate": 2.086509483956594e-06, |
| "loss": 0.2683, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.8031830238726791, |
| "grad_norm": 2.3010053634643555, |
| "learning_rate": 2.056082470784469e-06, |
| "loss": 0.313, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.8137931034482757, |
| "grad_norm": 2.3864669799804688, |
| "learning_rate": 2.0257232067538213e-06, |
| "loss": 0.262, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.8244031830238727, |
| "grad_norm": 2.63028883934021, |
| "learning_rate": 1.9954363251894007e-06, |
| "loss": 0.3457, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.8350132625994695, |
| "grad_norm": 2.0011484622955322, |
| "learning_rate": 1.9652264483691933e-06, |
| "loss": 0.2739, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.8456233421750663, |
| "grad_norm": 2.6818690299987793, |
| "learning_rate": 1.9350981868189944e-06, |
| "loss": 0.3109, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.856233421750663, |
| "grad_norm": 2.6978225708007812, |
| "learning_rate": 1.9050561386087618e-06, |
| "loss": 0.3269, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.8668435013262599, |
| "grad_norm": 2.578031301498413, |
| "learning_rate": 1.8751048886508711e-06, |
| "loss": 0.3617, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.8774535809018569, |
| "grad_norm": 2.5525052547454834, |
| "learning_rate": 1.8452490080003888e-06, |
| "loss": 0.3228, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.8880636604774534, |
| "grad_norm": 2.1095635890960693, |
| "learning_rate": 1.8154930531574521e-06, |
| "loss": 0.2857, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.8986737400530505, |
| "grad_norm": 2.3965845108032227, |
| "learning_rate": 1.785841565371868e-06, |
| "loss": 0.3622, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.9092838196286472, |
| "grad_norm": 2.293715238571167, |
| "learning_rate": 1.7562990699500482e-06, |
| "loss": 0.3031, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.919893899204244, |
| "grad_norm": 2.026015281677246, |
| "learning_rate": 1.7268700755643708e-06, |
| "loss": 0.3019, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.9305039787798408, |
| "grad_norm": 1.7175791263580322, |
| "learning_rate": 1.6975590735650812e-06, |
| "loss": 0.3047, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.9411140583554376, |
| "grad_norm": 2.0024490356445312, |
| "learning_rate": 1.668370537294841e-06, |
| "loss": 0.3048, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.9517241379310346, |
| "grad_norm": 2.8226239681243896, |
| "learning_rate": 1.6393089214060204e-06, |
| "loss": 0.3205, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.9623342175066312, |
| "grad_norm": 1.9452221393585205, |
| "learning_rate": 1.6103786611808414e-06, |
| "loss": 0.321, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.9729442970822282, |
| "grad_norm": 2.304274320602417, |
| "learning_rate": 1.5815841718544884e-06, |
| "loss": 0.2954, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.983554376657825, |
| "grad_norm": 2.502206802368164, |
| "learning_rate": 1.5529298479412636e-06, |
| "loss": 0.2945, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.9941644562334218, |
| "grad_norm": 2.5796189308166504, |
| "learning_rate": 1.524420062563912e-06, |
| "loss": 0.3291, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.004244031830239, |
| "grad_norm": 1.9198871850967407, |
| "learning_rate": 1.4960591667862163e-06, |
| "loss": 0.234, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.0148541114058354, |
| "grad_norm": 1.7082706689834595, |
| "learning_rate": 1.4678514889489464e-06, |
| "loss": 0.1943, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.0254641909814324, |
| "grad_norm": 1.8571817874908447, |
| "learning_rate": 1.4398013340092864e-06, |
| "loss": 0.1911, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.0360742705570294, |
| "grad_norm": 2.454561233520508, |
| "learning_rate": 1.4119129828838275e-06, |
| "loss": 0.1895, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.046684350132626, |
| "grad_norm": 2.3714683055877686, |
| "learning_rate": 1.384190691795226e-06, |
| "loss": 0.2177, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.057294429708223, |
| "grad_norm": 2.1356313228607178, |
| "learning_rate": 1.3566386916226373e-06, |
| "loss": 0.2252, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.0679045092838195, |
| "grad_norm": 2.446906089782715, |
| "learning_rate": 1.3292611872560134e-06, |
| "loss": 0.1982, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.0785145888594165, |
| "grad_norm": 2.1040875911712646, |
| "learning_rate": 1.302062356954365e-06, |
| "loss": 0.1696, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.089124668435013, |
| "grad_norm": 2.220742702484131, |
| "learning_rate": 1.2750463517080922e-06, |
| "loss": 0.1936, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.09973474801061, |
| "grad_norm": 2.7784054279327393, |
| "learning_rate": 1.2482172946054753e-06, |
| "loss": 0.1604, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.110344827586207, |
| "grad_norm": 2.0539498329162598, |
| "learning_rate": 1.2215792802034187e-06, |
| "loss": 0.2069, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.1209549071618037, |
| "grad_norm": 1.8337138891220093, |
| "learning_rate": 1.1951363739025618e-06, |
| "loss": 0.1964, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.1315649867374007, |
| "grad_norm": 1.7631642818450928, |
| "learning_rate": 1.168892611326827e-06, |
| "loss": 0.1871, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.1421750663129973, |
| "grad_norm": 2.386589527130127, |
| "learning_rate": 1.1428519977075136e-06, |
| "loss": 0.2595, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.1527851458885943, |
| "grad_norm": 2.553382635116577, |
| "learning_rate": 1.1170185072720434e-06, |
| "loss": 0.185, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.163395225464191, |
| "grad_norm": 2.870973825454712, |
| "learning_rate": 1.091396082637419e-06, |
| "loss": 0.228, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.174005305039788, |
| "grad_norm": 2.643745183944702, |
| "learning_rate": 1.065988634208516e-06, |
| "loss": 0.2098, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.184615384615385, |
| "grad_norm": 2.369596481323242, |
| "learning_rate": 1.0408000395812961e-06, |
| "loss": 0.1982, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.1952254641909814, |
| "grad_norm": 2.1093883514404297, |
| "learning_rate": 1.0158341429510194e-06, |
| "loss": 0.1844, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.2058355437665784, |
| "grad_norm": 1.951935052871704, |
| "learning_rate": 9.910947545255523e-07, |
| "loss": 0.1654, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.216445623342175, |
| "grad_norm": 2.230781078338623, |
| "learning_rate": 9.665856499438744e-07, |
| "loss": 0.2037, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.227055702917772, |
| "grad_norm": 2.6240904331207275, |
| "learning_rate": 9.423105696998491e-07, |
| "loss": 0.2087, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.2376657824933686, |
| "grad_norm": 1.712857723236084, |
| "learning_rate": 9.182732185713633e-07, |
| "loss": 0.2105, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.2482758620689656, |
| "grad_norm": 2.036086082458496, |
| "learning_rate": 8.94477265054918e-07, |
| "loss": 0.2186, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.2588859416445626, |
| "grad_norm": 2.3545398712158203, |
| "learning_rate": 8.709263408057522e-07, |
| "loss": 0.1879, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.269496021220159, |
| "grad_norm": 1.9098992347717285, |
| "learning_rate": 8.476240400835972e-07, |
| "loss": 0.2177, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.280106100795756, |
| "grad_norm": 2.107959270477295, |
| "learning_rate": 8.245739192041311e-07, |
| "loss": 0.165, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.2907161803713527, |
| "grad_norm": 2.550719976425171, |
| "learning_rate": 8.017794959962225e-07, |
| "loss": 0.2018, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.3013262599469497, |
| "grad_norm": 2.354701280593872, |
| "learning_rate": 7.792442492650587e-07, |
| "loss": 0.1955, |
| "step": 2170 |
| }, |
| { |
| "epoch": 2.3119363395225463, |
| "grad_norm": 2.3547091484069824, |
| "learning_rate": 7.569716182612177e-07, |
| "loss": 0.1976, |
| "step": 2180 |
| }, |
| { |
| "epoch": 2.3225464190981433, |
| "grad_norm": 1.4048022031784058, |
| "learning_rate": 7.349650021557839e-07, |
| "loss": 0.1685, |
| "step": 2190 |
| }, |
| { |
| "epoch": 2.33315649867374, |
| "grad_norm": 2.568500280380249, |
| "learning_rate": 7.132277595215773e-07, |
| "loss": 0.1519, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.343766578249337, |
| "grad_norm": 2.205993413925171, |
| "learning_rate": 6.917632078205805e-07, |
| "loss": 0.1573, |
| "step": 2210 |
| }, |
| { |
| "epoch": 2.3543766578249334, |
| "grad_norm": 2.067505121231079, |
| "learning_rate": 6.705746228976387e-07, |
| "loss": 0.184, |
| "step": 2220 |
| }, |
| { |
| "epoch": 2.3649867374005304, |
| "grad_norm": 2.4360201358795166, |
| "learning_rate": 6.496652384805125e-07, |
| "loss": 0.1968, |
| "step": 2230 |
| }, |
| { |
| "epoch": 2.3755968169761275, |
| "grad_norm": 2.042179584503174, |
| "learning_rate": 6.290382456863584e-07, |
| "loss": 0.1846, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.386206896551724, |
| "grad_norm": 2.849271535873413, |
| "learning_rate": 6.086967925347075e-07, |
| "loss": 0.1858, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.396816976127321, |
| "grad_norm": 2.0765082836151123, |
| "learning_rate": 5.88643983467033e-07, |
| "loss": 0.1837, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.4074270557029176, |
| "grad_norm": 1.9958840608596802, |
| "learning_rate": 5.688828788729547e-07, |
| "loss": 0.1659, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.4180371352785146, |
| "grad_norm": 2.253602981567383, |
| "learning_rate": 5.494164946231747e-07, |
| "loss": 0.2095, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.428647214854111, |
| "grad_norm": 1.5552992820739746, |
| "learning_rate": 5.302478016092075e-07, |
| "loss": 0.1862, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.439257294429708, |
| "grad_norm": 2.721445322036743, |
| "learning_rate": 5.113797252899728e-07, |
| "loss": 0.2085, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.449867374005305, |
| "grad_norm": 2.3488707542419434, |
| "learning_rate": 4.928151452453184e-07, |
| "loss": 0.1914, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.4604774535809018, |
| "grad_norm": 2.49068021774292, |
| "learning_rate": 4.745568947365542e-07, |
| "loss": 0.1718, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.4710875331564988, |
| "grad_norm": 1.4638549089431763, |
| "learning_rate": 4.5660776027404654e-07, |
| "loss": 0.1669, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.4816976127320953, |
| "grad_norm": 2.288776159286499, |
| "learning_rate": 4.389704811919507e-07, |
| "loss": 0.1731, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.4923076923076923, |
| "grad_norm": 2.385162115097046, |
| "learning_rate": 4.216477492301455e-07, |
| "loss": 0.1802, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.502917771883289, |
| "grad_norm": 2.0100815296173096, |
| "learning_rate": 4.0464220812342526e-07, |
| "loss": 0.2232, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.513527851458886, |
| "grad_norm": 1.8439091444015503, |
| "learning_rate": 3.87956453198027e-07, |
| "loss": 0.1432, |
| "step": 2370 |
| }, |
| { |
| "epoch": 2.524137931034483, |
| "grad_norm": 2.3093338012695312, |
| "learning_rate": 3.715930309755389e-07, |
| "loss": 0.1834, |
| "step": 2380 |
| }, |
| { |
| "epoch": 2.5347480106100795, |
| "grad_norm": 2.3250088691711426, |
| "learning_rate": 3.5555443878425635e-07, |
| "loss": 0.2123, |
| "step": 2390 |
| }, |
| { |
| "epoch": 2.5453580901856765, |
| "grad_norm": 1.8003133535385132, |
| "learning_rate": 3.398431243780531e-07, |
| "loss": 0.2034, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.555968169761273, |
| "grad_norm": 2.8948135375976562, |
| "learning_rate": 3.2446148556281117e-07, |
| "loss": 0.1778, |
| "step": 2410 |
| }, |
| { |
| "epoch": 2.56657824933687, |
| "grad_norm": 1.8556360006332397, |
| "learning_rate": 3.0941186983047543e-07, |
| "loss": 0.1892, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.5771883289124666, |
| "grad_norm": 2.771932363510132, |
| "learning_rate": 2.9469657400078925e-07, |
| "loss": 0.1935, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.5877984084880636, |
| "grad_norm": 2.5325114727020264, |
| "learning_rate": 2.8031784387076186e-07, |
| "loss": 0.1858, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.5984084880636606, |
| "grad_norm": 2.4069302082061768, |
| "learning_rate": 2.6627787387191934e-07, |
| "loss": 0.2118, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.609018567639257, |
| "grad_norm": 2.053656816482544, |
| "learning_rate": 2.5257880673540376e-07, |
| "loss": 0.1929, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.6196286472148542, |
| "grad_norm": 1.8820626735687256, |
| "learning_rate": 2.392227331649527e-07, |
| "loss": 0.1745, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.630238726790451, |
| "grad_norm": 1.9418586492538452, |
| "learning_rate": 2.2621169151782417e-07, |
| "loss": 0.1823, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.640848806366048, |
| "grad_norm": 2.519037961959839, |
| "learning_rate": 2.1354766749371093e-07, |
| "loss": 0.2037, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.6514588859416444, |
| "grad_norm": 2.010211944580078, |
| "learning_rate": 2.0123259383169031e-07, |
| "loss": 0.2196, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.6620689655172414, |
| "grad_norm": 1.9838532209396362, |
| "learning_rate": 1.8926835001525257e-07, |
| "loss": 0.1848, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.6726790450928384, |
| "grad_norm": 2.3488149642944336, |
| "learning_rate": 1.776567619854655e-07, |
| "loss": 0.1823, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.683289124668435, |
| "grad_norm": 2.839651584625244, |
| "learning_rate": 1.6639960186230293e-07, |
| "loss": 0.2039, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.693899204244032, |
| "grad_norm": 2.050480842590332, |
| "learning_rate": 1.5549858767419018e-07, |
| "loss": 0.1796, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.7045092838196285, |
| "grad_norm": 1.2738044261932373, |
| "learning_rate": 1.449553830958053e-07, |
| "loss": 0.1893, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.7151193633952255, |
| "grad_norm": 1.8912787437438965, |
| "learning_rate": 1.347715971941746e-07, |
| "loss": 0.1947, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.725729442970822, |
| "grad_norm": 1.8385730981826782, |
| "learning_rate": 1.2494878418310234e-07, |
| "loss": 0.1744, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.736339522546419, |
| "grad_norm": 2.1071712970733643, |
| "learning_rate": 1.1548844318597208e-07, |
| "loss": 0.2351, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.746949602122016, |
| "grad_norm": 2.054392099380493, |
| "learning_rate": 1.0639201800695553e-07, |
| "loss": 0.2245, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.7575596816976127, |
| "grad_norm": 1.656562328338623, |
| "learning_rate": 9.76608969106646e-08, |
| "loss": 0.2014, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.7681697612732097, |
| "grad_norm": 2.6887638568878174, |
| "learning_rate": 8.929641241027937e-08, |
| "loss": 0.1824, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.7787798408488062, |
| "grad_norm": 2.4606659412384033, |
| "learning_rate": 8.129984106418354e-08, |
| "loss": 0.1706, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.7893899204244033, |
| "grad_norm": 2.5548455715179443, |
| "learning_rate": 7.3672403281142e-08, |
| "loss": 0.2195, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 1.7952167987823486, |
| "learning_rate": 6.641526313404534e-08, |
| "loss": 0.1748, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.810610079575597, |
| "grad_norm": 2.376830816268921, |
| "learning_rate": 5.952952818225416e-08, |
| "loss": 0.2061, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.821220159151194, |
| "grad_norm": 1.7183632850646973, |
| "learning_rate": 5.3016249302565436e-08, |
| "loss": 0.1742, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.8318302387267904, |
| "grad_norm": 2.11011004447937, |
| "learning_rate": 4.6876420528833014e-08, |
| "loss": 0.2082, |
| "step": 2670 |
| }, |
| { |
| "epoch": 2.8424403183023874, |
| "grad_norm": 1.8799868822097778, |
| "learning_rate": 4.111097890026089e-08, |
| "loss": 0.1805, |
| "step": 2680 |
| }, |
| { |
| "epoch": 2.853050397877984, |
| "grad_norm": 2.5171291828155518, |
| "learning_rate": 3.5720804318395976e-08, |
| "loss": 0.2058, |
| "step": 2690 |
| }, |
| { |
| "epoch": 2.863660477453581, |
| "grad_norm": 2.142263650894165, |
| "learning_rate": 3.0706719412839926e-08, |
| "loss": 0.2027, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.8742705570291776, |
| "grad_norm": 2.2124040126800537, |
| "learning_rate": 2.6069489415703197e-08, |
| "loss": 0.1941, |
| "step": 2710 |
| }, |
| { |
| "epoch": 2.8848806366047746, |
| "grad_norm": 2.033259153366089, |
| "learning_rate": 2.18098220448168e-08, |
| "loss": 0.2029, |
| "step": 2720 |
| }, |
| { |
| "epoch": 2.8954907161803716, |
| "grad_norm": 2.416912794113159, |
| "learning_rate": 1.7928367395725066e-08, |
| "loss": 0.2062, |
| "step": 2730 |
| }, |
| { |
| "epoch": 2.906100795755968, |
| "grad_norm": 2.193751096725464, |
| "learning_rate": 1.442571784246699e-08, |
| "loss": 0.1873, |
| "step": 2740 |
| }, |
| { |
| "epoch": 2.916710875331565, |
| "grad_norm": 1.5729731321334839, |
| "learning_rate": 1.1302407947173522e-08, |
| "loss": 0.1653, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.9273209549071617, |
| "grad_norm": 1.7562044858932495, |
| "learning_rate": 8.558914378481996e-09, |
| "loss": 0.1743, |
| "step": 2760 |
| }, |
| { |
| "epoch": 2.9379310344827587, |
| "grad_norm": 2.183967351913452, |
| "learning_rate": 6.195655838790726e-09, |
| "loss": 0.1821, |
| "step": 2770 |
| }, |
| { |
| "epoch": 2.9485411140583553, |
| "grad_norm": 1.9312433004379272, |
| "learning_rate": 4.212993000356491e-09, |
| "loss": 0.1954, |
| "step": 2780 |
| }, |
| { |
| "epoch": 2.9591511936339523, |
| "grad_norm": 2.2055087089538574, |
| "learning_rate": 2.611228450250802e-09, |
| "loss": 0.1925, |
| "step": 2790 |
| }, |
| { |
| "epoch": 2.9697612732095493, |
| "grad_norm": 1.6606404781341553, |
| "learning_rate": 1.3906066441798927e-09, |
| "loss": 0.1805, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.980371352785146, |
| "grad_norm": 2.594404458999634, |
| "learning_rate": 5.513138691767839e-10, |
| "loss": 0.2084, |
| "step": 2810 |
| }, |
| { |
| "epoch": 2.9909814323607424, |
| "grad_norm": 2.007861375808716, |
| "learning_rate": 9.347821517069477e-11, |
| "loss": 0.2115, |
| "step": 2820 |
| }, |
| { |
| "epoch": 2.9973474801061006, |
| "step": 2826, |
| "total_flos": 1.0915292825780224e+17, |
| "train_loss": 0.34044326600333263, |
| "train_runtime": 16671.2674, |
| "train_samples_per_second": 2.713, |
| "train_steps_per_second": 0.17 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 2826, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 943, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0915292825780224e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|