{ "best_global_step": 1500, "best_metric": 0.4265768229961395, "best_model_checkpoint": "/content/drive/MyDrive/gemma2_trading_finetune/gemma-2b-trader-summarizer/checkpoint-1500", "epoch": 3.0, "eval_steps": 500, "global_step": 1791, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001675392670157068, "grad_norm": NaN, "learning_rate": 0.0, "loss": 1.2713, "step": 1 }, { "epoch": 0.016753926701570682, "grad_norm": 0.6294093728065491, "learning_rate": 1.4000000000000001e-06, "loss": 0.9665, "step": 10 }, { "epoch": 0.033507853403141365, "grad_norm": 0.5912520885467529, "learning_rate": 3.4000000000000005e-06, "loss": 0.8862, "step": 20 }, { "epoch": 0.050261780104712044, "grad_norm": 0.5498993396759033, "learning_rate": 5.400000000000001e-06, "loss": 0.866, "step": 30 }, { "epoch": 0.06701570680628273, "grad_norm": 0.47078797221183777, "learning_rate": 7.4e-06, "loss": 0.7701, "step": 40 }, { "epoch": 0.08376963350785341, "grad_norm": 0.44182780385017395, "learning_rate": 9.4e-06, "loss": 0.7534, "step": 50 }, { "epoch": 0.10052356020942409, "grad_norm": 0.4943735897541046, "learning_rate": 1.14e-05, "loss": 0.6793, "step": 60 }, { "epoch": 0.11727748691099477, "grad_norm": 0.4289020001888275, "learning_rate": 1.3400000000000002e-05, "loss": 0.6589, "step": 70 }, { "epoch": 0.13403141361256546, "grad_norm": 0.3783618211746216, "learning_rate": 1.54e-05, "loss": 0.5734, "step": 80 }, { "epoch": 0.15078534031413612, "grad_norm": 0.43305572867393494, "learning_rate": 1.7400000000000003e-05, "loss": 0.6123, "step": 90 }, { "epoch": 0.16753926701570682, "grad_norm": 0.42297738790512085, "learning_rate": 1.94e-05, "loss": 0.5813, "step": 100 }, { "epoch": 0.18429319371727748, "grad_norm": 0.4422590732574463, "learning_rate": 1.9999154385359104e-05, "loss": 0.5432, "step": 110 }, { "epoch": 0.20104712041884817, "grad_norm": 0.4152766764163971, "learning_rate": 1.9995012943638305e-05, "loss": 0.5422, "step": 120 }, { "epoch": 0.21780104712041884, "grad_norm": 0.5250847339630127, "learning_rate": 1.9987421785468592e-05, "loss": 0.5781, "step": 130 }, { "epoch": 0.23455497382198953, "grad_norm": 0.4559013843536377, "learning_rate": 1.997638353089092e-05, "loss": 0.5234, "step": 140 }, { "epoch": 0.2513089005235602, "grad_norm": 0.49973589181900024, "learning_rate": 1.996190198969009e-05, "loss": 0.5465, "step": 150 }, { "epoch": 0.2680628272251309, "grad_norm": 0.4770303964614868, "learning_rate": 1.9943982160079823e-05, "loss": 0.5019, "step": 160 }, { "epoch": 0.2848167539267016, "grad_norm": 0.5174061059951782, "learning_rate": 1.9922630226977667e-05, "loss": 0.5366, "step": 170 }, { "epoch": 0.30157068062827225, "grad_norm": 0.5140427947044373, "learning_rate": 1.989785355987032e-05, "loss": 0.5331, "step": 180 }, { "epoch": 0.3183246073298429, "grad_norm": 0.49744996428489685, "learning_rate": 1.986966071027007e-05, "loss": 0.5043, "step": 190 }, { "epoch": 0.33507853403141363, "grad_norm": 0.5526700019836426, "learning_rate": 1.9838061408763314e-05, "loss": 0.5658, "step": 200 }, { "epoch": 0.3518324607329843, "grad_norm": 0.5416470170021057, "learning_rate": 1.9803066561652108e-05, "loss": 0.5356, "step": 210 }, { "epoch": 0.36858638743455496, "grad_norm": 0.5986471772193909, "learning_rate": 1.9764688247189918e-05, "loss": 0.5746, "step": 220 }, { "epoch": 0.38534031413612563, "grad_norm": 0.49069204926490784, "learning_rate": 1.972293971141288e-05, "loss": 0.4598, "step": 230 }, { "epoch": 0.40209424083769635, "grad_norm": 0.5061984658241272, "learning_rate": 1.9677835363568024e-05, "loss": 0.5232, "step": 240 }, { "epoch": 0.418848167539267, "grad_norm": 0.6716789603233337, "learning_rate": 1.962939077114002e-05, "loss": 0.5627, "step": 250 }, { "epoch": 0.4356020942408377, "grad_norm": 0.6077855229377747, "learning_rate": 1.9577622654478127e-05, "loss": 0.4972, "step": 260 }, { "epoch": 0.4523560209424084, "grad_norm": 0.5625795722007751, "learning_rate": 1.9522548881025304e-05, "loss": 0.4538, "step": 270 }, { "epoch": 0.46910994764397906, "grad_norm": 0.5840628743171692, "learning_rate": 1.9464188459151342e-05, "loss": 0.4707, "step": 280 }, { "epoch": 0.48586387434554973, "grad_norm": 0.6204021573066711, "learning_rate": 1.9402561531592273e-05, "loss": 0.4776, "step": 290 }, { "epoch": 0.5026178010471204, "grad_norm": 0.6092849969863892, "learning_rate": 1.9337689368498214e-05, "loss": 0.454, "step": 300 }, { "epoch": 0.5193717277486911, "grad_norm": 0.6331911087036133, "learning_rate": 1.92695943600921e-05, "loss": 0.5294, "step": 310 }, { "epoch": 0.5361256544502618, "grad_norm": 0.5908372402191162, "learning_rate": 1.919830000894185e-05, "loss": 0.4953, "step": 320 }, { "epoch": 0.5528795811518324, "grad_norm": 0.6011969447135925, "learning_rate": 1.912383092184861e-05, "loss": 0.5049, "step": 330 }, { "epoch": 0.5696335078534032, "grad_norm": 0.6391215920448303, "learning_rate": 1.9046212801353846e-05, "loss": 0.5501, "step": 340 }, { "epoch": 0.5863874345549738, "grad_norm": 0.587267279624939, "learning_rate": 1.8965472436868288e-05, "loss": 0.4655, "step": 350 }, { "epoch": 0.6031413612565445, "grad_norm": 0.6430619359016418, "learning_rate": 1.888163769542573e-05, "loss": 0.4778, "step": 360 }, { "epoch": 0.6198952879581152, "grad_norm": 0.6426651477813721, "learning_rate": 1.879473751206489e-05, "loss": 0.4926, "step": 370 }, { "epoch": 0.6366492146596858, "grad_norm": 0.5685374736785889, "learning_rate": 1.870480187984268e-05, "loss": 0.5288, "step": 380 }, { "epoch": 0.6534031413612565, "grad_norm": 0.6300281882286072, "learning_rate": 1.8611861839482252e-05, "loss": 0.4617, "step": 390 }, { "epoch": 0.6701570680628273, "grad_norm": 0.5899410843849182, "learning_rate": 1.8515949468659532e-05, "loss": 0.4669, "step": 400 }, { "epoch": 0.6869109947643979, "grad_norm": 0.6726074814796448, "learning_rate": 1.8417097870931777e-05, "loss": 0.5024, "step": 410 }, { "epoch": 0.7036649214659686, "grad_norm": 0.6307454109191895, "learning_rate": 1.8315341164312102e-05, "loss": 0.4651, "step": 420 }, { "epoch": 0.7204188481675393, "grad_norm": 0.6004384160041809, "learning_rate": 1.8210714469493852e-05, "loss": 0.4795, "step": 430 }, { "epoch": 0.7371727748691099, "grad_norm": 0.6285229325294495, "learning_rate": 1.810325389772892e-05, "loss": 0.4493, "step": 440 }, { "epoch": 0.7539267015706806, "grad_norm": 0.6102107167243958, "learning_rate": 1.7992996538364185e-05, "loss": 0.5082, "step": 450 }, { "epoch": 0.7706806282722513, "grad_norm": 0.6182498931884766, "learning_rate": 1.7879980446040327e-05, "loss": 0.488, "step": 460 }, { "epoch": 0.787434554973822, "grad_norm": 0.6647325754165649, "learning_rate": 1.7764244627557524e-05, "loss": 0.4537, "step": 470 }, { "epoch": 0.8041884816753927, "grad_norm": 0.588350772857666, "learning_rate": 1.7645829028412496e-05, "loss": 0.4487, "step": 480 }, { "epoch": 0.8209424083769633, "grad_norm": 0.7462045550346375, "learning_rate": 1.7524774519011565e-05, "loss": 0.4463, "step": 490 }, { "epoch": 0.837696335078534, "grad_norm": 0.6237714886665344, "learning_rate": 1.740112288056448e-05, "loss": 0.4952, "step": 500 }, { "epoch": 0.837696335078534, "eval_loss": 0.46747511625289917, "eval_runtime": 72.2811, "eval_samples_per_second": 14.679, "eval_steps_per_second": 7.346, "step": 500 }, { "epoch": 0.8544502617801047, "grad_norm": 0.6721609830856323, "learning_rate": 1.7274916790663914e-05, "loss": 0.4804, "step": 510 }, { "epoch": 0.8712041884816754, "grad_norm": 0.6287488341331482, "learning_rate": 1.714619980855553e-05, "loss": 0.4275, "step": 520 }, { "epoch": 0.8879581151832461, "grad_norm": 0.6959580183029175, "learning_rate": 1.70150163601038e-05, "loss": 0.4713, "step": 530 }, { "epoch": 0.9047120418848168, "grad_norm": 0.6396615505218506, "learning_rate": 1.6881411722458688e-05, "loss": 0.4553, "step": 540 }, { "epoch": 0.9214659685863874, "grad_norm": 0.6596575975418091, "learning_rate": 1.674543200842853e-05, "loss": 0.4765, "step": 550 }, { "epoch": 0.9382198952879581, "grad_norm": 0.7078624367713928, "learning_rate": 1.660712415056448e-05, "loss": 0.5032, "step": 560 }, { "epoch": 0.9549738219895288, "grad_norm": 0.7436346411705017, "learning_rate": 1.646653588496201e-05, "loss": 0.5238, "step": 570 }, { "epoch": 0.9717277486910995, "grad_norm": 0.5996255874633789, "learning_rate": 1.6323715734785143e-05, "loss": 0.4802, "step": 580 }, { "epoch": 0.9884816753926702, "grad_norm": 0.7176609039306641, "learning_rate": 1.6178712993518945e-05, "loss": 0.4705, "step": 590 }, { "epoch": 1.0050261780104712, "grad_norm": 0.714697539806366, "learning_rate": 1.6031577707956207e-05, "loss": 0.4417, "step": 600 }, { "epoch": 1.0217801047120418, "grad_norm": 0.7231177091598511, "learning_rate": 1.5882360660924084e-05, "loss": 0.4445, "step": 610 }, { "epoch": 1.0385340314136127, "grad_norm": 0.6571698784828186, "learning_rate": 1.5731113353756752e-05, "loss": 0.4514, "step": 620 }, { "epoch": 1.0552879581151833, "grad_norm": 0.6722269058227539, "learning_rate": 1.557788798852001e-05, "loss": 0.4953, "step": 630 }, { "epoch": 1.0720418848167539, "grad_norm": 0.64736407995224, "learning_rate": 1.5422737449994078e-05, "loss": 0.4674, "step": 640 }, { "epoch": 1.0887958115183247, "grad_norm": 0.7046222686767578, "learning_rate": 1.5265715287420752e-05, "loss": 0.4234, "step": 650 }, { "epoch": 1.1055497382198953, "grad_norm": 0.755739688873291, "learning_rate": 1.5106875696021192e-05, "loss": 0.4162, "step": 660 }, { "epoch": 1.122303664921466, "grad_norm": 0.7437321543693542, "learning_rate": 1.4946273498290815e-05, "loss": 0.4681, "step": 670 }, { "epoch": 1.1390575916230365, "grad_norm": 0.7438350915908813, "learning_rate": 1.4783964125077614e-05, "loss": 0.4529, "step": 680 }, { "epoch": 1.1558115183246074, "grad_norm": 0.9883790016174316, "learning_rate": 1.4620003596450576e-05, "loss": 0.4289, "step": 690 }, { "epoch": 1.172565445026178, "grad_norm": 0.7670640349388123, "learning_rate": 1.4454448502364687e-05, "loss": 0.4249, "step": 700 }, { "epoch": 1.1893193717277486, "grad_norm": 0.6871143579483032, "learning_rate": 1.4287355983129265e-05, "loss": 0.452, "step": 710 }, { "epoch": 1.2060732984293194, "grad_norm": 0.7382912039756775, "learning_rate": 1.411878370968635e-05, "loss": 0.4538, "step": 720 }, { "epoch": 1.22282722513089, "grad_norm": 0.819150984287262, "learning_rate": 1.3948789863705914e-05, "loss": 0.4031, "step": 730 }, { "epoch": 1.2395811518324606, "grad_norm": 0.8083910942077637, "learning_rate": 1.3777433117504849e-05, "loss": 0.449, "step": 740 }, { "epoch": 1.2563350785340315, "grad_norm": 0.7677626609802246, "learning_rate": 1.3604772613796551e-05, "loss": 0.4682, "step": 750 }, { "epoch": 1.273089005235602, "grad_norm": 0.7691996097564697, "learning_rate": 1.3430867945278212e-05, "loss": 0.4604, "step": 760 }, { "epoch": 1.2898429319371727, "grad_norm": 0.7367651462554932, "learning_rate": 1.3255779134062747e-05, "loss": 0.4282, "step": 770 }, { "epoch": 1.3065968586387435, "grad_norm": 0.8763916492462158, "learning_rate": 1.3079566610962557e-05, "loss": 0.4554, "step": 780 }, { "epoch": 1.3233507853403141, "grad_norm": 0.8387876749038696, "learning_rate": 1.2902291194632187e-05, "loss": 0.4407, "step": 790 }, { "epoch": 1.3401047120418847, "grad_norm": 0.7726860642433167, "learning_rate": 1.2724014070577159e-05, "loss": 0.4261, "step": 800 }, { "epoch": 1.3568586387434554, "grad_norm": 0.7063042521476746, "learning_rate": 1.254479677003619e-05, "loss": 0.4236, "step": 810 }, { "epoch": 1.3736125654450262, "grad_norm": 0.8544300198554993, "learning_rate": 1.2364701148744035e-05, "loss": 0.4715, "step": 820 }, { "epoch": 1.3903664921465968, "grad_norm": 0.7012007236480713, "learning_rate": 1.2183789365582402e-05, "loss": 0.4514, "step": 830 }, { "epoch": 1.4071204188481676, "grad_norm": 0.7686796188354492, "learning_rate": 1.2002123861126169e-05, "loss": 0.4714, "step": 840 }, { "epoch": 1.4238743455497382, "grad_norm": 0.8073607683181763, "learning_rate": 1.1819767336092434e-05, "loss": 0.4234, "step": 850 }, { "epoch": 1.4406282722513089, "grad_norm": 0.8263911604881287, "learning_rate": 1.163678272969973e-05, "loss": 0.4742, "step": 860 }, { "epoch": 1.4573821989528795, "grad_norm": 0.7900548577308655, "learning_rate": 1.1453233197944974e-05, "loss": 0.4329, "step": 870 }, { "epoch": 1.4741361256544503, "grad_norm": 0.7620972394943237, "learning_rate": 1.1269182091805537e-05, "loss": 0.4254, "step": 880 }, { "epoch": 1.490890052356021, "grad_norm": 0.7844494581222534, "learning_rate": 1.1084692935374075e-05, "loss": 0.4322, "step": 890 }, { "epoch": 1.5076439790575917, "grad_norm": 0.8092195987701416, "learning_rate": 1.0899829403933568e-05, "loss": 0.4773, "step": 900 }, { "epoch": 1.5243979057591623, "grad_norm": 0.7582876682281494, "learning_rate": 1.0714655301980201e-05, "loss": 0.4226, "step": 910 }, { "epoch": 1.541151832460733, "grad_norm": 0.7948154211044312, "learning_rate": 1.0529234541201631e-05, "loss": 0.4539, "step": 920 }, { "epoch": 1.5579057591623036, "grad_norm": 0.8422669172286987, "learning_rate": 1.0343631118418283e-05, "loss": 0.4429, "step": 930 }, { "epoch": 1.5746596858638744, "grad_norm": 0.9496159553527832, "learning_rate": 1.015790909349522e-05, "loss": 0.4667, "step": 940 }, { "epoch": 1.591413612565445, "grad_norm": 0.8045749068260193, "learning_rate": 9.972132567232301e-06, "loss": 0.4627, "step": 950 }, { "epoch": 1.6081675392670158, "grad_norm": 0.7890786528587341, "learning_rate": 9.786365659240194e-06, "loss": 0.4682, "step": 960 }, { "epoch": 1.6249214659685864, "grad_norm": 0.823998212814331, "learning_rate": 9.600672485809868e-06, "loss": 0.4823, "step": 970 }, { "epoch": 1.641675392670157, "grad_norm": 0.8117020726203918, "learning_rate": 9.41511713778326e-06, "loss": 0.4565, "step": 980 }, { "epoch": 1.6584293193717277, "grad_norm": 0.7982953786849976, "learning_rate": 9.229763658432752e-06, "loss": 0.4896, "step": 990 }, { "epoch": 1.6751832460732983, "grad_norm": 0.8489425778388977, "learning_rate": 9.044676021356988e-06, "loss": 0.451, "step": 1000 }, { "epoch": 1.6751832460732983, "eval_loss": 0.4372614920139313, "eval_runtime": 72.7911, "eval_samples_per_second": 14.576, "eval_steps_per_second": 7.295, "step": 1000 }, { "epoch": 1.691937172774869, "grad_norm": 0.8381561636924744, "learning_rate": 8.859918108400817e-06, "loss": 0.4762, "step": 1010 }, { "epoch": 1.70869109947644, "grad_norm": 0.8479191660881042, "learning_rate": 8.675553687606864e-06, "loss": 0.4385, "step": 1020 }, { "epoch": 1.7254450261780105, "grad_norm": 0.8599492311477661, "learning_rate": 8.491646391206374e-06, "loss": 0.4365, "step": 1030 }, { "epoch": 1.7421989528795812, "grad_norm": 0.8813064098358154, "learning_rate": 8.308259693656972e-06, "loss": 0.4494, "step": 1040 }, { "epoch": 1.7589528795811518, "grad_norm": 0.9143653512001038, "learning_rate": 8.125456889734816e-06, "loss": 0.4245, "step": 1050 }, { "epoch": 1.7757068062827224, "grad_norm": 0.8547804951667786, "learning_rate": 7.943301072688832e-06, "loss": 0.4275, "step": 1060 }, { "epoch": 1.7924607329842932, "grad_norm": 0.8245564699172974, "learning_rate": 7.761855112464447e-06, "loss": 0.4065, "step": 1070 }, { "epoch": 1.809214659685864, "grad_norm": 0.9589866995811462, "learning_rate": 7.581181634004423e-06, "loss": 0.4725, "step": 1080 }, { "epoch": 1.8259685863874346, "grad_norm": 0.8072324395179749, "learning_rate": 7.401342995634275e-06, "loss": 0.4779, "step": 1090 }, { "epoch": 1.8427225130890053, "grad_norm": 0.8404145240783691, "learning_rate": 7.222401267539634e-06, "loss": 0.4746, "step": 1100 }, { "epoch": 1.8594764397905759, "grad_norm": 0.8116699457168579, "learning_rate": 7.044418210343161e-06, "loss": 0.4744, "step": 1110 }, { "epoch": 1.8762303664921465, "grad_norm": 0.8185380697250366, "learning_rate": 6.867455253788214e-06, "loss": 0.3999, "step": 1120 }, { "epoch": 1.8929842931937173, "grad_norm": 0.9066283106803894, "learning_rate": 6.691573475536774e-06, "loss": 0.4411, "step": 1130 }, { "epoch": 1.909738219895288, "grad_norm": 0.9375002980232239, "learning_rate": 6.516833580088873e-06, "loss": 0.4061, "step": 1140 }, { "epoch": 1.9264921465968587, "grad_norm": 0.8629588484764099, "learning_rate": 6.343295877830797e-06, "loss": 0.4575, "step": 1150 }, { "epoch": 1.9432460732984294, "grad_norm": 0.8223278522491455, "learning_rate": 6.171020264219344e-06, "loss": 0.4422, "step": 1160 }, { "epoch": 1.96, "grad_norm": 0.8139267563819885, "learning_rate": 6.000066199109287e-06, "loss": 0.4382, "step": 1170 }, { "epoch": 1.9767539267015706, "grad_norm": 0.8527988195419312, "learning_rate": 5.830492686231151e-06, "loss": 0.3895, "step": 1180 }, { "epoch": 1.9935078534031414, "grad_norm": 0.9887182116508484, "learning_rate": 5.662358252826483e-06, "loss": 0.3828, "step": 1190 }, { "epoch": 2.0100523560209425, "grad_norm": 0.8452683687210083, "learning_rate": 5.49572092944749e-06, "loss": 0.3828, "step": 1200 }, { "epoch": 2.026806282722513, "grad_norm": 0.9050988554954529, "learning_rate": 5.330638229928193e-06, "loss": 0.4497, "step": 1210 }, { "epoch": 2.0435602094240837, "grad_norm": 1.036914587020874, "learning_rate": 5.167167131533877e-06, "loss": 0.4706, "step": 1220 }, { "epoch": 2.0603141361256543, "grad_norm": 0.8268985748291016, "learning_rate": 5.005364055295727e-06, "loss": 0.4536, "step": 1230 }, { "epoch": 2.0770680628272253, "grad_norm": 0.9379895329475403, "learning_rate": 4.84528484653748e-06, "loss": 0.3812, "step": 1240 }, { "epoch": 2.093821989528796, "grad_norm": 0.9189339876174927, "learning_rate": 4.686984755600757e-06, "loss": 0.3778, "step": 1250 }, { "epoch": 2.1105759162303666, "grad_norm": 0.900139570236206, "learning_rate": 4.530518418775734e-06, "loss": 0.3908, "step": 1260 }, { "epoch": 2.127329842931937, "grad_norm": 0.8743225336074829, "learning_rate": 4.375939839443787e-06, "loss": 0.4186, "step": 1270 }, { "epoch": 2.1440837696335078, "grad_norm": 0.9526062607765198, "learning_rate": 4.223302369438554e-06, "loss": 0.46, "step": 1280 }, { "epoch": 2.1608376963350784, "grad_norm": 0.8931829333305359, "learning_rate": 4.072658690631887e-06, "loss": 0.4238, "step": 1290 }, { "epoch": 2.1775916230366494, "grad_norm": 0.9619183540344238, "learning_rate": 3.924060796751012e-06, "loss": 0.4357, "step": 1300 }, { "epoch": 2.19434554973822, "grad_norm": 1.0413931608200073, "learning_rate": 3.7775599754332457e-06, "loss": 0.4443, "step": 1310 }, { "epoch": 2.2110994764397907, "grad_norm": 0.9470687508583069, "learning_rate": 3.6332067905243728e-06, "loss": 0.4208, "step": 1320 }, { "epoch": 2.2278534031413613, "grad_norm": 0.9725884199142456, "learning_rate": 3.491051064626847e-06, "loss": 0.4307, "step": 1330 }, { "epoch": 2.244607329842932, "grad_norm": 0.9030916094779968, "learning_rate": 3.3511418619038406e-06, "loss": 0.4386, "step": 1340 }, { "epoch": 2.2613612565445025, "grad_norm": 0.85715651512146, "learning_rate": 3.213527471145037e-06, "loss": 0.4329, "step": 1350 }, { "epoch": 2.278115183246073, "grad_norm": 0.9860045909881592, "learning_rate": 3.078255389100078e-06, "loss": 0.3981, "step": 1360 }, { "epoch": 2.294869109947644, "grad_norm": 0.9435902237892151, "learning_rate": 2.9453723040853433e-06, "loss": 0.3627, "step": 1370 }, { "epoch": 2.3116230366492148, "grad_norm": 0.9470138549804688, "learning_rate": 2.8149240798697632e-06, "loss": 0.3991, "step": 1380 }, { "epoch": 2.3283769633507854, "grad_norm": 0.8298546075820923, "learning_rate": 2.6869557398452397e-06, "loss": 0.3823, "step": 1390 }, { "epoch": 2.345130890052356, "grad_norm": 0.8885378837585449, "learning_rate": 2.5615114514870966e-06, "loss": 0.4355, "step": 1400 }, { "epoch": 2.3618848167539266, "grad_norm": 1.0156524181365967, "learning_rate": 2.438634511109941e-06, "loss": 0.4289, "step": 1410 }, { "epoch": 2.378638743455497, "grad_norm": 0.9947994351387024, "learning_rate": 2.318367328924234e-06, "loss": 0.3928, "step": 1420 }, { "epoch": 2.3953926701570682, "grad_norm": 0.8874132037162781, "learning_rate": 2.2007514143986386e-06, "loss": 0.374, "step": 1430 }, { "epoch": 2.412146596858639, "grad_norm": 0.9219970703125, "learning_rate": 2.0858273619333135e-06, "loss": 0.4454, "step": 1440 }, { "epoch": 2.4289005235602095, "grad_norm": 0.9621909856796265, "learning_rate": 1.9736348368489966e-06, "loss": 0.401, "step": 1450 }, { "epoch": 2.44565445026178, "grad_norm": 1.1088659763336182, "learning_rate": 1.8642125616967587e-06, "loss": 0.4059, "step": 1460 }, { "epoch": 2.4624083769633507, "grad_norm": 0.9601513147354126, "learning_rate": 1.7575983028931775e-06, "loss": 0.4326, "step": 1470 }, { "epoch": 2.4791623036649213, "grad_norm": 0.936352550983429, "learning_rate": 1.6538288576854721e-06, "loss": 0.4374, "step": 1480 }, { "epoch": 2.4959162303664923, "grad_norm": 1.0005784034729004, "learning_rate": 1.5529400414511809e-06, "loss": 0.4076, "step": 1490 }, { "epoch": 2.512670157068063, "grad_norm": 0.990437388420105, "learning_rate": 1.4549666753366875e-06, "loss": 0.419, "step": 1500 }, { "epoch": 2.512670157068063, "eval_loss": 0.4265768229961395, "eval_runtime": 72.3773, "eval_samples_per_second": 14.659, "eval_steps_per_second": 7.337, "step": 1500 }, { "epoch": 2.5294240837696336, "grad_norm": 1.0548429489135742, "learning_rate": 1.3599425742389382e-06, "loss": 0.4199, "step": 1510 }, { "epoch": 2.546178010471204, "grad_norm": 0.9754648208618164, "learning_rate": 1.2679005351344308e-06, "loss": 0.4377, "step": 1520 }, { "epoch": 2.562931937172775, "grad_norm": 1.028577208518982, "learning_rate": 1.1788723257595403e-06, "loss": 0.4506, "step": 1530 }, { "epoch": 2.5796858638743454, "grad_norm": 0.9221422076225281, "learning_rate": 1.0928886736460954e-06, "loss": 0.3728, "step": 1540 }, { "epoch": 2.596439790575916, "grad_norm": 0.9151352643966675, "learning_rate": 1.0099792555159437e-06, "loss": 0.4214, "step": 1550 }, { "epoch": 2.613193717277487, "grad_norm": 1.017935872077942, "learning_rate": 9.301726870382277e-07, "loss": 0.4464, "step": 1560 }, { "epoch": 2.6299476439790577, "grad_norm": 0.9608612060546875, "learning_rate": 8.534965129528716e-07, "loss": 0.4239, "step": 1570 }, { "epoch": 2.6467015706806283, "grad_norm": 0.9908237457275391, "learning_rate": 7.799771975636672e-07, "loss": 0.393, "step": 1580 }, { "epoch": 2.663455497382199, "grad_norm": 0.9740267992019653, "learning_rate": 7.096401156042975e-07, "loss": 0.3993, "step": 1590 }, { "epoch": 2.6802094240837695, "grad_norm": 0.9356936812400818, "learning_rate": 6.425095434804074e-07, "loss": 0.3985, "step": 1600 }, { "epoch": 2.6969633507853406, "grad_norm": 0.9094387888908386, "learning_rate": 5.786086508907396e-07, "loss": 0.4152, "step": 1610 }, { "epoch": 2.7137172774869107, "grad_norm": 0.9659162759780884, "learning_rate": 5.179594928302723e-07, "loss": 0.4123, "step": 1620 }, { "epoch": 2.7304712041884818, "grad_norm": 0.9622294902801514, "learning_rate": 4.6058300197806994e-07, "loss": 0.4131, "step": 1630 }, { "epoch": 2.7472251308900524, "grad_norm": 1.0013716220855713, "learning_rate": 4.064989814724951e-07, "loss": 0.4437, "step": 1640 }, { "epoch": 2.763979057591623, "grad_norm": 1.0195082426071167, "learning_rate": 3.557260980762833e-07, "loss": 0.3943, "step": 1650 }, { "epoch": 2.7807329842931936, "grad_norm": 1.0120123624801636, "learning_rate": 3.0828187573382575e-07, "loss": 0.434, "step": 1660 }, { "epoch": 2.797486910994764, "grad_norm": 0.9160566926002502, "learning_rate": 2.641826895228894e-07, "loss": 0.4643, "step": 1670 }, { "epoch": 2.8142408376963353, "grad_norm": 0.9706994295120239, "learning_rate": 2.2344376000285606e-07, "loss": 0.4638, "step": 1680 }, { "epoch": 2.830994764397906, "grad_norm": 1.0439672470092773, "learning_rate": 1.8607914796144988e-07, "loss": 0.4458, "step": 1690 }, { "epoch": 2.8477486910994765, "grad_norm": 0.947834849357605, "learning_rate": 1.5210174956173495e-07, "loss": 0.4396, "step": 1700 }, { "epoch": 2.864502617801047, "grad_norm": 1.0074108839035034, "learning_rate": 1.2152329189109136e-07, "loss": 0.4331, "step": 1710 }, { "epoch": 2.8812565445026177, "grad_norm": 0.9114189147949219, "learning_rate": 9.435432891368279e-08, "loss": 0.4221, "step": 1720 }, { "epoch": 2.8980104712041888, "grad_norm": 0.9793130159378052, "learning_rate": 7.060423782781712e-08, "loss": 0.3817, "step": 1730 }, { "epoch": 2.914764397905759, "grad_norm": 0.9325648546218872, "learning_rate": 5.028121582947076e-08, "loss": 0.483, "step": 1740 }, { "epoch": 2.93151832460733, "grad_norm": 0.9619839191436768, "learning_rate": 3.339227728307637e-08, "loss": 0.4234, "step": 1750 }, { "epoch": 2.9482722513089006, "grad_norm": 0.9810802340507507, "learning_rate": 1.9943251300553745e-08, "loss": 0.4377, "step": 1760 }, { "epoch": 2.965026178010471, "grad_norm": 0.981601357460022, "learning_rate": 9.9387797294348e-09, "loss": 0.4259, "step": 1770 }, { "epoch": 2.981780104712042, "grad_norm": 0.981569766998291, "learning_rate": 3.3823155507550775e-09, "loss": 0.4402, "step": 1780 }, { "epoch": 2.9985340314136124, "grad_norm": 1.0313425064086914, "learning_rate": 2.761216872781969e-10, "loss": 0.432, "step": 1790 }, { "epoch": 3.0, "step": 1791, "total_flos": 3.5998475160807014e+17, "train_loss": 0.468212741586764, "train_runtime": 7355.8498, "train_samples_per_second": 3.894, "train_steps_per_second": 0.243 } ], "logging_steps": 10, "max_steps": 1791, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.5998475160807014e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }