{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "eval_steps": 1, "global_step": 48810, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03073140749846343, "grad_norm": 5.49942684173584, "learning_rate": 0.0001, "loss": 11.0245, "step": 100, "train_loss_gtc": 4.04046875, "train_loss_gtm": 0.6575, "train_loss_lm": 6.331875 }, { "epoch": 0.06146281499692686, "grad_norm": 6.315459251403809, "learning_rate": 9.999896007507038e-05, "loss": 6.9106, "step": 200, "train_loss_gtc": 2.85453125, "train_loss_gtm": 0.6456640625, "train_loss_lm": 3.41375 }, { "epoch": 0.09219422249539029, "grad_norm": 4.463039398193359, "learning_rate": 9.999584034353926e-05, "loss": 5.6825, "step": 300, "train_loss_gtc": 2.053828125, "train_loss_gtm": 0.625390625, "train_loss_lm": 3.02734375 }, { "epoch": 0.12292562999385372, "grad_norm": 8.906160354614258, "learning_rate": 9.999064093517811e-05, "loss": 4.8225, "step": 400, "train_loss_gtc": 1.5528125, "train_loss_gtm": 0.550078125, "train_loss_lm": 2.7178125 }, { "epoch": 0.15365703749231716, "grad_norm": 5.962785720825195, "learning_rate": 9.99833620662667e-05, "loss": 4.3188, "step": 500, "train_loss_gtc": 1.3146875, "train_loss_gtm": 0.4680859375, "train_loss_lm": 2.55734375 }, { "epoch": 0.18438844499078058, "grad_norm": 15.813605308532715, "learning_rate": 9.997400403958414e-05, "loss": 3.9968, "step": 600, "train_loss_gtc": 1.151953125, "train_loss_gtm": 0.404775390625, "train_loss_lm": 2.43796875 }, { "epoch": 0.215119852489244, "grad_norm": 9.086814880371094, "learning_rate": 9.99625672443962e-05, "loss": 3.8064, "step": 700, "train_loss_gtc": 1.0492578125, "train_loss_gtm": 0.37134765625, "train_loss_lm": 2.38078125 }, { "epoch": 0.24585125998770743, "grad_norm": 10.451958656311035, "learning_rate": 9.994905215643926e-05, "loss": 3.6012, "step": 800, "train_loss_gtc": 0.9308203125, "train_loss_gtm": 0.342978515625, "train_loss_lm": 2.32328125 }, { "epoch": 0.2765826674861709, "grad_norm": 2.9532716274261475, "learning_rate": 9.993345933790036e-05, "loss": 3.4027, "step": 900, "train_loss_gtc": 0.80484375, "train_loss_gtm": 0.313017578125, "train_loss_lm": 2.28640625 }, { "epoch": 0.3073140749846343, "grad_norm": 5.513271331787109, "learning_rate": 9.991578943739396e-05, "loss": 3.2882, "step": 1000, "train_loss_gtc": 0.748984375, "train_loss_gtm": 0.29857421875, "train_loss_lm": 2.23609375 }, { "epoch": 0.33804548248309774, "grad_norm": 9.901054382324219, "learning_rate": 9.989604318993484e-05, "loss": 3.1962, "step": 1100, "train_loss_gtc": 0.694609375, "train_loss_gtm": 0.293974609375, "train_loss_lm": 2.22203125 }, { "epoch": 0.36877688998156116, "grad_norm": 4.640697956085205, "learning_rate": 9.987422141690761e-05, "loss": 3.0563, "step": 1200, "train_loss_gtc": 0.62521484375, "train_loss_gtm": 0.266708984375, "train_loss_lm": 2.163359375 }, { "epoch": 0.3995082974800246, "grad_norm": 6.046248435974121, "learning_rate": 9.98503250260325e-05, "loss": 2.9871, "step": 1300, "train_loss_gtc": 0.60119140625, "train_loss_gtm": 0.250830078125, "train_loss_lm": 2.1440625 }, { "epoch": 0.430239704978488, "grad_norm": 6.06462287902832, "learning_rate": 9.982435501132761e-05, "loss": 2.918, "step": 1400, "train_loss_gtc": 0.54515625, "train_loss_gtm": 0.228916015625, "train_loss_lm": 2.139453125 }, { "epoch": 0.46097111247695144, "grad_norm": 4.272490978240967, "learning_rate": 9.979631245306756e-05, "loss": 2.8624, "step": 1500, "train_loss_gtc": 0.5327734375, "train_loss_gtm": 0.236171875, "train_loss_lm": 2.10140625 }, { "epoch": 0.49170251997541486, "grad_norm": 2.8239645957946777, "learning_rate": 9.976619851773859e-05, "loss": 2.7952, "step": 1600, "train_loss_gtc": 0.49181640625, "train_loss_gtm": 0.216806640625, "train_loss_lm": 2.087890625 }, { "epoch": 0.5224339274738783, "grad_norm": 4.718524932861328, "learning_rate": 9.973401445798997e-05, "loss": 2.744, "step": 1700, "train_loss_gtc": 0.47638671875, "train_loss_gtm": 0.2067578125, "train_loss_lm": 2.06015625 }, { "epoch": 0.5531653349723418, "grad_norm": 7.191675662994385, "learning_rate": 9.969976161258194e-05, "loss": 2.6875, "step": 1800, "train_loss_gtc": 0.4446484375, "train_loss_gtm": 0.1912353515625, "train_loss_lm": 2.05390625 }, { "epoch": 0.5838967424708051, "grad_norm": 4.1135640144348145, "learning_rate": 9.966344140633001e-05, "loss": 2.6366, "step": 1900, "train_loss_gtc": 0.42078125, "train_loss_gtm": 0.187529296875, "train_loss_lm": 2.03140625 }, { "epoch": 0.6146281499692686, "grad_norm": 4.423370361328125, "learning_rate": 9.962505535004571e-05, "loss": 2.6245, "step": 2000, "train_loss_gtc": 0.40998046875, "train_loss_gtm": 0.1915087890625, "train_loss_lm": 2.026328125 }, { "epoch": 0.645359557467732, "grad_norm": 3.297764778137207, "learning_rate": 9.958460504047372e-05, "loss": 2.5585, "step": 2100, "train_loss_gtc": 0.38455078125, "train_loss_gtm": 0.176318359375, "train_loss_lm": 1.99703125 }, { "epoch": 0.6760909649661955, "grad_norm": 8.061768531799316, "learning_rate": 9.954209216022543e-05, "loss": 2.5188, "step": 2200, "train_loss_gtc": 0.36263671875, "train_loss_gtm": 0.1635595703125, "train_loss_lm": 1.987578125 }, { "epoch": 0.7068223724646588, "grad_norm": 4.367369174957275, "learning_rate": 9.949751847770904e-05, "loss": 2.5078, "step": 2300, "train_loss_gtc": 0.3640234375, "train_loss_gtm": 0.174541015625, "train_loss_lm": 1.973359375 }, { "epoch": 0.7375537799631223, "grad_norm": 1.8874632120132446, "learning_rate": 9.945088584705584e-05, "loss": 2.4485, "step": 2400, "train_loss_gtc": 0.33607421875, "train_loss_gtm": 0.14620361328125, "train_loss_lm": 1.95671875 }, { "epoch": 0.7682851874615857, "grad_norm": 2.437286376953125, "learning_rate": 9.940219620804327e-05, "loss": 2.4232, "step": 2500, "train_loss_gtc": 0.3200390625, "train_loss_gtm": 0.149814453125, "train_loss_lm": 1.95375 }, { "epoch": 0.7990165949600492, "grad_norm": 2.414090871810913, "learning_rate": 9.935145158601411e-05, "loss": 2.4102, "step": 2600, "train_loss_gtc": 0.317177734375, "train_loss_gtm": 0.153583984375, "train_loss_lm": 1.937109375 }, { "epoch": 0.8297480024585125, "grad_norm": 3.1979939937591553, "learning_rate": 9.929865409179224e-05, "loss": 2.3885, "step": 2700, "train_loss_gtc": 0.30353515625, "train_loss_gtm": 0.135172119140625, "train_loss_lm": 1.940078125 }, { "epoch": 0.860479409956976, "grad_norm": 5.63021993637085, "learning_rate": 9.92438059215949e-05, "loss": 2.3431, "step": 2800, "train_loss_gtc": 0.286796875, "train_loss_gtm": 0.1349951171875, "train_loss_lm": 1.92109375 }, { "epoch": 0.8912108174554395, "grad_norm": 2.3982977867126465, "learning_rate": 9.918690935694126e-05, "loss": 2.3297, "step": 2900, "train_loss_gtc": 0.28318359375, "train_loss_gtm": 0.134306640625, "train_loss_lm": 1.91375 }, { "epoch": 0.9219422249539029, "grad_norm": 3.208214282989502, "learning_rate": 9.912796676455757e-05, "loss": 2.3016, "step": 3000, "train_loss_gtc": 0.272578125, "train_loss_gtm": 0.124122314453125, "train_loss_lm": 1.913046875 }, { "epoch": 0.9526736324523664, "grad_norm": 2.7042226791381836, "learning_rate": 9.906698059627866e-05, "loss": 2.2748, "step": 3100, "train_loss_gtc": 0.25408203125, "train_loss_gtm": 0.121632080078125, "train_loss_lm": 1.896484375 }, { "epoch": 0.9834050399508297, "grad_norm": 3.007841110229492, "learning_rate": 9.900395338894601e-05, "loss": 2.2726, "step": 3200, "train_loss_gtc": 0.254970703125, "train_loss_gtm": 0.1286865234375, "train_loss_lm": 1.881171875 }, { "epoch": 1.0, "eval_loss": 2.549999952316284, "eval_runtime": 4.223, "eval_samples_per_second": 235.615, "eval_steps_per_second": 2.605, "step": 3254, "train_loss_gtc": 0.26175491898148145, "train_loss_gtm": 0.12819191261574073, "train_loss_lm": 1.8826678240740742, "val_loss_gtc": 0.39130859375, "val_loss_gtm": 0.2362060546875, "val_loss_lm": 1.94765625 }, { "epoch": 1.014136447449293, "grad_norm": 3.212726354598999, "learning_rate": 9.89388877643022e-05, "loss": 2.2582, "step": 3300, "train_loss_gtc": 0.24630604619565216, "train_loss_gtm": 0.11464259935461957, "train_loss_lm": 1.8685461956521738 }, { "epoch": 1.0448678549477566, "grad_norm": 2.5640087127685547, "learning_rate": 9.887178642888182e-05, "loss": 2.2174, "step": 3400, "train_loss_gtc": 0.23337890625, "train_loss_gtm": 0.11256591796875, "train_loss_lm": 1.86703125 }, { "epoch": 1.07559926244622, "grad_norm": 1.9021731615066528, "learning_rate": 9.880265217389893e-05, "loss": 2.2195, "step": 3500, "train_loss_gtc": 0.234921875, "train_loss_gtm": 0.125850830078125, "train_loss_lm": 1.86671875 }, { "epoch": 1.1063306699446835, "grad_norm": 2.093270778656006, "learning_rate": 9.873148787513093e-05, "loss": 2.2154, "step": 3600, "train_loss_gtc": 0.241884765625, "train_loss_gtm": 0.12220458984375, "train_loss_lm": 1.8584375 }, { "epoch": 1.1370620774431468, "grad_norm": 4.833855152130127, "learning_rate": 9.865829649279898e-05, "loss": 2.1983, "step": 3700, "train_loss_gtc": 0.2290625, "train_loss_gtm": 0.108033447265625, "train_loss_lm": 1.843671875 }, { "epoch": 1.1677934849416103, "grad_norm": 4.2011518478393555, "learning_rate": 9.858308107144479e-05, "loss": 2.1765, "step": 3800, "train_loss_gtc": 0.223466796875, "train_loss_gtm": 0.109500732421875, "train_loss_lm": 1.837578125 }, { "epoch": 1.1985248924400738, "grad_norm": 6.385040760040283, "learning_rate": 9.850584473980405e-05, "loss": 2.1558, "step": 3900, "train_loss_gtc": 0.21083984375, "train_loss_gtm": 0.102752685546875, "train_loss_lm": 1.843203125 }, { "epoch": 1.2292562999385372, "grad_norm": 2.9829607009887695, "learning_rate": 9.84265907106762e-05, "loss": 2.1519, "step": 4000, "train_loss_gtc": 0.208603515625, "train_loss_gtm": 0.11494384765625, "train_loss_lm": 1.833828125 }, { "epoch": 1.2599877074370007, "grad_norm": 1.9834630489349365, "learning_rate": 9.834532228079088e-05, "loss": 2.1325, "step": 4100, "train_loss_gtc": 0.201494140625, "train_loss_gtm": 0.102637939453125, "train_loss_lm": 1.829453125 }, { "epoch": 1.290719114935464, "grad_norm": 2.685356378555298, "learning_rate": 9.826204283067073e-05, "loss": 2.1218, "step": 4200, "train_loss_gtc": 0.19677734375, "train_loss_gtm": 0.100631103515625, "train_loss_lm": 1.82484375 }, { "epoch": 1.3214505224339275, "grad_norm": 1.5945948362350464, "learning_rate": 9.817675582449082e-05, "loss": 2.1261, "step": 4300, "train_loss_gtc": 0.19837890625, "train_loss_gtm": 0.11297119140625, "train_loss_lm": 1.828828125 }, { "epoch": 1.352181929932391, "grad_norm": 2.5570931434631348, "learning_rate": 9.80894648099345e-05, "loss": 2.1039, "step": 4400, "train_loss_gtc": 0.18681640625, "train_loss_gtm": 0.09926513671875, "train_loss_lm": 1.81125 }, { "epoch": 1.3829133374308542, "grad_norm": 1.3465452194213867, "learning_rate": 9.800017341804584e-05, "loss": 2.0879, "step": 4500, "train_loss_gtc": 0.1895263671875, "train_loss_gtm": 0.101627197265625, "train_loss_lm": 1.799453125 }, { "epoch": 1.4136447449293177, "grad_norm": 1.4922404289245605, "learning_rate": 9.790888536307865e-05, "loss": 2.0743, "step": 4600, "train_loss_gtc": 0.1802734375, "train_loss_gtm": 0.0869610595703125, "train_loss_lm": 1.797890625 }, { "epoch": 1.4443761524277812, "grad_norm": 3.5552456378936768, "learning_rate": 9.781560444234187e-05, "loss": 2.077, "step": 4700, "train_loss_gtc": 0.1821533203125, "train_loss_gtm": 0.10408935546875, "train_loss_lm": 1.79203125 }, { "epoch": 1.4751075599262446, "grad_norm": 4.11555290222168, "learning_rate": 9.77203345360417e-05, "loss": 2.0674, "step": 4800, "train_loss_gtc": 0.178369140625, "train_loss_gtm": 0.09654052734375, "train_loss_lm": 1.797109375 }, { "epoch": 1.5058389674247081, "grad_norm": 1.795981764793396, "learning_rate": 9.762307960712018e-05, "loss": 2.0636, "step": 4900, "train_loss_gtc": 0.179599609375, "train_loss_gtm": 0.10112548828125, "train_loss_lm": 1.788359375 }, { "epoch": 1.5365703749231714, "grad_norm": 1.9280930757522583, "learning_rate": 9.75238437010903e-05, "loss": 2.0431, "step": 5000, "train_loss_gtc": 0.170859375, "train_loss_gtm": 0.0848968505859375, "train_loss_lm": 1.785546875 }, { "epoch": 1.5673017824216349, "grad_norm": 1.1412756443023682, "learning_rate": 9.742263094586775e-05, "loss": 2.0316, "step": 5100, "train_loss_gtc": 0.171064453125, "train_loss_gtm": 0.0872491455078125, "train_loss_lm": 1.7815625 }, { "epoch": 1.5980331899200984, "grad_norm": 1.9907689094543457, "learning_rate": 9.731944555159926e-05, "loss": 2.0229, "step": 5200, "train_loss_gtc": 0.1637890625, "train_loss_gtm": 0.080010986328125, "train_loss_lm": 1.778359375 }, { "epoch": 1.6287645974185616, "grad_norm": 1.9761877059936523, "learning_rate": 9.721429181048736e-05, "loss": 2.0141, "step": 5300, "train_loss_gtc": 0.163154296875, "train_loss_gtm": 0.08893798828125, "train_loss_lm": 1.76640625 }, { "epoch": 1.6594960049170253, "grad_norm": 2.6358389854431152, "learning_rate": 9.710717409661191e-05, "loss": 2.0137, "step": 5400, "train_loss_gtc": 0.1599267578125, "train_loss_gtm": 0.08393280029296875, "train_loss_lm": 1.7646875 }, { "epoch": 1.6902274124154886, "grad_norm": 1.707661509513855, "learning_rate": 9.699809686574819e-05, "loss": 2.0079, "step": 5500, "train_loss_gtc": 0.157626953125, "train_loss_gtm": 0.0876666259765625, "train_loss_lm": 1.757421875 }, { "epoch": 1.720958819913952, "grad_norm": 2.0161757469177246, "learning_rate": 9.688706465518145e-05, "loss": 2.0002, "step": 5600, "train_loss_gtc": 0.1531884765625, "train_loss_gtm": 0.0824560546875, "train_loss_lm": 1.757421875 }, { "epoch": 1.7516902274124155, "grad_norm": 1.4757572412490845, "learning_rate": 9.677408208351822e-05, "loss": 1.9837, "step": 5700, "train_loss_gtc": 0.152705078125, "train_loss_gtm": 0.080008544921875, "train_loss_lm": 1.756640625 }, { "epoch": 1.7824216349108788, "grad_norm": 1.820297360420227, "learning_rate": 9.665915385049424e-05, "loss": 1.9852, "step": 5800, "train_loss_gtc": 0.1525732421875, "train_loss_gtm": 0.0837579345703125, "train_loss_lm": 1.7565625 }, { "epoch": 1.8131530424093425, "grad_norm": 2.550199031829834, "learning_rate": 9.65422847367789e-05, "loss": 1.9628, "step": 5900, "train_loss_gtc": 0.1431494140625, "train_loss_gtm": 0.070325927734375, "train_loss_lm": 1.748359375 }, { "epoch": 1.8438844499078058, "grad_norm": 3.284813642501831, "learning_rate": 9.642347960377638e-05, "loss": 1.9785, "step": 6000, "train_loss_gtc": 0.149755859375, "train_loss_gtm": 0.085838623046875, "train_loss_lm": 1.74515625 }, { "epoch": 1.8746158574062692, "grad_norm": 2.071235418319702, "learning_rate": 9.630274339342344e-05, "loss": 1.9699, "step": 6100, "train_loss_gtc": 0.1496533203125, "train_loss_gtm": 0.08025115966796875, "train_loss_lm": 1.75 }, { "epoch": 1.9053472649047327, "grad_norm": 1.5470776557922363, "learning_rate": 9.618008112798393e-05, "loss": 1.9727, "step": 6200, "train_loss_gtc": 0.1493896484375, "train_loss_gtm": 0.080283203125, "train_loss_lm": 1.739921875 }, { "epoch": 1.936078672403196, "grad_norm": 6.53104305267334, "learning_rate": 9.605549790983973e-05, "loss": 1.9612, "step": 6300, "train_loss_gtc": 0.1470556640625, "train_loss_gtm": 0.0838421630859375, "train_loss_lm": 1.736328125 }, { "epoch": 1.9668100799016595, "grad_norm": 1.8970906734466553, "learning_rate": 9.592899892127863e-05, "loss": 1.9457, "step": 6400, "train_loss_gtc": 0.1391943359375, "train_loss_gtm": 0.0737689208984375, "train_loss_lm": 1.730859375 }, { "epoch": 1.997541487400123, "grad_norm": 1.675215721130371, "learning_rate": 9.580058942427867e-05, "loss": 1.9364, "step": 6500, "train_loss_gtc": 0.1374560546875, "train_loss_gtm": 0.0687762451171875, "train_loss_lm": 1.72921875 }, { "epoch": 2.0, "eval_loss": 2.19921875, "eval_runtime": 3.8844, "eval_samples_per_second": 256.154, "eval_steps_per_second": 2.832, "step": 6508, "train_loss_gtc": 0.137939453125, "train_loss_gtm": 0.0904998779296875, "train_loss_lm": 1.70703125, "val_loss_gtc": 0.2357421875, "val_loss_gtm": 0.210223388671875, "val_loss_lm": 1.7640625 }, { "epoch": 2.028272894898586, "grad_norm": 1.4447669982910156, "learning_rate": 9.567027476028937e-05, "loss": 1.9201, "step": 6600, "train_loss_gtc": 0.13188901154891305, "train_loss_gtm": 0.06937939187754756, "train_loss_lm": 1.7201086956521738 }, { "epoch": 2.05900430239705, "grad_norm": 2.3873608112335205, "learning_rate": 9.553806035000945e-05, "loss": 1.9203, "step": 6700, "train_loss_gtc": 0.12853515625, "train_loss_gtm": 0.0747589111328125, "train_loss_lm": 1.7165625 }, { "epoch": 2.089735709895513, "grad_norm": 3.420974016189575, "learning_rate": 9.540395169316132e-05, "loss": 1.9248, "step": 6800, "train_loss_gtc": 0.1289501953125, "train_loss_gtm": 0.0793853759765625, "train_loss_lm": 1.7153125 }, { "epoch": 2.120467117393977, "grad_norm": 2.3101561069488525, "learning_rate": 9.526795436826242e-05, "loss": 1.9149, "step": 6900, "train_loss_gtc": 0.128876953125, "train_loss_gtm": 0.071492919921875, "train_loss_lm": 1.713671875 }, { "epoch": 2.15119852489244, "grad_norm": 1.0823053121566772, "learning_rate": 9.513007403239311e-05, "loss": 1.8968, "step": 7000, "train_loss_gtc": 0.1226025390625, "train_loss_gtm": 0.0626873779296875, "train_loss_lm": 1.707890625 }, { "epoch": 2.1819299323909034, "grad_norm": 2.2665770053863525, "learning_rate": 9.49903164209613e-05, "loss": 1.9036, "step": 7100, "train_loss_gtc": 0.128154296875, "train_loss_gtm": 0.06544097900390625, "train_loss_lm": 1.702578125 }, { "epoch": 2.212661339889367, "grad_norm": 0.9536680579185486, "learning_rate": 9.484868734746399e-05, "loss": 1.8943, "step": 7200, "train_loss_gtc": 0.119287109375, "train_loss_gtm": 0.0679217529296875, "train_loss_lm": 1.700703125 }, { "epoch": 2.2433927473878303, "grad_norm": 1.799402117729187, "learning_rate": 9.470519270324532e-05, "loss": 1.8917, "step": 7300, "train_loss_gtc": 0.121845703125, "train_loss_gtm": 0.05860137939453125, "train_loss_lm": 1.7115625 }, { "epoch": 2.2741241548862936, "grad_norm": 1.3167532682418823, "learning_rate": 9.455983845725164e-05, "loss": 1.8896, "step": 7400, "train_loss_gtc": 0.12458984375, "train_loss_gtm": 0.06630035400390626, "train_loss_lm": 1.698984375 }, { "epoch": 2.3048555623847573, "grad_norm": 3.1567189693450928, "learning_rate": 9.441263065578308e-05, "loss": 1.8849, "step": 7500, "train_loss_gtc": 0.120859375, "train_loss_gtm": 0.063575439453125, "train_loss_lm": 1.69515625 }, { "epoch": 2.3355869698832206, "grad_norm": 2.949071168899536, "learning_rate": 9.426357542224215e-05, "loss": 1.8767, "step": 7600, "train_loss_gtc": 0.1182275390625, "train_loss_gtm": 0.067989501953125, "train_loss_lm": 1.691875 }, { "epoch": 2.3663183773816843, "grad_norm": 2.110520362854004, "learning_rate": 9.411267895687898e-05, "loss": 1.8791, "step": 7700, "train_loss_gtc": 0.11953125, "train_loss_gtm": 0.068671875, "train_loss_lm": 1.693046875 }, { "epoch": 2.3970497848801475, "grad_norm": 1.1845890283584595, "learning_rate": 9.395994753653343e-05, "loss": 1.8692, "step": 7800, "train_loss_gtc": 0.1122509765625, "train_loss_gtm": 0.06687744140625, "train_loss_lm": 1.690078125 }, { "epoch": 2.427781192378611, "grad_norm": 1.572401762008667, "learning_rate": 9.380538751437396e-05, "loss": 1.869, "step": 7900, "train_loss_gtc": 0.1185498046875, "train_loss_gtm": 0.05891082763671875, "train_loss_lm": 1.69109375 }, { "epoch": 2.4585125998770745, "grad_norm": 1.395868182182312, "learning_rate": 9.364900531963336e-05, "loss": 1.8866, "step": 8000, "train_loss_gtc": 0.125126953125, "train_loss_gtm": 0.0729669189453125, "train_loss_lm": 1.689609375 }, { "epoch": 2.4892440073755377, "grad_norm": 1.1641755104064941, "learning_rate": 9.349080745734135e-05, "loss": 1.867, "step": 8100, "train_loss_gtc": 0.1189306640625, "train_loss_gtm": 0.06694183349609376, "train_loss_lm": 1.68921875 }, { "epoch": 2.5199754148740015, "grad_norm": 2.092716932296753, "learning_rate": 9.333080050805396e-05, "loss": 1.8538, "step": 8200, "train_loss_gtc": 0.114306640625, "train_loss_gtm": 0.0646319580078125, "train_loss_lm": 1.68078125 }, { "epoch": 2.5507068223724647, "grad_norm": 1.8535902500152588, "learning_rate": 9.316899112757982e-05, "loss": 1.8524, "step": 8300, "train_loss_gtc": 0.1098681640625, "train_loss_gtm": 0.06549957275390625, "train_loss_lm": 1.6834375 }, { "epoch": 2.581438229870928, "grad_norm": 1.1401584148406982, "learning_rate": 9.300538604670325e-05, "loss": 1.8498, "step": 8400, "train_loss_gtc": 0.109970703125, "train_loss_gtm": 0.0634844970703125, "train_loss_lm": 1.677734375 }, { "epoch": 2.6121696373693917, "grad_norm": 1.7290570735931396, "learning_rate": 9.283999207090439e-05, "loss": 1.8523, "step": 8500, "train_loss_gtc": 0.1066796875, "train_loss_gtm": 0.06089630126953125, "train_loss_lm": 1.683203125 }, { "epoch": 2.642901044867855, "grad_norm": 0.7238840460777283, "learning_rate": 9.267281608007592e-05, "loss": 1.8537, "step": 8600, "train_loss_gtc": 0.1138037109375, "train_loss_gtm": 0.066612548828125, "train_loss_lm": 1.6834375 }, { "epoch": 2.673632452366318, "grad_norm": 2.260568380355835, "learning_rate": 9.250386502823712e-05, "loss": 1.8303, "step": 8700, "train_loss_gtc": 0.102099609375, "train_loss_gtm": 0.0620458984375, "train_loss_lm": 1.6715625 }, { "epoch": 2.704363859864782, "grad_norm": 1.1292821168899536, "learning_rate": 9.233314594324437e-05, "loss": 1.8346, "step": 8800, "train_loss_gtc": 0.106123046875, "train_loss_gtm": 0.06123687744140625, "train_loss_lm": 1.66734375 }, { "epoch": 2.735095267363245, "grad_norm": 1.3726723194122314, "learning_rate": 9.216066592649899e-05, "loss": 1.835, "step": 8900, "train_loss_gtc": 0.1031640625, "train_loss_gtm": 0.055390625, "train_loss_lm": 1.670390625 }, { "epoch": 2.7658266748617084, "grad_norm": 0.7613235712051392, "learning_rate": 9.198643215265175e-05, "loss": 1.8289, "step": 9000, "train_loss_gtc": 0.1032861328125, "train_loss_gtm": 0.05791168212890625, "train_loss_lm": 1.670234375 }, { "epoch": 2.796558082360172, "grad_norm": 1.4104223251342773, "learning_rate": 9.181045186930446e-05, "loss": 1.8226, "step": 9100, "train_loss_gtc": 0.10169921875, "train_loss_gtm": 0.05517242431640625, "train_loss_lm": 1.665 }, { "epoch": 2.8272894898586354, "grad_norm": 2.643277406692505, "learning_rate": 9.163273239670845e-05, "loss": 1.8278, "step": 9200, "train_loss_gtc": 0.1060205078125, "train_loss_gtm": 0.0587158203125, "train_loss_lm": 1.66890625 }, { "epoch": 2.858020897357099, "grad_norm": 0.7709031105041504, "learning_rate": 9.145328112746013e-05, "loss": 1.8159, "step": 9300, "train_loss_gtc": 0.10208984375, "train_loss_gtm": 0.05981475830078125, "train_loss_lm": 1.654921875 }, { "epoch": 2.8887523048555623, "grad_norm": 1.2432808876037598, "learning_rate": 9.127210552619346e-05, "loss": 1.8186, "step": 9400, "train_loss_gtc": 0.104443359375, "train_loss_gtm": 0.05574615478515625, "train_loss_lm": 1.65515625 }, { "epoch": 2.919483712354026, "grad_norm": 0.7463958859443665, "learning_rate": 9.108921312926937e-05, "loss": 1.8259, "step": 9500, "train_loss_gtc": 0.1081005859375, "train_loss_gtm": 0.06411102294921875, "train_loss_lm": 1.660234375 }, { "epoch": 2.9502151198524893, "grad_norm": 1.5550811290740967, "learning_rate": 9.090461154446243e-05, "loss": 1.8085, "step": 9600, "train_loss_gtc": 0.0987890625, "train_loss_gtm": 0.0575555419921875, "train_loss_lm": 1.650625 }, { "epoch": 2.9809465273509526, "grad_norm": 1.3831332921981812, "learning_rate": 9.071830845064421e-05, "loss": 1.8021, "step": 9700, "train_loss_gtc": 0.093544921875, "train_loss_gtm": 0.04638153076171875, "train_loss_lm": 1.653125 }, { "epoch": 3.0, "eval_loss": 2.033203125, "eval_runtime": 3.9269, "eval_samples_per_second": 253.383, "eval_steps_per_second": 2.801, "step": 9762, "train_loss_gtc": 0.09482500630040322, "train_loss_gtm": 0.03868521413495464, "train_loss_lm": 1.6529737903225807, "val_loss_gtc": 0.195166015625, "val_loss_gtm": 0.1783203125, "val_loss_lm": 1.67734375 }, { "epoch": 3.0116779348494163, "grad_norm": 0.6389613747596741, "learning_rate": 9.0530311597464e-05, "loss": 1.7867, "step": 9800, "train_loss_gtc": 0.0934094880756579, "train_loss_gtm": 0.05592105263157895, "train_loss_lm": 1.6383634868421053 }, { "epoch": 3.0424093423478795, "grad_norm": 4.752490520477295, "learning_rate": 9.034062880502636e-05, "loss": 1.8109, "step": 9900, "train_loss_gtc": 0.106484375, "train_loss_gtm": 0.065299072265625, "train_loss_lm": 1.645078125 }, { "epoch": 3.073140749846343, "grad_norm": 0.8840903043746948, "learning_rate": 9.014926796356588e-05, "loss": 1.7886, "step": 10000, "train_loss_gtc": 0.0953857421875, "train_loss_gtm": 0.05199127197265625, "train_loss_lm": 1.638125 }, { "epoch": 3.1038721573448065, "grad_norm": 1.790175199508667, "learning_rate": 8.995623703311894e-05, "loss": 1.7777, "step": 10100, "train_loss_gtc": 0.09615478515625, "train_loss_gtm": 0.04876113891601563, "train_loss_lm": 1.631875 }, { "epoch": 3.1346035648432697, "grad_norm": 1.167677879333496, "learning_rate": 8.976154404319261e-05, "loss": 1.7916, "step": 10200, "train_loss_gtc": 0.0942919921875, "train_loss_gtm": 0.0605712890625, "train_loss_lm": 1.639140625 }, { "epoch": 3.1653349723417334, "grad_norm": 1.2573415040969849, "learning_rate": 8.956519709243065e-05, "loss": 1.7905, "step": 10300, "train_loss_gtc": 0.097001953125, "train_loss_gtm": 0.06192230224609375, "train_loss_lm": 1.638203125 }, { "epoch": 3.1960663798401967, "grad_norm": 0.9823325276374817, "learning_rate": 8.93672043482766e-05, "loss": 1.7674, "step": 10400, "train_loss_gtc": 0.09020751953125, "train_loss_gtm": 0.0457440185546875, "train_loss_lm": 1.631875 }, { "epoch": 3.22679778733866, "grad_norm": 0.8545394539833069, "learning_rate": 8.91675740466341e-05, "loss": 1.7727, "step": 10500, "train_loss_gtc": 0.09113525390625, "train_loss_gtm": 0.04704864501953125, "train_loss_lm": 1.63609375 }, { "epoch": 3.2575291948371237, "grad_norm": 0.8178197741508484, "learning_rate": 8.896631449152425e-05, "loss": 1.7856, "step": 10600, "train_loss_gtc": 0.09496826171875, "train_loss_gtm": 0.058369140625, "train_loss_lm": 1.636484375 }, { "epoch": 3.288260602335587, "grad_norm": 1.6585794687271118, "learning_rate": 8.876343405474018e-05, "loss": 1.7747, "step": 10700, "train_loss_gtc": 0.0904931640625, "train_loss_gtm": 0.0434954833984375, "train_loss_lm": 1.6378125 }, { "epoch": 3.3189920098340506, "grad_norm": 1.3470587730407715, "learning_rate": 8.855894117549885e-05, "loss": 1.7657, "step": 10800, "train_loss_gtc": 0.08614501953125, "train_loss_gtm": 0.04624908447265625, "train_loss_lm": 1.6303125 }, { "epoch": 3.349723417332514, "grad_norm": 0.6378850936889648, "learning_rate": 8.835284436009e-05, "loss": 1.7683, "step": 10900, "train_loss_gtc": 0.0873779296875, "train_loss_gtm": 0.04869110107421875, "train_loss_lm": 1.6296875 }, { "epoch": 3.380454824830977, "grad_norm": 1.9016733169555664, "learning_rate": 8.814515218152226e-05, "loss": 1.7686, "step": 11000, "train_loss_gtc": 0.08774169921875, "train_loss_gtm": 0.04988037109375, "train_loss_lm": 1.6275 }, { "epoch": 3.411186232329441, "grad_norm": 6.191075325012207, "learning_rate": 8.793587327916661e-05, "loss": 1.7661, "step": 11100, "train_loss_gtc": 0.08765625, "train_loss_gtm": 0.04734375, "train_loss_lm": 1.626875 }, { "epoch": 3.441917639827904, "grad_norm": 1.3823864459991455, "learning_rate": 8.772501635839694e-05, "loss": 1.7634, "step": 11200, "train_loss_gtc": 0.0852392578125, "train_loss_gtm": 0.047673492431640624, "train_loss_lm": 1.630546875 }, { "epoch": 3.4726490473263674, "grad_norm": 0.9048540592193604, "learning_rate": 8.751259019022801e-05, "loss": 1.763, "step": 11300, "train_loss_gtc": 0.0891015625, "train_loss_gtm": 0.0477813720703125, "train_loss_lm": 1.626171875 }, { "epoch": 3.503380454824831, "grad_norm": 1.5553096532821655, "learning_rate": 8.729860361095056e-05, "loss": 1.7607, "step": 11400, "train_loss_gtc": 0.086826171875, "train_loss_gtm": 0.05027008056640625, "train_loss_lm": 1.627890625 }, { "epoch": 3.5341118623232943, "grad_norm": 1.639862060546875, "learning_rate": 8.708306552176368e-05, "loss": 1.7502, "step": 11500, "train_loss_gtc": 0.0829248046875, "train_loss_gtm": 0.046551055908203125, "train_loss_lm": 1.6209375 }, { "epoch": 3.5648432698217576, "grad_norm": 0.8203203678131104, "learning_rate": 8.68659848884047e-05, "loss": 1.7439, "step": 11600, "train_loss_gtc": 0.0837255859375, "train_loss_gtm": 0.04283203125, "train_loss_lm": 1.6215625 }, { "epoch": 3.5955746773202213, "grad_norm": 0.7728437781333923, "learning_rate": 8.664737074077606e-05, "loss": 1.7501, "step": 11700, "train_loss_gtc": 0.08592041015625, "train_loss_gtm": 0.0465325927734375, "train_loss_lm": 1.62140625 }, { "epoch": 3.6263060848186845, "grad_norm": 2.764263391494751, "learning_rate": 8.642723217256991e-05, "loss": 1.748, "step": 11800, "train_loss_gtc": 0.08718017578125, "train_loss_gtm": 0.05325942993164062, "train_loss_lm": 1.614921875 }, { "epoch": 3.6570374923171483, "grad_norm": 1.381459355354309, "learning_rate": 8.620557834088962e-05, "loss": 1.7502, "step": 11900, "train_loss_gtc": 0.08507568359375, "train_loss_gtm": 0.04700302124023437, "train_loss_lm": 1.6196875 }, { "epoch": 3.6877688998156115, "grad_norm": 3.3425018787384033, "learning_rate": 8.598241846586899e-05, "loss": 1.7493, "step": 12000, "train_loss_gtc": 0.0854345703125, "train_loss_gtm": 0.04688232421875, "train_loss_lm": 1.61484375 }, { "epoch": 3.718500307314075, "grad_norm": 1.8849352598190308, "learning_rate": 8.575776183028873e-05, "loss": 1.7444, "step": 12100, "train_loss_gtc": 0.08189208984375, "train_loss_gtm": 0.05115646362304688, "train_loss_lm": 1.619765625 }, { "epoch": 3.7492317148125385, "grad_norm": 0.8946220278739929, "learning_rate": 8.553161777919028e-05, "loss": 1.7366, "step": 12200, "train_loss_gtc": 0.08007568359375, "train_loss_gtm": 0.04531814575195312, "train_loss_lm": 1.614140625 }, { "epoch": 3.7799631223110017, "grad_norm": 0.682151734828949, "learning_rate": 8.530399571948708e-05, "loss": 1.7215, "step": 12300, "train_loss_gtc": 0.0752392578125, "train_loss_gtm": 0.034449920654296876, "train_loss_lm": 1.606171875 }, { "epoch": 3.8106945298094654, "grad_norm": 0.6967170238494873, "learning_rate": 8.507490511957333e-05, "loss": 1.7367, "step": 12400, "train_loss_gtc": 0.0777197265625, "train_loss_gtm": 0.045133056640625, "train_loss_lm": 1.610703125 }, { "epoch": 3.8414259373079287, "grad_norm": 1.3819748163223267, "learning_rate": 8.484435550893006e-05, "loss": 1.7275, "step": 12500, "train_loss_gtc": 0.07723388671875, "train_loss_gtm": 0.037061309814453124, "train_loss_lm": 1.611875 }, { "epoch": 3.872157344806392, "grad_norm": 1.2844618558883667, "learning_rate": 8.461235647772877e-05, "loss": 1.7219, "step": 12600, "train_loss_gtc": 0.07914794921875, "train_loss_gtm": 0.039013671875, "train_loss_lm": 1.610703125 }, { "epoch": 3.9028887523048557, "grad_norm": 1.2111929655075073, "learning_rate": 8.437891767643251e-05, "loss": 1.7278, "step": 12700, "train_loss_gtc": 0.078544921875, "train_loss_gtm": 0.038914642333984374, "train_loss_lm": 1.610078125 }, { "epoch": 3.933620159803319, "grad_norm": 0.8348441123962402, "learning_rate": 8.414404881539443e-05, "loss": 1.7255, "step": 12800, "train_loss_gtc": 0.0780419921875, "train_loss_gtm": 0.044105224609375, "train_loss_lm": 1.602890625 }, { "epoch": 3.964351567301782, "grad_norm": 1.4092820882797241, "learning_rate": 8.39077596644539e-05, "loss": 1.7195, "step": 12900, "train_loss_gtc": 0.07587646484375, "train_loss_gtm": 0.0384991455078125, "train_loss_lm": 1.60671875 }, { "epoch": 3.995082974800246, "grad_norm": 3.6042683124542236, "learning_rate": 8.367006005253006e-05, "loss": 1.7295, "step": 13000, "train_loss_gtc": 0.08053466796875, "train_loss_gtm": 0.04183273315429688, "train_loss_lm": 1.60703125 }, { "epoch": 4.0, "eval_loss": 1.9796874523162842, "eval_runtime": 3.8758, "eval_samples_per_second": 256.721, "eval_steps_per_second": 2.838, "step": 13016, "train_loss_gtc": 0.079498291015625, "train_loss_gtm": 0.044208526611328125, "train_loss_lm": 1.59814453125, "val_loss_gtc": 0.159912109375, "val_loss_gtm": 0.183050537109375, "val_loss_lm": 1.61953125 }, { "epoch": 4.02581438229871, "grad_norm": 1.8121000528335571, "learning_rate": 8.343095986721301e-05, "loss": 1.7206, "step": 13100, "train_loss_gtc": 0.07986014229910714, "train_loss_gtm": 0.055745079403831846, "train_loss_lm": 1.5932849702380953 }, { "epoch": 4.056545789797172, "grad_norm": 1.3698956966400146, "learning_rate": 8.319046905435246e-05, "loss": 1.7096, "step": 13200, "train_loss_gtc": 0.0751611328125, "train_loss_gtm": 0.042149658203125, "train_loss_lm": 1.59296875 }, { "epoch": 4.087277197295636, "grad_norm": 1.6034717559814453, "learning_rate": 8.294859761764408e-05, "loss": 1.7046, "step": 13300, "train_loss_gtc": 0.072431640625, "train_loss_gtm": 0.046780548095703124, "train_loss_lm": 1.58609375 }, { "epoch": 4.1180086047941, "grad_norm": 1.3316949605941772, "learning_rate": 8.270535561821336e-05, "loss": 1.7095, "step": 13400, "train_loss_gtc": 0.077919921875, "train_loss_gtm": 0.04380218505859375, "train_loss_lm": 1.591953125 }, { "epoch": 4.148740012292563, "grad_norm": 0.6827447414398193, "learning_rate": 8.246075317419706e-05, "loss": 1.7173, "step": 13500, "train_loss_gtc": 0.07958740234375, "train_loss_gtm": 0.0477728271484375, "train_loss_lm": 1.598125 }, { "epoch": 4.179471419791026, "grad_norm": 1.5629603862762451, "learning_rate": 8.221480046032233e-05, "loss": 1.6964, "step": 13600, "train_loss_gtc": 0.07267578125, "train_loss_gtm": 0.03870758056640625, "train_loss_lm": 1.582734375 }, { "epoch": 4.21020282728949, "grad_norm": 1.0047539472579956, "learning_rate": 8.196750770748355e-05, "loss": 1.7064, "step": 13700, "train_loss_gtc": 0.0744775390625, "train_loss_gtm": 0.0351849365234375, "train_loss_lm": 1.5890625 }, { "epoch": 4.240934234787954, "grad_norm": 0.7914025187492371, "learning_rate": 8.171888520231666e-05, "loss": 1.7175, "step": 13800, "train_loss_gtc": 0.0821630859375, "train_loss_gtm": 0.05238189697265625, "train_loss_lm": 1.58953125 }, { "epoch": 4.2716656422864165, "grad_norm": 1.05272376537323, "learning_rate": 8.146894328677128e-05, "loss": 1.6928, "step": 13900, "train_loss_gtc": 0.0702734375, "train_loss_gtm": 0.033878173828125, "train_loss_lm": 1.58375 }, { "epoch": 4.30239704978488, "grad_norm": 1.6808894872665405, "learning_rate": 8.12176923576806e-05, "loss": 1.6968, "step": 14000, "train_loss_gtc": 0.07257080078125, "train_loss_gtm": 0.034125747680664065, "train_loss_lm": 1.585 }, { "epoch": 4.333128457283344, "grad_norm": 0.815800130367279, "learning_rate": 8.096514286632879e-05, "loss": 1.6977, "step": 14100, "train_loss_gtc": 0.070693359375, "train_loss_gtm": 0.030710296630859377, "train_loss_lm": 1.585390625 }, { "epoch": 4.363859864781807, "grad_norm": 0.7311274409294128, "learning_rate": 8.071130531801635e-05, "loss": 1.7137, "step": 14200, "train_loss_gtc": 0.079658203125, "train_loss_gtm": 0.0499749755859375, "train_loss_lm": 1.59171875 }, { "epoch": 4.3945912722802705, "grad_norm": 0.8525009155273438, "learning_rate": 8.045619027162303e-05, "loss": 1.6995, "step": 14300, "train_loss_gtc": 0.07261474609375, "train_loss_gtm": 0.03937774658203125, "train_loss_lm": 1.588359375 }, { "epoch": 4.425322679778734, "grad_norm": 1.277293086051941, "learning_rate": 8.019980833916874e-05, "loss": 1.6991, "step": 14400, "train_loss_gtc": 0.07212158203125, "train_loss_gtm": 0.042982177734375, "train_loss_lm": 1.585625 }, { "epoch": 4.456054087277197, "grad_norm": 0.7727832794189453, "learning_rate": 7.994217018537195e-05, "loss": 1.6925, "step": 14500, "train_loss_gtc": 0.07016845703125, "train_loss_gtm": 0.041646270751953124, "train_loss_lm": 1.580859375 }, { "epoch": 4.486785494775661, "grad_norm": 1.5050898790359497, "learning_rate": 7.968328652720627e-05, "loss": 1.6898, "step": 14600, "train_loss_gtc": 0.07381591796875, "train_loss_gtm": 0.035602569580078125, "train_loss_lm": 1.583203125 }, { "epoch": 4.517516902274124, "grad_norm": 0.8743451833724976, "learning_rate": 7.942316813345447e-05, "loss": 1.6976, "step": 14700, "train_loss_gtc": 0.07141357421875, "train_loss_gtm": 0.03574203491210937, "train_loss_lm": 1.58015625 }, { "epoch": 4.548248309772587, "grad_norm": 4.071852684020996, "learning_rate": 7.916182582426064e-05, "loss": 1.6793, "step": 14800, "train_loss_gtc": 0.065556640625, "train_loss_gtm": 0.03599624633789063, "train_loss_lm": 1.578359375 }, { "epoch": 4.578979717271051, "grad_norm": 1.2412759065628052, "learning_rate": 7.88992704706801e-05, "loss": 1.6891, "step": 14900, "train_loss_gtc": 0.07296875, "train_loss_gtm": 0.04167098999023437, "train_loss_lm": 1.58109375 }, { "epoch": 4.609711124769515, "grad_norm": 1.0076960325241089, "learning_rate": 7.863551299422714e-05, "loss": 1.6928, "step": 15000, "train_loss_gtc": 0.07355712890625, "train_loss_gtm": 0.040593414306640624, "train_loss_lm": 1.5796875 }, { "epoch": 4.640442532267977, "grad_norm": 1.8155709505081177, "learning_rate": 7.837056436642077e-05, "loss": 1.6972, "step": 15100, "train_loss_gtc": 0.07208251953125, "train_loss_gtm": 0.03765533447265625, "train_loss_lm": 1.5828125 }, { "epoch": 4.671173939766441, "grad_norm": 4.2761101722717285, "learning_rate": 7.810443560832832e-05, "loss": 1.6779, "step": 15200, "train_loss_gtc": 0.0666650390625, "train_loss_gtm": 0.03232818603515625, "train_loss_lm": 1.5771875 }, { "epoch": 4.701905347264905, "grad_norm": 1.0301436185836792, "learning_rate": 7.783713779010697e-05, "loss": 1.6814, "step": 15300, "train_loss_gtc": 0.0691845703125, "train_loss_gtm": 0.03757865905761719, "train_loss_lm": 1.57953125 }, { "epoch": 4.7326367547633685, "grad_norm": 3.180100679397583, "learning_rate": 7.756868203054334e-05, "loss": 1.6773, "step": 15400, "train_loss_gtc": 0.06718994140625, "train_loss_gtm": 0.030146408081054687, "train_loss_lm": 1.57796875 }, { "epoch": 4.763368162261831, "grad_norm": 0.845735490322113, "learning_rate": 7.729907949659089e-05, "loss": 1.6662, "step": 15500, "train_loss_gtc": 0.06385986328125, "train_loss_gtm": 0.027723541259765627, "train_loss_lm": 1.573125 }, { "epoch": 4.794099569760295, "grad_norm": 0.8206067681312561, "learning_rate": 7.702834140290547e-05, "loss": 1.6742, "step": 15600, "train_loss_gtc": 0.067158203125, "train_loss_gtm": 0.035130157470703124, "train_loss_lm": 1.571953125 }, { "epoch": 4.824830977258759, "grad_norm": 0.7254693508148193, "learning_rate": 7.675647901137879e-05, "loss": 1.6833, "step": 15700, "train_loss_gtc": 0.06796142578125, "train_loss_gtm": 0.03723342895507813, "train_loss_lm": 1.573984375 }, { "epoch": 4.855562384757222, "grad_norm": 1.2930517196655273, "learning_rate": 7.648350363066998e-05, "loss": 1.6783, "step": 15800, "train_loss_gtc": 0.0690478515625, "train_loss_gtm": 0.03417861938476562, "train_loss_lm": 1.574296875 }, { "epoch": 4.886293792255685, "grad_norm": 0.46316060423851013, "learning_rate": 7.620942661573523e-05, "loss": 1.6772, "step": 15900, "train_loss_gtc": 0.0691015625, "train_loss_gtm": 0.03562210083007813, "train_loss_lm": 1.568046875 }, { "epoch": 4.917025199754149, "grad_norm": 1.149546504020691, "learning_rate": 7.59342593673553e-05, "loss": 1.668, "step": 16000, "train_loss_gtc": 0.0667919921875, "train_loss_gtm": 0.035623931884765626, "train_loss_lm": 1.56515625 }, { "epoch": 4.947756607252612, "grad_norm": 0.4385952949523926, "learning_rate": 7.56580133316615e-05, "loss": 1.6674, "step": 16100, "train_loss_gtc": 0.06619873046875, "train_loss_gtm": 0.034098358154296876, "train_loss_lm": 1.56875 }, { "epoch": 4.9784880147510755, "grad_norm": 0.670734167098999, "learning_rate": 7.538069999965934e-05, "loss": 1.6746, "step": 16200, "train_loss_gtc": 0.067392578125, "train_loss_gtm": 0.04040283203125, "train_loss_lm": 1.568984375 }, { "epoch": 5.0, "eval_loss": 1.859765648841858, "eval_runtime": 3.9207, "eval_samples_per_second": 253.782, "eval_steps_per_second": 2.806, "step": 16270, "train_loss_gtc": 0.06170131138392857, "train_loss_gtm": 0.02779693603515625, "train_loss_lm": 1.574330357142857, "val_loss_gtc": 0.124658203125, "val_loss_gtm": 0.1553466796875, "val_loss_lm": 1.5875 }, { "epoch": 5.009219422249539, "grad_norm": 0.8819429278373718, "learning_rate": 7.510233090675076e-05, "loss": 1.6639, "step": 16300, "train_loss_gtc": 0.06167805989583333, "train_loss_gtm": 0.03746388753255208, "train_loss_lm": 1.55 }, { "epoch": 5.039950829748003, "grad_norm": 1.934515118598938, "learning_rate": 7.482291763225411e-05, "loss": 1.6471, "step": 16400, "train_loss_gtc": 0.0614111328125, "train_loss_gtm": 0.0246038818359375, "train_loss_lm": 1.560078125 }, { "epoch": 5.070682237246466, "grad_norm": 0.4644189476966858, "learning_rate": 7.454247179892258e-05, "loss": 1.6539, "step": 16500, "train_loss_gtc": 0.06216796875, "train_loss_gtm": 0.029619293212890627, "train_loss_lm": 1.55984375 }, { "epoch": 5.101413644744929, "grad_norm": 0.6986903548240662, "learning_rate": 7.426100507246073e-05, "loss": 1.654, "step": 16600, "train_loss_gtc": 0.06435791015625, "train_loss_gtm": 0.029658050537109376, "train_loss_lm": 1.554609375 }, { "epoch": 5.132145052243393, "grad_norm": 1.399057149887085, "learning_rate": 7.397852916103918e-05, "loss": 1.6514, "step": 16700, "train_loss_gtc": 0.06365234375, "train_loss_gtm": 0.032920303344726565, "train_loss_lm": 1.556171875 }, { "epoch": 5.162876459741856, "grad_norm": 1.2212918996810913, "learning_rate": 7.369505581480761e-05, "loss": 1.6591, "step": 16800, "train_loss_gtc": 0.06535888671875, "train_loss_gtm": 0.03793792724609375, "train_loss_lm": 1.55328125 }, { "epoch": 5.19360786724032, "grad_norm": 1.227950930595398, "learning_rate": 7.341059682540601e-05, "loss": 1.6542, "step": 16900, "train_loss_gtc": 0.06419921875, "train_loss_gtm": 0.03715614318847656, "train_loss_lm": 1.558828125 }, { "epoch": 5.224339274738783, "grad_norm": 0.7415390610694885, "learning_rate": 7.312516402547418e-05, "loss": 1.6535, "step": 17000, "train_loss_gtc": 0.06427001953125, "train_loss_gtm": 0.038449249267578124, "train_loss_lm": 1.5575 }, { "epoch": 5.255070682237246, "grad_norm": 0.4711204767227173, "learning_rate": 7.283876928815944e-05, "loss": 1.6536, "step": 17100, "train_loss_gtc": 0.062666015625, "train_loss_gtm": 0.032582550048828124, "train_loss_lm": 1.558359375 }, { "epoch": 5.28580208973571, "grad_norm": 0.8353385925292969, "learning_rate": 7.255142452662295e-05, "loss": 1.6433, "step": 17200, "train_loss_gtc": 0.0605859375, "train_loss_gtm": 0.029074554443359376, "train_loss_lm": 1.557421875 }, { "epoch": 5.316533497234174, "grad_norm": 0.8035210371017456, "learning_rate": 7.226314169354391e-05, "loss": 1.6511, "step": 17300, "train_loss_gtc": 0.0600830078125, "train_loss_gtm": 0.029854888916015624, "train_loss_lm": 1.558125 }, { "epoch": 5.347264904732636, "grad_norm": 1.1498240232467651, "learning_rate": 7.197393278062251e-05, "loss": 1.6475, "step": 17400, "train_loss_gtc": 0.0640478515625, "train_loss_gtm": 0.039508514404296875, "train_loss_lm": 1.549453125 }, { "epoch": 5.3779963122311, "grad_norm": 0.510197639465332, "learning_rate": 7.168380981808108e-05, "loss": 1.6438, "step": 17500, "train_loss_gtc": 0.06174072265625, "train_loss_gtm": 0.0270050048828125, "train_loss_lm": 1.551328125 }, { "epoch": 5.408727719729564, "grad_norm": 1.0153284072875977, "learning_rate": 7.139278487416369e-05, "loss": 1.6418, "step": 17600, "train_loss_gtc": 0.05983154296875, "train_loss_gtm": 0.031028366088867186, "train_loss_lm": 1.553203125 }, { "epoch": 5.439459127228027, "grad_norm": 3.0183732509613037, "learning_rate": 7.110087005463413e-05, "loss": 1.6466, "step": 17700, "train_loss_gtc": 0.0623046875, "train_loss_gtm": 0.03655166625976562, "train_loss_lm": 1.55625 }, { "epoch": 5.47019053472649, "grad_norm": 0.8955859541893005, "learning_rate": 7.080807750227229e-05, "loss": 1.6351, "step": 17800, "train_loss_gtc": 0.05905029296875, "train_loss_gtm": 0.029703750610351562, "train_loss_lm": 1.544765625 }, { "epoch": 5.500921942224954, "grad_norm": 0.4188254773616791, "learning_rate": 7.051441939636915e-05, "loss": 1.6359, "step": 17900, "train_loss_gtc": 0.05901123046875, "train_loss_gtm": 0.02728248596191406, "train_loss_lm": 1.54921875 }, { "epoch": 5.531653349723418, "grad_norm": 0.733801543712616, "learning_rate": 7.021990795222015e-05, "loss": 1.6387, "step": 18000, "train_loss_gtc": 0.0610791015625, "train_loss_gtm": 0.033560562133789065, "train_loss_lm": 1.550390625 }, { "epoch": 5.5623847572218805, "grad_norm": 1.336493968963623, "learning_rate": 6.992455542061697e-05, "loss": 1.6385, "step": 18100, "train_loss_gtc": 0.0579931640625, "train_loss_gtm": 0.0305810546875, "train_loss_lm": 1.5415625 }, { "epoch": 5.593116164720344, "grad_norm": 0.9108180999755859, "learning_rate": 6.962837408733806e-05, "loss": 1.6326, "step": 18200, "train_loss_gtc": 0.0611328125, "train_loss_gtm": 0.027354583740234376, "train_loss_lm": 1.54671875 }, { "epoch": 5.623847572218808, "grad_norm": 0.6176537871360779, "learning_rate": 6.933137627263747e-05, "loss": 1.6387, "step": 18300, "train_loss_gtc": 0.05935791015625, "train_loss_gtm": 0.027886962890625, "train_loss_lm": 1.550859375 }, { "epoch": 5.654578979717271, "grad_norm": 0.6086634993553162, "learning_rate": 6.903357433073251e-05, "loss": 1.6463, "step": 18400, "train_loss_gtc": 0.06116943359375, "train_loss_gtm": 0.029478378295898437, "train_loss_lm": 1.5525 }, { "epoch": 5.6853103872157345, "grad_norm": 0.6187928318977356, "learning_rate": 6.873498064928969e-05, "loss": 1.6362, "step": 18500, "train_loss_gtc": 0.059130859375, "train_loss_gtm": 0.0319903564453125, "train_loss_lm": 1.546171875 }, { "epoch": 5.716041794714198, "grad_norm": 0.7199295163154602, "learning_rate": 6.843560764890953e-05, "loss": 1.6304, "step": 18600, "train_loss_gtc": 0.0573388671875, "train_loss_gtm": 0.026798248291015625, "train_loss_lm": 1.54875 }, { "epoch": 5.746773202212661, "grad_norm": 0.48185673356056213, "learning_rate": 6.81354677826099e-05, "loss": 1.6356, "step": 18700, "train_loss_gtc": 0.05507568359375, "train_loss_gtm": 0.023614425659179688, "train_loss_lm": 1.552265625 }, { "epoch": 5.777504609711125, "grad_norm": 0.44653263688087463, "learning_rate": 6.783457353530797e-05, "loss": 1.629, "step": 18800, "train_loss_gtc": 0.057138671875, "train_loss_gtm": 0.02418853759765625, "train_loss_lm": 1.546953125 }, { "epoch": 5.808236017209588, "grad_norm": 0.6734046936035156, "learning_rate": 6.75329374233009e-05, "loss": 1.6311, "step": 18900, "train_loss_gtc": 0.05802734375, "train_loss_gtm": 0.025522842407226562, "train_loss_lm": 1.54484375 }, { "epoch": 5.838967424708052, "grad_norm": 1.3264803886413574, "learning_rate": 6.723057199374518e-05, "loss": 1.6371, "step": 19000, "train_loss_gtc": 0.057373046875, "train_loss_gtm": 0.030865325927734374, "train_loss_lm": 1.548671875 }, { "epoch": 5.869698832206515, "grad_norm": 1.0278254747390747, "learning_rate": 6.692748982413474e-05, "loss": 1.6338, "step": 19100, "train_loss_gtc": 0.05820068359375, "train_loss_gtm": 0.030936508178710936, "train_loss_lm": 1.550859375 }, { "epoch": 5.900430239704979, "grad_norm": 0.6339811086654663, "learning_rate": 6.662370352177774e-05, "loss": 1.6301, "step": 19200, "train_loss_gtc": 0.0586328125, "train_loss_gtm": 0.02888214111328125, "train_loss_lm": 1.5375 }, { "epoch": 5.931161647203442, "grad_norm": 0.892484724521637, "learning_rate": 6.631922572327213e-05, "loss": 1.6294, "step": 19300, "train_loss_gtc": 0.05766357421875, "train_loss_gtm": 0.03270828247070313, "train_loss_lm": 1.542890625 }, { "epoch": 5.961893054701905, "grad_norm": 0.8719256520271301, "learning_rate": 6.601406909398007e-05, "loss": 1.6334, "step": 19400, "train_loss_gtc": 0.05709716796875, "train_loss_gtm": 0.034244384765625, "train_loss_lm": 1.544453125 }, { "epoch": 5.992624462200369, "grad_norm": 0.5898253917694092, "learning_rate": 6.570824632750099e-05, "loss": 1.6308, "step": 19500, "train_loss_gtc": 0.05718017578125, "train_loss_gtm": 0.028017425537109376, "train_loss_lm": 1.5475 }, { "epoch": 6.0, "eval_loss": 1.8289062976837158, "eval_runtime": 3.955, "eval_samples_per_second": 251.581, "eval_steps_per_second": 2.781, "step": 19524, "train_loss_gtc": 0.059417724609375, "train_loss_gtm": 0.021376291910807293, "train_loss_lm": 1.5485026041666667, "val_loss_gtc": 0.1081298828125, "val_loss_gtm": 0.14790267944335939, "val_loss_lm": 1.559375 }, { "epoch": 6.0233558696988325, "grad_norm": 0.5566962361335754, "learning_rate": 6.540177014514361e-05, "loss": 1.6229, "step": 19600, "train_loss_gtc": 0.05809583162006579, "train_loss_gtm": 0.027290545011821547, "train_loss_lm": 1.5270353618421053 }, { "epoch": 6.054087277197295, "grad_norm": 0.7923322319984436, "learning_rate": 6.509465329539689e-05, "loss": 1.6096, "step": 19700, "train_loss_gtc": 0.0559228515625, "train_loss_gtm": 0.02381988525390625, "train_loss_lm": 1.527734375 }, { "epoch": 6.084818684695759, "grad_norm": 0.633963942527771, "learning_rate": 6.478690855339953e-05, "loss": 1.6261, "step": 19800, "train_loss_gtc": 0.0565234375, "train_loss_gtm": 0.03267807006835938, "train_loss_lm": 1.532734375 }, { "epoch": 6.115550092194223, "grad_norm": 0.9739740490913391, "learning_rate": 6.44785487204087e-05, "loss": 1.6157, "step": 19900, "train_loss_gtc": 0.05468017578125, "train_loss_gtm": 0.029143524169921876, "train_loss_lm": 1.53 }, { "epoch": 6.146281499692686, "grad_norm": 1.191219449043274, "learning_rate": 6.416958662326749e-05, "loss": 1.6127, "step": 20000, "train_loss_gtc": 0.05240966796875, "train_loss_gtm": 0.02735198974609375, "train_loss_lm": 1.531171875 }, { "epoch": 6.177012907191149, "grad_norm": 0.9735581278800964, "learning_rate": 6.38600351138714e-05, "loss": 1.6113, "step": 20100, "train_loss_gtc": 0.0530419921875, "train_loss_gtm": 0.027030487060546875, "train_loss_lm": 1.5346875 }, { "epoch": 6.207744314689613, "grad_norm": 1.2206913232803345, "learning_rate": 6.35499070686337e-05, "loss": 1.6212, "step": 20200, "train_loss_gtc": 0.055166015625, "train_loss_gtm": 0.026912918090820314, "train_loss_lm": 1.535625 }, { "epoch": 6.238475722188076, "grad_norm": 0.8422713279724121, "learning_rate": 6.323921538794981e-05, "loss": 1.6118, "step": 20300, "train_loss_gtc": 0.05383056640625, "train_loss_gtm": 0.029865264892578125, "train_loss_lm": 1.52765625 }, { "epoch": 6.2692071296865395, "grad_norm": 1.286847472190857, "learning_rate": 6.292797299566072e-05, "loss": 1.6112, "step": 20400, "train_loss_gtc": 0.055625, "train_loss_gtm": 0.0314874267578125, "train_loss_lm": 1.525234375 }, { "epoch": 6.299938537185003, "grad_norm": 0.5895647406578064, "learning_rate": 6.261619283851527e-05, "loss": 1.6021, "step": 20500, "train_loss_gtc": 0.050849609375, "train_loss_gtm": 0.027188568115234374, "train_loss_lm": 1.52734375 }, { "epoch": 6.330669944683467, "grad_norm": 0.6928810477256775, "learning_rate": 6.230388788563187e-05, "loss": 1.6008, "step": 20600, "train_loss_gtc": 0.05047119140625, "train_loss_gtm": 0.02188018798828125, "train_loss_lm": 1.530703125 }, { "epoch": 6.36140135218193, "grad_norm": 1.0124385356903076, "learning_rate": 6.199107112795872e-05, "loss": 1.6071, "step": 20700, "train_loss_gtc": 0.05262939453125, "train_loss_gtm": 0.028003463745117186, "train_loss_lm": 1.52765625 }, { "epoch": 6.392132759680393, "grad_norm": 1.7495094537734985, "learning_rate": 6.167775557773363e-05, "loss": 1.6069, "step": 20800, "train_loss_gtc": 0.0532470703125, "train_loss_gtm": 0.027218780517578124, "train_loss_lm": 1.525546875 }, { "epoch": 6.422864167178857, "grad_norm": 0.7303450703620911, "learning_rate": 6.136395426794261e-05, "loss": 1.5961, "step": 20900, "train_loss_gtc": 0.04982177734375, "train_loss_gtm": 0.019435043334960937, "train_loss_lm": 1.521875 }, { "epoch": 6.45359557467732, "grad_norm": 0.797379732131958, "learning_rate": 6.104968025177791e-05, "loss": 1.607, "step": 21000, "train_loss_gtc": 0.0555908203125, "train_loss_gtm": 0.024838104248046874, "train_loss_lm": 1.529375 }, { "epoch": 6.484326982175784, "grad_norm": 0.5462325811386108, "learning_rate": 6.073494660209491e-05, "loss": 1.6088, "step": 21100, "train_loss_gtc": 0.0543115234375, "train_loss_gtm": 0.03119972229003906, "train_loss_lm": 1.52734375 }, { "epoch": 6.515058389674247, "grad_norm": 0.46476686000823975, "learning_rate": 6.0419766410868294e-05, "loss": 1.6075, "step": 21200, "train_loss_gtc": 0.05191650390625, "train_loss_gtm": 0.027312164306640626, "train_loss_lm": 1.5278125 }, { "epoch": 6.54578979717271, "grad_norm": 0.6704521179199219, "learning_rate": 6.010415278864762e-05, "loss": 1.6081, "step": 21300, "train_loss_gtc": 0.05267822265625, "train_loss_gtm": 0.025740814208984376, "train_loss_lm": 1.522734375 }, { "epoch": 6.576521204671174, "grad_norm": 0.513566792011261, "learning_rate": 5.978811886401183e-05, "loss": 1.6077, "step": 21400, "train_loss_gtc": 0.05446533203125, "train_loss_gtm": 0.03446975708007813, "train_loss_lm": 1.523359375 }, { "epoch": 6.6072526121696376, "grad_norm": 1.2353570461273193, "learning_rate": 5.947167778302323e-05, "loss": 1.5954, "step": 21500, "train_loss_gtc": 0.04914306640625, "train_loss_gtm": 0.019849777221679688, "train_loss_lm": 1.52546875 }, { "epoch": 6.637984019668101, "grad_norm": 2.1153972148895264, "learning_rate": 5.9154842708680544e-05, "loss": 1.6048, "step": 21600, "train_loss_gtc": 0.052568359375, "train_loss_gtm": 0.028261795043945312, "train_loss_lm": 1.52453125 }, { "epoch": 6.668715427166564, "grad_norm": 1.2410842180252075, "learning_rate": 5.8837626820371486e-05, "loss": 1.6103, "step": 21700, "train_loss_gtc": 0.0537890625, "train_loss_gtm": 0.027198944091796875, "train_loss_lm": 1.52640625 }, { "epoch": 6.699446834665028, "grad_norm": 0.39238986372947693, "learning_rate": 5.852004331332443e-05, "loss": 1.6068, "step": 21800, "train_loss_gtc": 0.05417724609375, "train_loss_gtm": 0.025730323791503907, "train_loss_lm": 1.5265625 }, { "epoch": 6.7301782421634915, "grad_norm": 0.8881044983863831, "learning_rate": 5.820210539805968e-05, "loss": 1.5946, "step": 21900, "train_loss_gtc": 0.0499072265625, "train_loss_gtm": 0.019407730102539062, "train_loss_lm": 1.521875 }, { "epoch": 6.760909649661954, "grad_norm": 0.5124359130859375, "learning_rate": 5.788382629983977e-05, "loss": 1.612, "step": 22000, "train_loss_gtc": 0.0574853515625, "train_loss_gtm": 0.031860885620117185, "train_loss_lm": 1.523984375 }, { "epoch": 6.791641057160418, "grad_norm": 0.6098849773406982, "learning_rate": 5.7565219258119455e-05, "loss": 1.5961, "step": 22100, "train_loss_gtc": 0.05323974609375, "train_loss_gtm": 0.02882041931152344, "train_loss_lm": 1.521953125 }, { "epoch": 6.822372464658882, "grad_norm": 1.027600884437561, "learning_rate": 5.724629752599495e-05, "loss": 1.5928, "step": 22200, "train_loss_gtc": 0.0508203125, "train_loss_gtm": 0.02281818389892578, "train_loss_lm": 1.52421875 }, { "epoch": 6.8531038721573445, "grad_norm": 1.004398226737976, "learning_rate": 5.692707436965267e-05, "loss": 1.5929, "step": 22300, "train_loss_gtc": 0.04905517578125, "train_loss_gtm": 0.025001983642578125, "train_loss_lm": 1.521328125 }, { "epoch": 6.883835279655808, "grad_norm": 0.8874416351318359, "learning_rate": 5.660756306781733e-05, "loss": 1.5983, "step": 22400, "train_loss_gtc": 0.04990234375, "train_loss_gtm": 0.025154190063476564, "train_loss_lm": 1.52375 }, { "epoch": 6.914566687154272, "grad_norm": 0.5866090059280396, "learning_rate": 5.628777691119965e-05, "loss": 1.5958, "step": 22500, "train_loss_gtc": 0.0502880859375, "train_loss_gtm": 0.024204254150390625, "train_loss_lm": 1.521328125 }, { "epoch": 6.945298094652735, "grad_norm": 0.48130372166633606, "learning_rate": 5.59677292019435e-05, "loss": 1.594, "step": 22600, "train_loss_gtc": 0.05079833984375, "train_loss_gtm": 0.02796661376953125, "train_loss_lm": 1.51875 }, { "epoch": 6.976029502151198, "grad_norm": 0.6554698944091797, "learning_rate": 5.564743325307254e-05, "loss": 1.5964, "step": 22700, "train_loss_gtc": 0.0513427734375, "train_loss_gtm": 0.025988922119140626, "train_loss_lm": 1.521171875 }, { "epoch": 7.0, "eval_loss": 1.8093750476837158, "eval_runtime": 3.9611, "eval_samples_per_second": 251.194, "eval_steps_per_second": 2.777, "step": 22778, "train_loss_gtc": 0.051851712740384616, "train_loss_gtm": 0.024179898775540866, "train_loss_lm": 1.5157251602564104, "val_loss_gtc": 0.11328125, "val_loss_gtm": 0.15882568359375, "val_loss_lm": 1.53984375 }, { "epoch": 7.006760909649662, "grad_norm": 1.2214024066925049, "learning_rate": 5.5326902387936454e-05, "loss": 1.5932, "step": 22800, "train_loss_gtc": 0.04629794034090909, "train_loss_gtm": 0.011040774258700285, "train_loss_lm": 1.5095880681818181 }, { "epoch": 7.037492317148125, "grad_norm": 0.78125, "learning_rate": 5.500614993965673e-05, "loss": 1.5774, "step": 22900, "train_loss_gtc": 0.048642578125, "train_loss_gtm": 0.028121871948242186, "train_loss_lm": 1.504296875 }, { "epoch": 7.068223724646589, "grad_norm": 0.5814157724380493, "learning_rate": 5.468518925057203e-05, "loss": 1.5826, "step": 23000, "train_loss_gtc": 0.049710693359375, "train_loss_gtm": 0.02605010986328125, "train_loss_lm": 1.508125 }, { "epoch": 7.098955132145052, "grad_norm": 0.7798097133636475, "learning_rate": 5.4364033671683304e-05, "loss": 1.5849, "step": 23100, "train_loss_gtc": 0.049805908203125, "train_loss_gtm": 0.024519424438476562, "train_loss_lm": 1.512890625 }, { "epoch": 7.129686539643516, "grad_norm": 0.8778783679008484, "learning_rate": 5.404269656209819e-05, "loss": 1.5775, "step": 23200, "train_loss_gtc": 0.04724853515625, "train_loss_gtm": 0.021280136108398438, "train_loss_lm": 1.509140625 }, { "epoch": 7.160417947141979, "grad_norm": 0.8768311142921448, "learning_rate": 5.3721191288475595e-05, "loss": 1.5768, "step": 23300, "train_loss_gtc": 0.0488720703125, "train_loss_gtm": 0.020770683288574218, "train_loss_lm": 1.50484375 }, { "epoch": 7.191149354640443, "grad_norm": 1.3236780166625977, "learning_rate": 5.3399531224469424e-05, "loss": 1.5761, "step": 23400, "train_loss_gtc": 0.047967529296875, "train_loss_gtm": 0.016504249572753905, "train_loss_lm": 1.507578125 }, { "epoch": 7.221880762138906, "grad_norm": 0.4845696985721588, "learning_rate": 5.307772975017249e-05, "loss": 1.58, "step": 23500, "train_loss_gtc": 0.04843017578125, "train_loss_gtm": 0.021038818359375, "train_loss_lm": 1.51203125 }, { "epoch": 7.252612169637369, "grad_norm": 0.6816074848175049, "learning_rate": 5.2755800251559794e-05, "loss": 1.5807, "step": 23600, "train_loss_gtc": 0.0488525390625, "train_loss_gtm": 0.025988388061523437, "train_loss_lm": 1.50859375 }, { "epoch": 7.283343577135833, "grad_norm": 0.8071028590202332, "learning_rate": 5.24337561199318e-05, "loss": 1.5757, "step": 23700, "train_loss_gtc": 0.0470068359375, "train_loss_gtm": 0.02268218994140625, "train_loss_lm": 1.510703125 }, { "epoch": 7.3140749846342965, "grad_norm": 1.132927656173706, "learning_rate": 5.211161075135733e-05, "loss": 1.5746, "step": 23800, "train_loss_gtc": 0.04585205078125, "train_loss_gtm": 0.020586471557617187, "train_loss_lm": 1.508203125 }, { "epoch": 7.344806392132759, "grad_norm": 0.6981713771820068, "learning_rate": 5.178937754611637e-05, "loss": 1.5759, "step": 23900, "train_loss_gtc": 0.045491943359375, "train_loss_gtm": 0.0174371337890625, "train_loss_lm": 1.508671875 }, { "epoch": 7.375537799631223, "grad_norm": 0.689810574054718, "learning_rate": 5.1467069908142684e-05, "loss": 1.5719, "step": 24000, "train_loss_gtc": 0.046361083984375, "train_loss_gtm": 0.02007720947265625, "train_loss_lm": 1.50734375 }, { "epoch": 7.406269207129687, "grad_norm": 0.5761317610740662, "learning_rate": 5.1144701244466144e-05, "loss": 1.5774, "step": 24100, "train_loss_gtc": 0.047037353515625, "train_loss_gtm": 0.025342483520507813, "train_loss_lm": 1.505390625 }, { "epoch": 7.43700061462815, "grad_norm": 0.9547802805900574, "learning_rate": 5.082228496465517e-05, "loss": 1.5723, "step": 24200, "train_loss_gtc": 0.046898193359375, "train_loss_gtm": 0.019998626708984377, "train_loss_lm": 1.5040625 }, { "epoch": 7.467732022126613, "grad_norm": 1.58182954788208, "learning_rate": 5.049983448025881e-05, "loss": 1.5752, "step": 24300, "train_loss_gtc": 0.047181396484375, "train_loss_gtm": 0.019326019287109374, "train_loss_lm": 1.5034375 }, { "epoch": 7.498463429625077, "grad_norm": 1.1392496824264526, "learning_rate": 5.0177363204249016e-05, "loss": 1.567, "step": 24400, "train_loss_gtc": 0.0444873046875, "train_loss_gtm": 0.02104278564453125, "train_loss_lm": 1.503828125 }, { "epoch": 7.529194837123541, "grad_norm": 0.9969751238822937, "learning_rate": 4.985488455046249e-05, "loss": 1.5918, "step": 24500, "train_loss_gtc": 0.05201904296875, "train_loss_gtm": 0.026438446044921876, "train_loss_lm": 1.50671875 }, { "epoch": 7.5599262446220035, "grad_norm": 0.6485080122947693, "learning_rate": 4.953241193304291e-05, "loss": 1.5678, "step": 24600, "train_loss_gtc": 0.04556884765625, "train_loss_gtm": 0.01871406555175781, "train_loss_lm": 1.50484375 }, { "epoch": 7.590657652120467, "grad_norm": 0.5488921403884888, "learning_rate": 4.920995876588286e-05, "loss": 1.5709, "step": 24700, "train_loss_gtc": 0.045516357421875, "train_loss_gtm": 0.017727508544921874, "train_loss_lm": 1.507890625 }, { "epoch": 7.621389059618931, "grad_norm": 1.2782403230667114, "learning_rate": 4.888753846206578e-05, "loss": 1.5708, "step": 24800, "train_loss_gtc": 0.045699462890625, "train_loss_gtm": 0.019001045227050782, "train_loss_lm": 1.5021875 }, { "epoch": 7.652120467117394, "grad_norm": 1.2111992835998535, "learning_rate": 4.856516443330818e-05, "loss": 1.5671, "step": 24900, "train_loss_gtc": 0.04524169921875, "train_loss_gtm": 0.015474700927734375, "train_loss_lm": 1.50671875 }, { "epoch": 7.682851874615857, "grad_norm": 0.9381042122840881, "learning_rate": 4.824285008940159e-05, "loss": 1.5682, "step": 25000, "train_loss_gtc": 0.04477783203125, "train_loss_gtm": 0.016591415405273438, "train_loss_lm": 1.50328125 }, { "epoch": 7.713583282114321, "grad_norm": 0.41880643367767334, "learning_rate": 4.79206088376549e-05, "loss": 1.5699, "step": 25100, "train_loss_gtc": 0.04564697265625, "train_loss_gtm": 0.022302398681640623, "train_loss_lm": 1.50265625 }, { "epoch": 7.744314689612784, "grad_norm": 0.41994112730026245, "learning_rate": 4.7598454082336525e-05, "loss": 1.5593, "step": 25200, "train_loss_gtc": 0.0431494140625, "train_loss_gtm": 0.01353099822998047, "train_loss_lm": 1.501328125 }, { "epoch": 7.775046097111248, "grad_norm": 0.41959813237190247, "learning_rate": 4.727639922411693e-05, "loss": 1.5675, "step": 25300, "train_loss_gtc": 0.045030517578125, "train_loss_gtm": 0.018340682983398436, "train_loss_lm": 1.498359375 }, { "epoch": 7.805777504609711, "grad_norm": 1.3286911249160767, "learning_rate": 4.695445765951113e-05, "loss": 1.5671, "step": 25400, "train_loss_gtc": 0.044442138671875, "train_loss_gtm": 0.017482261657714843, "train_loss_lm": 1.50640625 }, { "epoch": 7.836508912108174, "grad_norm": 0.5046520233154297, "learning_rate": 4.6632642780321506e-05, "loss": 1.5625, "step": 25500, "train_loss_gtc": 0.04425048828125, "train_loss_gtm": 0.01410266876220703, "train_loss_lm": 1.501953125 }, { "epoch": 7.867240319606638, "grad_norm": 0.7728056907653809, "learning_rate": 4.631096797308068e-05, "loss": 1.5739, "step": 25600, "train_loss_gtc": 0.048016357421875, "train_loss_gtm": 0.026591949462890625, "train_loss_lm": 1.502890625 }, { "epoch": 7.8979717271051015, "grad_norm": 0.549649178981781, "learning_rate": 4.598944661849467e-05, "loss": 1.5654, "step": 25700, "train_loss_gtc": 0.045203857421875, "train_loss_gtm": 0.019275131225585936, "train_loss_lm": 1.500703125 }, { "epoch": 7.928703134603564, "grad_norm": 0.4454677999019623, "learning_rate": 4.566809209088641e-05, "loss": 1.5661, "step": 25800, "train_loss_gtc": 0.044942626953125, "train_loss_gtm": 0.01573017120361328, "train_loss_lm": 1.50234375 }, { "epoch": 7.959434542102028, "grad_norm": 0.5023268461227417, "learning_rate": 4.534691775763923e-05, "loss": 1.5643, "step": 25900, "train_loss_gtc": 0.045194091796875, "train_loss_gtm": 0.020731773376464844, "train_loss_lm": 1.498359375 }, { "epoch": 7.990165949600492, "grad_norm": 0.4675215780735016, "learning_rate": 4.5025936978640993e-05, "loss": 1.5646, "step": 26000, "train_loss_gtc": 0.04420166015625, "train_loss_gtm": 0.0233331298828125, "train_loss_lm": 1.50140625 }, { "epoch": 8.0, "eval_loss": 1.7390625476837158, "eval_runtime": 3.9419, "eval_samples_per_second": 252.418, "eval_steps_per_second": 2.791, "step": 26032, "train_loss_gtc": 0.043849945068359375, "train_loss_gtm": 0.01909458637237549, "train_loss_lm": 1.50341796875, "val_loss_gtc": 0.085546875, "val_loss_gtm": 0.1235870361328125, "val_loss_lm": 1.51875 }, { "epoch": 8.020897357098955, "grad_norm": 0.9887075424194336, "learning_rate": 4.470516310572825e-05, "loss": 1.5523, "step": 26100, "train_loss_gtc": 0.04299388212316176, "train_loss_gtm": 0.015683286330279184, "train_loss_lm": 1.4872472426470589 }, { "epoch": 8.05162876459742, "grad_norm": 0.7514944076538086, "learning_rate": 4.43846094821309e-05, "loss": 1.5613, "step": 26200, "train_loss_gtc": 0.04583251953125, "train_loss_gtm": 0.026337127685546875, "train_loss_lm": 1.490859375 }, { "epoch": 8.082360172095882, "grad_norm": 1.092617154121399, "learning_rate": 4.406428944191709e-05, "loss": 1.5533, "step": 26300, "train_loss_gtc": 0.04384765625, "train_loss_gtm": 0.016444091796875, "train_loss_lm": 1.488046875 }, { "epoch": 8.113091579594345, "grad_norm": 1.1750010251998901, "learning_rate": 4.374421630943868e-05, "loss": 1.5543, "step": 26400, "train_loss_gtc": 0.043507080078125, "train_loss_gtm": 0.018485107421875, "train_loss_lm": 1.493203125 }, { "epoch": 8.14382298709281, "grad_norm": 0.5995994806289673, "learning_rate": 4.3424403398776835e-05, "loss": 1.5558, "step": 26500, "train_loss_gtc": 0.045775146484375, "train_loss_gtm": 0.0213360595703125, "train_loss_lm": 1.486953125 }, { "epoch": 8.174554394591272, "grad_norm": 0.40138596296310425, "learning_rate": 4.310486401318829e-05, "loss": 1.5414, "step": 26600, "train_loss_gtc": 0.04089599609375, "train_loss_gtm": 0.011089859008789062, "train_loss_lm": 1.488828125 }, { "epoch": 8.205285802089735, "grad_norm": 0.4291875958442688, "learning_rate": 4.278561144455199e-05, "loss": 1.5511, "step": 26700, "train_loss_gtc": 0.0429052734375, "train_loss_gtm": 0.014610671997070312, "train_loss_lm": 1.4884375 }, { "epoch": 8.2360172095882, "grad_norm": 0.5274336934089661, "learning_rate": 4.246665897281612e-05, "loss": 1.5493, "step": 26800, "train_loss_gtc": 0.04279296875, "train_loss_gtm": 0.015193328857421876, "train_loss_lm": 1.49359375 }, { "epoch": 8.266748617086662, "grad_norm": 0.7654374837875366, "learning_rate": 4.214801986544575e-05, "loss": 1.5566, "step": 26900, "train_loss_gtc": 0.042926025390625, "train_loss_gtm": 0.018515548706054687, "train_loss_lm": 1.49296875 }, { "epoch": 8.297480024585125, "grad_norm": 0.9065292477607727, "learning_rate": 4.182970737687093e-05, "loss": 1.5538, "step": 27000, "train_loss_gtc": 0.04357177734375, "train_loss_gtm": 0.016671829223632813, "train_loss_lm": 1.491875 }, { "epoch": 8.32821143208359, "grad_norm": 1.0985864400863647, "learning_rate": 4.151173474793534e-05, "loss": 1.5566, "step": 27100, "train_loss_gtc": 0.045074462890625, "train_loss_gtm": 0.02417022705078125, "train_loss_lm": 1.488515625 }, { "epoch": 8.358942839582053, "grad_norm": 0.43155065178871155, "learning_rate": 4.1194115205345574e-05, "loss": 1.5593, "step": 27200, "train_loss_gtc": 0.04392822265625, "train_loss_gtm": 0.024323196411132814, "train_loss_lm": 1.490078125 }, { "epoch": 8.389674247080515, "grad_norm": 0.6603362560272217, "learning_rate": 4.0876861961120806e-05, "loss": 1.5456, "step": 27300, "train_loss_gtc": 0.043385009765625, "train_loss_gtm": 0.011190872192382812, "train_loss_lm": 1.486015625 }, { "epoch": 8.42040565457898, "grad_norm": 0.5204278826713562, "learning_rate": 4.055998821204337e-05, "loss": 1.5511, "step": 27400, "train_loss_gtc": 0.04381103515625, "train_loss_gtm": 0.017749443054199218, "train_loss_lm": 1.491953125 }, { "epoch": 8.451137062077443, "grad_norm": 0.7329652309417725, "learning_rate": 4.024350713910969e-05, "loss": 1.5452, "step": 27500, "train_loss_gtc": 0.041251220703125, "train_loss_gtm": 0.012794952392578125, "train_loss_lm": 1.48953125 }, { "epoch": 8.481868469575907, "grad_norm": 1.1227164268493652, "learning_rate": 3.9927431906982095e-05, "loss": 1.5508, "step": 27600, "train_loss_gtc": 0.04261962890625, "train_loss_gtm": 0.01765655517578125, "train_loss_lm": 1.48875 }, { "epoch": 8.51259987707437, "grad_norm": 0.6496936678886414, "learning_rate": 3.9611775663441094e-05, "loss": 1.5491, "step": 27700, "train_loss_gtc": 0.04417724609375, "train_loss_gtm": 0.023344078063964845, "train_loss_lm": 1.48734375 }, { "epoch": 8.543331284572833, "grad_norm": 0.4676097333431244, "learning_rate": 3.92965515388386e-05, "loss": 1.5494, "step": 27800, "train_loss_gtc": 0.0420458984375, "train_loss_gtm": 0.020555419921875, "train_loss_lm": 1.48703125 }, { "epoch": 8.574062692071298, "grad_norm": 1.0823791027069092, "learning_rate": 3.8981772645551595e-05, "loss": 1.5512, "step": 27900, "train_loss_gtc": 0.042501220703125, "train_loss_gtm": 0.022169036865234373, "train_loss_lm": 1.4890625 }, { "epoch": 8.60479409956976, "grad_norm": 0.40729042887687683, "learning_rate": 3.866745207743683e-05, "loss": 1.543, "step": 28000, "train_loss_gtc": 0.03969482421875, "train_loss_gtm": 0.009343986511230468, "train_loss_lm": 1.487421875 }, { "epoch": 8.635525507068223, "grad_norm": 1.4600690603256226, "learning_rate": 3.835360290928612e-05, "loss": 1.549, "step": 28100, "train_loss_gtc": 0.04197265625, "train_loss_gtm": 0.016862869262695312, "train_loss_lm": 1.484921875 }, { "epoch": 8.666256914566688, "grad_norm": 0.43790164589881897, "learning_rate": 3.8040238196282395e-05, "loss": 1.5401, "step": 28200, "train_loss_gtc": 0.03960205078125, "train_loss_gtm": 0.01627326965332031, "train_loss_lm": 1.482890625 }, { "epoch": 8.69698832206515, "grad_norm": 0.4079265892505646, "learning_rate": 3.772737097345676e-05, "loss": 1.5519, "step": 28300, "train_loss_gtc": 0.04443603515625, "train_loss_gtm": 0.01917346954345703, "train_loss_lm": 1.486328125 }, { "epoch": 8.727719729563614, "grad_norm": 2.1502716541290283, "learning_rate": 3.741501425514618e-05, "loss": 1.5453, "step": 28400, "train_loss_gtc": 0.04140380859375, "train_loss_gtm": 0.016539077758789062, "train_loss_lm": 1.489453125 }, { "epoch": 8.758451137062078, "grad_norm": 2.0536539554595947, "learning_rate": 3.710318103445223e-05, "loss": 1.5478, "step": 28500, "train_loss_gtc": 0.04205078125, "train_loss_gtm": 0.019853744506835937, "train_loss_lm": 1.48765625 }, { "epoch": 8.789182544560541, "grad_norm": 0.8067043423652649, "learning_rate": 3.6791884282700464e-05, "loss": 1.5401, "step": 28600, "train_loss_gtc": 0.042589111328125, "train_loss_gtm": 0.01223979949951172, "train_loss_lm": 1.487265625 }, { "epoch": 8.819913952059004, "grad_norm": 1.0549793243408203, "learning_rate": 3.6481136948901016e-05, "loss": 1.5449, "step": 28700, "train_loss_gtc": 0.039984130859375, "train_loss_gtm": 0.013403701782226562, "train_loss_lm": 1.4865625 }, { "epoch": 8.850645359557468, "grad_norm": 0.3913937211036682, "learning_rate": 3.617095195920983e-05, "loss": 1.5392, "step": 28800, "train_loss_gtc": 0.038916015625, "train_loss_gtm": 0.014700355529785157, "train_loss_lm": 1.48515625 }, { "epoch": 8.881376767055931, "grad_norm": 0.6485953330993652, "learning_rate": 3.5861342216391083e-05, "loss": 1.5398, "step": 28900, "train_loss_gtc": 0.0403515625, "train_loss_gtm": 0.008778877258300781, "train_loss_lm": 1.48671875 }, { "epoch": 8.912108174554394, "grad_norm": 0.42979031801223755, "learning_rate": 3.555232059928037e-05, "loss": 1.5443, "step": 29000, "train_loss_gtc": 0.040491943359375, "train_loss_gtm": 0.020406494140625, "train_loss_lm": 1.487421875 }, { "epoch": 8.942839582052859, "grad_norm": 0.4814371168613434, "learning_rate": 3.524389996224899e-05, "loss": 1.5404, "step": 29100, "train_loss_gtc": 0.038388671875, "train_loss_gtm": 0.01521839141845703, "train_loss_lm": 1.486171875 }, { "epoch": 8.973570989551321, "grad_norm": 1.2739533185958862, "learning_rate": 3.4936093134669375e-05, "loss": 1.5411, "step": 29200, "train_loss_gtc": 0.04125, "train_loss_gtm": 0.01296173095703125, "train_loss_lm": 1.482421875 }, { "epoch": 9.0, "eval_loss": 1.7078125476837158, "eval_runtime": 3.925, "eval_samples_per_second": 253.505, "eval_steps_per_second": 2.803, "step": 29286, "train_loss_gtc": 0.03917747320130814, "train_loss_gtm": 0.016063379686932232, "train_loss_lm": 1.4876453488372092, "val_loss_gtc": 0.0817138671875, "val_loss_gtm": 0.13273239135742188, "val_loss_lm": 1.5046875 }, { "epoch": 9.004302397049784, "grad_norm": 0.49189862608909607, "learning_rate": 3.4628912920381206e-05, "loss": 1.5363, "step": 29300, "train_loss_gtc": 0.03465053013392857, "train_loss_gtm": 0.006429399762834821, "train_loss_lm": 1.4709821428571428 }, { "epoch": 9.035033804548249, "grad_norm": 0.4804949164390564, "learning_rate": 3.432237209715904e-05, "loss": 1.5311, "step": 29400, "train_loss_gtc": 0.040050048828125, "train_loss_gtm": 0.015148849487304687, "train_loss_lm": 1.471484375 }, { "epoch": 9.065765212046712, "grad_norm": 2.241997241973877, "learning_rate": 3.40164834161806e-05, "loss": 1.5311, "step": 29500, "train_loss_gtc": 0.039757080078125, "train_loss_gtm": 0.017725067138671877, "train_loss_lm": 1.476484375 }, { "epoch": 9.096496619545174, "grad_norm": 0.48794323205947876, "learning_rate": 3.371125960149651e-05, "loss": 1.5284, "step": 29600, "train_loss_gtc": 0.040618896484375, "train_loss_gtm": 0.014891014099121094, "train_loss_lm": 1.47359375 }, { "epoch": 9.127228027043639, "grad_norm": 0.9154407978057861, "learning_rate": 3.340671334950091e-05, "loss": 1.5308, "step": 29700, "train_loss_gtc": 0.04009521484375, "train_loss_gtm": 0.01613304138183594, "train_loss_lm": 1.476953125 }, { "epoch": 9.157959434542102, "grad_norm": 0.3826013505458832, "learning_rate": 3.31028573284034e-05, "loss": 1.5269, "step": 29800, "train_loss_gtc": 0.03857177734375, "train_loss_gtm": 0.012269973754882812, "train_loss_lm": 1.47546875 }, { "epoch": 9.188690842040565, "grad_norm": 0.4480116665363312, "learning_rate": 3.279970417770206e-05, "loss": 1.5314, "step": 29900, "train_loss_gtc": 0.0409716796875, "train_loss_gtm": 0.01894462585449219, "train_loss_lm": 1.477578125 }, { "epoch": 9.21942224953903, "grad_norm": 0.5610605478286743, "learning_rate": 3.24972665076576e-05, "loss": 1.5302, "step": 30000, "train_loss_gtc": 0.037857666015625, "train_loss_gtm": 0.015255851745605469, "train_loss_lm": 1.47703125 }, { "epoch": 9.250153657037492, "grad_norm": 0.4201144576072693, "learning_rate": 3.219555689876896e-05, "loss": 1.5277, "step": 30100, "train_loss_gtc": 0.03964599609375, "train_loss_gtm": 0.012595443725585938, "train_loss_lm": 1.47546875 }, { "epoch": 9.280885064535955, "grad_norm": 1.114909291267395, "learning_rate": 3.1894587901249875e-05, "loss": 1.5265, "step": 30200, "train_loss_gtc": 0.036279296875, "train_loss_gtm": 0.009384765625, "train_loss_lm": 1.475390625 }, { "epoch": 9.31161647203442, "grad_norm": 0.41764217615127563, "learning_rate": 3.159437203450691e-05, "loss": 1.5256, "step": 30300, "train_loss_gtc": 0.037474365234375, "train_loss_gtm": 0.010728912353515625, "train_loss_lm": 1.4775 }, { "epoch": 9.342347879532882, "grad_norm": 1.1266087293624878, "learning_rate": 3.1294921786618595e-05, "loss": 1.522, "step": 30400, "train_loss_gtc": 0.036798095703125, "train_loss_gtm": 0.007729339599609375, "train_loss_lm": 1.4746875 }, { "epoch": 9.373079287031347, "grad_norm": 0.4223707616329193, "learning_rate": 3.099624961381606e-05, "loss": 1.5262, "step": 30500, "train_loss_gtc": 0.039088134765625, "train_loss_gtm": 0.013626289367675782, "train_loss_lm": 1.471875 }, { "epoch": 9.40381069452981, "grad_norm": 0.4733109176158905, "learning_rate": 3.069836793996486e-05, "loss": 1.5268, "step": 30600, "train_loss_gtc": 0.038968505859375, "train_loss_gtm": 0.015171966552734374, "train_loss_lm": 1.4746875 }, { "epoch": 9.434542102028272, "grad_norm": 0.8515746593475342, "learning_rate": 3.0401289156048117e-05, "loss": 1.524, "step": 30700, "train_loss_gtc": 0.038099365234375, "train_loss_gtm": 0.011698036193847657, "train_loss_lm": 1.475234375 }, { "epoch": 9.465273509526737, "grad_norm": 0.3740207850933075, "learning_rate": 3.0105025619651193e-05, "loss": 1.5272, "step": 30800, "train_loss_gtc": 0.038729248046875, "train_loss_gtm": 0.012548446655273438, "train_loss_lm": 1.474765625 }, { "epoch": 9.4960049170252, "grad_norm": 0.42126893997192383, "learning_rate": 2.9809589654447555e-05, "loss": 1.5232, "step": 30900, "train_loss_gtc": 0.0394775390625, "train_loss_gtm": 0.012857398986816405, "train_loss_lm": 1.47125 }, { "epoch": 9.526736324523663, "grad_norm": 0.4131476581096649, "learning_rate": 2.951499354968623e-05, "loss": 1.5289, "step": 31000, "train_loss_gtc": 0.03717041015625, "train_loss_gtm": 0.010989189147949219, "train_loss_lm": 1.476328125 }, { "epoch": 9.557467732022127, "grad_norm": 1.3864574432373047, "learning_rate": 2.922124955968054e-05, "loss": 1.5302, "step": 31100, "train_loss_gtc": 0.040264892578125, "train_loss_gtm": 0.014952011108398437, "train_loss_lm": 1.4771875 }, { "epoch": 9.58819913952059, "grad_norm": 0.6983849406242371, "learning_rate": 2.892836990329844e-05, "loss": 1.5228, "step": 31200, "train_loss_gtc": 0.037857666015625, "train_loss_gtm": 0.014338626861572265, "train_loss_lm": 1.475390625 }, { "epoch": 9.618930547019053, "grad_norm": 0.9399222731590271, "learning_rate": 2.8636366763454153e-05, "loss": 1.5205, "step": 31300, "train_loss_gtc": 0.03775146484375, "train_loss_gtm": 0.011002845764160156, "train_loss_lm": 1.4725 }, { "epoch": 9.649661954517518, "grad_norm": 0.7803316712379456, "learning_rate": 2.8345252286601448e-05, "loss": 1.5214, "step": 31400, "train_loss_gtc": 0.03853271484375, "train_loss_gtm": 0.014136924743652343, "train_loss_lm": 1.473671875 }, { "epoch": 9.68039336201598, "grad_norm": 1.0166672468185425, "learning_rate": 2.805503858222842e-05, "loss": 1.525, "step": 31500, "train_loss_gtc": 0.03795654296875, "train_loss_gtm": 0.013683624267578125, "train_loss_lm": 1.472109375 }, { "epoch": 9.711124769514443, "grad_norm": 1.386081576347351, "learning_rate": 2.7765737722353725e-05, "loss": 1.5211, "step": 31600, "train_loss_gtc": 0.037562255859375, "train_loss_gtm": 0.0162908935546875, "train_loss_lm": 1.471484375 }, { "epoch": 9.741856177012908, "grad_norm": 1.487998366355896, "learning_rate": 2.747736174102441e-05, "loss": 1.5211, "step": 31700, "train_loss_gtc": 0.037666015625, "train_loss_gtm": 0.009608421325683594, "train_loss_lm": 1.4721875 }, { "epoch": 9.77258758451137, "grad_norm": 0.4993577301502228, "learning_rate": 2.7189922633815346e-05, "loss": 1.5286, "step": 31800, "train_loss_gtc": 0.04015380859375, "train_loss_gtm": 0.015623245239257812, "train_loss_lm": 1.476171875 }, { "epoch": 9.803318992009833, "grad_norm": 2.035013437271118, "learning_rate": 2.690343235733026e-05, "loss": 1.5297, "step": 31900, "train_loss_gtc": 0.03919921875, "train_loss_gtm": 0.01642772674560547, "train_loss_lm": 1.4703125 }, { "epoch": 9.834050399508298, "grad_norm": 0.44986504316329956, "learning_rate": 2.66179028287044e-05, "loss": 1.5191, "step": 32000, "train_loss_gtc": 0.0372119140625, "train_loss_gtm": 0.01147369384765625, "train_loss_lm": 1.474140625 }, { "epoch": 9.86478180700676, "grad_norm": 0.44800782203674316, "learning_rate": 2.633334592510876e-05, "loss": 1.5229, "step": 32100, "train_loss_gtc": 0.037117919921875, "train_loss_gtm": 0.020374336242675782, "train_loss_lm": 1.472421875 }, { "epoch": 9.895513214505224, "grad_norm": 0.4471757113933563, "learning_rate": 2.6049773483256046e-05, "loss": 1.5197, "step": 32200, "train_loss_gtc": 0.03855224609375, "train_loss_gtm": 0.012574348449707031, "train_loss_lm": 1.4709375 }, { "epoch": 9.926244622003688, "grad_norm": 1.0153461694717407, "learning_rate": 2.5767197298908296e-05, "loss": 1.522, "step": 32300, "train_loss_gtc": 0.0387353515625, "train_loss_gtm": 0.013848609924316406, "train_loss_lm": 1.470703125 }, { "epoch": 9.956976029502151, "grad_norm": 0.35531821846961975, "learning_rate": 2.5485629126386323e-05, "loss": 1.5207, "step": 32400, "train_loss_gtc": 0.0349658203125, "train_loss_gtm": 0.00917278289794922, "train_loss_lm": 1.471484375 }, { "epoch": 9.987707437000614, "grad_norm": 0.4289498031139374, "learning_rate": 2.5205080678080573e-05, "loss": 1.5159, "step": 32500, "train_loss_gtc": 0.03526123046875, "train_loss_gtm": 0.006147556304931641, "train_loss_lm": 1.469765625 }, { "epoch": 10.0, "eval_loss": 1.663671851158142, "eval_runtime": 3.934, "eval_samples_per_second": 252.92, "eval_steps_per_second": 2.796, "step": 32540, "train_loss_gtc": 0.0366180419921875, "train_loss_gtm": 0.016598081588745116, "train_loss_lm": 1.4673828125, "val_loss_gtc": 0.075244140625, "val_loss_gtm": 0.091632080078125, "val_loss_lm": 1.49296875 }, { "epoch": 10.018438844499078, "grad_norm": 0.6605350971221924, "learning_rate": 2.4925563623964055e-05, "loss": 1.5146, "step": 32600, "train_loss_gtc": 0.037335205078125, "train_loss_gtm": 0.014607747395833334, "train_loss_lm": 1.459765625 }, { "epoch": 10.049170251997541, "grad_norm": 0.5100732445716858, "learning_rate": 2.4647089591106885e-05, "loss": 1.5074, "step": 32700, "train_loss_gtc": 0.035672607421875, "train_loss_gtm": 0.010171089172363281, "train_loss_lm": 1.461875 }, { "epoch": 10.079901659496006, "grad_norm": 1.4332607984542847, "learning_rate": 2.4369670163192603e-05, "loss": 1.5097, "step": 32800, "train_loss_gtc": 0.038460693359375, "train_loss_gtm": 0.012800846099853515, "train_loss_lm": 1.461484375 }, { "epoch": 10.110633066994469, "grad_norm": 0.3127327263355255, "learning_rate": 2.409331688003642e-05, "loss": 1.5074, "step": 32900, "train_loss_gtc": 0.037158203125, "train_loss_gtm": 0.007948532104492187, "train_loss_lm": 1.46296875 }, { "epoch": 10.141364474492931, "grad_norm": 0.49944329261779785, "learning_rate": 2.3818041237105047e-05, "loss": 1.5138, "step": 33000, "train_loss_gtc": 0.036298828125, "train_loss_gtm": 0.01034515380859375, "train_loss_lm": 1.46390625 }, { "epoch": 10.172095881991396, "grad_norm": 0.38113901019096375, "learning_rate": 2.3543854685038612e-05, "loss": 1.5096, "step": 33100, "train_loss_gtc": 0.035716552734375, "train_loss_gtm": 0.010895004272460937, "train_loss_lm": 1.46328125 }, { "epoch": 10.202827289489859, "grad_norm": 0.8738096952438354, "learning_rate": 2.3270768629174366e-05, "loss": 1.5107, "step": 33200, "train_loss_gtc": 0.03684326171875, "train_loss_gtm": 0.01412738800048828, "train_loss_lm": 1.4628125 }, { "epoch": 10.233558696988322, "grad_norm": 0.7059551477432251, "learning_rate": 2.2998794429072228e-05, "loss": 1.511, "step": 33300, "train_loss_gtc": 0.035848388671875, "train_loss_gtm": 0.010251865386962891, "train_loss_lm": 1.46515625 }, { "epoch": 10.264290104486786, "grad_norm": 0.49285122752189636, "learning_rate": 2.2727943398042223e-05, "loss": 1.5166, "step": 33400, "train_loss_gtc": 0.03899169921875, "train_loss_gtm": 0.01776031494140625, "train_loss_lm": 1.46421875 }, { "epoch": 10.295021511985249, "grad_norm": 0.3343373239040375, "learning_rate": 2.245822680267391e-05, "loss": 1.5063, "step": 33500, "train_loss_gtc": 0.034970703125, "train_loss_gtm": 0.00969287872314453, "train_loss_lm": 1.46109375 }, { "epoch": 10.325752919483712, "grad_norm": 0.6031121611595154, "learning_rate": 2.2189655862367736e-05, "loss": 1.5091, "step": 33600, "train_loss_gtc": 0.036680908203125, "train_loss_gtm": 0.013059463500976563, "train_loss_lm": 1.46125 }, { "epoch": 10.356484326982176, "grad_norm": 0.35346755385398865, "learning_rate": 2.1922241748868395e-05, "loss": 1.5055, "step": 33700, "train_loss_gtc": 0.033951416015625, "train_loss_gtm": 0.005552330017089844, "train_loss_lm": 1.4603125 }, { "epoch": 10.38721573448064, "grad_norm": 1.6642231941223145, "learning_rate": 2.1655995585799977e-05, "loss": 1.51, "step": 33800, "train_loss_gtc": 0.036239013671875, "train_loss_gtm": 0.012279739379882812, "train_loss_lm": 1.460546875 }, { "epoch": 10.417947141979102, "grad_norm": 1.5294814109802246, "learning_rate": 2.1390928448203397e-05, "loss": 1.5046, "step": 33900, "train_loss_gtc": 0.03334716796875, "train_loss_gtm": 0.00482635498046875, "train_loss_lm": 1.46125 }, { "epoch": 10.448678549477567, "grad_norm": 0.9640972018241882, "learning_rate": 2.1127051362075596e-05, "loss": 1.5085, "step": 34000, "train_loss_gtc": 0.03734619140625, "train_loss_gtm": 0.012679977416992188, "train_loss_lm": 1.4615625 }, { "epoch": 10.47940995697603, "grad_norm": 2.8935489654541016, "learning_rate": 2.086437530391101e-05, "loss": 1.5037, "step": 34100, "train_loss_gtc": 0.034757080078125, "train_loss_gtm": 0.006779251098632813, "train_loss_lm": 1.46296875 }, { "epoch": 10.510141364474492, "grad_norm": 0.4859734773635864, "learning_rate": 2.0602911200244907e-05, "loss": 1.5141, "step": 34200, "train_loss_gtc": 0.037239990234375, "train_loss_gtm": 0.014754142761230469, "train_loss_lm": 1.462109375 }, { "epoch": 10.540872771972957, "grad_norm": 0.4255363643169403, "learning_rate": 2.034266992719886e-05, "loss": 1.5048, "step": 34300, "train_loss_gtc": 0.0356005859375, "train_loss_gtm": 0.009561195373535156, "train_loss_lm": 1.459375 }, { "epoch": 10.57160417947142, "grad_norm": 0.41498520970344543, "learning_rate": 2.008366231002836e-05, "loss": 1.5094, "step": 34400, "train_loss_gtc": 0.0361181640625, "train_loss_gtm": 0.013895111083984375, "train_loss_lm": 1.458828125 }, { "epoch": 10.602335586969883, "grad_norm": 0.4691818058490753, "learning_rate": 1.9825899122672516e-05, "loss": 1.5088, "step": 34500, "train_loss_gtc": 0.036781005859375, "train_loss_gtm": 0.016254196166992186, "train_loss_lm": 1.4590625 }, { "epoch": 10.633066994468347, "grad_norm": 0.3247811496257782, "learning_rate": 1.9569391087305944e-05, "loss": 1.5095, "step": 34600, "train_loss_gtc": 0.036104736328125, "train_loss_gtm": 0.011984748840332031, "train_loss_lm": 1.458671875 }, { "epoch": 10.66379840196681, "grad_norm": 0.48939141631126404, "learning_rate": 1.931414887389265e-05, "loss": 1.5032, "step": 34700, "train_loss_gtc": 0.035, "train_loss_gtm": 0.00996623992919922, "train_loss_lm": 1.45953125 }, { "epoch": 10.694529809465273, "grad_norm": 0.5067106485366821, "learning_rate": 1.906018309974225e-05, "loss": 1.5118, "step": 34800, "train_loss_gtc": 0.036153564453125, "train_loss_gtm": 0.017694778442382812, "train_loss_lm": 1.46171875 }, { "epoch": 10.725261216963737, "grad_norm": 0.4321945607662201, "learning_rate": 1.8807504329068377e-05, "loss": 1.5052, "step": 34900, "train_loss_gtc": 0.0354345703125, "train_loss_gtm": 0.012692756652832031, "train_loss_lm": 1.461015625 }, { "epoch": 10.7559926244622, "grad_norm": 0.39166566729545593, "learning_rate": 1.8556123072549097e-05, "loss": 1.5078, "step": 35000, "train_loss_gtc": 0.037042236328125, "train_loss_gtm": 0.011860542297363282, "train_loss_lm": 1.4615625 }, { "epoch": 10.786724031960663, "grad_norm": 0.4959773123264313, "learning_rate": 1.8306049786889872e-05, "loss": 1.5037, "step": 35100, "train_loss_gtc": 0.036055908203125, "train_loss_gtm": 0.007551231384277344, "train_loss_lm": 1.46109375 }, { "epoch": 10.817455439459128, "grad_norm": 0.5386573076248169, "learning_rate": 1.8057294874388443e-05, "loss": 1.5052, "step": 35200, "train_loss_gtc": 0.034755859375, "train_loss_gtm": 0.011582107543945312, "train_loss_lm": 1.461328125 }, { "epoch": 10.84818684695759, "grad_norm": 0.38217893242836, "learning_rate": 1.78098686825022e-05, "loss": 1.502, "step": 35300, "train_loss_gtc": 0.034422607421875, "train_loss_gtm": 0.0069885444641113285, "train_loss_lm": 1.4628125 }, { "epoch": 10.878918254456053, "grad_norm": 0.3977510929107666, "learning_rate": 1.7563781503417743e-05, "loss": 1.5027, "step": 35400, "train_loss_gtc": 0.034517822265625, "train_loss_gtm": 0.012902565002441406, "train_loss_lm": 1.461015625 }, { "epoch": 10.909649661954518, "grad_norm": 1.0005662441253662, "learning_rate": 1.7319043573622796e-05, "loss": 1.5068, "step": 35500, "train_loss_gtc": 0.034649658203125, "train_loss_gtm": 0.00762664794921875, "train_loss_lm": 1.46140625 }, { "epoch": 10.94038106945298, "grad_norm": 0.8638070225715637, "learning_rate": 1.707566507348032e-05, "loss": 1.5069, "step": 35600, "train_loss_gtc": 0.03516845703125, "train_loss_gtm": 0.013119163513183594, "train_loss_lm": 1.46453125 }, { "epoch": 10.971112476951445, "grad_norm": 0.7276130318641663, "learning_rate": 1.6833656126805075e-05, "loss": 1.5038, "step": 35700, "train_loss_gtc": 0.034442138671875, "train_loss_gtm": 0.008318862915039062, "train_loss_lm": 1.4596875 }, { "epoch": 11.0, "eval_loss": 1.6515624523162842, "eval_runtime": 3.9297, "eval_samples_per_second": 253.199, "eval_steps_per_second": 2.799, "step": 35794, "train_loss_gtc": 0.03423682679521277, "train_loss_gtm": 0.011414101783265459, "train_loss_lm": 1.4602726063829787, "val_loss_gtc": 0.0715576171875, "val_loss_gtm": 0.08521461486816406, "val_loss_lm": 1.484375 }, { "epoch": 11.001843884449908, "grad_norm": 0.355080246925354, "learning_rate": 1.6593026800442584e-05, "loss": 1.5059, "step": 35800, "train_loss_gtc": 0.038492838541666664, "train_loss_gtm": 0.0019823710123697915, "train_loss_lm": 1.4466145833333333 }, { "epoch": 11.03257529194837, "grad_norm": 0.3804630935192108, "learning_rate": 1.6353787103850214e-05, "loss": 1.4999, "step": 35900, "train_loss_gtc": 0.034288330078125, "train_loss_gtm": 0.013097267150878906, "train_loss_lm": 1.454375 }, { "epoch": 11.063306699446835, "grad_norm": 0.43270865082740784, "learning_rate": 1.611594698868099e-05, "loss": 1.4984, "step": 36000, "train_loss_gtc": 0.034847412109375, "train_loss_gtm": 0.010229988098144531, "train_loss_lm": 1.45125 }, { "epoch": 11.094038106945298, "grad_norm": 0.35577720403671265, "learning_rate": 1.587951634836949e-05, "loss": 1.4972, "step": 36100, "train_loss_gtc": 0.03463623046875, "train_loss_gtm": 0.006039161682128907, "train_loss_lm": 1.45390625 }, { "epoch": 11.124769514443761, "grad_norm": 0.3876980245113373, "learning_rate": 1.5644505017720396e-05, "loss": 1.4942, "step": 36200, "train_loss_gtc": 0.032666015625, "train_loss_gtm": 0.00611663818359375, "train_loss_lm": 1.451171875 }, { "epoch": 11.155500921942226, "grad_norm": 0.675238847732544, "learning_rate": 1.5410922772499352e-05, "loss": 1.503, "step": 36300, "train_loss_gtc": 0.035501708984375, "train_loss_gtm": 0.013241043090820312, "train_loss_lm": 1.45578125 }, { "epoch": 11.186232329440688, "grad_norm": 0.4091513752937317, "learning_rate": 1.5178779329026393e-05, "loss": 1.5001, "step": 36400, "train_loss_gtc": 0.03492431640625, "train_loss_gtm": 0.013411216735839844, "train_loss_lm": 1.4534375 }, { "epoch": 11.216963736939151, "grad_norm": 0.4007122814655304, "learning_rate": 1.494808434377164e-05, "loss": 1.4959, "step": 36500, "train_loss_gtc": 0.0340380859375, "train_loss_gtm": 0.010790367126464844, "train_loss_lm": 1.45296875 }, { "epoch": 11.247695144437616, "grad_norm": 0.3332425057888031, "learning_rate": 1.4718847412953784e-05, "loss": 1.4964, "step": 36600, "train_loss_gtc": 0.035784912109375, "train_loss_gtm": 0.013795166015625, "train_loss_lm": 1.4509375 }, { "epoch": 11.278426551936079, "grad_norm": 0.42536449432373047, "learning_rate": 1.4491078072140779e-05, "loss": 1.4959, "step": 36700, "train_loss_gtc": 0.035238037109375, "train_loss_gtm": 0.008274612426757812, "train_loss_lm": 1.453203125 }, { "epoch": 11.309157959434541, "grad_norm": 0.4789024889469147, "learning_rate": 1.4264785795853231e-05, "loss": 1.4947, "step": 36800, "train_loss_gtc": 0.0340283203125, "train_loss_gtm": 0.007297935485839843, "train_loss_lm": 1.4525 }, { "epoch": 11.339889366933006, "grad_norm": 0.436238557100296, "learning_rate": 1.4039979997170349e-05, "loss": 1.4954, "step": 36900, "train_loss_gtc": 0.035128173828125, "train_loss_gtm": 0.010289707183837891, "train_loss_lm": 1.45390625 }, { "epoch": 11.370620774431469, "grad_norm": 0.37121227383613586, "learning_rate": 1.3816670027338297e-05, "loss": 1.4961, "step": 37000, "train_loss_gtc": 0.0336767578125, "train_loss_gtm": 0.011312313079833984, "train_loss_lm": 1.451328125 }, { "epoch": 11.401352181929932, "grad_norm": 0.3737700581550598, "learning_rate": 1.3594865175381267e-05, "loss": 1.4941, "step": 37100, "train_loss_gtc": 0.034173583984375, "train_loss_gtm": 0.011153717041015625, "train_loss_lm": 1.453828125 }, { "epoch": 11.432083589428396, "grad_norm": 0.40509167313575745, "learning_rate": 1.3374574667715033e-05, "loss": 1.4974, "step": 37200, "train_loss_gtc": 0.034654541015625, "train_loss_gtm": 0.013001708984375, "train_loss_lm": 1.452421875 }, { "epoch": 11.46281499692686, "grad_norm": 0.38259902596473694, "learning_rate": 1.3155807667763265e-05, "loss": 1.4975, "step": 37300, "train_loss_gtc": 0.03426025390625, "train_loss_gtm": 0.011098213195800781, "train_loss_lm": 1.45296875 }, { "epoch": 11.493546404425322, "grad_norm": 2.280012369155884, "learning_rate": 1.2938573275576204e-05, "loss": 1.4933, "step": 37400, "train_loss_gtc": 0.034439697265625, "train_loss_gtm": 0.009605464935302734, "train_loss_lm": 1.451640625 }, { "epoch": 11.524277811923787, "grad_norm": 0.8614688515663147, "learning_rate": 1.2722880527452285e-05, "loss": 1.4916, "step": 37500, "train_loss_gtc": 0.032637939453125, "train_loss_gtm": 0.0070468330383300784, "train_loss_lm": 1.454375 }, { "epoch": 11.55500921942225, "grad_norm": 0.40161266922950745, "learning_rate": 1.250873839556213e-05, "loss": 1.4943, "step": 37600, "train_loss_gtc": 0.033919677734375, "train_loss_gtm": 0.005923271179199219, "train_loss_lm": 1.45078125 }, { "epoch": 11.585740626920712, "grad_norm": 0.4867040514945984, "learning_rate": 1.2296155787575386e-05, "loss": 1.4963, "step": 37700, "train_loss_gtc": 0.03362060546875, "train_loss_gtm": 0.01107696533203125, "train_loss_lm": 1.453515625 }, { "epoch": 11.616472034419177, "grad_norm": 0.40651935338974, "learning_rate": 1.208514154629022e-05, "loss": 1.4943, "step": 37800, "train_loss_gtc": 0.034439697265625, "train_loss_gtm": 0.00758575439453125, "train_loss_lm": 1.454609375 }, { "epoch": 11.64720344191764, "grad_norm": 0.43702617287635803, "learning_rate": 1.1875704449265423e-05, "loss": 1.4957, "step": 37900, "train_loss_gtc": 0.034952392578125, "train_loss_gtm": 0.010952072143554687, "train_loss_lm": 1.454921875 }, { "epoch": 11.677934849416104, "grad_norm": 0.3727381229400635, "learning_rate": 1.1667853208455325e-05, "loss": 1.4978, "step": 38000, "train_loss_gtc": 0.03486572265625, "train_loss_gtm": 0.015162067413330078, "train_loss_lm": 1.450859375 }, { "epoch": 11.708666256914567, "grad_norm": 0.3844757080078125, "learning_rate": 1.1461596469847402e-05, "loss": 1.4953, "step": 38100, "train_loss_gtc": 0.035777587890625, "train_loss_gtm": 0.011620597839355469, "train_loss_lm": 1.450546875 }, { "epoch": 11.73939766441303, "grad_norm": 0.40840184688568115, "learning_rate": 1.1256942813102634e-05, "loss": 1.4928, "step": 38200, "train_loss_gtc": 0.031209716796875, "train_loss_gtm": 0.00724945068359375, "train_loss_lm": 1.45421875 }, { "epoch": 11.770129071911494, "grad_norm": 0.6461498141288757, "learning_rate": 1.1053900751198614e-05, "loss": 1.4896, "step": 38300, "train_loss_gtc": 0.033707275390625, "train_loss_gtm": 0.007514209747314453, "train_loss_lm": 1.45015625 }, { "epoch": 11.800860479409957, "grad_norm": 0.46932530403137207, "learning_rate": 1.0852478730075422e-05, "loss": 1.4971, "step": 38400, "train_loss_gtc": 0.0347412109375, "train_loss_gtm": 0.014281749725341797, "train_loss_lm": 1.45265625 }, { "epoch": 11.83159188690842, "grad_norm": 0.417879194021225, "learning_rate": 1.0652685128284285e-05, "loss": 1.493, "step": 38500, "train_loss_gtc": 0.034190673828125, "train_loss_gtm": 0.007110633850097656, "train_loss_lm": 1.451796875 }, { "epoch": 11.862323294406885, "grad_norm": 0.38669833540916443, "learning_rate": 1.0454528256639095e-05, "loss": 1.4928, "step": 38600, "train_loss_gtc": 0.032156982421875, "train_loss_gtm": 0.008788909912109375, "train_loss_lm": 1.45203125 }, { "epoch": 11.893054701905347, "grad_norm": 1.0371503829956055, "learning_rate": 1.0258016357870703e-05, "loss": 1.4918, "step": 38700, "train_loss_gtc": 0.03337646484375, "train_loss_gtm": 0.007540702819824219, "train_loss_lm": 1.450390625 }, { "epoch": 11.92378610940381, "grad_norm": 0.7227888703346252, "learning_rate": 1.0063157606284001e-05, "loss": 1.4903, "step": 38800, "train_loss_gtc": 0.032996826171875, "train_loss_gtm": 0.005477218627929687, "train_loss_lm": 1.452578125 }, { "epoch": 11.954517516902275, "grad_norm": 0.44045162200927734, "learning_rate": 9.869960107417924e-06, "loss": 1.4931, "step": 38900, "train_loss_gtc": 0.034642333984375, "train_loss_gtm": 0.009967632293701172, "train_loss_lm": 1.4534375 }, { "epoch": 11.985248924400738, "grad_norm": 0.36739978194236755, "learning_rate": 9.678431897708279e-06, "loss": 1.4923, "step": 39000, "train_loss_gtc": 0.03304931640625, "train_loss_gtm": 0.007914905548095702, "train_loss_lm": 1.45109375 }, { "epoch": 12.0, "eval_loss": 1.6339843273162842, "eval_runtime": 3.8887, "eval_samples_per_second": 255.872, "eval_steps_per_second": 2.829, "step": 39048, "train_loss_gtc": 0.032511393229166664, "train_loss_gtm": 0.011383334795633951, "train_loss_lm": 1.4518229166666667, "val_loss_gtc": 0.067724609375, "val_loss_gtm": 0.07337799072265624, "val_loss_lm": 1.47890625 }, { "epoch": 12.0159803318992, "grad_norm": 0.5276215672492981, "learning_rate": 9.48858094415348e-06, "loss": 1.4867, "step": 39100, "train_loss_gtc": 0.031123234675480768, "train_loss_gtm": 0.007110412304217999, "train_loss_lm": 1.4439603365384615 }, { "epoch": 12.046711739397665, "grad_norm": 1.3187389373779297, "learning_rate": 9.300415143983122e-06, "loss": 1.4823, "step": 39200, "train_loss_gtc": 0.03217041015625, "train_loss_gtm": 0.007877159118652343, "train_loss_lm": 1.44421875 }, { "epoch": 12.077443146896128, "grad_norm": 0.37951648235321045, "learning_rate": 9.113942324329445e-06, "loss": 1.4868, "step": 39300, "train_loss_gtc": 0.032154541015625, "train_loss_gtm": 0.006891098022460938, "train_loss_lm": 1.446171875 }, { "epoch": 12.10817455439459, "grad_norm": 0.6352601051330566, "learning_rate": 8.929170241901807e-06, "loss": 1.4818, "step": 39400, "train_loss_gtc": 0.032747802734375, "train_loss_gtm": 0.007182502746582031, "train_loss_lm": 1.445390625 }, { "epoch": 12.138905961893055, "grad_norm": 0.46073710918426514, "learning_rate": 8.746106582663994e-06, "loss": 1.4839, "step": 39500, "train_loss_gtc": 0.03167236328125, "train_loss_gtm": 0.009096622467041016, "train_loss_lm": 1.447734375 }, { "epoch": 12.169637369391518, "grad_norm": 0.3877211809158325, "learning_rate": 8.56475896151454e-06, "loss": 1.4845, "step": 39600, "train_loss_gtc": 0.032230224609375, "train_loss_gtm": 0.005583648681640625, "train_loss_lm": 1.445 }, { "epoch": 12.200368776889981, "grad_norm": 0.5160537362098694, "learning_rate": 8.385134921969923e-06, "loss": 1.4865, "step": 39700, "train_loss_gtc": 0.032567138671875, "train_loss_gtm": 0.012664890289306641, "train_loss_lm": 1.44546875 }, { "epoch": 12.231100184388445, "grad_norm": 0.34780940413475037, "learning_rate": 8.207241935850812e-06, "loss": 1.4859, "step": 39800, "train_loss_gtc": 0.031810302734375, "train_loss_gtm": 0.00482290267944336, "train_loss_lm": 1.4471875 }, { "epoch": 12.261831591886908, "grad_norm": 0.39777079224586487, "learning_rate": 8.031087402971232e-06, "loss": 1.488, "step": 39900, "train_loss_gtc": 0.0323828125, "train_loss_gtm": 0.015415172576904296, "train_loss_lm": 1.444921875 }, { "epoch": 12.292562999385371, "grad_norm": 0.5161352753639221, "learning_rate": 7.856678650830806e-06, "loss": 1.4832, "step": 40000, "train_loss_gtc": 0.03137939453125, "train_loss_gtm": 0.0043726348876953125, "train_loss_lm": 1.44625 }, { "epoch": 12.323294406883836, "grad_norm": 0.3717089295387268, "learning_rate": 7.684022934309926e-06, "loss": 1.4859, "step": 40100, "train_loss_gtc": 0.032230224609375, "train_loss_gtm": 0.008196029663085937, "train_loss_lm": 1.44453125 }, { "epoch": 12.354025814382299, "grad_norm": 0.4823426902294159, "learning_rate": 7.513127435367923e-06, "loss": 1.4862, "step": 40200, "train_loss_gtc": 0.032799072265625, "train_loss_gtm": 0.008565444946289063, "train_loss_lm": 1.446953125 }, { "epoch": 12.384757221880761, "grad_norm": 0.3817342221736908, "learning_rate": 7.343999262744389e-06, "loss": 1.4889, "step": 40300, "train_loss_gtc": 0.033624267578125, "train_loss_gtm": 0.00685495376586914, "train_loss_lm": 1.445234375 }, { "epoch": 12.415488629379226, "grad_norm": 0.38065531849861145, "learning_rate": 7.176645451663433e-06, "loss": 1.4908, "step": 40400, "train_loss_gtc": 0.034915771484375, "train_loss_gtm": 0.011385536193847657, "train_loss_lm": 1.443984375 }, { "epoch": 12.446220036877689, "grad_norm": 0.39833277463912964, "learning_rate": 7.011072963541088e-06, "loss": 1.4832, "step": 40500, "train_loss_gtc": 0.031995849609375, "train_loss_gtm": 0.006886463165283203, "train_loss_lm": 1.44546875 }, { "epoch": 12.476951444376152, "grad_norm": 0.3548543453216553, "learning_rate": 6.847288685695663e-06, "loss": 1.4845, "step": 40600, "train_loss_gtc": 0.031795654296875, "train_loss_gtm": 0.010219860076904296, "train_loss_lm": 1.446171875 }, { "epoch": 12.507682851874616, "grad_norm": 0.46865567564964294, "learning_rate": 6.6852994310613035e-06, "loss": 1.4804, "step": 40700, "train_loss_gtc": 0.03116455078125, "train_loss_gtm": 0.0027751541137695313, "train_loss_lm": 1.444921875 }, { "epoch": 12.538414259373079, "grad_norm": 0.3493591547012329, "learning_rate": 6.525111937904565e-06, "loss": 1.4867, "step": 40800, "train_loss_gtc": 0.03113525390625, "train_loss_gtm": 0.006133708953857422, "train_loss_lm": 1.446484375 }, { "epoch": 12.569145666871542, "grad_norm": 0.3806462287902832, "learning_rate": 6.366732869544167e-06, "loss": 1.4847, "step": 40900, "train_loss_gtc": 0.032784423828125, "train_loss_gtm": 0.009026336669921874, "train_loss_lm": 1.444609375 }, { "epoch": 12.599877074370006, "grad_norm": 0.3359711170196533, "learning_rate": 6.210168814073775e-06, "loss": 1.4844, "step": 41000, "train_loss_gtc": 0.033193359375, "train_loss_gtm": 0.013145980834960937, "train_loss_lm": 1.4425 }, { "epoch": 12.63060848186847, "grad_norm": 0.3647012412548065, "learning_rate": 6.0554262840879505e-06, "loss": 1.4819, "step": 41100, "train_loss_gtc": 0.03174072265625, "train_loss_gtm": 0.0059863471984863284, "train_loss_lm": 1.445078125 }, { "epoch": 12.661339889366934, "grad_norm": 0.3800066411495209, "learning_rate": 5.902511716411286e-06, "loss": 1.4832, "step": 41200, "train_loss_gtc": 0.03176025390625, "train_loss_gtm": 0.004956302642822266, "train_loss_lm": 1.445703125 }, { "epoch": 12.692071296865397, "grad_norm": 6.271182060241699, "learning_rate": 5.75143147183061e-06, "loss": 1.4843, "step": 41300, "train_loss_gtc": 0.032977294921875, "train_loss_gtm": 0.008317089080810547, "train_loss_lm": 1.445390625 }, { "epoch": 12.72280270436386, "grad_norm": 1.3521143198013306, "learning_rate": 5.602191834830445e-06, "loss": 1.4785, "step": 41400, "train_loss_gtc": 0.030087890625, "train_loss_gtm": 0.0036014556884765626, "train_loss_lm": 1.4446875 }, { "epoch": 12.753534111862324, "grad_norm": 0.38900676369667053, "learning_rate": 5.454799013331546e-06, "loss": 1.4838, "step": 41500, "train_loss_gtc": 0.031859130859375, "train_loss_gtm": 0.003786640167236328, "train_loss_lm": 1.444453125 }, { "epoch": 12.784265519360787, "grad_norm": 0.36797913908958435, "learning_rate": 5.309259138432693e-06, "loss": 1.4843, "step": 41600, "train_loss_gtc": 0.031395263671875, "train_loss_gtm": 0.005061054229736328, "train_loss_lm": 1.444609375 }, { "epoch": 12.81499692685925, "grad_norm": 2.0185465812683105, "learning_rate": 5.165578264155646e-06, "loss": 1.4854, "step": 41700, "train_loss_gtc": 0.03158447265625, "train_loss_gtm": 0.007223720550537109, "train_loss_lm": 1.444609375 }, { "epoch": 12.845728334357714, "grad_norm": 0.37788382172584534, "learning_rate": 5.023762367193336e-06, "loss": 1.4802, "step": 41800, "train_loss_gtc": 0.031046142578125, "train_loss_gtm": 0.0037957191467285155, "train_loss_lm": 1.4475 }, { "epoch": 12.876459741856177, "grad_norm": 0.31019526720046997, "learning_rate": 4.883817346661234e-06, "loss": 1.4895, "step": 41900, "train_loss_gtc": 0.033118896484375, "train_loss_gtm": 0.00923778533935547, "train_loss_lm": 1.445546875 }, { "epoch": 12.90719114935464, "grad_norm": 0.3986211121082306, "learning_rate": 4.745749023851964e-06, "loss": 1.483, "step": 42000, "train_loss_gtc": 0.03188232421875, "train_loss_gtm": 0.008430919647216796, "train_loss_lm": 1.44296875 }, { "epoch": 12.937922556853104, "grad_norm": 0.3529811501502991, "learning_rate": 4.609563141993156e-06, "loss": 1.4812, "step": 42100, "train_loss_gtc": 0.030782470703125, "train_loss_gtm": 0.0027103614807128906, "train_loss_lm": 1.442265625 }, { "epoch": 12.968653964351567, "grad_norm": 0.3418220579624176, "learning_rate": 4.475265366008547e-06, "loss": 1.4829, "step": 42200, "train_loss_gtc": 0.03141357421875, "train_loss_gtm": 0.007238006591796875, "train_loss_lm": 1.44453125 }, { "epoch": 12.99938537185003, "grad_norm": 0.385499507188797, "learning_rate": 4.342861282282362e-06, "loss": 1.4841, "step": 42300, "train_loss_gtc": 0.032645263671875, "train_loss_gtm": 0.0034380340576171875, "train_loss_lm": 1.444921875 }, { "epoch": 13.0, "eval_loss": 1.618749976158142, "eval_runtime": 3.9049, "eval_samples_per_second": 254.805, "eval_steps_per_second": 2.817, "step": 42302, "train_loss_gtc": 0.043212890625, "train_loss_gtm": 0.05727386474609375, "train_loss_lm": 1.453125, "val_loss_gtc": 0.06478271484375, "val_loss_gtm": 0.07197847366333007, "val_loss_lm": 1.47578125 }, { "epoch": 13.030116779348495, "grad_norm": 0.4604727327823639, "learning_rate": 4.212356398426892e-06, "loss": 1.481, "step": 42400, "train_loss_gtc": 0.03175447425063776, "train_loss_gtm": 0.006234383096500319, "train_loss_lm": 1.4418845663265305 }, { "epoch": 13.060848186846957, "grad_norm": 0.41722559928894043, "learning_rate": 4.0837561430534135e-06, "loss": 1.4805, "step": 42500, "train_loss_gtc": 0.03138427734375, "train_loss_gtm": 0.006003303527832031, "train_loss_lm": 1.4434375 }, { "epoch": 13.09157959434542, "grad_norm": 0.3385833501815796, "learning_rate": 3.957065865546406e-06, "loss": 1.4773, "step": 42600, "train_loss_gtc": 0.032398681640625, "train_loss_gtm": 0.005317020416259766, "train_loss_lm": 1.4409375 }, { "epoch": 13.122311001843885, "grad_norm": 0.35626187920570374, "learning_rate": 3.832290835840974e-06, "loss": 1.4767, "step": 42700, "train_loss_gtc": 0.03093505859375, "train_loss_gtm": 0.0037181663513183596, "train_loss_lm": 1.440625 }, { "epoch": 13.153042409342348, "grad_norm": 0.3810971975326538, "learning_rate": 3.7094362442036845e-06, "loss": 1.4776, "step": 42800, "train_loss_gtc": 0.03213134765625, "train_loss_gtm": 0.004517803192138672, "train_loss_lm": 1.441328125 }, { "epoch": 13.18377381684081, "grad_norm": 0.36459001898765564, "learning_rate": 3.588507201016633e-06, "loss": 1.4797, "step": 42900, "train_loss_gtc": 0.031527099609375, "train_loss_gtm": 0.009501018524169923, "train_loss_lm": 1.440546875 }, { "epoch": 13.214505224339275, "grad_norm": 0.505306601524353, "learning_rate": 3.469508736564897e-06, "loss": 1.4807, "step": 43000, "train_loss_gtc": 0.0320068359375, "train_loss_gtm": 0.006435070037841797, "train_loss_lm": 1.4425 }, { "epoch": 13.245236631837738, "grad_norm": 1.4232553243637085, "learning_rate": 3.3524458008272475e-06, "loss": 1.4775, "step": 43100, "train_loss_gtc": 0.03030517578125, "train_loss_gtm": 0.004405345916748047, "train_loss_lm": 1.44171875 }, { "epoch": 13.275968039336203, "grad_norm": 0.37980103492736816, "learning_rate": 3.2373232632703197e-06, "loss": 1.4816, "step": 43200, "train_loss_gtc": 0.0322021484375, "train_loss_gtm": 0.0026582717895507813, "train_loss_lm": 1.4415625 }, { "epoch": 13.306699446834665, "grad_norm": 2.2766940593719482, "learning_rate": 3.1241459126459706e-06, "loss": 1.4808, "step": 43300, "train_loss_gtc": 0.03172119140625, "train_loss_gtm": 0.008477497100830077, "train_loss_lm": 1.44171875 }, { "epoch": 13.337430854333128, "grad_norm": 0.427611768245697, "learning_rate": 3.01291845679213e-06, "loss": 1.4783, "step": 43400, "train_loss_gtc": 0.03244873046875, "train_loss_gtm": 0.0040134239196777345, "train_loss_lm": 1.440390625 }, { "epoch": 13.368162261831593, "grad_norm": 0.30551981925964355, "learning_rate": 2.9036455224369765e-06, "loss": 1.4762, "step": 43500, "train_loss_gtc": 0.032469482421875, "train_loss_gtm": 0.003238506317138672, "train_loss_lm": 1.439453125 }, { "epoch": 13.398893669330056, "grad_norm": 0.42366525530815125, "learning_rate": 2.7963316550064455e-06, "loss": 1.4821, "step": 43600, "train_loss_gtc": 0.033074951171875, "train_loss_gtm": 0.010958194732666016, "train_loss_lm": 1.439765625 }, { "epoch": 13.429625076828518, "grad_norm": 0.33650246262550354, "learning_rate": 2.6909813184351873e-06, "loss": 1.4795, "step": 43700, "train_loss_gtc": 0.032664794921875, "train_loss_gtm": 0.0049641036987304685, "train_loss_lm": 1.4409375 }, { "epoch": 13.460356484326983, "grad_norm": 0.34106993675231934, "learning_rate": 2.5875988949808472e-06, "loss": 1.4846, "step": 43800, "train_loss_gtc": 0.03381591796875, "train_loss_gtm": 0.013602008819580078, "train_loss_lm": 1.441796875 }, { "epoch": 13.491087891825446, "grad_norm": 0.3917493224143982, "learning_rate": 2.486188685041807e-06, "loss": 1.4821, "step": 43900, "train_loss_gtc": 0.031900634765625, "train_loss_gtm": 0.008092212677001952, "train_loss_lm": 1.440859375 }, { "epoch": 13.521819299323909, "grad_norm": 0.3950476050376892, "learning_rate": 2.386754906978278e-06, "loss": 1.4819, "step": 44000, "train_loss_gtc": 0.03089111328125, "train_loss_gtm": 0.008052177429199218, "train_loss_lm": 1.4409375 }, { "epoch": 13.552550706822373, "grad_norm": 1.4824786186218262, "learning_rate": 2.2893016969368575e-06, "loss": 1.4889, "step": 44100, "train_loss_gtc": 0.03397705078125, "train_loss_gtm": 0.019720077514648438, "train_loss_lm": 1.441640625 }, { "epoch": 13.583282114320836, "grad_norm": 0.4232785999774933, "learning_rate": 2.1938331086784335e-06, "loss": 1.4796, "step": 44200, "train_loss_gtc": 0.030982666015625, "train_loss_gtm": 0.0070129776000976566, "train_loss_lm": 1.44140625 }, { "epoch": 13.614013521819299, "grad_norm": 0.3128654956817627, "learning_rate": 2.1003531134096255e-06, "loss": 1.4759, "step": 44300, "train_loss_gtc": 0.03089111328125, "train_loss_gtm": 0.005992927551269531, "train_loss_lm": 1.43984375 }, { "epoch": 13.644744929317763, "grad_norm": 0.3076239824295044, "learning_rate": 2.0088655996175097e-06, "loss": 1.4805, "step": 44400, "train_loss_gtc": 0.031224365234375, "train_loss_gtm": 0.006224002838134766, "train_loss_lm": 1.4428125 }, { "epoch": 13.675476336816226, "grad_norm": 0.3875581622123718, "learning_rate": 1.9193743729079507e-06, "loss": 1.4787, "step": 44500, "train_loss_gtc": 0.030994873046875, "train_loss_gtm": 0.005257759094238281, "train_loss_lm": 1.44125 }, { "epoch": 13.706207744314689, "grad_norm": 0.3164869546890259, "learning_rate": 1.8318831558472582e-06, "loss": 1.4788, "step": 44600, "train_loss_gtc": 0.032825927734375, "train_loss_gtm": 0.008005275726318359, "train_loss_lm": 1.4421875 }, { "epoch": 13.736939151813154, "grad_norm": 0.40070638060569763, "learning_rate": 1.7463955878073424e-06, "loss": 1.4785, "step": 44700, "train_loss_gtc": 0.031497802734375, "train_loss_gtm": 0.0062798881530761715, "train_loss_lm": 1.44125 }, { "epoch": 13.767670559311616, "grad_norm": 0.39651504158973694, "learning_rate": 1.662915224814321e-06, "loss": 1.4769, "step": 44800, "train_loss_gtc": 0.031278076171875, "train_loss_gtm": 0.00752462387084961, "train_loss_lm": 1.44140625 }, { "epoch": 13.79840196681008, "grad_norm": 0.3495580554008484, "learning_rate": 1.5814455394006167e-06, "loss": 1.4801, "step": 44900, "train_loss_gtc": 0.03222900390625, "train_loss_gtm": 0.008823738098144532, "train_loss_lm": 1.4409375 }, { "epoch": 13.829133374308544, "grad_norm": 0.33641329407691956, "learning_rate": 1.501989920460517e-06, "loss": 1.4793, "step": 45000, "train_loss_gtc": 0.03152587890625, "train_loss_gtm": 0.005212993621826172, "train_loss_lm": 1.441484375 }, { "epoch": 13.859864781807007, "grad_norm": 0.4456912577152252, "learning_rate": 1.4245516731091646e-06, "loss": 1.4772, "step": 45100, "train_loss_gtc": 0.03094482421875, "train_loss_gtm": 0.002681427001953125, "train_loss_lm": 1.43921875 }, { "epoch": 13.89059618930547, "grad_norm": 0.34826260805130005, "learning_rate": 1.349134018545134e-06, "loss": 1.4777, "step": 45200, "train_loss_gtc": 0.032086181640625, "train_loss_gtm": 0.006848697662353516, "train_loss_lm": 1.43953125 }, { "epoch": 13.921327596803934, "grad_norm": 0.3667930066585541, "learning_rate": 1.2757400939163833e-06, "loss": 1.4779, "step": 45300, "train_loss_gtc": 0.032056884765625, "train_loss_gtm": 0.005229644775390625, "train_loss_lm": 1.44171875 }, { "epoch": 13.952059004302397, "grad_norm": 0.2943558096885681, "learning_rate": 1.2043729521897752e-06, "loss": 1.4775, "step": 45400, "train_loss_gtc": 0.03116455078125, "train_loss_gtm": 0.005698661804199218, "train_loss_lm": 1.440625 }, { "epoch": 13.98279041180086, "grad_norm": 0.3945413827896118, "learning_rate": 1.1350355620241226e-06, "loss": 1.4789, "step": 45500, "train_loss_gtc": 0.03146484375, "train_loss_gtm": 0.005878944396972657, "train_loss_lm": 1.442421875 }, { "epoch": 14.0, "eval_loss": 1.60546875, "eval_runtime": 3.9133, "eval_samples_per_second": 254.264, "eval_steps_per_second": 2.811, "step": 45556, "train_loss_gtc": 0.030979701450892856, "train_loss_gtm": 0.002463647297450474, "train_loss_lm": 1.4439174107142858, "val_loss_gtc": 0.06396484375, "val_loss_gtm": 0.06325559616088867, "val_loss_lm": 1.47578125 }, { "epoch": 14.013521819299324, "grad_norm": 0.37518543004989624, "learning_rate": 1.0677308076466385e-06, "loss": 1.478, "step": 45600, "train_loss_gtc": 0.03331687233664773, "train_loss_gtm": 0.010442083532159979, "train_loss_lm": 1.4401633522727273 }, { "epoch": 14.044253226797787, "grad_norm": 0.3532905876636505, "learning_rate": 1.002461488733003e-06, "loss": 1.4728, "step": 45700, "train_loss_gtc": 0.0307568359375, "train_loss_gtm": 0.0067650794982910155, "train_loss_lm": 1.436328125 }, { "epoch": 14.07498463429625, "grad_norm": 0.40793976187705994, "learning_rate": 9.392303202908848e-07, "loss": 1.473, "step": 45800, "train_loss_gtc": 0.02984375, "train_loss_gtm": 0.002398052215576172, "train_loss_lm": 1.439375 }, { "epoch": 14.105716041794714, "grad_norm": 0.3508872985839844, "learning_rate": 8.780399325470313e-07, "loss": 1.4732, "step": 45900, "train_loss_gtc": 0.031231689453125, "train_loss_gtm": 0.004491233825683593, "train_loss_lm": 1.43890625 }, { "epoch": 14.136447449293177, "grad_norm": 1.9087783098220825, "learning_rate": 8.188928708378229e-07, "loss": 1.4757, "step": 46000, "train_loss_gtc": 0.030797119140625, "train_loss_gtm": 0.0036650848388671874, "train_loss_lm": 1.437265625 }, { "epoch": 14.167178856791642, "grad_norm": 0.5606856942176819, "learning_rate": 7.61791595503425e-07, "loss": 1.4788, "step": 46100, "train_loss_gtc": 0.03298828125, "train_loss_gtm": 0.008520011901855468, "train_loss_lm": 1.440625 }, { "epoch": 14.197910264290105, "grad_norm": 0.5114173293113708, "learning_rate": 7.067384817854184e-07, "loss": 1.4751, "step": 46200, "train_loss_gtc": 0.031270751953125, "train_loss_gtm": 0.006043624877929687, "train_loss_lm": 1.438828125 }, { "epoch": 14.228641671788568, "grad_norm": 0.3046382963657379, "learning_rate": 6.537358197280241e-07, "loss": 1.4759, "step": 46300, "train_loss_gtc": 0.029881591796875, "train_loss_gtm": 0.00199981689453125, "train_loss_lm": 1.44015625 }, { "epoch": 14.259373079287032, "grad_norm": 0.3361985683441162, "learning_rate": 6.027858140828235e-07, "loss": 1.48, "step": 46400, "train_loss_gtc": 0.032630615234375, "train_loss_gtm": 0.00764068603515625, "train_loss_lm": 1.438125 }, { "epoch": 14.290104486785495, "grad_norm": 0.332077294588089, "learning_rate": 5.538905842170649e-07, "loss": 1.4752, "step": 46500, "train_loss_gtc": 0.03033203125, "train_loss_gtm": 0.008144855499267578, "train_loss_lm": 1.4384375 }, { "epoch": 14.320835894283958, "grad_norm": 0.3548417389392853, "learning_rate": 5.070521640254788e-07, "loss": 1.4765, "step": 46600, "train_loss_gtc": 0.031827392578125, "train_loss_gtm": 0.005391826629638672, "train_loss_lm": 1.438671875 }, { "epoch": 14.351567301782422, "grad_norm": 0.35536205768585205, "learning_rate": 4.622725018457008e-07, "loss": 1.4791, "step": 46700, "train_loss_gtc": 0.031151123046875, "train_loss_gtm": 0.005690097808837891, "train_loss_lm": 1.439765625 }, { "epoch": 14.382298709280885, "grad_norm": 0.36706480383872986, "learning_rate": 4.1955346037721445e-07, "loss": 1.4791, "step": 46800, "train_loss_gtc": 0.032288818359375, "train_loss_gtm": 0.011700859069824219, "train_loss_lm": 1.438515625 }, { "epoch": 14.413030116779348, "grad_norm": 0.3751659393310547, "learning_rate": 3.7889681660386866e-07, "loss": 1.4776, "step": 46900, "train_loss_gtc": 0.03252685546875, "train_loss_gtm": 0.009815158843994141, "train_loss_lm": 1.438125 }, { "epoch": 14.443761524277813, "grad_norm": 0.32809144258499146, "learning_rate": 3.403042617199592e-07, "loss": 1.4792, "step": 47000, "train_loss_gtc": 0.032452392578125, "train_loss_gtm": 0.0022745895385742187, "train_loss_lm": 1.440546875 }, { "epoch": 14.474492931776275, "grad_norm": 0.3384574055671692, "learning_rate": 3.037774010598793e-07, "loss": 1.4798, "step": 47100, "train_loss_gtc": 0.03126708984375, "train_loss_gtm": 0.00357696533203125, "train_loss_lm": 1.44046875 }, { "epoch": 14.505224339274738, "grad_norm": 0.3188628554344177, "learning_rate": 2.6931775403135074e-07, "loss": 1.4742, "step": 47200, "train_loss_gtc": 0.030872802734375, "train_loss_gtm": 0.0019446945190429688, "train_loss_lm": 1.439609375 }, { "epoch": 14.535955746773203, "grad_norm": 0.6094369292259216, "learning_rate": 2.369267540522191e-07, "loss": 1.4732, "step": 47300, "train_loss_gtc": 0.031270751953125, "train_loss_gtm": 0.0019117927551269532, "train_loss_lm": 1.44015625 }, { "epoch": 14.566687154271666, "grad_norm": 0.3574078381061554, "learning_rate": 2.0660574849081237e-07, "loss": 1.477, "step": 47400, "train_loss_gtc": 0.030413818359375, "train_loss_gtm": 0.0030179214477539062, "train_loss_lm": 1.44078125 }, { "epoch": 14.597418561770128, "grad_norm": 0.34775152802467346, "learning_rate": 1.783559986099137e-07, "loss": 1.4796, "step": 47500, "train_loss_gtc": 0.031693115234375, "train_loss_gtm": 0.0033152008056640623, "train_loss_lm": 1.4409375 }, { "epoch": 14.628149969268593, "grad_norm": 0.3424850106239319, "learning_rate": 1.521786795142921e-07, "loss": 1.4761, "step": 47600, "train_loss_gtc": 0.029925537109375, "train_loss_gtm": 0.0017087364196777344, "train_loss_lm": 1.43875 }, { "epoch": 14.658881376767056, "grad_norm": 0.31570205092430115, "learning_rate": 1.2807488010181945e-07, "loss": 1.4798, "step": 47700, "train_loss_gtc": 0.032105712890625, "train_loss_gtm": 0.0031629753112792968, "train_loss_lm": 1.441171875 }, { "epoch": 14.689612784265519, "grad_norm": 0.3260713517665863, "learning_rate": 1.0604560301816224e-07, "loss": 1.4788, "step": 47800, "train_loss_gtc": 0.031483154296875, "train_loss_gtm": 0.006717433929443359, "train_loss_lm": 1.439140625 }, { "epoch": 14.720344191763983, "grad_norm": 0.3307570219039917, "learning_rate": 8.609176461510938e-08, "loss": 1.4739, "step": 47900, "train_loss_gtc": 0.03117919921875, "train_loss_gtm": 0.005090217590332031, "train_loss_lm": 1.438203125 }, { "epoch": 14.751075599262446, "grad_norm": 0.29519036412239075, "learning_rate": 6.821419491241376e-08, "loss": 1.472, "step": 48000, "train_loss_gtc": 0.030118408203125, "train_loss_gtm": 0.002861900329589844, "train_loss_lm": 1.440078125 }, { "epoch": 14.781807006760909, "grad_norm": 0.38826602697372437, "learning_rate": 5.2413637563292205e-08, "loss": 1.4736, "step": 48100, "train_loss_gtc": 0.030203857421875, "train_loss_gtm": 0.004720573425292969, "train_loss_lm": 1.43859375 }, { "epoch": 14.812538414259373, "grad_norm": 0.4058314263820648, "learning_rate": 3.8690749823488967e-08, "loss": 1.4767, "step": 48200, "train_loss_gtc": 0.03099365234375, "train_loss_gtm": 0.005015640258789062, "train_loss_lm": 1.4378125 }, { "epoch": 14.843269821757836, "grad_norm": 0.38652893900871277, "learning_rate": 2.7046102523919927e-08, "loss": 1.471, "step": 48300, "train_loss_gtc": 0.030880126953125, "train_loss_gtm": 0.0031046104431152344, "train_loss_lm": 1.439609375 }, { "epoch": 14.8740012292563, "grad_norm": 0.3828943967819214, "learning_rate": 1.748018004694707e-08, "loss": 1.4734, "step": 48400, "train_loss_gtc": 0.029947509765625, "train_loss_gtm": 0.0023802757263183595, "train_loss_lm": 1.43984375 }, { "epoch": 14.904732636754764, "grad_norm": 0.3241247832775116, "learning_rate": 9.993380306222432e-09, "loss": 1.4751, "step": 48500, "train_loss_gtc": 0.0314208984375, "train_loss_gtm": 0.005055904388427734, "train_loss_lm": 1.440234375 }, { "epoch": 14.935464044253226, "grad_norm": 0.3689234256744385, "learning_rate": 4.586014730140198e-09, "loss": 1.477, "step": 48600, "train_loss_gtc": 0.032603759765625, "train_loss_gtm": 0.004544639587402343, "train_loss_lm": 1.43734375 }, { "epoch": 14.966195451751691, "grad_norm": 0.376676082611084, "learning_rate": 1.2583082488581976e-09, "loss": 1.4825, "step": 48700, "train_loss_gtc": 0.03310546875, "train_loss_gtm": 0.009465179443359374, "train_loss_lm": 1.439765625 }, { "epoch": 14.996926859250154, "grad_norm": 0.3576776385307312, "learning_rate": 1.0399284983142465e-11, "loss": 1.4777, "step": 48800, "train_loss_gtc": 0.0317724609375, "train_loss_gtm": 0.005117168426513672, "train_loss_lm": 1.439921875 }, { "epoch": 15.0, "eval_loss": 1.6144530773162842, "eval_runtime": 3.9176, "eval_samples_per_second": 253.984, "eval_steps_per_second": 2.808, "step": 48810, "train_loss_gtc": 0.03424072265625, "train_loss_gtm": 0.010280990600585937, "train_loss_lm": 1.44140625, "val_loss_gtc": 0.0637451171875, "val_loss_gtm": 0.06403846740722656, "val_loss_lm": 1.475 } ], "logging_steps": 100, "max_steps": 48810, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 48, "trial_name": null, "trial_params": null }