mridangAI / trainer_state.json
mridangteam's picture
๐Ÿš€ First upload of finetuned MridangAI model
cae511e verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 26790,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0011198208286674132,
"grad_norm": 4.2997212409973145,
"learning_rate": 4.9983202687569994e-05,
"loss": 3.1991,
"step": 10
},
{
"epoch": 0.0022396416573348264,
"grad_norm": 3.1073286533355713,
"learning_rate": 4.9964539007092206e-05,
"loss": 2.9692,
"step": 20
},
{
"epoch": 0.0033594624860022394,
"grad_norm": 1.520981788635254,
"learning_rate": 4.994587532661441e-05,
"loss": 3.2009,
"step": 30
},
{
"epoch": 0.004479283314669653,
"grad_norm": 4.404178142547607,
"learning_rate": 4.9927211646136616e-05,
"loss": 2.8,
"step": 40
},
{
"epoch": 0.005599104143337066,
"grad_norm": 6.26295280456543,
"learning_rate": 4.990854796565883e-05,
"loss": 3.2157,
"step": 50
},
{
"epoch": 0.006718924972004479,
"grad_norm": 2.1947414875030518,
"learning_rate": 4.988988428518104e-05,
"loss": 3.358,
"step": 60
},
{
"epoch": 0.007838745800671893,
"grad_norm": 2.184293746948242,
"learning_rate": 4.987122060470325e-05,
"loss": 3.1155,
"step": 70
},
{
"epoch": 0.008958566629339306,
"grad_norm": 2.162921905517578,
"learning_rate": 4.9852556924225456e-05,
"loss": 2.113,
"step": 80
},
{
"epoch": 0.010078387458006719,
"grad_norm": 6.238914966583252,
"learning_rate": 4.983389324374767e-05,
"loss": 2.9041,
"step": 90
},
{
"epoch": 0.011198208286674132,
"grad_norm": 2.3711066246032715,
"learning_rate": 4.981522956326988e-05,
"loss": 2.6368,
"step": 100
},
{
"epoch": 0.012318029115341545,
"grad_norm": 6.883894443511963,
"learning_rate": 4.979656588279209e-05,
"loss": 3.1278,
"step": 110
},
{
"epoch": 0.013437849944008958,
"grad_norm": 2.7835214138031006,
"learning_rate": 4.9777902202314296e-05,
"loss": 3.2703,
"step": 120
},
{
"epoch": 0.014557670772676373,
"grad_norm": 2.707707643508911,
"learning_rate": 4.975923852183651e-05,
"loss": 3.2012,
"step": 130
},
{
"epoch": 0.015677491601343786,
"grad_norm": 7.556955337524414,
"learning_rate": 4.974057484135872e-05,
"loss": 3.0857,
"step": 140
},
{
"epoch": 0.0167973124300112,
"grad_norm": 9.100250244140625,
"learning_rate": 4.972191116088093e-05,
"loss": 2.8656,
"step": 150
},
{
"epoch": 0.01791713325867861,
"grad_norm": 6.301916122436523,
"learning_rate": 4.9703247480403136e-05,
"loss": 2.6866,
"step": 160
},
{
"epoch": 0.019036954087346025,
"grad_norm": 2.9455721378326416,
"learning_rate": 4.968458379992535e-05,
"loss": 2.4591,
"step": 170
},
{
"epoch": 0.020156774916013438,
"grad_norm": 2.6669416427612305,
"learning_rate": 4.966592011944756e-05,
"loss": 2.73,
"step": 180
},
{
"epoch": 0.02127659574468085,
"grad_norm": 7.3631086349487305,
"learning_rate": 4.964725643896977e-05,
"loss": 2.5342,
"step": 190
},
{
"epoch": 0.022396416573348264,
"grad_norm": 2.861095905303955,
"learning_rate": 4.9628592758491976e-05,
"loss": 3.152,
"step": 200
},
{
"epoch": 0.023516237402015677,
"grad_norm": 6.908902645111084,
"learning_rate": 4.960992907801419e-05,
"loss": 2.4202,
"step": 210
},
{
"epoch": 0.02463605823068309,
"grad_norm": 3.1885159015655518,
"learning_rate": 4.95912653975364e-05,
"loss": 2.6223,
"step": 220
},
{
"epoch": 0.025755879059350503,
"grad_norm": 3.509582042694092,
"learning_rate": 4.957260171705861e-05,
"loss": 2.2328,
"step": 230
},
{
"epoch": 0.026875699888017916,
"grad_norm": 7.881849765777588,
"learning_rate": 4.9553938036580816e-05,
"loss": 3.1846,
"step": 240
},
{
"epoch": 0.027995520716685332,
"grad_norm": 6.826298236846924,
"learning_rate": 4.953527435610303e-05,
"loss": 2.5674,
"step": 250
},
{
"epoch": 0.029115341545352745,
"grad_norm": 2.763533353805542,
"learning_rate": 4.951661067562523e-05,
"loss": 2.9195,
"step": 260
},
{
"epoch": 0.030235162374020158,
"grad_norm": 2.4903926849365234,
"learning_rate": 4.9497946995147444e-05,
"loss": 2.8041,
"step": 270
},
{
"epoch": 0.03135498320268757,
"grad_norm": 5.462828636169434,
"learning_rate": 4.9479283314669656e-05,
"loss": 2.7518,
"step": 280
},
{
"epoch": 0.032474804031354984,
"grad_norm": 2.7415122985839844,
"learning_rate": 4.946061963419186e-05,
"loss": 2.2559,
"step": 290
},
{
"epoch": 0.0335946248600224,
"grad_norm": 3.042797803878784,
"learning_rate": 4.944195595371407e-05,
"loss": 2.3064,
"step": 300
},
{
"epoch": 0.03471444568868981,
"grad_norm": 7.439578533172607,
"learning_rate": 4.9423292273236284e-05,
"loss": 2.2869,
"step": 310
},
{
"epoch": 0.03583426651735722,
"grad_norm": 7.290367603302002,
"learning_rate": 4.9404628592758496e-05,
"loss": 2.4284,
"step": 320
},
{
"epoch": 0.036954087346024636,
"grad_norm": 3.0886971950531006,
"learning_rate": 4.93859649122807e-05,
"loss": 2.8264,
"step": 330
},
{
"epoch": 0.03807390817469205,
"grad_norm": 2.817957878112793,
"learning_rate": 4.936730123180291e-05,
"loss": 2.631,
"step": 340
},
{
"epoch": 0.03919372900335946,
"grad_norm": 2.4056355953216553,
"learning_rate": 4.9348637551325124e-05,
"loss": 2.538,
"step": 350
},
{
"epoch": 0.040313549832026875,
"grad_norm": 5.376706600189209,
"learning_rate": 4.9329973870847336e-05,
"loss": 2.8889,
"step": 360
},
{
"epoch": 0.04143337066069429,
"grad_norm": 2.869654893875122,
"learning_rate": 4.931131019036954e-05,
"loss": 2.4983,
"step": 370
},
{
"epoch": 0.0425531914893617,
"grad_norm": 4.797430515289307,
"learning_rate": 4.929264650989175e-05,
"loss": 3.0132,
"step": 380
},
{
"epoch": 0.043673012318029114,
"grad_norm": 7.970874786376953,
"learning_rate": 4.9273982829413964e-05,
"loss": 2.8745,
"step": 390
},
{
"epoch": 0.04479283314669653,
"grad_norm": 5.253184795379639,
"learning_rate": 4.9255319148936176e-05,
"loss": 2.6653,
"step": 400
},
{
"epoch": 0.04591265397536394,
"grad_norm": 3.0611302852630615,
"learning_rate": 4.923665546845838e-05,
"loss": 2.5853,
"step": 410
},
{
"epoch": 0.04703247480403135,
"grad_norm": 5.2951884269714355,
"learning_rate": 4.921799178798059e-05,
"loss": 2.5193,
"step": 420
},
{
"epoch": 0.048152295632698766,
"grad_norm": 2.9463164806365967,
"learning_rate": 4.9199328107502804e-05,
"loss": 2.5797,
"step": 430
},
{
"epoch": 0.04927211646136618,
"grad_norm": 3.1109468936920166,
"learning_rate": 4.9180664427025016e-05,
"loss": 2.7147,
"step": 440
},
{
"epoch": 0.05039193729003359,
"grad_norm": 4.468992710113525,
"learning_rate": 4.916200074654722e-05,
"loss": 2.5695,
"step": 450
},
{
"epoch": 0.051511758118701005,
"grad_norm": 8.419249534606934,
"learning_rate": 4.914333706606943e-05,
"loss": 2.7006,
"step": 460
},
{
"epoch": 0.05263157894736842,
"grad_norm": 6.9784722328186035,
"learning_rate": 4.912467338559164e-05,
"loss": 2.6604,
"step": 470
},
{
"epoch": 0.05375139977603583,
"grad_norm": 3.6676979064941406,
"learning_rate": 4.9106009705113856e-05,
"loss": 2.5849,
"step": 480
},
{
"epoch": 0.054871220604703244,
"grad_norm": 2.428481101989746,
"learning_rate": 4.908734602463606e-05,
"loss": 2.7081,
"step": 490
},
{
"epoch": 0.055991041433370664,
"grad_norm": 4.069552898406982,
"learning_rate": 4.9068682344158266e-05,
"loss": 2.7358,
"step": 500
},
{
"epoch": 0.05711086226203808,
"grad_norm": 4.768444538116455,
"learning_rate": 4.905001866368048e-05,
"loss": 2.3977,
"step": 510
},
{
"epoch": 0.05823068309070549,
"grad_norm": 7.9206342697143555,
"learning_rate": 4.903135498320269e-05,
"loss": 2.615,
"step": 520
},
{
"epoch": 0.0593505039193729,
"grad_norm": 4.9245476722717285,
"learning_rate": 4.90126913027249e-05,
"loss": 2.1996,
"step": 530
},
{
"epoch": 0.060470324748040316,
"grad_norm": 3.498934745788574,
"learning_rate": 4.8994027622247106e-05,
"loss": 2.4096,
"step": 540
},
{
"epoch": 0.06159014557670773,
"grad_norm": 2.8137447834014893,
"learning_rate": 4.897536394176932e-05,
"loss": 2.3293,
"step": 550
},
{
"epoch": 0.06270996640537514,
"grad_norm": 2.2534000873565674,
"learning_rate": 4.895670026129153e-05,
"loss": 2.5449,
"step": 560
},
{
"epoch": 0.06382978723404255,
"grad_norm": 5.295638561248779,
"learning_rate": 4.893803658081374e-05,
"loss": 2.4056,
"step": 570
},
{
"epoch": 0.06494960806270997,
"grad_norm": 2.56434965133667,
"learning_rate": 4.8919372900335946e-05,
"loss": 2.4592,
"step": 580
},
{
"epoch": 0.06606942889137737,
"grad_norm": 2.1598501205444336,
"learning_rate": 4.890070921985816e-05,
"loss": 2.3764,
"step": 590
},
{
"epoch": 0.0671892497200448,
"grad_norm": 2.71020245552063,
"learning_rate": 4.888204553938037e-05,
"loss": 2.8345,
"step": 600
},
{
"epoch": 0.0683090705487122,
"grad_norm": 6.304147243499756,
"learning_rate": 4.886338185890258e-05,
"loss": 2.6056,
"step": 610
},
{
"epoch": 0.06942889137737962,
"grad_norm": 6.749722003936768,
"learning_rate": 4.8844718178424786e-05,
"loss": 2.1934,
"step": 620
},
{
"epoch": 0.07054871220604703,
"grad_norm": 7.15731954574585,
"learning_rate": 4.8826054497947e-05,
"loss": 2.3531,
"step": 630
},
{
"epoch": 0.07166853303471445,
"grad_norm": 2.1473021507263184,
"learning_rate": 4.880739081746921e-05,
"loss": 2.6166,
"step": 640
},
{
"epoch": 0.07278835386338185,
"grad_norm": 2.7124907970428467,
"learning_rate": 4.878872713699142e-05,
"loss": 2.3902,
"step": 650
},
{
"epoch": 0.07390817469204927,
"grad_norm": 11.411026954650879,
"learning_rate": 4.8770063456513626e-05,
"loss": 2.4434,
"step": 660
},
{
"epoch": 0.07502799552071669,
"grad_norm": 2.7030553817749023,
"learning_rate": 4.875139977603584e-05,
"loss": 2.3203,
"step": 670
},
{
"epoch": 0.0761478163493841,
"grad_norm": 8.861196517944336,
"learning_rate": 4.873273609555804e-05,
"loss": 2.5645,
"step": 680
},
{
"epoch": 0.07726763717805152,
"grad_norm": 4.628374099731445,
"learning_rate": 4.871407241508026e-05,
"loss": 2.8101,
"step": 690
},
{
"epoch": 0.07838745800671892,
"grad_norm": 9.408851623535156,
"learning_rate": 4.8695408734602466e-05,
"loss": 2.2799,
"step": 700
},
{
"epoch": 0.07950727883538634,
"grad_norm": 9.125829696655273,
"learning_rate": 4.867674505412468e-05,
"loss": 2.6383,
"step": 710
},
{
"epoch": 0.08062709966405375,
"grad_norm": 3.9296653270721436,
"learning_rate": 4.865808137364688e-05,
"loss": 2.0646,
"step": 720
},
{
"epoch": 0.08174692049272117,
"grad_norm": 5.768338203430176,
"learning_rate": 4.8639417693169094e-05,
"loss": 2.3089,
"step": 730
},
{
"epoch": 0.08286674132138858,
"grad_norm": 2.457167148590088,
"learning_rate": 4.8620754012691306e-05,
"loss": 2.2855,
"step": 740
},
{
"epoch": 0.083986562150056,
"grad_norm": 8.373284339904785,
"learning_rate": 4.860209033221351e-05,
"loss": 2.2884,
"step": 750
},
{
"epoch": 0.0851063829787234,
"grad_norm": 2.7077553272247314,
"learning_rate": 4.858342665173572e-05,
"loss": 2.5055,
"step": 760
},
{
"epoch": 0.08622620380739082,
"grad_norm": 7.299142360687256,
"learning_rate": 4.8564762971257934e-05,
"loss": 2.2387,
"step": 770
},
{
"epoch": 0.08734602463605823,
"grad_norm": 2.6765339374542236,
"learning_rate": 4.8546099290780146e-05,
"loss": 2.6105,
"step": 780
},
{
"epoch": 0.08846584546472565,
"grad_norm": 3.2736477851867676,
"learning_rate": 4.852743561030235e-05,
"loss": 2.3101,
"step": 790
},
{
"epoch": 0.08958566629339305,
"grad_norm": 8.751072883605957,
"learning_rate": 4.850877192982456e-05,
"loss": 2.7699,
"step": 800
},
{
"epoch": 0.09070548712206047,
"grad_norm": 2.8005926609039307,
"learning_rate": 4.8490108249346774e-05,
"loss": 2.5564,
"step": 810
},
{
"epoch": 0.09182530795072788,
"grad_norm": 5.777060031890869,
"learning_rate": 4.8471444568868986e-05,
"loss": 2.5826,
"step": 820
},
{
"epoch": 0.0929451287793953,
"grad_norm": 5.9840803146362305,
"learning_rate": 4.845278088839119e-05,
"loss": 2.7461,
"step": 830
},
{
"epoch": 0.0940649496080627,
"grad_norm": 5.613245010375977,
"learning_rate": 4.84341172079134e-05,
"loss": 2.2355,
"step": 840
},
{
"epoch": 0.09518477043673013,
"grad_norm": 2.8910045623779297,
"learning_rate": 4.8415453527435614e-05,
"loss": 2.5613,
"step": 850
},
{
"epoch": 0.09630459126539753,
"grad_norm": 2.2605295181274414,
"learning_rate": 4.8396789846957826e-05,
"loss": 2.3108,
"step": 860
},
{
"epoch": 0.09742441209406495,
"grad_norm": 2.1678943634033203,
"learning_rate": 4.837812616648003e-05,
"loss": 2.5849,
"step": 870
},
{
"epoch": 0.09854423292273236,
"grad_norm": 3.4123549461364746,
"learning_rate": 4.835946248600224e-05,
"loss": 2.5897,
"step": 880
},
{
"epoch": 0.09966405375139978,
"grad_norm": 2.3803961277008057,
"learning_rate": 4.834079880552445e-05,
"loss": 2.3515,
"step": 890
},
{
"epoch": 0.10078387458006718,
"grad_norm": 2.3979332447052,
"learning_rate": 4.8322135125046666e-05,
"loss": 2.5587,
"step": 900
},
{
"epoch": 0.1019036954087346,
"grad_norm": 4.127903461456299,
"learning_rate": 4.830347144456887e-05,
"loss": 1.8338,
"step": 910
},
{
"epoch": 0.10302351623740201,
"grad_norm": 3.460048198699951,
"learning_rate": 4.828480776409108e-05,
"loss": 3.0448,
"step": 920
},
{
"epoch": 0.10414333706606943,
"grad_norm": 2.7010154724121094,
"learning_rate": 4.826614408361329e-05,
"loss": 2.5111,
"step": 930
},
{
"epoch": 0.10526315789473684,
"grad_norm": 2.2253668308258057,
"learning_rate": 4.8247480403135506e-05,
"loss": 2.1311,
"step": 940
},
{
"epoch": 0.10638297872340426,
"grad_norm": 2.3751561641693115,
"learning_rate": 4.822881672265771e-05,
"loss": 2.3021,
"step": 950
},
{
"epoch": 0.10750279955207166,
"grad_norm": 4.968678951263428,
"learning_rate": 4.8210153042179916e-05,
"loss": 2.7204,
"step": 960
},
{
"epoch": 0.10862262038073908,
"grad_norm": 2.429736375808716,
"learning_rate": 4.819148936170213e-05,
"loss": 2.4528,
"step": 970
},
{
"epoch": 0.10974244120940649,
"grad_norm": 8.118196487426758,
"learning_rate": 4.817282568122434e-05,
"loss": 2.8347,
"step": 980
},
{
"epoch": 0.11086226203807391,
"grad_norm": 9.025050163269043,
"learning_rate": 4.815416200074655e-05,
"loss": 2.2329,
"step": 990
},
{
"epoch": 0.11198208286674133,
"grad_norm": 2.610757827758789,
"learning_rate": 4.8135498320268756e-05,
"loss": 2.0086,
"step": 1000
},
{
"epoch": 0.11310190369540873,
"grad_norm": 3.6983273029327393,
"learning_rate": 4.811683463979097e-05,
"loss": 2.6589,
"step": 1010
},
{
"epoch": 0.11422172452407615,
"grad_norm": 6.618379592895508,
"learning_rate": 4.809817095931318e-05,
"loss": 2.2701,
"step": 1020
},
{
"epoch": 0.11534154535274356,
"grad_norm": 2.155717372894287,
"learning_rate": 4.807950727883539e-05,
"loss": 2.3469,
"step": 1030
},
{
"epoch": 0.11646136618141098,
"grad_norm": 2.49660325050354,
"learning_rate": 4.8060843598357596e-05,
"loss": 2.1965,
"step": 1040
},
{
"epoch": 0.11758118701007839,
"grad_norm": 9.351076126098633,
"learning_rate": 4.804217991787981e-05,
"loss": 3.0117,
"step": 1050
},
{
"epoch": 0.1187010078387458,
"grad_norm": 4.395270824432373,
"learning_rate": 4.802351623740202e-05,
"loss": 2.091,
"step": 1060
},
{
"epoch": 0.11982082866741321,
"grad_norm": 2.891835927963257,
"learning_rate": 4.800485255692423e-05,
"loss": 2.4642,
"step": 1070
},
{
"epoch": 0.12094064949608063,
"grad_norm": 3.1243512630462646,
"learning_rate": 4.7986188876446436e-05,
"loss": 2.3218,
"step": 1080
},
{
"epoch": 0.12206047032474804,
"grad_norm": 4.109086513519287,
"learning_rate": 4.796752519596865e-05,
"loss": 2.2233,
"step": 1090
},
{
"epoch": 0.12318029115341546,
"grad_norm": 8.871736526489258,
"learning_rate": 4.794886151549086e-05,
"loss": 2.6833,
"step": 1100
},
{
"epoch": 0.12430011198208286,
"grad_norm": 2.5556600093841553,
"learning_rate": 4.793019783501307e-05,
"loss": 2.3963,
"step": 1110
},
{
"epoch": 0.12541993281075028,
"grad_norm": 2.431551694869995,
"learning_rate": 4.7911534154535276e-05,
"loss": 2.2135,
"step": 1120
},
{
"epoch": 0.1265397536394177,
"grad_norm": 6.504064559936523,
"learning_rate": 4.789287047405749e-05,
"loss": 2.0713,
"step": 1130
},
{
"epoch": 0.1276595744680851,
"grad_norm": 8.992396354675293,
"learning_rate": 4.787420679357969e-05,
"loss": 2.3186,
"step": 1140
},
{
"epoch": 0.12877939529675253,
"grad_norm": 7.831729888916016,
"learning_rate": 4.785554311310191e-05,
"loss": 2.5546,
"step": 1150
},
{
"epoch": 0.12989921612541994,
"grad_norm": 2.7570407390594482,
"learning_rate": 4.7836879432624116e-05,
"loss": 2.5008,
"step": 1160
},
{
"epoch": 0.13101903695408734,
"grad_norm": 10.529077529907227,
"learning_rate": 4.781821575214633e-05,
"loss": 2.3034,
"step": 1170
},
{
"epoch": 0.13213885778275475,
"grad_norm": 7.510254383087158,
"learning_rate": 4.779955207166853e-05,
"loss": 2.5347,
"step": 1180
},
{
"epoch": 0.13325867861142218,
"grad_norm": 6.021450519561768,
"learning_rate": 4.7780888391190744e-05,
"loss": 2.3394,
"step": 1190
},
{
"epoch": 0.1343784994400896,
"grad_norm": 2.8167929649353027,
"learning_rate": 4.7762224710712956e-05,
"loss": 2.4706,
"step": 1200
},
{
"epoch": 0.135498320268757,
"grad_norm": 2.655770778656006,
"learning_rate": 4.774356103023516e-05,
"loss": 2.5604,
"step": 1210
},
{
"epoch": 0.1366181410974244,
"grad_norm": 5.053645610809326,
"learning_rate": 4.772489734975737e-05,
"loss": 2.468,
"step": 1220
},
{
"epoch": 0.13773796192609183,
"grad_norm": 7.558941841125488,
"learning_rate": 4.7706233669279584e-05,
"loss": 2.5013,
"step": 1230
},
{
"epoch": 0.13885778275475924,
"grad_norm": 7.665897369384766,
"learning_rate": 4.7687569988801796e-05,
"loss": 2.1974,
"step": 1240
},
{
"epoch": 0.13997760358342665,
"grad_norm": 9.41537094116211,
"learning_rate": 4.7668906308324e-05,
"loss": 2.2467,
"step": 1250
},
{
"epoch": 0.14109742441209405,
"grad_norm": 9.68034839630127,
"learning_rate": 4.765024262784621e-05,
"loss": 2.5209,
"step": 1260
},
{
"epoch": 0.1422172452407615,
"grad_norm": 6.756275177001953,
"learning_rate": 4.7631578947368424e-05,
"loss": 2.4288,
"step": 1270
},
{
"epoch": 0.1433370660694289,
"grad_norm": 7.971835613250732,
"learning_rate": 4.7612915266890636e-05,
"loss": 2.059,
"step": 1280
},
{
"epoch": 0.1444568868980963,
"grad_norm": 7.094338893890381,
"learning_rate": 4.759425158641284e-05,
"loss": 2.7495,
"step": 1290
},
{
"epoch": 0.1455767077267637,
"grad_norm": 6.793420791625977,
"learning_rate": 4.757558790593505e-05,
"loss": 1.8302,
"step": 1300
},
{
"epoch": 0.14669652855543114,
"grad_norm": 6.490263938903809,
"learning_rate": 4.7556924225457264e-05,
"loss": 2.4643,
"step": 1310
},
{
"epoch": 0.14781634938409854,
"grad_norm": 2.3416640758514404,
"learning_rate": 4.7538260544979476e-05,
"loss": 2.4583,
"step": 1320
},
{
"epoch": 0.14893617021276595,
"grad_norm": 6.409727096557617,
"learning_rate": 4.751959686450168e-05,
"loss": 1.7198,
"step": 1330
},
{
"epoch": 0.15005599104143338,
"grad_norm": 3.995352029800415,
"learning_rate": 4.750093318402389e-05,
"loss": 2.1065,
"step": 1340
},
{
"epoch": 0.1511758118701008,
"grad_norm": 4.906558036804199,
"learning_rate": 4.74822695035461e-05,
"loss": 2.3096,
"step": 1350
},
{
"epoch": 0.1522956326987682,
"grad_norm": 6.388749122619629,
"learning_rate": 4.7463605823068316e-05,
"loss": 1.9702,
"step": 1360
},
{
"epoch": 0.1534154535274356,
"grad_norm": 2.246985673904419,
"learning_rate": 4.744494214259052e-05,
"loss": 2.1969,
"step": 1370
},
{
"epoch": 0.15453527435610304,
"grad_norm": 6.625758647918701,
"learning_rate": 4.742627846211273e-05,
"loss": 2.4131,
"step": 1380
},
{
"epoch": 0.15565509518477044,
"grad_norm": 2.558464527130127,
"learning_rate": 4.740761478163494e-05,
"loss": 2.5493,
"step": 1390
},
{
"epoch": 0.15677491601343785,
"grad_norm": 4.546473979949951,
"learning_rate": 4.7388951101157156e-05,
"loss": 2.8893,
"step": 1400
},
{
"epoch": 0.15789473684210525,
"grad_norm": 2.1784298419952393,
"learning_rate": 4.737028742067936e-05,
"loss": 2.3148,
"step": 1410
},
{
"epoch": 0.1590145576707727,
"grad_norm": 2.0193071365356445,
"learning_rate": 4.735162374020157e-05,
"loss": 2.6046,
"step": 1420
},
{
"epoch": 0.1601343784994401,
"grad_norm": 2.74428653717041,
"learning_rate": 4.733296005972378e-05,
"loss": 2.6196,
"step": 1430
},
{
"epoch": 0.1612541993281075,
"grad_norm": 10.364500045776367,
"learning_rate": 4.731429637924599e-05,
"loss": 2.473,
"step": 1440
},
{
"epoch": 0.1623740201567749,
"grad_norm": 7.268424034118652,
"learning_rate": 4.72956326987682e-05,
"loss": 2.9277,
"step": 1450
},
{
"epoch": 0.16349384098544234,
"grad_norm": 7.980413913726807,
"learning_rate": 4.7276969018290406e-05,
"loss": 2.4812,
"step": 1460
},
{
"epoch": 0.16461366181410975,
"grad_norm": 4.977534770965576,
"learning_rate": 4.725830533781262e-05,
"loss": 2.3111,
"step": 1470
},
{
"epoch": 0.16573348264277715,
"grad_norm": 2.0615103244781494,
"learning_rate": 4.723964165733483e-05,
"loss": 2.4883,
"step": 1480
},
{
"epoch": 0.16685330347144456,
"grad_norm": 2.4058101177215576,
"learning_rate": 4.722097797685704e-05,
"loss": 2.2982,
"step": 1490
},
{
"epoch": 0.167973124300112,
"grad_norm": 5.251309871673584,
"learning_rate": 4.7202314296379246e-05,
"loss": 2.186,
"step": 1500
},
{
"epoch": 0.1690929451287794,
"grad_norm": 4.544527053833008,
"learning_rate": 4.718365061590146e-05,
"loss": 1.8374,
"step": 1510
},
{
"epoch": 0.1702127659574468,
"grad_norm": 8.125224113464355,
"learning_rate": 4.716498693542367e-05,
"loss": 2.3848,
"step": 1520
},
{
"epoch": 0.1713325867861142,
"grad_norm": 2.274805784225464,
"learning_rate": 4.714632325494588e-05,
"loss": 2.1904,
"step": 1530
},
{
"epoch": 0.17245240761478164,
"grad_norm": 7.999364376068115,
"learning_rate": 4.7127659574468086e-05,
"loss": 2.4646,
"step": 1540
},
{
"epoch": 0.17357222844344905,
"grad_norm": 4.198975086212158,
"learning_rate": 4.71089958939903e-05,
"loss": 2.2459,
"step": 1550
},
{
"epoch": 0.17469204927211646,
"grad_norm": 8.396190643310547,
"learning_rate": 4.70903322135125e-05,
"loss": 2.75,
"step": 1560
},
{
"epoch": 0.17581187010078386,
"grad_norm": 2.833841562271118,
"learning_rate": 4.707166853303472e-05,
"loss": 2.5594,
"step": 1570
},
{
"epoch": 0.1769316909294513,
"grad_norm": 2.6558115482330322,
"learning_rate": 4.7053004852556926e-05,
"loss": 2.2972,
"step": 1580
},
{
"epoch": 0.1780515117581187,
"grad_norm": 7.599963188171387,
"learning_rate": 4.703434117207914e-05,
"loss": 2.5468,
"step": 1590
},
{
"epoch": 0.1791713325867861,
"grad_norm": 2.6800622940063477,
"learning_rate": 4.701567749160134e-05,
"loss": 2.4275,
"step": 1600
},
{
"epoch": 0.18029115341545351,
"grad_norm": 9.46832275390625,
"learning_rate": 4.699701381112356e-05,
"loss": 2.2797,
"step": 1610
},
{
"epoch": 0.18141097424412095,
"grad_norm": 2.8210015296936035,
"learning_rate": 4.6978350130645766e-05,
"loss": 2.3262,
"step": 1620
},
{
"epoch": 0.18253079507278835,
"grad_norm": 6.384908676147461,
"learning_rate": 4.695968645016798e-05,
"loss": 1.852,
"step": 1630
},
{
"epoch": 0.18365061590145576,
"grad_norm": 11.738371849060059,
"learning_rate": 4.694102276969018e-05,
"loss": 2.4613,
"step": 1640
},
{
"epoch": 0.18477043673012317,
"grad_norm": 2.614558696746826,
"learning_rate": 4.6922359089212394e-05,
"loss": 2.3428,
"step": 1650
},
{
"epoch": 0.1858902575587906,
"grad_norm": 3.371556282043457,
"learning_rate": 4.6903695408734606e-05,
"loss": 2.8777,
"step": 1660
},
{
"epoch": 0.187010078387458,
"grad_norm": 2.5485849380493164,
"learning_rate": 4.688503172825681e-05,
"loss": 2.1301,
"step": 1670
},
{
"epoch": 0.1881298992161254,
"grad_norm": 4.2750935554504395,
"learning_rate": 4.686636804777902e-05,
"loss": 2.5627,
"step": 1680
},
{
"epoch": 0.18924972004479285,
"grad_norm": 2.555360794067383,
"learning_rate": 4.6847704367301234e-05,
"loss": 2.19,
"step": 1690
},
{
"epoch": 0.19036954087346025,
"grad_norm": 6.980922698974609,
"learning_rate": 4.6829040686823446e-05,
"loss": 2.2301,
"step": 1700
},
{
"epoch": 0.19148936170212766,
"grad_norm": 4.802427768707275,
"learning_rate": 4.681037700634565e-05,
"loss": 2.1102,
"step": 1710
},
{
"epoch": 0.19260918253079506,
"grad_norm": 6.685520172119141,
"learning_rate": 4.679171332586786e-05,
"loss": 2.3725,
"step": 1720
},
{
"epoch": 0.1937290033594625,
"grad_norm": 2.2345573902130127,
"learning_rate": 4.6773049645390074e-05,
"loss": 2.1639,
"step": 1730
},
{
"epoch": 0.1948488241881299,
"grad_norm": 2.531062364578247,
"learning_rate": 4.6754385964912286e-05,
"loss": 1.8379,
"step": 1740
},
{
"epoch": 0.1959686450167973,
"grad_norm": 7.543485164642334,
"learning_rate": 4.673572228443449e-05,
"loss": 2.5206,
"step": 1750
},
{
"epoch": 0.19708846584546472,
"grad_norm": 4.684238910675049,
"learning_rate": 4.67170586039567e-05,
"loss": 2.1743,
"step": 1760
},
{
"epoch": 0.19820828667413215,
"grad_norm": 8.91139030456543,
"learning_rate": 4.669839492347891e-05,
"loss": 2.3441,
"step": 1770
},
{
"epoch": 0.19932810750279956,
"grad_norm": 2.0204806327819824,
"learning_rate": 4.6679731243001126e-05,
"loss": 2.3745,
"step": 1780
},
{
"epoch": 0.20044792833146696,
"grad_norm": 10.537651062011719,
"learning_rate": 4.666106756252333e-05,
"loss": 2.587,
"step": 1790
},
{
"epoch": 0.20156774916013437,
"grad_norm": 3.3336009979248047,
"learning_rate": 4.664240388204554e-05,
"loss": 2.7144,
"step": 1800
},
{
"epoch": 0.2026875699888018,
"grad_norm": 3.2457361221313477,
"learning_rate": 4.662374020156775e-05,
"loss": 2.1114,
"step": 1810
},
{
"epoch": 0.2038073908174692,
"grad_norm": 6.266234874725342,
"learning_rate": 4.6605076521089966e-05,
"loss": 2.3444,
"step": 1820
},
{
"epoch": 0.20492721164613661,
"grad_norm": 5.921943187713623,
"learning_rate": 4.658641284061217e-05,
"loss": 2.2345,
"step": 1830
},
{
"epoch": 0.20604703247480402,
"grad_norm": 2.481746196746826,
"learning_rate": 4.656774916013438e-05,
"loss": 2.3925,
"step": 1840
},
{
"epoch": 0.20716685330347145,
"grad_norm": 6.096205711364746,
"learning_rate": 4.654908547965659e-05,
"loss": 2.4366,
"step": 1850
},
{
"epoch": 0.20828667413213886,
"grad_norm": 7.671387672424316,
"learning_rate": 4.65304217991788e-05,
"loss": 2.5033,
"step": 1860
},
{
"epoch": 0.20940649496080627,
"grad_norm": 4.001086711883545,
"learning_rate": 4.651175811870101e-05,
"loss": 2.0047,
"step": 1870
},
{
"epoch": 0.21052631578947367,
"grad_norm": 7.602363586425781,
"learning_rate": 4.649309443822322e-05,
"loss": 2.3823,
"step": 1880
},
{
"epoch": 0.2116461366181411,
"grad_norm": 5.483312129974365,
"learning_rate": 4.647443075774543e-05,
"loss": 2.4785,
"step": 1890
},
{
"epoch": 0.2127659574468085,
"grad_norm": 2.5652925968170166,
"learning_rate": 4.645576707726764e-05,
"loss": 2.1959,
"step": 1900
},
{
"epoch": 0.21388577827547592,
"grad_norm": 8.491823196411133,
"learning_rate": 4.643710339678985e-05,
"loss": 2.9472,
"step": 1910
},
{
"epoch": 0.21500559910414332,
"grad_norm": 5.945290565490723,
"learning_rate": 4.6418439716312056e-05,
"loss": 2.2415,
"step": 1920
},
{
"epoch": 0.21612541993281076,
"grad_norm": 4.045243263244629,
"learning_rate": 4.639977603583427e-05,
"loss": 2.4932,
"step": 1930
},
{
"epoch": 0.21724524076147816,
"grad_norm": 2.715601921081543,
"learning_rate": 4.638111235535648e-05,
"loss": 2.4262,
"step": 1940
},
{
"epoch": 0.21836506159014557,
"grad_norm": 3.0143299102783203,
"learning_rate": 4.636244867487869e-05,
"loss": 2.6019,
"step": 1950
},
{
"epoch": 0.21948488241881298,
"grad_norm": 9.742323875427246,
"learning_rate": 4.6343784994400896e-05,
"loss": 2.9155,
"step": 1960
},
{
"epoch": 0.2206047032474804,
"grad_norm": 5.9390788078308105,
"learning_rate": 4.632512131392311e-05,
"loss": 2.1963,
"step": 1970
},
{
"epoch": 0.22172452407614782,
"grad_norm": 5.941153049468994,
"learning_rate": 4.630645763344531e-05,
"loss": 2.2618,
"step": 1980
},
{
"epoch": 0.22284434490481522,
"grad_norm": 4.004471778869629,
"learning_rate": 4.628779395296753e-05,
"loss": 2.6587,
"step": 1990
},
{
"epoch": 0.22396416573348266,
"grad_norm": 8.82131576538086,
"learning_rate": 4.6269130272489736e-05,
"loss": 2.1719,
"step": 2000
},
{
"epoch": 0.22508398656215006,
"grad_norm": 2.8698363304138184,
"learning_rate": 4.625046659201195e-05,
"loss": 2.4046,
"step": 2010
},
{
"epoch": 0.22620380739081747,
"grad_norm": 6.006710529327393,
"learning_rate": 4.623180291153415e-05,
"loss": 2.3737,
"step": 2020
},
{
"epoch": 0.22732362821948487,
"grad_norm": 2.5947604179382324,
"learning_rate": 4.621313923105637e-05,
"loss": 2.4821,
"step": 2030
},
{
"epoch": 0.2284434490481523,
"grad_norm": 2.4432547092437744,
"learning_rate": 4.6194475550578576e-05,
"loss": 2.5101,
"step": 2040
},
{
"epoch": 0.22956326987681971,
"grad_norm": 12.777518272399902,
"learning_rate": 4.617581187010079e-05,
"loss": 2.89,
"step": 2050
},
{
"epoch": 0.23068309070548712,
"grad_norm": 8.881490707397461,
"learning_rate": 4.615714818962299e-05,
"loss": 3.0737,
"step": 2060
},
{
"epoch": 0.23180291153415453,
"grad_norm": 11.968159675598145,
"learning_rate": 4.613848450914521e-05,
"loss": 2.4398,
"step": 2070
},
{
"epoch": 0.23292273236282196,
"grad_norm": 2.413706064224243,
"learning_rate": 4.6119820828667416e-05,
"loss": 2.1109,
"step": 2080
},
{
"epoch": 0.23404255319148937,
"grad_norm": 8.401453971862793,
"learning_rate": 4.610115714818963e-05,
"loss": 2.5007,
"step": 2090
},
{
"epoch": 0.23516237402015677,
"grad_norm": 2.3912086486816406,
"learning_rate": 4.608249346771183e-05,
"loss": 2.4239,
"step": 2100
},
{
"epoch": 0.23628219484882418,
"grad_norm": 8.813179016113281,
"learning_rate": 4.6063829787234044e-05,
"loss": 2.4587,
"step": 2110
},
{
"epoch": 0.2374020156774916,
"grad_norm": 10.839656829833984,
"learning_rate": 4.6045166106756256e-05,
"loss": 2.4691,
"step": 2120
},
{
"epoch": 0.23852183650615902,
"grad_norm": 4.4540252685546875,
"learning_rate": 4.602650242627846e-05,
"loss": 2.4054,
"step": 2130
},
{
"epoch": 0.23964165733482642,
"grad_norm": 2.7125473022460938,
"learning_rate": 4.600783874580067e-05,
"loss": 2.7286,
"step": 2140
},
{
"epoch": 0.24076147816349383,
"grad_norm": 2.332322359085083,
"learning_rate": 4.5989175065322884e-05,
"loss": 2.5039,
"step": 2150
},
{
"epoch": 0.24188129899216126,
"grad_norm": 2.539842367172241,
"learning_rate": 4.5970511384845096e-05,
"loss": 2.2403,
"step": 2160
},
{
"epoch": 0.24300111982082867,
"grad_norm": 6.839804649353027,
"learning_rate": 4.59518477043673e-05,
"loss": 2.0901,
"step": 2170
},
{
"epoch": 0.24412094064949608,
"grad_norm": 2.5890653133392334,
"learning_rate": 4.593318402388951e-05,
"loss": 2.2586,
"step": 2180
},
{
"epoch": 0.24524076147816348,
"grad_norm": 2.5026495456695557,
"learning_rate": 4.591452034341172e-05,
"loss": 2.1888,
"step": 2190
},
{
"epoch": 0.24636058230683092,
"grad_norm": 3.8693251609802246,
"learning_rate": 4.5895856662933936e-05,
"loss": 2.6531,
"step": 2200
},
{
"epoch": 0.24748040313549832,
"grad_norm": 8.573837280273438,
"learning_rate": 4.587719298245614e-05,
"loss": 2.5078,
"step": 2210
},
{
"epoch": 0.24860022396416573,
"grad_norm": 3.1866371631622314,
"learning_rate": 4.585852930197835e-05,
"loss": 2.2594,
"step": 2220
},
{
"epoch": 0.24972004479283313,
"grad_norm": 7.868608474731445,
"learning_rate": 4.583986562150056e-05,
"loss": 2.2759,
"step": 2230
},
{
"epoch": 0.25083986562150057,
"grad_norm": 3.183617353439331,
"learning_rate": 4.5821201941022776e-05,
"loss": 2.5888,
"step": 2240
},
{
"epoch": 0.251959686450168,
"grad_norm": 2.5060982704162598,
"learning_rate": 4.580253826054498e-05,
"loss": 2.1064,
"step": 2250
},
{
"epoch": 0.2530795072788354,
"grad_norm": 2.9019861221313477,
"learning_rate": 4.578387458006719e-05,
"loss": 2.1935,
"step": 2260
},
{
"epoch": 0.2541993281075028,
"grad_norm": 13.326761245727539,
"learning_rate": 4.57652108995894e-05,
"loss": 2.5056,
"step": 2270
},
{
"epoch": 0.2553191489361702,
"grad_norm": 7.620180130004883,
"learning_rate": 4.5746547219111616e-05,
"loss": 2.6445,
"step": 2280
},
{
"epoch": 0.2564389697648376,
"grad_norm": 6.347967147827148,
"learning_rate": 4.572788353863382e-05,
"loss": 2.1387,
"step": 2290
},
{
"epoch": 0.25755879059350506,
"grad_norm": 7.242101192474365,
"learning_rate": 4.570921985815603e-05,
"loss": 2.4346,
"step": 2300
},
{
"epoch": 0.25867861142217247,
"grad_norm": 7.027688503265381,
"learning_rate": 4.569055617767824e-05,
"loss": 2.2526,
"step": 2310
},
{
"epoch": 0.2597984322508399,
"grad_norm": 6.494021892547607,
"learning_rate": 4.567189249720045e-05,
"loss": 1.9165,
"step": 2320
},
{
"epoch": 0.2609182530795073,
"grad_norm": 8.93453311920166,
"learning_rate": 4.565322881672266e-05,
"loss": 2.1397,
"step": 2330
},
{
"epoch": 0.2620380739081747,
"grad_norm": 2.471494197845459,
"learning_rate": 4.563456513624487e-05,
"loss": 2.2892,
"step": 2340
},
{
"epoch": 0.2631578947368421,
"grad_norm": 10.424552917480469,
"learning_rate": 4.561590145576708e-05,
"loss": 2.514,
"step": 2350
},
{
"epoch": 0.2642777155655095,
"grad_norm": 7.312840938568115,
"learning_rate": 4.559723777528929e-05,
"loss": 2.2164,
"step": 2360
},
{
"epoch": 0.26539753639417696,
"grad_norm": 11.861546516418457,
"learning_rate": 4.55785740948115e-05,
"loss": 2.2371,
"step": 2370
},
{
"epoch": 0.26651735722284436,
"grad_norm": 9.549253463745117,
"learning_rate": 4.5559910414333706e-05,
"loss": 2.6811,
"step": 2380
},
{
"epoch": 0.26763717805151177,
"grad_norm": 2.9422247409820557,
"learning_rate": 4.554124673385592e-05,
"loss": 2.2888,
"step": 2390
},
{
"epoch": 0.2687569988801792,
"grad_norm": 7.779324054718018,
"learning_rate": 4.552258305337813e-05,
"loss": 2.3416,
"step": 2400
},
{
"epoch": 0.2698768197088466,
"grad_norm": 2.1986162662506104,
"learning_rate": 4.550391937290034e-05,
"loss": 2.4494,
"step": 2410
},
{
"epoch": 0.270996640537514,
"grad_norm": 2.420370578765869,
"learning_rate": 4.5485255692422546e-05,
"loss": 2.6193,
"step": 2420
},
{
"epoch": 0.2721164613661814,
"grad_norm": 2.281414747238159,
"learning_rate": 4.546659201194476e-05,
"loss": 2.7233,
"step": 2430
},
{
"epoch": 0.2732362821948488,
"grad_norm": 2.4500784873962402,
"learning_rate": 4.544792833146696e-05,
"loss": 2.658,
"step": 2440
},
{
"epoch": 0.27435610302351626,
"grad_norm": 8.000895500183105,
"learning_rate": 4.542926465098918e-05,
"loss": 2.7245,
"step": 2450
},
{
"epoch": 0.27547592385218367,
"grad_norm": 2.6147563457489014,
"learning_rate": 4.5410600970511386e-05,
"loss": 2.4904,
"step": 2460
},
{
"epoch": 0.2765957446808511,
"grad_norm": 12.0834321975708,
"learning_rate": 4.53919372900336e-05,
"loss": 2.6572,
"step": 2470
},
{
"epoch": 0.2777155655095185,
"grad_norm": 5.857783317565918,
"learning_rate": 4.53732736095558e-05,
"loss": 1.98,
"step": 2480
},
{
"epoch": 0.2788353863381859,
"grad_norm": 5.242463111877441,
"learning_rate": 4.535460992907802e-05,
"loss": 2.4122,
"step": 2490
},
{
"epoch": 0.2799552071668533,
"grad_norm": 9.532788276672363,
"learning_rate": 4.5335946248600226e-05,
"loss": 2.6636,
"step": 2500
},
{
"epoch": 0.2810750279955207,
"grad_norm": 8.554610252380371,
"learning_rate": 4.531728256812244e-05,
"loss": 2.4047,
"step": 2510
},
{
"epoch": 0.2821948488241881,
"grad_norm": 7.8059234619140625,
"learning_rate": 4.529861888764464e-05,
"loss": 2.4136,
"step": 2520
},
{
"epoch": 0.28331466965285557,
"grad_norm": 4.768645286560059,
"learning_rate": 4.5279955207166854e-05,
"loss": 2.6977,
"step": 2530
},
{
"epoch": 0.284434490481523,
"grad_norm": 6.272426128387451,
"learning_rate": 4.5261291526689066e-05,
"loss": 2.7218,
"step": 2540
},
{
"epoch": 0.2855543113101904,
"grad_norm": 2.5695507526397705,
"learning_rate": 4.524262784621128e-05,
"loss": 2.8431,
"step": 2550
},
{
"epoch": 0.2866741321388578,
"grad_norm": 2.400848865509033,
"learning_rate": 4.522396416573348e-05,
"loss": 2.3621,
"step": 2560
},
{
"epoch": 0.2877939529675252,
"grad_norm": 10.295741081237793,
"learning_rate": 4.5205300485255694e-05,
"loss": 1.9837,
"step": 2570
},
{
"epoch": 0.2889137737961926,
"grad_norm": 2.625807762145996,
"learning_rate": 4.5186636804777906e-05,
"loss": 2.3817,
"step": 2580
},
{
"epoch": 0.29003359462486,
"grad_norm": 2.6179468631744385,
"learning_rate": 4.516797312430011e-05,
"loss": 2.6555,
"step": 2590
},
{
"epoch": 0.2911534154535274,
"grad_norm": 2.512031316757202,
"learning_rate": 4.514930944382232e-05,
"loss": 2.5653,
"step": 2600
},
{
"epoch": 0.29227323628219487,
"grad_norm": 2.6077969074249268,
"learning_rate": 4.5130645763344534e-05,
"loss": 2.222,
"step": 2610
},
{
"epoch": 0.2933930571108623,
"grad_norm": 2.072172164916992,
"learning_rate": 4.5111982082866746e-05,
"loss": 2.1437,
"step": 2620
},
{
"epoch": 0.2945128779395297,
"grad_norm": 4.034156799316406,
"learning_rate": 4.509331840238895e-05,
"loss": 2.4836,
"step": 2630
},
{
"epoch": 0.2956326987681971,
"grad_norm": 6.953413963317871,
"learning_rate": 4.507465472191116e-05,
"loss": 2.1186,
"step": 2640
},
{
"epoch": 0.2967525195968645,
"grad_norm": 7.338948726654053,
"learning_rate": 4.505599104143337e-05,
"loss": 2.1858,
"step": 2650
},
{
"epoch": 0.2978723404255319,
"grad_norm": 6.163172245025635,
"learning_rate": 4.5037327360955586e-05,
"loss": 2.2643,
"step": 2660
},
{
"epoch": 0.2989921612541993,
"grad_norm": 7.6946563720703125,
"learning_rate": 4.501866368047779e-05,
"loss": 2.294,
"step": 2670
},
{
"epoch": 0.30011198208286677,
"grad_norm": 12.317503929138184,
"learning_rate": 4.5e-05,
"loss": 2.5286,
"step": 2680
},
{
"epoch": 0.3012318029115342,
"grad_norm": 7.581274509429932,
"learning_rate": 4.498133631952221e-05,
"loss": 2.146,
"step": 2690
},
{
"epoch": 0.3023516237402016,
"grad_norm": 6.5440778732299805,
"learning_rate": 4.4962672639044426e-05,
"loss": 2.4251,
"step": 2700
},
{
"epoch": 0.303471444568869,
"grad_norm": 4.657285213470459,
"learning_rate": 4.494400895856663e-05,
"loss": 1.8409,
"step": 2710
},
{
"epoch": 0.3045912653975364,
"grad_norm": 1.9951245784759521,
"learning_rate": 4.492534527808884e-05,
"loss": 2.2119,
"step": 2720
},
{
"epoch": 0.3057110862262038,
"grad_norm": 6.937575340270996,
"learning_rate": 4.490668159761105e-05,
"loss": 1.8932,
"step": 2730
},
{
"epoch": 0.3068309070548712,
"grad_norm": 12.604211807250977,
"learning_rate": 4.488801791713326e-05,
"loss": 2.1911,
"step": 2740
},
{
"epoch": 0.3079507278835386,
"grad_norm": 2.306835412979126,
"learning_rate": 4.486935423665547e-05,
"loss": 2.225,
"step": 2750
},
{
"epoch": 0.3090705487122061,
"grad_norm": 7.850268840789795,
"learning_rate": 4.485069055617768e-05,
"loss": 2.2062,
"step": 2760
},
{
"epoch": 0.3101903695408735,
"grad_norm": 8.962443351745605,
"learning_rate": 4.483202687569989e-05,
"loss": 2.0101,
"step": 2770
},
{
"epoch": 0.3113101903695409,
"grad_norm": 2.3884646892547607,
"learning_rate": 4.48133631952221e-05,
"loss": 2.4539,
"step": 2780
},
{
"epoch": 0.3124300111982083,
"grad_norm": 4.534022808074951,
"learning_rate": 4.479469951474431e-05,
"loss": 2.0423,
"step": 2790
},
{
"epoch": 0.3135498320268757,
"grad_norm": 2.491356372833252,
"learning_rate": 4.477603583426652e-05,
"loss": 2.3449,
"step": 2800
},
{
"epoch": 0.3146696528555431,
"grad_norm": 5.900778293609619,
"learning_rate": 4.475737215378873e-05,
"loss": 2.3496,
"step": 2810
},
{
"epoch": 0.3157894736842105,
"grad_norm": 3.9138317108154297,
"learning_rate": 4.473870847331094e-05,
"loss": 2.2142,
"step": 2820
},
{
"epoch": 0.3169092945128779,
"grad_norm": 9.516107559204102,
"learning_rate": 4.472004479283315e-05,
"loss": 2.0819,
"step": 2830
},
{
"epoch": 0.3180291153415454,
"grad_norm": 2.504873275756836,
"learning_rate": 4.4701381112355356e-05,
"loss": 2.0613,
"step": 2840
},
{
"epoch": 0.3191489361702128,
"grad_norm": 8.265789031982422,
"learning_rate": 4.468271743187757e-05,
"loss": 2.2209,
"step": 2850
},
{
"epoch": 0.3202687569988802,
"grad_norm": 3.2764816284179688,
"learning_rate": 4.466405375139977e-05,
"loss": 2.7096,
"step": 2860
},
{
"epoch": 0.3213885778275476,
"grad_norm": 10.983661651611328,
"learning_rate": 4.464539007092199e-05,
"loss": 2.638,
"step": 2870
},
{
"epoch": 0.322508398656215,
"grad_norm": 2.8227787017822266,
"learning_rate": 4.4626726390444196e-05,
"loss": 2.6929,
"step": 2880
},
{
"epoch": 0.3236282194848824,
"grad_norm": 2.553760528564453,
"learning_rate": 4.460806270996641e-05,
"loss": 1.8541,
"step": 2890
},
{
"epoch": 0.3247480403135498,
"grad_norm": 9.215750694274902,
"learning_rate": 4.458939902948861e-05,
"loss": 2.3142,
"step": 2900
},
{
"epoch": 0.3258678611422172,
"grad_norm": 3.168344020843506,
"learning_rate": 4.457073534901083e-05,
"loss": 2.4242,
"step": 2910
},
{
"epoch": 0.3269876819708847,
"grad_norm": 3.0249898433685303,
"learning_rate": 4.4552071668533036e-05,
"loss": 2.0356,
"step": 2920
},
{
"epoch": 0.3281075027995521,
"grad_norm": 7.524886608123779,
"learning_rate": 4.453340798805525e-05,
"loss": 2.7437,
"step": 2930
},
{
"epoch": 0.3292273236282195,
"grad_norm": 8.902599334716797,
"learning_rate": 4.451474430757745e-05,
"loss": 2.0116,
"step": 2940
},
{
"epoch": 0.3303471444568869,
"grad_norm": 3.2997946739196777,
"learning_rate": 4.4496080627099664e-05,
"loss": 2.359,
"step": 2950
},
{
"epoch": 0.3314669652855543,
"grad_norm": 3.123281717300415,
"learning_rate": 4.4477416946621876e-05,
"loss": 2.2666,
"step": 2960
},
{
"epoch": 0.3325867861142217,
"grad_norm": 10.098536491394043,
"learning_rate": 4.445875326614409e-05,
"loss": 2.501,
"step": 2970
},
{
"epoch": 0.3337066069428891,
"grad_norm": 11.130685806274414,
"learning_rate": 4.444008958566629e-05,
"loss": 2.4014,
"step": 2980
},
{
"epoch": 0.3348264277715566,
"grad_norm": 8.4888334274292,
"learning_rate": 4.4421425905188505e-05,
"loss": 2.3178,
"step": 2990
},
{
"epoch": 0.335946248600224,
"grad_norm": 8.757832527160645,
"learning_rate": 4.4402762224710716e-05,
"loss": 2.1205,
"step": 3000
},
{
"epoch": 0.3370660694288914,
"grad_norm": 8.70385456085205,
"learning_rate": 4.438409854423293e-05,
"loss": 2.4269,
"step": 3010
},
{
"epoch": 0.3381858902575588,
"grad_norm": 8.281830787658691,
"learning_rate": 4.436543486375513e-05,
"loss": 2.5491,
"step": 3020
},
{
"epoch": 0.3393057110862262,
"grad_norm": 9.058775901794434,
"learning_rate": 4.4346771183277345e-05,
"loss": 2.3167,
"step": 3030
},
{
"epoch": 0.3404255319148936,
"grad_norm": 5.364592552185059,
"learning_rate": 4.4328107502799556e-05,
"loss": 1.9588,
"step": 3040
},
{
"epoch": 0.341545352743561,
"grad_norm": 2.446974277496338,
"learning_rate": 4.430944382232177e-05,
"loss": 2.3064,
"step": 3050
},
{
"epoch": 0.3426651735722284,
"grad_norm": 2.6895692348480225,
"learning_rate": 4.429078014184397e-05,
"loss": 2.3751,
"step": 3060
},
{
"epoch": 0.3437849944008959,
"grad_norm": 7.783231735229492,
"learning_rate": 4.427211646136618e-05,
"loss": 1.6835,
"step": 3070
},
{
"epoch": 0.3449048152295633,
"grad_norm": 3.170950412750244,
"learning_rate": 4.4253452780888396e-05,
"loss": 2.5539,
"step": 3080
},
{
"epoch": 0.3460246360582307,
"grad_norm": 7.711115837097168,
"learning_rate": 4.42347891004106e-05,
"loss": 2.78,
"step": 3090
},
{
"epoch": 0.3471444568868981,
"grad_norm": 8.71380615234375,
"learning_rate": 4.421612541993281e-05,
"loss": 1.7815,
"step": 3100
},
{
"epoch": 0.3482642777155655,
"grad_norm": 2.3626303672790527,
"learning_rate": 4.419746173945502e-05,
"loss": 2.3035,
"step": 3110
},
{
"epoch": 0.3493840985442329,
"grad_norm": 2.5161445140838623,
"learning_rate": 4.4178798058977236e-05,
"loss": 2.3065,
"step": 3120
},
{
"epoch": 0.3505039193729003,
"grad_norm": 2.395263433456421,
"learning_rate": 4.416013437849944e-05,
"loss": 2.3419,
"step": 3130
},
{
"epoch": 0.3516237402015677,
"grad_norm": 6.902035713195801,
"learning_rate": 4.414147069802165e-05,
"loss": 2.3343,
"step": 3140
},
{
"epoch": 0.3527435610302352,
"grad_norm": 5.079914093017578,
"learning_rate": 4.412280701754386e-05,
"loss": 2.4017,
"step": 3150
},
{
"epoch": 0.3538633818589026,
"grad_norm": 3.483292579650879,
"learning_rate": 4.410414333706607e-05,
"loss": 2.3309,
"step": 3160
},
{
"epoch": 0.35498320268757,
"grad_norm": 7.4583940505981445,
"learning_rate": 4.408547965658828e-05,
"loss": 2.1556,
"step": 3170
},
{
"epoch": 0.3561030235162374,
"grad_norm": 16.233184814453125,
"learning_rate": 4.406681597611049e-05,
"loss": 2.2442,
"step": 3180
},
{
"epoch": 0.3572228443449048,
"grad_norm": 9.553163528442383,
"learning_rate": 4.40481522956327e-05,
"loss": 2.1811,
"step": 3190
},
{
"epoch": 0.3583426651735722,
"grad_norm": 5.221775531768799,
"learning_rate": 4.402948861515491e-05,
"loss": 2.8077,
"step": 3200
},
{
"epoch": 0.3594624860022396,
"grad_norm": 2.419001579284668,
"learning_rate": 4.401082493467712e-05,
"loss": 1.9981,
"step": 3210
},
{
"epoch": 0.36058230683090703,
"grad_norm": 2.5910959243774414,
"learning_rate": 4.399216125419933e-05,
"loss": 1.9731,
"step": 3220
},
{
"epoch": 0.3617021276595745,
"grad_norm": 3.1020877361297607,
"learning_rate": 4.397349757372154e-05,
"loss": 2.0817,
"step": 3230
},
{
"epoch": 0.3628219484882419,
"grad_norm": 3.0343470573425293,
"learning_rate": 4.395483389324375e-05,
"loss": 2.3142,
"step": 3240
},
{
"epoch": 0.3639417693169093,
"grad_norm": 12.035741806030273,
"learning_rate": 4.393617021276596e-05,
"loss": 2.4769,
"step": 3250
},
{
"epoch": 0.3650615901455767,
"grad_norm": 3.116953134536743,
"learning_rate": 4.391750653228817e-05,
"loss": 2.347,
"step": 3260
},
{
"epoch": 0.3661814109742441,
"grad_norm": 2.565833330154419,
"learning_rate": 4.389884285181038e-05,
"loss": 1.9123,
"step": 3270
},
{
"epoch": 0.3673012318029115,
"grad_norm": 2.983285427093506,
"learning_rate": 4.388017917133259e-05,
"loss": 2.4622,
"step": 3280
},
{
"epoch": 0.3684210526315789,
"grad_norm": 11.630106925964355,
"learning_rate": 4.38615154908548e-05,
"loss": 2.4459,
"step": 3290
},
{
"epoch": 0.36954087346024633,
"grad_norm": 6.8500285148620605,
"learning_rate": 4.3842851810377006e-05,
"loss": 2.143,
"step": 3300
},
{
"epoch": 0.3706606942889138,
"grad_norm": 2.3746914863586426,
"learning_rate": 4.382418812989922e-05,
"loss": 2.143,
"step": 3310
},
{
"epoch": 0.3717805151175812,
"grad_norm": 2.91323184967041,
"learning_rate": 4.380552444942142e-05,
"loss": 2.2025,
"step": 3320
},
{
"epoch": 0.3729003359462486,
"grad_norm": 2.4903807640075684,
"learning_rate": 4.378686076894364e-05,
"loss": 2.3756,
"step": 3330
},
{
"epoch": 0.374020156774916,
"grad_norm": 4.964207172393799,
"learning_rate": 4.3768197088465846e-05,
"loss": 1.7364,
"step": 3340
},
{
"epoch": 0.3751399776035834,
"grad_norm": 7.413595199584961,
"learning_rate": 4.374953340798806e-05,
"loss": 2.3942,
"step": 3350
},
{
"epoch": 0.3762597984322508,
"grad_norm": 2.7675399780273438,
"learning_rate": 4.373086972751026e-05,
"loss": 2.5504,
"step": 3360
},
{
"epoch": 0.37737961926091823,
"grad_norm": 1.9106221199035645,
"learning_rate": 4.3712206047032475e-05,
"loss": 2.2394,
"step": 3370
},
{
"epoch": 0.3784994400895857,
"grad_norm": 5.255868911743164,
"learning_rate": 4.3693542366554686e-05,
"loss": 1.7869,
"step": 3380
},
{
"epoch": 0.3796192609182531,
"grad_norm": 4.734898567199707,
"learning_rate": 4.36748786860769e-05,
"loss": 2.1864,
"step": 3390
},
{
"epoch": 0.3807390817469205,
"grad_norm": 11.226783752441406,
"learning_rate": 4.36562150055991e-05,
"loss": 2.4792,
"step": 3400
},
{
"epoch": 0.3818589025755879,
"grad_norm": 8.230179786682129,
"learning_rate": 4.3637551325121315e-05,
"loss": 1.9946,
"step": 3410
},
{
"epoch": 0.3829787234042553,
"grad_norm": 2.981816291809082,
"learning_rate": 4.3618887644643526e-05,
"loss": 1.9636,
"step": 3420
},
{
"epoch": 0.3840985442329227,
"grad_norm": 7.890393257141113,
"learning_rate": 4.360022396416574e-05,
"loss": 2.0774,
"step": 3430
},
{
"epoch": 0.38521836506159013,
"grad_norm": 2.7089128494262695,
"learning_rate": 4.358156028368794e-05,
"loss": 2.1393,
"step": 3440
},
{
"epoch": 0.38633818589025753,
"grad_norm": 7.063770771026611,
"learning_rate": 4.3562896603210155e-05,
"loss": 1.9429,
"step": 3450
},
{
"epoch": 0.387458006718925,
"grad_norm": 2.608469247817993,
"learning_rate": 4.3544232922732366e-05,
"loss": 2.8053,
"step": 3460
},
{
"epoch": 0.3885778275475924,
"grad_norm": 2.1650965213775635,
"learning_rate": 4.352556924225458e-05,
"loss": 2.4611,
"step": 3470
},
{
"epoch": 0.3896976483762598,
"grad_norm": 7.017950057983398,
"learning_rate": 4.350690556177678e-05,
"loss": 2.0722,
"step": 3480
},
{
"epoch": 0.3908174692049272,
"grad_norm": 2.769286870956421,
"learning_rate": 4.3488241881298995e-05,
"loss": 2.4542,
"step": 3490
},
{
"epoch": 0.3919372900335946,
"grad_norm": 9.565979957580566,
"learning_rate": 4.3469578200821206e-05,
"loss": 2.6218,
"step": 3500
},
{
"epoch": 0.393057110862262,
"grad_norm": 12.220897674560547,
"learning_rate": 4.345091452034342e-05,
"loss": 2.248,
"step": 3510
},
{
"epoch": 0.39417693169092943,
"grad_norm": 12.827961921691895,
"learning_rate": 4.343225083986562e-05,
"loss": 1.9856,
"step": 3520
},
{
"epoch": 0.39529675251959684,
"grad_norm": 2.4457015991210938,
"learning_rate": 4.341358715938783e-05,
"loss": 2.6235,
"step": 3530
},
{
"epoch": 0.3964165733482643,
"grad_norm": 5.266937255859375,
"learning_rate": 4.3394923478910046e-05,
"loss": 2.4887,
"step": 3540
},
{
"epoch": 0.3975363941769317,
"grad_norm": 8.347966194152832,
"learning_rate": 4.337625979843225e-05,
"loss": 2.212,
"step": 3550
},
{
"epoch": 0.3986562150055991,
"grad_norm": 7.743762969970703,
"learning_rate": 4.335759611795446e-05,
"loss": 2.3995,
"step": 3560
},
{
"epoch": 0.3997760358342665,
"grad_norm": 3.587676763534546,
"learning_rate": 4.333893243747667e-05,
"loss": 2.355,
"step": 3570
},
{
"epoch": 0.4008958566629339,
"grad_norm": 3.1175928115844727,
"learning_rate": 4.3320268756998886e-05,
"loss": 2.5916,
"step": 3580
},
{
"epoch": 0.40201567749160133,
"grad_norm": 8.9489107131958,
"learning_rate": 4.330160507652109e-05,
"loss": 2.6132,
"step": 3590
},
{
"epoch": 0.40313549832026874,
"grad_norm": 12.342984199523926,
"learning_rate": 4.32829413960433e-05,
"loss": 2.3637,
"step": 3600
},
{
"epoch": 0.40425531914893614,
"grad_norm": 2.721482276916504,
"learning_rate": 4.326427771556551e-05,
"loss": 2.3011,
"step": 3610
},
{
"epoch": 0.4053751399776036,
"grad_norm": 2.5782060623168945,
"learning_rate": 4.324561403508772e-05,
"loss": 2.1841,
"step": 3620
},
{
"epoch": 0.406494960806271,
"grad_norm": 2.5713908672332764,
"learning_rate": 4.322695035460993e-05,
"loss": 2.3976,
"step": 3630
},
{
"epoch": 0.4076147816349384,
"grad_norm": 7.063972473144531,
"learning_rate": 4.320828667413214e-05,
"loss": 2.4057,
"step": 3640
},
{
"epoch": 0.4087346024636058,
"grad_norm": 8.767318725585938,
"learning_rate": 4.318962299365435e-05,
"loss": 2.3432,
"step": 3650
},
{
"epoch": 0.40985442329227323,
"grad_norm": 9.010395050048828,
"learning_rate": 4.317095931317656e-05,
"loss": 2.3386,
"step": 3660
},
{
"epoch": 0.41097424412094063,
"grad_norm": 5.226011276245117,
"learning_rate": 4.315229563269877e-05,
"loss": 1.9751,
"step": 3670
},
{
"epoch": 0.41209406494960804,
"grad_norm": 2.9475603103637695,
"learning_rate": 4.313363195222098e-05,
"loss": 2.0421,
"step": 3680
},
{
"epoch": 0.4132138857782755,
"grad_norm": 2.4759316444396973,
"learning_rate": 4.311496827174319e-05,
"loss": 2.0569,
"step": 3690
},
{
"epoch": 0.4143337066069429,
"grad_norm": 2.6276895999908447,
"learning_rate": 4.30963045912654e-05,
"loss": 1.8686,
"step": 3700
},
{
"epoch": 0.4154535274356103,
"grad_norm": 5.415910243988037,
"learning_rate": 4.307764091078761e-05,
"loss": 2.2398,
"step": 3710
},
{
"epoch": 0.4165733482642777,
"grad_norm": 9.693281173706055,
"learning_rate": 4.305897723030982e-05,
"loss": 2.4323,
"step": 3720
},
{
"epoch": 0.4176931690929451,
"grad_norm": 6.599532127380371,
"learning_rate": 4.304031354983203e-05,
"loss": 2.0143,
"step": 3730
},
{
"epoch": 0.41881298992161253,
"grad_norm": 4.097227096557617,
"learning_rate": 4.302164986935424e-05,
"loss": 2.3824,
"step": 3740
},
{
"epoch": 0.41993281075027994,
"grad_norm": 3.5678653717041016,
"learning_rate": 4.300298618887645e-05,
"loss": 2.0583,
"step": 3750
},
{
"epoch": 0.42105263157894735,
"grad_norm": 13.594582557678223,
"learning_rate": 4.2984322508398656e-05,
"loss": 2.3702,
"step": 3760
},
{
"epoch": 0.4221724524076148,
"grad_norm": 10.508759498596191,
"learning_rate": 4.296565882792087e-05,
"loss": 1.9474,
"step": 3770
},
{
"epoch": 0.4232922732362822,
"grad_norm": 2.452303647994995,
"learning_rate": 4.294699514744307e-05,
"loss": 2.3192,
"step": 3780
},
{
"epoch": 0.4244120940649496,
"grad_norm": 7.144927978515625,
"learning_rate": 4.292833146696529e-05,
"loss": 2.1651,
"step": 3790
},
{
"epoch": 0.425531914893617,
"grad_norm": 8.945828437805176,
"learning_rate": 4.2909667786487496e-05,
"loss": 2.3486,
"step": 3800
},
{
"epoch": 0.42665173572228443,
"grad_norm": 2.609912633895874,
"learning_rate": 4.289100410600971e-05,
"loss": 1.8995,
"step": 3810
},
{
"epoch": 0.42777155655095184,
"grad_norm": 7.373888969421387,
"learning_rate": 4.287234042553191e-05,
"loss": 2.0976,
"step": 3820
},
{
"epoch": 0.42889137737961924,
"grad_norm": 2.694624662399292,
"learning_rate": 4.2853676745054125e-05,
"loss": 1.9521,
"step": 3830
},
{
"epoch": 0.43001119820828665,
"grad_norm": 2.8247783184051514,
"learning_rate": 4.2835013064576336e-05,
"loss": 2.7777,
"step": 3840
},
{
"epoch": 0.4311310190369541,
"grad_norm": 9.070876121520996,
"learning_rate": 4.281634938409855e-05,
"loss": 2.4438,
"step": 3850
},
{
"epoch": 0.4322508398656215,
"grad_norm": 5.014525890350342,
"learning_rate": 4.279768570362075e-05,
"loss": 2.1444,
"step": 3860
},
{
"epoch": 0.4333706606942889,
"grad_norm": 3.661271333694458,
"learning_rate": 4.2779022023142965e-05,
"loss": 2.052,
"step": 3870
},
{
"epoch": 0.43449048152295633,
"grad_norm": 6.962841033935547,
"learning_rate": 4.2760358342665176e-05,
"loss": 2.1674,
"step": 3880
},
{
"epoch": 0.43561030235162373,
"grad_norm": 2.9479000568389893,
"learning_rate": 4.274169466218739e-05,
"loss": 2.0745,
"step": 3890
},
{
"epoch": 0.43673012318029114,
"grad_norm": 2.4860355854034424,
"learning_rate": 4.272303098170959e-05,
"loss": 2.2103,
"step": 3900
},
{
"epoch": 0.43784994400895855,
"grad_norm": 3.2063636779785156,
"learning_rate": 4.2704367301231805e-05,
"loss": 2.6216,
"step": 3910
},
{
"epoch": 0.43896976483762595,
"grad_norm": 8.925811767578125,
"learning_rate": 4.2685703620754016e-05,
"loss": 1.8027,
"step": 3920
},
{
"epoch": 0.4400895856662934,
"grad_norm": 2.438516616821289,
"learning_rate": 4.266703994027623e-05,
"loss": 2.2463,
"step": 3930
},
{
"epoch": 0.4412094064949608,
"grad_norm": 3.3323545455932617,
"learning_rate": 4.264837625979843e-05,
"loss": 2.0737,
"step": 3940
},
{
"epoch": 0.4423292273236282,
"grad_norm": 8.51876163482666,
"learning_rate": 4.2629712579320645e-05,
"loss": 2.366,
"step": 3950
},
{
"epoch": 0.44344904815229563,
"grad_norm": 15.145380973815918,
"learning_rate": 4.2611048898842856e-05,
"loss": 2.3532,
"step": 3960
},
{
"epoch": 0.44456886898096304,
"grad_norm": 7.819403171539307,
"learning_rate": 4.259238521836507e-05,
"loss": 2.0457,
"step": 3970
},
{
"epoch": 0.44568868980963045,
"grad_norm": 2.681534767150879,
"learning_rate": 4.257372153788727e-05,
"loss": 2.159,
"step": 3980
},
{
"epoch": 0.44680851063829785,
"grad_norm": 2.585684061050415,
"learning_rate": 4.255505785740948e-05,
"loss": 2.5404,
"step": 3990
},
{
"epoch": 0.4479283314669653,
"grad_norm": 6.74754524230957,
"learning_rate": 4.2536394176931696e-05,
"loss": 2.3685,
"step": 4000
},
{
"epoch": 0.4490481522956327,
"grad_norm": 2.9003231525421143,
"learning_rate": 4.25177304964539e-05,
"loss": 2.1618,
"step": 4010
},
{
"epoch": 0.4501679731243001,
"grad_norm": 13.633071899414062,
"learning_rate": 4.249906681597611e-05,
"loss": 2.375,
"step": 4020
},
{
"epoch": 0.45128779395296753,
"grad_norm": 9.91258430480957,
"learning_rate": 4.248040313549832e-05,
"loss": 2.0909,
"step": 4030
},
{
"epoch": 0.45240761478163494,
"grad_norm": 4.096499443054199,
"learning_rate": 4.246173945502053e-05,
"loss": 2.1711,
"step": 4040
},
{
"epoch": 0.45352743561030234,
"grad_norm": 10.29516887664795,
"learning_rate": 4.244307577454274e-05,
"loss": 2.4019,
"step": 4050
},
{
"epoch": 0.45464725643896975,
"grad_norm": 9.679535865783691,
"learning_rate": 4.242441209406495e-05,
"loss": 2.1732,
"step": 4060
},
{
"epoch": 0.45576707726763716,
"grad_norm": 2.7053027153015137,
"learning_rate": 4.240574841358716e-05,
"loss": 2.0555,
"step": 4070
},
{
"epoch": 0.4568868980963046,
"grad_norm": 7.90255069732666,
"learning_rate": 4.238708473310937e-05,
"loss": 2.3904,
"step": 4080
},
{
"epoch": 0.458006718924972,
"grad_norm": 3.999415397644043,
"learning_rate": 4.236842105263158e-05,
"loss": 2.2267,
"step": 4090
},
{
"epoch": 0.45912653975363943,
"grad_norm": 4.677366256713867,
"learning_rate": 4.234975737215379e-05,
"loss": 1.7037,
"step": 4100
},
{
"epoch": 0.46024636058230683,
"grad_norm": 10.746310234069824,
"learning_rate": 4.2331093691676e-05,
"loss": 2.3778,
"step": 4110
},
{
"epoch": 0.46136618141097424,
"grad_norm": 2.9237968921661377,
"learning_rate": 4.231243001119821e-05,
"loss": 2.2335,
"step": 4120
},
{
"epoch": 0.46248600223964165,
"grad_norm": 3.3537890911102295,
"learning_rate": 4.229376633072042e-05,
"loss": 2.0904,
"step": 4130
},
{
"epoch": 0.46360582306830905,
"grad_norm": 4.9723358154296875,
"learning_rate": 4.227510265024263e-05,
"loss": 2.0794,
"step": 4140
},
{
"epoch": 0.46472564389697646,
"grad_norm": 3.5420267581939697,
"learning_rate": 4.225643896976484e-05,
"loss": 1.95,
"step": 4150
},
{
"epoch": 0.4658454647256439,
"grad_norm": 5.858832359313965,
"learning_rate": 4.223777528928705e-05,
"loss": 2.3874,
"step": 4160
},
{
"epoch": 0.4669652855543113,
"grad_norm": 3.2437384128570557,
"learning_rate": 4.221911160880926e-05,
"loss": 1.9892,
"step": 4170
},
{
"epoch": 0.46808510638297873,
"grad_norm": 8.610901832580566,
"learning_rate": 4.220044792833147e-05,
"loss": 2.8553,
"step": 4180
},
{
"epoch": 0.46920492721164614,
"grad_norm": 8.59118938446045,
"learning_rate": 4.218178424785368e-05,
"loss": 2.4644,
"step": 4190
},
{
"epoch": 0.47032474804031354,
"grad_norm": 6.852227210998535,
"learning_rate": 4.216312056737589e-05,
"loss": 2.1337,
"step": 4200
},
{
"epoch": 0.47144456886898095,
"grad_norm": 6.020224571228027,
"learning_rate": 4.21444568868981e-05,
"loss": 1.9491,
"step": 4210
},
{
"epoch": 0.47256438969764836,
"grad_norm": 4.580352783203125,
"learning_rate": 4.2125793206420306e-05,
"loss": 2.6378,
"step": 4220
},
{
"epoch": 0.47368421052631576,
"grad_norm": 10.987154006958008,
"learning_rate": 4.210712952594252e-05,
"loss": 2.3521,
"step": 4230
},
{
"epoch": 0.4748040313549832,
"grad_norm": 2.8252968788146973,
"learning_rate": 4.208846584546472e-05,
"loss": 2.3216,
"step": 4240
},
{
"epoch": 0.47592385218365063,
"grad_norm": 5.822597503662109,
"learning_rate": 4.2069802164986935e-05,
"loss": 1.9804,
"step": 4250
},
{
"epoch": 0.47704367301231804,
"grad_norm": 3.373899221420288,
"learning_rate": 4.2051138484509146e-05,
"loss": 1.9312,
"step": 4260
},
{
"epoch": 0.47816349384098544,
"grad_norm": 8.621574401855469,
"learning_rate": 4.203247480403136e-05,
"loss": 2.1056,
"step": 4270
},
{
"epoch": 0.47928331466965285,
"grad_norm": 3.1795461177825928,
"learning_rate": 4.201381112355356e-05,
"loss": 1.9162,
"step": 4280
},
{
"epoch": 0.48040313549832026,
"grad_norm": 3.5701396465301514,
"learning_rate": 4.1995147443075775e-05,
"loss": 2.3904,
"step": 4290
},
{
"epoch": 0.48152295632698766,
"grad_norm": 2.4584646224975586,
"learning_rate": 4.1976483762597986e-05,
"loss": 1.9739,
"step": 4300
},
{
"epoch": 0.4826427771556551,
"grad_norm": 9.168150901794434,
"learning_rate": 4.19578200821202e-05,
"loss": 2.242,
"step": 4310
},
{
"epoch": 0.48376259798432253,
"grad_norm": 6.235483169555664,
"learning_rate": 4.19391564016424e-05,
"loss": 2.0577,
"step": 4320
},
{
"epoch": 0.48488241881298993,
"grad_norm": 4.428210735321045,
"learning_rate": 4.1920492721164615e-05,
"loss": 2.3219,
"step": 4330
},
{
"epoch": 0.48600223964165734,
"grad_norm": 10.322796821594238,
"learning_rate": 4.1901829040686826e-05,
"loss": 2.4196,
"step": 4340
},
{
"epoch": 0.48712206047032475,
"grad_norm": 11.971220016479492,
"learning_rate": 4.188316536020904e-05,
"loss": 2.221,
"step": 4350
},
{
"epoch": 0.48824188129899215,
"grad_norm": 2.4789071083068848,
"learning_rate": 4.186450167973124e-05,
"loss": 1.9658,
"step": 4360
},
{
"epoch": 0.48936170212765956,
"grad_norm": 3.5437817573547363,
"learning_rate": 4.1845837999253455e-05,
"loss": 2.3086,
"step": 4370
},
{
"epoch": 0.49048152295632697,
"grad_norm": 2.637206554412842,
"learning_rate": 4.1827174318775666e-05,
"loss": 2.0656,
"step": 4380
},
{
"epoch": 0.4916013437849944,
"grad_norm": 2.3072986602783203,
"learning_rate": 4.180851063829788e-05,
"loss": 1.8875,
"step": 4390
},
{
"epoch": 0.49272116461366183,
"grad_norm": 11.45031452178955,
"learning_rate": 4.178984695782008e-05,
"loss": 1.8249,
"step": 4400
},
{
"epoch": 0.49384098544232924,
"grad_norm": 8.976868629455566,
"learning_rate": 4.1771183277342295e-05,
"loss": 2.4993,
"step": 4410
},
{
"epoch": 0.49496080627099664,
"grad_norm": 2.619194507598877,
"learning_rate": 4.1752519596864506e-05,
"loss": 2.221,
"step": 4420
},
{
"epoch": 0.49608062709966405,
"grad_norm": 10.117256164550781,
"learning_rate": 4.173385591638672e-05,
"loss": 1.8587,
"step": 4430
},
{
"epoch": 0.49720044792833146,
"grad_norm": 2.874436140060425,
"learning_rate": 4.171519223590892e-05,
"loss": 1.5831,
"step": 4440
},
{
"epoch": 0.49832026875699886,
"grad_norm": 10.60855484008789,
"learning_rate": 4.1696528555431135e-05,
"loss": 2.483,
"step": 4450
},
{
"epoch": 0.49944008958566627,
"grad_norm": 5.419251441955566,
"learning_rate": 4.167786487495334e-05,
"loss": 1.905,
"step": 4460
},
{
"epoch": 0.5005599104143337,
"grad_norm": 4.474939823150635,
"learning_rate": 4.165920119447555e-05,
"loss": 2.1842,
"step": 4470
},
{
"epoch": 0.5016797312430011,
"grad_norm": 2.611745595932007,
"learning_rate": 4.164053751399776e-05,
"loss": 2.2377,
"step": 4480
},
{
"epoch": 0.5027995520716685,
"grad_norm": 3.1102752685546875,
"learning_rate": 4.162187383351997e-05,
"loss": 1.7457,
"step": 4490
},
{
"epoch": 0.503919372900336,
"grad_norm": 3.362260341644287,
"learning_rate": 4.160321015304218e-05,
"loss": 2.2162,
"step": 4500
},
{
"epoch": 0.5050391937290034,
"grad_norm": 5.862063407897949,
"learning_rate": 4.158454647256439e-05,
"loss": 1.9277,
"step": 4510
},
{
"epoch": 0.5061590145576708,
"grad_norm": 2.268481969833374,
"learning_rate": 4.15658827920866e-05,
"loss": 2.0642,
"step": 4520
},
{
"epoch": 0.5072788353863382,
"grad_norm": 3.2130661010742188,
"learning_rate": 4.154721911160881e-05,
"loss": 2.2511,
"step": 4530
},
{
"epoch": 0.5083986562150056,
"grad_norm": 9.958855628967285,
"learning_rate": 4.152855543113102e-05,
"loss": 2.8674,
"step": 4540
},
{
"epoch": 0.509518477043673,
"grad_norm": 3.7821731567382812,
"learning_rate": 4.150989175065323e-05,
"loss": 2.2098,
"step": 4550
},
{
"epoch": 0.5106382978723404,
"grad_norm": 3.210670232772827,
"learning_rate": 4.149122807017544e-05,
"loss": 2.1041,
"step": 4560
},
{
"epoch": 0.5117581187010078,
"grad_norm": 12.174056053161621,
"learning_rate": 4.147256438969765e-05,
"loss": 2.1001,
"step": 4570
},
{
"epoch": 0.5128779395296752,
"grad_norm": 10.776714324951172,
"learning_rate": 4.145390070921986e-05,
"loss": 2.2956,
"step": 4580
},
{
"epoch": 0.5139977603583427,
"grad_norm": 7.005626201629639,
"learning_rate": 4.143523702874207e-05,
"loss": 2.2044,
"step": 4590
},
{
"epoch": 0.5151175811870101,
"grad_norm": 11.280997276306152,
"learning_rate": 4.141657334826428e-05,
"loss": 2.3835,
"step": 4600
},
{
"epoch": 0.5162374020156775,
"grad_norm": 8.00539779663086,
"learning_rate": 4.139790966778649e-05,
"loss": 2.15,
"step": 4610
},
{
"epoch": 0.5173572228443449,
"grad_norm": 11.379185676574707,
"learning_rate": 4.13792459873087e-05,
"loss": 2.2607,
"step": 4620
},
{
"epoch": 0.5184770436730123,
"grad_norm": 3.4828784465789795,
"learning_rate": 4.136058230683091e-05,
"loss": 2.6439,
"step": 4630
},
{
"epoch": 0.5195968645016797,
"grad_norm": 8.438654899597168,
"learning_rate": 4.134191862635312e-05,
"loss": 1.7996,
"step": 4640
},
{
"epoch": 0.5207166853303471,
"grad_norm": 3.8132407665252686,
"learning_rate": 4.132325494587533e-05,
"loss": 2.1626,
"step": 4650
},
{
"epoch": 0.5218365061590146,
"grad_norm": 11.904292106628418,
"learning_rate": 4.130459126539754e-05,
"loss": 2.5126,
"step": 4660
},
{
"epoch": 0.522956326987682,
"grad_norm": 8.020877838134766,
"learning_rate": 4.1285927584919745e-05,
"loss": 2.3528,
"step": 4670
},
{
"epoch": 0.5240761478163494,
"grad_norm": 2.708252191543579,
"learning_rate": 4.126726390444196e-05,
"loss": 2.0487,
"step": 4680
},
{
"epoch": 0.5251959686450168,
"grad_norm": 3.0927486419677734,
"learning_rate": 4.124860022396417e-05,
"loss": 1.6742,
"step": 4690
},
{
"epoch": 0.5263157894736842,
"grad_norm": 12.992857933044434,
"learning_rate": 4.122993654348637e-05,
"loss": 1.9681,
"step": 4700
},
{
"epoch": 0.5274356103023516,
"grad_norm": 6.5138325691223145,
"learning_rate": 4.1211272863008585e-05,
"loss": 1.923,
"step": 4710
},
{
"epoch": 0.528555431131019,
"grad_norm": 3.025493621826172,
"learning_rate": 4.1192609182530796e-05,
"loss": 1.6204,
"step": 4720
},
{
"epoch": 0.5296752519596865,
"grad_norm": 3.9649546146392822,
"learning_rate": 4.117394550205301e-05,
"loss": 2.3652,
"step": 4730
},
{
"epoch": 0.5307950727883539,
"grad_norm": 3.212306499481201,
"learning_rate": 4.115528182157521e-05,
"loss": 2.0342,
"step": 4740
},
{
"epoch": 0.5319148936170213,
"grad_norm": 9.27729320526123,
"learning_rate": 4.1136618141097425e-05,
"loss": 1.9462,
"step": 4750
},
{
"epoch": 0.5330347144456887,
"grad_norm": 8.476268768310547,
"learning_rate": 4.1117954460619636e-05,
"loss": 2.3341,
"step": 4760
},
{
"epoch": 0.5341545352743561,
"grad_norm": 3.2790377140045166,
"learning_rate": 4.109929078014185e-05,
"loss": 2.1268,
"step": 4770
},
{
"epoch": 0.5352743561030235,
"grad_norm": 2.6565237045288086,
"learning_rate": 4.108062709966405e-05,
"loss": 1.9036,
"step": 4780
},
{
"epoch": 0.5363941769316909,
"grad_norm": 6.67348051071167,
"learning_rate": 4.1061963419186265e-05,
"loss": 1.9455,
"step": 4790
},
{
"epoch": 0.5375139977603584,
"grad_norm": 8.719578742980957,
"learning_rate": 4.1043299738708476e-05,
"loss": 2.1615,
"step": 4800
},
{
"epoch": 0.5386338185890257,
"grad_norm": 5.975245475769043,
"learning_rate": 4.102463605823069e-05,
"loss": 2.1647,
"step": 4810
},
{
"epoch": 0.5397536394176932,
"grad_norm": 2.856062412261963,
"learning_rate": 4.100597237775289e-05,
"loss": 2.0456,
"step": 4820
},
{
"epoch": 0.5408734602463606,
"grad_norm": 2.8386130332946777,
"learning_rate": 4.0987308697275105e-05,
"loss": 2.1015,
"step": 4830
},
{
"epoch": 0.541993281075028,
"grad_norm": 3.556990623474121,
"learning_rate": 4.0968645016797316e-05,
"loss": 2.2417,
"step": 4840
},
{
"epoch": 0.5431131019036954,
"grad_norm": 7.924489498138428,
"learning_rate": 4.094998133631953e-05,
"loss": 1.9925,
"step": 4850
},
{
"epoch": 0.5442329227323628,
"grad_norm": 11.762128829956055,
"learning_rate": 4.093131765584173e-05,
"loss": 2.5949,
"step": 4860
},
{
"epoch": 0.5453527435610303,
"grad_norm": 7.913935661315918,
"learning_rate": 4.0912653975363945e-05,
"loss": 2.0106,
"step": 4870
},
{
"epoch": 0.5464725643896976,
"grad_norm": 8.176780700683594,
"learning_rate": 4.0893990294886156e-05,
"loss": 2.1813,
"step": 4880
},
{
"epoch": 0.5475923852183651,
"grad_norm": 2.9236576557159424,
"learning_rate": 4.087532661440837e-05,
"loss": 2.1256,
"step": 4890
},
{
"epoch": 0.5487122060470325,
"grad_norm": 12.421939849853516,
"learning_rate": 4.085666293393057e-05,
"loss": 2.2168,
"step": 4900
},
{
"epoch": 0.5498320268756999,
"grad_norm": 2.374150514602661,
"learning_rate": 4.0837999253452785e-05,
"loss": 2.1841,
"step": 4910
},
{
"epoch": 0.5509518477043673,
"grad_norm": 5.778265953063965,
"learning_rate": 4.081933557297499e-05,
"loss": 1.866,
"step": 4920
},
{
"epoch": 0.5520716685330347,
"grad_norm": 2.9031143188476562,
"learning_rate": 4.08006718924972e-05,
"loss": 2.1778,
"step": 4930
},
{
"epoch": 0.5531914893617021,
"grad_norm": 2.8752217292785645,
"learning_rate": 4.078200821201941e-05,
"loss": 2.1301,
"step": 4940
},
{
"epoch": 0.5543113101903695,
"grad_norm": 6.815023899078369,
"learning_rate": 4.076334453154162e-05,
"loss": 2.1037,
"step": 4950
},
{
"epoch": 0.555431131019037,
"grad_norm": 3.5605039596557617,
"learning_rate": 4.074468085106383e-05,
"loss": 2.181,
"step": 4960
},
{
"epoch": 0.5565509518477044,
"grad_norm": 3.3536183834075928,
"learning_rate": 4.072601717058604e-05,
"loss": 2.1846,
"step": 4970
},
{
"epoch": 0.5576707726763718,
"grad_norm": 14.845537185668945,
"learning_rate": 4.070735349010825e-05,
"loss": 2.332,
"step": 4980
},
{
"epoch": 0.5587905935050392,
"grad_norm": 6.083976745605469,
"learning_rate": 4.068868980963046e-05,
"loss": 2.1927,
"step": 4990
},
{
"epoch": 0.5599104143337066,
"grad_norm": 3.194537401199341,
"learning_rate": 4.067002612915267e-05,
"loss": 2.3715,
"step": 5000
},
{
"epoch": 0.561030235162374,
"grad_norm": 2.682272434234619,
"learning_rate": 4.065136244867488e-05,
"loss": 1.95,
"step": 5010
},
{
"epoch": 0.5621500559910414,
"grad_norm": 3.429429054260254,
"learning_rate": 4.063269876819709e-05,
"loss": 2.2317,
"step": 5020
},
{
"epoch": 0.5632698768197089,
"grad_norm": 6.22359037399292,
"learning_rate": 4.06140350877193e-05,
"loss": 2.2734,
"step": 5030
},
{
"epoch": 0.5643896976483762,
"grad_norm": 12.685219764709473,
"learning_rate": 4.059537140724151e-05,
"loss": 2.3362,
"step": 5040
},
{
"epoch": 0.5655095184770437,
"grad_norm": 3.122385025024414,
"learning_rate": 4.057670772676372e-05,
"loss": 2.2147,
"step": 5050
},
{
"epoch": 0.5666293393057111,
"grad_norm": 3.515317678451538,
"learning_rate": 4.055804404628593e-05,
"loss": 2.3661,
"step": 5060
},
{
"epoch": 0.5677491601343785,
"grad_norm": 5.837533473968506,
"learning_rate": 4.053938036580814e-05,
"loss": 2.057,
"step": 5070
},
{
"epoch": 0.568868980963046,
"grad_norm": 2.728402614593506,
"learning_rate": 4.052071668533035e-05,
"loss": 1.9779,
"step": 5080
},
{
"epoch": 0.5699888017917133,
"grad_norm": 5.042017459869385,
"learning_rate": 4.050205300485256e-05,
"loss": 1.8092,
"step": 5090
},
{
"epoch": 0.5711086226203808,
"grad_norm": 9.7918701171875,
"learning_rate": 4.048338932437477e-05,
"loss": 2.4933,
"step": 5100
},
{
"epoch": 0.5722284434490481,
"grad_norm": 3.005107879638672,
"learning_rate": 4.046472564389698e-05,
"loss": 2.2101,
"step": 5110
},
{
"epoch": 0.5733482642777156,
"grad_norm": 4.867323875427246,
"learning_rate": 4.044606196341919e-05,
"loss": 2.0726,
"step": 5120
},
{
"epoch": 0.574468085106383,
"grad_norm": 4.434531211853027,
"learning_rate": 4.0427398282941395e-05,
"loss": 2.5219,
"step": 5130
},
{
"epoch": 0.5755879059350504,
"grad_norm": 9.07414722442627,
"learning_rate": 4.040873460246361e-05,
"loss": 2.2031,
"step": 5140
},
{
"epoch": 0.5767077267637178,
"grad_norm": 2.6722495555877686,
"learning_rate": 4.039007092198582e-05,
"loss": 2.2475,
"step": 5150
},
{
"epoch": 0.5778275475923852,
"grad_norm": 6.318906784057617,
"learning_rate": 4.037140724150802e-05,
"loss": 2.1646,
"step": 5160
},
{
"epoch": 0.5789473684210527,
"grad_norm": 2.754269599914551,
"learning_rate": 4.0352743561030235e-05,
"loss": 2.6936,
"step": 5170
},
{
"epoch": 0.58006718924972,
"grad_norm": 8.491806983947754,
"learning_rate": 4.0334079880552446e-05,
"loss": 2.308,
"step": 5180
},
{
"epoch": 0.5811870100783875,
"grad_norm": 4.256706714630127,
"learning_rate": 4.031541620007466e-05,
"loss": 1.9769,
"step": 5190
},
{
"epoch": 0.5823068309070548,
"grad_norm": 8.133840560913086,
"learning_rate": 4.029675251959686e-05,
"loss": 1.8873,
"step": 5200
},
{
"epoch": 0.5834266517357223,
"grad_norm": 3.6523616313934326,
"learning_rate": 4.0278088839119075e-05,
"loss": 1.9991,
"step": 5210
},
{
"epoch": 0.5845464725643897,
"grad_norm": 2.976468324661255,
"learning_rate": 4.0259425158641286e-05,
"loss": 2.6573,
"step": 5220
},
{
"epoch": 0.5856662933930571,
"grad_norm": 8.252400398254395,
"learning_rate": 4.02407614781635e-05,
"loss": 2.221,
"step": 5230
},
{
"epoch": 0.5867861142217246,
"grad_norm": 3.0009639263153076,
"learning_rate": 4.02220977976857e-05,
"loss": 2.2523,
"step": 5240
},
{
"epoch": 0.5879059350503919,
"grad_norm": 2.764678955078125,
"learning_rate": 4.0203434117207915e-05,
"loss": 2.0534,
"step": 5250
},
{
"epoch": 0.5890257558790594,
"grad_norm": 2.818638563156128,
"learning_rate": 4.0184770436730126e-05,
"loss": 1.9171,
"step": 5260
},
{
"epoch": 0.5901455767077267,
"grad_norm": 3.9487977027893066,
"learning_rate": 4.016610675625234e-05,
"loss": 2.1506,
"step": 5270
},
{
"epoch": 0.5912653975363942,
"grad_norm": 4.936847686767578,
"learning_rate": 4.014744307577454e-05,
"loss": 2.1909,
"step": 5280
},
{
"epoch": 0.5923852183650616,
"grad_norm": 14.60064697265625,
"learning_rate": 4.0128779395296755e-05,
"loss": 2.0533,
"step": 5290
},
{
"epoch": 0.593505039193729,
"grad_norm": 9.342129707336426,
"learning_rate": 4.0110115714818966e-05,
"loss": 2.0769,
"step": 5300
},
{
"epoch": 0.5946248600223965,
"grad_norm": 16.89434242248535,
"learning_rate": 4.009145203434118e-05,
"loss": 2.2267,
"step": 5310
},
{
"epoch": 0.5957446808510638,
"grad_norm": 6.977470397949219,
"learning_rate": 4.007278835386338e-05,
"loss": 2.954,
"step": 5320
},
{
"epoch": 0.5968645016797313,
"grad_norm": 2.928067922592163,
"learning_rate": 4.0054124673385595e-05,
"loss": 1.9651,
"step": 5330
},
{
"epoch": 0.5979843225083986,
"grad_norm": 2.7120723724365234,
"learning_rate": 4.00354609929078e-05,
"loss": 2.5949,
"step": 5340
},
{
"epoch": 0.5991041433370661,
"grad_norm": 2.3959896564483643,
"learning_rate": 4.001679731243002e-05,
"loss": 2.5269,
"step": 5350
},
{
"epoch": 0.6002239641657335,
"grad_norm": 4.766486644744873,
"learning_rate": 3.999813363195222e-05,
"loss": 2.1063,
"step": 5360
},
{
"epoch": 0.6013437849944009,
"grad_norm": 2.9101717472076416,
"learning_rate": 3.9979469951474435e-05,
"loss": 2.4363,
"step": 5370
},
{
"epoch": 0.6024636058230683,
"grad_norm": 2.3980298042297363,
"learning_rate": 3.996080627099664e-05,
"loss": 2.166,
"step": 5380
},
{
"epoch": 0.6035834266517357,
"grad_norm": 3.109349012374878,
"learning_rate": 3.994214259051885e-05,
"loss": 2.3349,
"step": 5390
},
{
"epoch": 0.6047032474804032,
"grad_norm": 3.364403486251831,
"learning_rate": 3.992347891004106e-05,
"loss": 2.036,
"step": 5400
},
{
"epoch": 0.6058230683090705,
"grad_norm": 6.84296989440918,
"learning_rate": 3.990481522956327e-05,
"loss": 2.1545,
"step": 5410
},
{
"epoch": 0.606942889137738,
"grad_norm": 3.494910717010498,
"learning_rate": 3.988615154908548e-05,
"loss": 2.0747,
"step": 5420
},
{
"epoch": 0.6080627099664053,
"grad_norm": 11.233692169189453,
"learning_rate": 3.986748786860769e-05,
"loss": 2.1933,
"step": 5430
},
{
"epoch": 0.6091825307950728,
"grad_norm": 2.6794285774230957,
"learning_rate": 3.98488241881299e-05,
"loss": 1.9889,
"step": 5440
},
{
"epoch": 0.6103023516237402,
"grad_norm": 6.740621089935303,
"learning_rate": 3.983016050765211e-05,
"loss": 2.4853,
"step": 5450
},
{
"epoch": 0.6114221724524076,
"grad_norm": 3.250119686126709,
"learning_rate": 3.981149682717432e-05,
"loss": 2.2843,
"step": 5460
},
{
"epoch": 0.6125419932810751,
"grad_norm": 5.2820940017700195,
"learning_rate": 3.979283314669653e-05,
"loss": 2.0738,
"step": 5470
},
{
"epoch": 0.6136618141097424,
"grad_norm": 5.155092716217041,
"learning_rate": 3.977416946621874e-05,
"loss": 2.0983,
"step": 5480
},
{
"epoch": 0.6147816349384099,
"grad_norm": 10.836530685424805,
"learning_rate": 3.975550578574095e-05,
"loss": 1.8994,
"step": 5490
},
{
"epoch": 0.6159014557670772,
"grad_norm": 4.8996968269348145,
"learning_rate": 3.973684210526316e-05,
"loss": 2.0153,
"step": 5500
},
{
"epoch": 0.6170212765957447,
"grad_norm": 12.028742790222168,
"learning_rate": 3.971817842478537e-05,
"loss": 2.4317,
"step": 5510
},
{
"epoch": 0.6181410974244121,
"grad_norm": 10.310006141662598,
"learning_rate": 3.969951474430758e-05,
"loss": 2.5903,
"step": 5520
},
{
"epoch": 0.6192609182530795,
"grad_norm": 3.7316179275512695,
"learning_rate": 3.968085106382979e-05,
"loss": 2.148,
"step": 5530
},
{
"epoch": 0.620380739081747,
"grad_norm": 4.745426177978516,
"learning_rate": 3.9662187383352e-05,
"loss": 2.3905,
"step": 5540
},
{
"epoch": 0.6215005599104143,
"grad_norm": 12.419564247131348,
"learning_rate": 3.9643523702874205e-05,
"loss": 2.6076,
"step": 5550
},
{
"epoch": 0.6226203807390818,
"grad_norm": 3.6548733711242676,
"learning_rate": 3.962486002239642e-05,
"loss": 2.2913,
"step": 5560
},
{
"epoch": 0.6237402015677491,
"grad_norm": 2.8344454765319824,
"learning_rate": 3.960619634191863e-05,
"loss": 2.3774,
"step": 5570
},
{
"epoch": 0.6248600223964166,
"grad_norm": 4.7610321044921875,
"learning_rate": 3.958753266144084e-05,
"loss": 2.2121,
"step": 5580
},
{
"epoch": 0.6259798432250839,
"grad_norm": 3.1101725101470947,
"learning_rate": 3.9568868980963045e-05,
"loss": 2.3034,
"step": 5590
},
{
"epoch": 0.6270996640537514,
"grad_norm": 2.6766905784606934,
"learning_rate": 3.955020530048526e-05,
"loss": 1.7205,
"step": 5600
},
{
"epoch": 0.6282194848824189,
"grad_norm": 3.271083116531372,
"learning_rate": 3.953154162000747e-05,
"loss": 2.4918,
"step": 5610
},
{
"epoch": 0.6293393057110862,
"grad_norm": 7.914976119995117,
"learning_rate": 3.951287793952967e-05,
"loss": 2.1463,
"step": 5620
},
{
"epoch": 0.6304591265397537,
"grad_norm": 3.1537246704101562,
"learning_rate": 3.9494214259051885e-05,
"loss": 2.0338,
"step": 5630
},
{
"epoch": 0.631578947368421,
"grad_norm": 10.18811321258545,
"learning_rate": 3.9475550578574096e-05,
"loss": 2.1563,
"step": 5640
},
{
"epoch": 0.6326987681970885,
"grad_norm": 11.715261459350586,
"learning_rate": 3.945688689809631e-05,
"loss": 2.3083,
"step": 5650
},
{
"epoch": 0.6338185890257558,
"grad_norm": 2.7163820266723633,
"learning_rate": 3.943822321761851e-05,
"loss": 2.3445,
"step": 5660
},
{
"epoch": 0.6349384098544233,
"grad_norm": 6.3636627197265625,
"learning_rate": 3.9419559537140725e-05,
"loss": 2.2905,
"step": 5670
},
{
"epoch": 0.6360582306830908,
"grad_norm": 3.9701311588287354,
"learning_rate": 3.9400895856662936e-05,
"loss": 1.8656,
"step": 5680
},
{
"epoch": 0.6371780515117581,
"grad_norm": 5.779101848602295,
"learning_rate": 3.938223217618515e-05,
"loss": 2.1488,
"step": 5690
},
{
"epoch": 0.6382978723404256,
"grad_norm": 3.038818359375,
"learning_rate": 3.936356849570735e-05,
"loss": 2.2644,
"step": 5700
},
{
"epoch": 0.6394176931690929,
"grad_norm": 2.684335470199585,
"learning_rate": 3.9344904815229565e-05,
"loss": 2.1819,
"step": 5710
},
{
"epoch": 0.6405375139977604,
"grad_norm": 11.84406566619873,
"learning_rate": 3.9326241134751776e-05,
"loss": 2.2329,
"step": 5720
},
{
"epoch": 0.6416573348264277,
"grad_norm": 2.978997230529785,
"learning_rate": 3.930757745427399e-05,
"loss": 2.2307,
"step": 5730
},
{
"epoch": 0.6427771556550952,
"grad_norm": 3.1249163150787354,
"learning_rate": 3.928891377379619e-05,
"loss": 1.8239,
"step": 5740
},
{
"epoch": 0.6438969764837627,
"grad_norm": 8.90749454498291,
"learning_rate": 3.9270250093318405e-05,
"loss": 2.3477,
"step": 5750
},
{
"epoch": 0.64501679731243,
"grad_norm": 8.59403133392334,
"learning_rate": 3.925158641284061e-05,
"loss": 1.6853,
"step": 5760
},
{
"epoch": 0.6461366181410975,
"grad_norm": 3.567573070526123,
"learning_rate": 3.923292273236283e-05,
"loss": 1.7603,
"step": 5770
},
{
"epoch": 0.6472564389697648,
"grad_norm": 2.913238286972046,
"learning_rate": 3.921425905188503e-05,
"loss": 1.9284,
"step": 5780
},
{
"epoch": 0.6483762597984323,
"grad_norm": 5.024287700653076,
"learning_rate": 3.9195595371407245e-05,
"loss": 1.9221,
"step": 5790
},
{
"epoch": 0.6494960806270996,
"grad_norm": 13.80717945098877,
"learning_rate": 3.917693169092945e-05,
"loss": 2.1171,
"step": 5800
},
{
"epoch": 0.6506159014557671,
"grad_norm": 2.929304599761963,
"learning_rate": 3.915826801045167e-05,
"loss": 1.929,
"step": 5810
},
{
"epoch": 0.6517357222844344,
"grad_norm": 2.820366859436035,
"learning_rate": 3.913960432997387e-05,
"loss": 2.1108,
"step": 5820
},
{
"epoch": 0.6528555431131019,
"grad_norm": 3.1681950092315674,
"learning_rate": 3.9120940649496085e-05,
"loss": 2.4005,
"step": 5830
},
{
"epoch": 0.6539753639417694,
"grad_norm": 10.097253799438477,
"learning_rate": 3.910227696901829e-05,
"loss": 1.8262,
"step": 5840
},
{
"epoch": 0.6550951847704367,
"grad_norm": 3.0544557571411133,
"learning_rate": 3.90836132885405e-05,
"loss": 2.2202,
"step": 5850
},
{
"epoch": 0.6562150055991042,
"grad_norm": 7.928321838378906,
"learning_rate": 3.906494960806271e-05,
"loss": 1.8896,
"step": 5860
},
{
"epoch": 0.6573348264277715,
"grad_norm": 12.526985168457031,
"learning_rate": 3.904628592758492e-05,
"loss": 2.263,
"step": 5870
},
{
"epoch": 0.658454647256439,
"grad_norm": 12.4088716506958,
"learning_rate": 3.902762224710713e-05,
"loss": 2.3226,
"step": 5880
},
{
"epoch": 0.6595744680851063,
"grad_norm": 8.499159812927246,
"learning_rate": 3.900895856662934e-05,
"loss": 1.9171,
"step": 5890
},
{
"epoch": 0.6606942889137738,
"grad_norm": 6.148478031158447,
"learning_rate": 3.899029488615155e-05,
"loss": 1.7816,
"step": 5900
},
{
"epoch": 0.6618141097424413,
"grad_norm": 2.6093831062316895,
"learning_rate": 3.897163120567376e-05,
"loss": 1.874,
"step": 5910
},
{
"epoch": 0.6629339305711086,
"grad_norm": 2.9577527046203613,
"learning_rate": 3.895296752519597e-05,
"loss": 2.4124,
"step": 5920
},
{
"epoch": 0.6640537513997761,
"grad_norm": 2.769073724746704,
"learning_rate": 3.893430384471818e-05,
"loss": 2.3388,
"step": 5930
},
{
"epoch": 0.6651735722284434,
"grad_norm": 3.398643970489502,
"learning_rate": 3.891564016424039e-05,
"loss": 2.2894,
"step": 5940
},
{
"epoch": 0.6662933930571109,
"grad_norm": 3.1375699043273926,
"learning_rate": 3.88969764837626e-05,
"loss": 2.4933,
"step": 5950
},
{
"epoch": 0.6674132138857782,
"grad_norm": 8.086012840270996,
"learning_rate": 3.887831280328481e-05,
"loss": 2.1847,
"step": 5960
},
{
"epoch": 0.6685330347144457,
"grad_norm": 7.541558742523193,
"learning_rate": 3.8859649122807015e-05,
"loss": 2.0018,
"step": 5970
},
{
"epoch": 0.6696528555431132,
"grad_norm": 8.947002410888672,
"learning_rate": 3.884098544232923e-05,
"loss": 2.3184,
"step": 5980
},
{
"epoch": 0.6707726763717805,
"grad_norm": 9.351658821105957,
"learning_rate": 3.882232176185144e-05,
"loss": 2.3077,
"step": 5990
},
{
"epoch": 0.671892497200448,
"grad_norm": 6.452417850494385,
"learning_rate": 3.880365808137365e-05,
"loss": 1.9899,
"step": 6000
},
{
"epoch": 0.6730123180291153,
"grad_norm": 7.518797874450684,
"learning_rate": 3.8784994400895855e-05,
"loss": 2.3504,
"step": 6010
},
{
"epoch": 0.6741321388577828,
"grad_norm": 11.191749572753906,
"learning_rate": 3.876633072041807e-05,
"loss": 2.2785,
"step": 6020
},
{
"epoch": 0.6752519596864501,
"grad_norm": 3.318284273147583,
"learning_rate": 3.874766703994028e-05,
"loss": 2.1379,
"step": 6030
},
{
"epoch": 0.6763717805151176,
"grad_norm": 11.762707710266113,
"learning_rate": 3.872900335946249e-05,
"loss": 2.1742,
"step": 6040
},
{
"epoch": 0.6774916013437849,
"grad_norm": 2.576070785522461,
"learning_rate": 3.8710339678984695e-05,
"loss": 2.033,
"step": 6050
},
{
"epoch": 0.6786114221724524,
"grad_norm": 3.21813702583313,
"learning_rate": 3.869167599850691e-05,
"loss": 2.1262,
"step": 6060
},
{
"epoch": 0.6797312430011199,
"grad_norm": 5.945693016052246,
"learning_rate": 3.867301231802912e-05,
"loss": 2.3859,
"step": 6070
},
{
"epoch": 0.6808510638297872,
"grad_norm": 4.385049819946289,
"learning_rate": 3.865434863755133e-05,
"loss": 1.8462,
"step": 6080
},
{
"epoch": 0.6819708846584547,
"grad_norm": 3.1201934814453125,
"learning_rate": 3.8635684957073535e-05,
"loss": 2.2583,
"step": 6090
},
{
"epoch": 0.683090705487122,
"grad_norm": 3.0420446395874023,
"learning_rate": 3.8617021276595746e-05,
"loss": 2.3901,
"step": 6100
},
{
"epoch": 0.6842105263157895,
"grad_norm": 2.6930365562438965,
"learning_rate": 3.859835759611796e-05,
"loss": 2.2662,
"step": 6110
},
{
"epoch": 0.6853303471444568,
"grad_norm": 4.4401984214782715,
"learning_rate": 3.857969391564016e-05,
"loss": 2.472,
"step": 6120
},
{
"epoch": 0.6864501679731243,
"grad_norm": 2.8523037433624268,
"learning_rate": 3.8561030235162375e-05,
"loss": 1.9654,
"step": 6130
},
{
"epoch": 0.6875699888017918,
"grad_norm": 13.0241060256958,
"learning_rate": 3.8542366554684587e-05,
"loss": 2.3012,
"step": 6140
},
{
"epoch": 0.6886898096304591,
"grad_norm": 3.5604312419891357,
"learning_rate": 3.85237028742068e-05,
"loss": 2.0856,
"step": 6150
},
{
"epoch": 0.6898096304591266,
"grad_norm": 2.850850820541382,
"learning_rate": 3.8505039193729e-05,
"loss": 2.0274,
"step": 6160
},
{
"epoch": 0.6909294512877939,
"grad_norm": 8.735082626342773,
"learning_rate": 3.8486375513251215e-05,
"loss": 1.6773,
"step": 6170
},
{
"epoch": 0.6920492721164614,
"grad_norm": 3.8772952556610107,
"learning_rate": 3.8467711832773427e-05,
"loss": 2.5705,
"step": 6180
},
{
"epoch": 0.6931690929451287,
"grad_norm": 8.324105262756348,
"learning_rate": 3.844904815229564e-05,
"loss": 2.331,
"step": 6190
},
{
"epoch": 0.6942889137737962,
"grad_norm": 3.392038583755493,
"learning_rate": 3.843038447181784e-05,
"loss": 2.4718,
"step": 6200
},
{
"epoch": 0.6954087346024636,
"grad_norm": 3.956043004989624,
"learning_rate": 3.8411720791340055e-05,
"loss": 2.0328,
"step": 6210
},
{
"epoch": 0.696528555431131,
"grad_norm": 3.05572247505188,
"learning_rate": 3.839305711086226e-05,
"loss": 1.9317,
"step": 6220
},
{
"epoch": 0.6976483762597985,
"grad_norm": 3.7026829719543457,
"learning_rate": 3.837439343038448e-05,
"loss": 2.1722,
"step": 6230
},
{
"epoch": 0.6987681970884658,
"grad_norm": 7.879319190979004,
"learning_rate": 3.835572974990668e-05,
"loss": 1.8841,
"step": 6240
},
{
"epoch": 0.6998880179171333,
"grad_norm": 11.358241081237793,
"learning_rate": 3.8337066069428895e-05,
"loss": 2.5414,
"step": 6250
},
{
"epoch": 0.7010078387458006,
"grad_norm": 2.9824588298797607,
"learning_rate": 3.83184023889511e-05,
"loss": 2.1727,
"step": 6260
},
{
"epoch": 0.7021276595744681,
"grad_norm": 11.302024841308594,
"learning_rate": 3.829973870847332e-05,
"loss": 2.2552,
"step": 6270
},
{
"epoch": 0.7032474804031354,
"grad_norm": 13.782984733581543,
"learning_rate": 3.828107502799552e-05,
"loss": 2.3492,
"step": 6280
},
{
"epoch": 0.7043673012318029,
"grad_norm": 2.993431329727173,
"learning_rate": 3.8262411347517735e-05,
"loss": 1.8148,
"step": 6290
},
{
"epoch": 0.7054871220604704,
"grad_norm": 7.389340400695801,
"learning_rate": 3.824374766703994e-05,
"loss": 1.9859,
"step": 6300
},
{
"epoch": 0.7066069428891377,
"grad_norm": 9.87246036529541,
"learning_rate": 3.822508398656215e-05,
"loss": 1.997,
"step": 6310
},
{
"epoch": 0.7077267637178052,
"grad_norm": 3.7313337326049805,
"learning_rate": 3.820642030608436e-05,
"loss": 2.1025,
"step": 6320
},
{
"epoch": 0.7088465845464725,
"grad_norm": 12.712873458862305,
"learning_rate": 3.818775662560657e-05,
"loss": 2.1316,
"step": 6330
},
{
"epoch": 0.70996640537514,
"grad_norm": 3.423029661178589,
"learning_rate": 3.816909294512878e-05,
"loss": 1.8786,
"step": 6340
},
{
"epoch": 0.7110862262038073,
"grad_norm": 2.487156391143799,
"learning_rate": 3.815042926465099e-05,
"loss": 2.2605,
"step": 6350
},
{
"epoch": 0.7122060470324748,
"grad_norm": 3.7588279247283936,
"learning_rate": 3.81317655841732e-05,
"loss": 2.1991,
"step": 6360
},
{
"epoch": 0.7133258678611423,
"grad_norm": 3.227130174636841,
"learning_rate": 3.811310190369541e-05,
"loss": 2.4743,
"step": 6370
},
{
"epoch": 0.7144456886898096,
"grad_norm": 7.917088985443115,
"learning_rate": 3.809443822321762e-05,
"loss": 2.1551,
"step": 6380
},
{
"epoch": 0.7155655095184771,
"grad_norm": 2.7183947563171387,
"learning_rate": 3.807577454273983e-05,
"loss": 2.188,
"step": 6390
},
{
"epoch": 0.7166853303471444,
"grad_norm": 3.0164196491241455,
"learning_rate": 3.805711086226204e-05,
"loss": 2.1092,
"step": 6400
},
{
"epoch": 0.7178051511758119,
"grad_norm": 3.0733413696289062,
"learning_rate": 3.803844718178425e-05,
"loss": 2.0739,
"step": 6410
},
{
"epoch": 0.7189249720044792,
"grad_norm": 3.4838147163391113,
"learning_rate": 3.801978350130646e-05,
"loss": 2.5713,
"step": 6420
},
{
"epoch": 0.7200447928331467,
"grad_norm": 7.422530174255371,
"learning_rate": 3.8001119820828665e-05,
"loss": 2.1976,
"step": 6430
},
{
"epoch": 0.7211646136618141,
"grad_norm": 3.010465383529663,
"learning_rate": 3.798245614035088e-05,
"loss": 2.1626,
"step": 6440
},
{
"epoch": 0.7222844344904815,
"grad_norm": 2.360297918319702,
"learning_rate": 3.796379245987309e-05,
"loss": 1.8999,
"step": 6450
},
{
"epoch": 0.723404255319149,
"grad_norm": 4.043231964111328,
"learning_rate": 3.79451287793953e-05,
"loss": 2.4137,
"step": 6460
},
{
"epoch": 0.7245240761478163,
"grad_norm": 13.390426635742188,
"learning_rate": 3.7926465098917505e-05,
"loss": 2.3208,
"step": 6470
},
{
"epoch": 0.7256438969764838,
"grad_norm": 3.9308841228485107,
"learning_rate": 3.790780141843972e-05,
"loss": 2.2158,
"step": 6480
},
{
"epoch": 0.7267637178051511,
"grad_norm": 7.398802280426025,
"learning_rate": 3.788913773796193e-05,
"loss": 2.3551,
"step": 6490
},
{
"epoch": 0.7278835386338186,
"grad_norm": 9.143538475036621,
"learning_rate": 3.787047405748414e-05,
"loss": 2.2071,
"step": 6500
},
{
"epoch": 0.729003359462486,
"grad_norm": 3.0637400150299072,
"learning_rate": 3.7851810377006345e-05,
"loss": 2.0304,
"step": 6510
},
{
"epoch": 0.7301231802911534,
"grad_norm": 3.206883668899536,
"learning_rate": 3.7833146696528557e-05,
"loss": 2.2099,
"step": 6520
},
{
"epoch": 0.7312430011198209,
"grad_norm": 10.21704387664795,
"learning_rate": 3.781448301605077e-05,
"loss": 2.4963,
"step": 6530
},
{
"epoch": 0.7323628219484882,
"grad_norm": 11.84740161895752,
"learning_rate": 3.779581933557298e-05,
"loss": 2.342,
"step": 6540
},
{
"epoch": 0.7334826427771557,
"grad_norm": 9.463152885437012,
"learning_rate": 3.7777155655095185e-05,
"loss": 2.2016,
"step": 6550
},
{
"epoch": 0.734602463605823,
"grad_norm": 3.3957138061523438,
"learning_rate": 3.7758491974617397e-05,
"loss": 2.285,
"step": 6560
},
{
"epoch": 0.7357222844344905,
"grad_norm": 3.3638765811920166,
"learning_rate": 3.773982829413961e-05,
"loss": 2.2848,
"step": 6570
},
{
"epoch": 0.7368421052631579,
"grad_norm": 8.35213565826416,
"learning_rate": 3.772116461366181e-05,
"loss": 2.1612,
"step": 6580
},
{
"epoch": 0.7379619260918253,
"grad_norm": 7.612375259399414,
"learning_rate": 3.7702500933184025e-05,
"loss": 2.2899,
"step": 6590
},
{
"epoch": 0.7390817469204927,
"grad_norm": 7.650971412658691,
"learning_rate": 3.7683837252706237e-05,
"loss": 2.0612,
"step": 6600
},
{
"epoch": 0.7402015677491601,
"grad_norm": 3.540432929992676,
"learning_rate": 3.766517357222845e-05,
"loss": 2.0103,
"step": 6610
},
{
"epoch": 0.7413213885778276,
"grad_norm": 8.57761001586914,
"learning_rate": 3.764650989175065e-05,
"loss": 2.3906,
"step": 6620
},
{
"epoch": 0.7424412094064949,
"grad_norm": 11.053874969482422,
"learning_rate": 3.7627846211272865e-05,
"loss": 2.2103,
"step": 6630
},
{
"epoch": 0.7435610302351624,
"grad_norm": 8.416523933410645,
"learning_rate": 3.760918253079507e-05,
"loss": 2.3171,
"step": 6640
},
{
"epoch": 0.7446808510638298,
"grad_norm": 3.172659397125244,
"learning_rate": 3.759051885031729e-05,
"loss": 2.0747,
"step": 6650
},
{
"epoch": 0.7458006718924972,
"grad_norm": 3.0577268600463867,
"learning_rate": 3.757185516983949e-05,
"loss": 1.7781,
"step": 6660
},
{
"epoch": 0.7469204927211646,
"grad_norm": 2.7276387214660645,
"learning_rate": 3.7553191489361705e-05,
"loss": 2.3419,
"step": 6670
},
{
"epoch": 0.748040313549832,
"grad_norm": 6.0162529945373535,
"learning_rate": 3.753452780888391e-05,
"loss": 2.047,
"step": 6680
},
{
"epoch": 0.7491601343784995,
"grad_norm": 8.213367462158203,
"learning_rate": 3.751586412840613e-05,
"loss": 2.1848,
"step": 6690
},
{
"epoch": 0.7502799552071668,
"grad_norm": 2.8683581352233887,
"learning_rate": 3.749720044792833e-05,
"loss": 2.3152,
"step": 6700
},
{
"epoch": 0.7513997760358343,
"grad_norm": 7.736207485198975,
"learning_rate": 3.7478536767450545e-05,
"loss": 2.2199,
"step": 6710
},
{
"epoch": 0.7525195968645016,
"grad_norm": 13.607484817504883,
"learning_rate": 3.745987308697275e-05,
"loss": 2.0759,
"step": 6720
},
{
"epoch": 0.7536394176931691,
"grad_norm": 12.661739349365234,
"learning_rate": 3.744120940649496e-05,
"loss": 2.1309,
"step": 6730
},
{
"epoch": 0.7547592385218365,
"grad_norm": 7.0321364402771,
"learning_rate": 3.742254572601717e-05,
"loss": 2.2709,
"step": 6740
},
{
"epoch": 0.7558790593505039,
"grad_norm": 7.621607780456543,
"learning_rate": 3.7403882045539385e-05,
"loss": 1.9236,
"step": 6750
},
{
"epoch": 0.7569988801791714,
"grad_norm": 9.952698707580566,
"learning_rate": 3.738521836506159e-05,
"loss": 2.2815,
"step": 6760
},
{
"epoch": 0.7581187010078387,
"grad_norm": 3.3789877891540527,
"learning_rate": 3.73665546845838e-05,
"loss": 1.7131,
"step": 6770
},
{
"epoch": 0.7592385218365062,
"grad_norm": 5.496334075927734,
"learning_rate": 3.734789100410601e-05,
"loss": 2.07,
"step": 6780
},
{
"epoch": 0.7603583426651735,
"grad_norm": 6.971884250640869,
"learning_rate": 3.732922732362822e-05,
"loss": 2.0964,
"step": 6790
},
{
"epoch": 0.761478163493841,
"grad_norm": 8.502189636230469,
"learning_rate": 3.731056364315043e-05,
"loss": 2.1573,
"step": 6800
},
{
"epoch": 0.7625979843225084,
"grad_norm": 12.738436698913574,
"learning_rate": 3.729189996267264e-05,
"loss": 2.1589,
"step": 6810
},
{
"epoch": 0.7637178051511758,
"grad_norm": 4.9455790519714355,
"learning_rate": 3.727323628219485e-05,
"loss": 2.1109,
"step": 6820
},
{
"epoch": 0.7648376259798432,
"grad_norm": 4.243088245391846,
"learning_rate": 3.725457260171706e-05,
"loss": 1.96,
"step": 6830
},
{
"epoch": 0.7659574468085106,
"grad_norm": 3.8965704441070557,
"learning_rate": 3.723590892123927e-05,
"loss": 2.0415,
"step": 6840
},
{
"epoch": 0.7670772676371781,
"grad_norm": 3.0561602115631104,
"learning_rate": 3.7217245240761475e-05,
"loss": 1.8753,
"step": 6850
},
{
"epoch": 0.7681970884658454,
"grad_norm": 3.344120979309082,
"learning_rate": 3.719858156028369e-05,
"loss": 2.3332,
"step": 6860
},
{
"epoch": 0.7693169092945129,
"grad_norm": 15.658031463623047,
"learning_rate": 3.71799178798059e-05,
"loss": 1.8573,
"step": 6870
},
{
"epoch": 0.7704367301231803,
"grad_norm": 13.487674713134766,
"learning_rate": 3.716125419932811e-05,
"loss": 2.1022,
"step": 6880
},
{
"epoch": 0.7715565509518477,
"grad_norm": 4.366361141204834,
"learning_rate": 3.7142590518850315e-05,
"loss": 2.1056,
"step": 6890
},
{
"epoch": 0.7726763717805151,
"grad_norm": 6.940586566925049,
"learning_rate": 3.712392683837253e-05,
"loss": 1.7214,
"step": 6900
},
{
"epoch": 0.7737961926091825,
"grad_norm": 3.119396448135376,
"learning_rate": 3.710526315789474e-05,
"loss": 2.4966,
"step": 6910
},
{
"epoch": 0.77491601343785,
"grad_norm": 2.804882287979126,
"learning_rate": 3.708659947741695e-05,
"loss": 1.9811,
"step": 6920
},
{
"epoch": 0.7760358342665173,
"grad_norm": 6.220757484436035,
"learning_rate": 3.7067935796939155e-05,
"loss": 2.0228,
"step": 6930
},
{
"epoch": 0.7771556550951848,
"grad_norm": 7.664346218109131,
"learning_rate": 3.7049272116461367e-05,
"loss": 2.375,
"step": 6940
},
{
"epoch": 0.7782754759238522,
"grad_norm": 3.2381927967071533,
"learning_rate": 3.703060843598358e-05,
"loss": 2.1603,
"step": 6950
},
{
"epoch": 0.7793952967525196,
"grad_norm": 2.67271089553833,
"learning_rate": 3.701194475550579e-05,
"loss": 1.975,
"step": 6960
},
{
"epoch": 0.780515117581187,
"grad_norm": 8.897006034851074,
"learning_rate": 3.6993281075027995e-05,
"loss": 2.1017,
"step": 6970
},
{
"epoch": 0.7816349384098544,
"grad_norm": 4.098658084869385,
"learning_rate": 3.6974617394550207e-05,
"loss": 2.0624,
"step": 6980
},
{
"epoch": 0.7827547592385219,
"grad_norm": 9.428001403808594,
"learning_rate": 3.695595371407242e-05,
"loss": 1.9006,
"step": 6990
},
{
"epoch": 0.7838745800671892,
"grad_norm": 3.8001720905303955,
"learning_rate": 3.693729003359463e-05,
"loss": 2.2506,
"step": 7000
},
{
"epoch": 0.7849944008958567,
"grad_norm": 14.967480659484863,
"learning_rate": 3.6918626353116835e-05,
"loss": 2.4808,
"step": 7010
},
{
"epoch": 0.786114221724524,
"grad_norm": 5.58108377456665,
"learning_rate": 3.6899962672639047e-05,
"loss": 2.5445,
"step": 7020
},
{
"epoch": 0.7872340425531915,
"grad_norm": 4.169144153594971,
"learning_rate": 3.688129899216126e-05,
"loss": 2.2701,
"step": 7030
},
{
"epoch": 0.7883538633818589,
"grad_norm": 3.629635810852051,
"learning_rate": 3.686263531168346e-05,
"loss": 2.0769,
"step": 7040
},
{
"epoch": 0.7894736842105263,
"grad_norm": 3.2318367958068848,
"learning_rate": 3.6843971631205675e-05,
"loss": 1.9304,
"step": 7050
},
{
"epoch": 0.7905935050391937,
"grad_norm": 3.569641590118408,
"learning_rate": 3.682530795072788e-05,
"loss": 1.9362,
"step": 7060
},
{
"epoch": 0.7917133258678611,
"grad_norm": 3.6073529720306396,
"learning_rate": 3.68066442702501e-05,
"loss": 2.1551,
"step": 7070
},
{
"epoch": 0.7928331466965286,
"grad_norm": 2.949209690093994,
"learning_rate": 3.67879805897723e-05,
"loss": 1.7599,
"step": 7080
},
{
"epoch": 0.793952967525196,
"grad_norm": 7.541772842407227,
"learning_rate": 3.6769316909294515e-05,
"loss": 2.2318,
"step": 7090
},
{
"epoch": 0.7950727883538634,
"grad_norm": 3.108989953994751,
"learning_rate": 3.675065322881672e-05,
"loss": 2.1161,
"step": 7100
},
{
"epoch": 0.7961926091825308,
"grad_norm": 2.859032392501831,
"learning_rate": 3.673198954833894e-05,
"loss": 2.2659,
"step": 7110
},
{
"epoch": 0.7973124300111982,
"grad_norm": 4.491294860839844,
"learning_rate": 3.671332586786114e-05,
"loss": 1.4341,
"step": 7120
},
{
"epoch": 0.7984322508398656,
"grad_norm": 8.079992294311523,
"learning_rate": 3.6694662187383355e-05,
"loss": 2.1608,
"step": 7130
},
{
"epoch": 0.799552071668533,
"grad_norm": 3.3629186153411865,
"learning_rate": 3.667599850690556e-05,
"loss": 2.0803,
"step": 7140
},
{
"epoch": 0.8006718924972005,
"grad_norm": 7.034578800201416,
"learning_rate": 3.665733482642777e-05,
"loss": 2.2975,
"step": 7150
},
{
"epoch": 0.8017917133258678,
"grad_norm": 3.3165249824523926,
"learning_rate": 3.663867114594998e-05,
"loss": 2.0453,
"step": 7160
},
{
"epoch": 0.8029115341545353,
"grad_norm": 11.479082107543945,
"learning_rate": 3.6620007465472195e-05,
"loss": 1.9926,
"step": 7170
},
{
"epoch": 0.8040313549832027,
"grad_norm": 2.8620989322662354,
"learning_rate": 3.66013437849944e-05,
"loss": 2.0147,
"step": 7180
},
{
"epoch": 0.8051511758118701,
"grad_norm": 9.503447532653809,
"learning_rate": 3.658268010451661e-05,
"loss": 2.1835,
"step": 7190
},
{
"epoch": 0.8062709966405375,
"grad_norm": 2.9380719661712646,
"learning_rate": 3.656401642403882e-05,
"loss": 1.9345,
"step": 7200
},
{
"epoch": 0.8073908174692049,
"grad_norm": 9.880309104919434,
"learning_rate": 3.6545352743561035e-05,
"loss": 2.1885,
"step": 7210
},
{
"epoch": 0.8085106382978723,
"grad_norm": 8.49301528930664,
"learning_rate": 3.652668906308324e-05,
"loss": 1.7974,
"step": 7220
},
{
"epoch": 0.8096304591265397,
"grad_norm": 7.494529724121094,
"learning_rate": 3.650802538260545e-05,
"loss": 2.1994,
"step": 7230
},
{
"epoch": 0.8107502799552072,
"grad_norm": 2.999682664871216,
"learning_rate": 3.648936170212766e-05,
"loss": 2.2821,
"step": 7240
},
{
"epoch": 0.8118701007838746,
"grad_norm": 2.5797007083892822,
"learning_rate": 3.647069802164987e-05,
"loss": 2.1872,
"step": 7250
},
{
"epoch": 0.812989921612542,
"grad_norm": 9.615920066833496,
"learning_rate": 3.645203434117208e-05,
"loss": 2.3596,
"step": 7260
},
{
"epoch": 0.8141097424412094,
"grad_norm": 8.524604797363281,
"learning_rate": 3.6433370660694285e-05,
"loss": 2.6097,
"step": 7270
},
{
"epoch": 0.8152295632698768,
"grad_norm": 7.994124889373779,
"learning_rate": 3.64147069802165e-05,
"loss": 1.7589,
"step": 7280
},
{
"epoch": 0.8163493840985442,
"grad_norm": 2.902440071105957,
"learning_rate": 3.639604329973871e-05,
"loss": 2.1298,
"step": 7290
},
{
"epoch": 0.8174692049272116,
"grad_norm": 2.9455184936523438,
"learning_rate": 3.637737961926092e-05,
"loss": 2.2527,
"step": 7300
},
{
"epoch": 0.8185890257558791,
"grad_norm": 7.609272003173828,
"learning_rate": 3.6358715938783125e-05,
"loss": 2.2256,
"step": 7310
},
{
"epoch": 0.8197088465845465,
"grad_norm": 3.6720242500305176,
"learning_rate": 3.634005225830534e-05,
"loss": 2.034,
"step": 7320
},
{
"epoch": 0.8208286674132139,
"grad_norm": 6.270810604095459,
"learning_rate": 3.632138857782755e-05,
"loss": 1.9424,
"step": 7330
},
{
"epoch": 0.8219484882418813,
"grad_norm": 9.397404670715332,
"learning_rate": 3.630272489734976e-05,
"loss": 2.0945,
"step": 7340
},
{
"epoch": 0.8230683090705487,
"grad_norm": 3.0468692779541016,
"learning_rate": 3.6284061216871965e-05,
"loss": 2.382,
"step": 7350
},
{
"epoch": 0.8241881298992161,
"grad_norm": 5.612720966339111,
"learning_rate": 3.626539753639418e-05,
"loss": 1.9622,
"step": 7360
},
{
"epoch": 0.8253079507278835,
"grad_norm": 4.7055983543396,
"learning_rate": 3.624673385591639e-05,
"loss": 2.2322,
"step": 7370
},
{
"epoch": 0.826427771556551,
"grad_norm": 4.574550628662109,
"learning_rate": 3.62280701754386e-05,
"loss": 2.0364,
"step": 7380
},
{
"epoch": 0.8275475923852184,
"grad_norm": 9.704349517822266,
"learning_rate": 3.6209406494960805e-05,
"loss": 2.6025,
"step": 7390
},
{
"epoch": 0.8286674132138858,
"grad_norm": 3.6313247680664062,
"learning_rate": 3.6190742814483017e-05,
"loss": 2.0449,
"step": 7400
},
{
"epoch": 0.8297872340425532,
"grad_norm": 5.157100200653076,
"learning_rate": 3.617207913400523e-05,
"loss": 2.0293,
"step": 7410
},
{
"epoch": 0.8309070548712206,
"grad_norm": 11.102890968322754,
"learning_rate": 3.615341545352744e-05,
"loss": 2.3059,
"step": 7420
},
{
"epoch": 0.832026875699888,
"grad_norm": 6.320305347442627,
"learning_rate": 3.6134751773049645e-05,
"loss": 2.1117,
"step": 7430
},
{
"epoch": 0.8331466965285554,
"grad_norm": 9.381714820861816,
"learning_rate": 3.611608809257186e-05,
"loss": 2.5279,
"step": 7440
},
{
"epoch": 0.8342665173572228,
"grad_norm": 3.505153179168701,
"learning_rate": 3.609742441209407e-05,
"loss": 2.176,
"step": 7450
},
{
"epoch": 0.8353863381858903,
"grad_norm": 6.633389472961426,
"learning_rate": 3.607876073161628e-05,
"loss": 2.1151,
"step": 7460
},
{
"epoch": 0.8365061590145577,
"grad_norm": 2.6333770751953125,
"learning_rate": 3.6060097051138485e-05,
"loss": 2.4062,
"step": 7470
},
{
"epoch": 0.8376259798432251,
"grad_norm": 3.540119171142578,
"learning_rate": 3.60414333706607e-05,
"loss": 2.1977,
"step": 7480
},
{
"epoch": 0.8387458006718925,
"grad_norm": 2.524616003036499,
"learning_rate": 3.602276969018291e-05,
"loss": 2.1205,
"step": 7490
},
{
"epoch": 0.8398656215005599,
"grad_norm": 6.62229061126709,
"learning_rate": 3.600410600970511e-05,
"loss": 2.2605,
"step": 7500
},
{
"epoch": 0.8409854423292273,
"grad_norm": 3.3816375732421875,
"learning_rate": 3.5985442329227325e-05,
"loss": 2.0532,
"step": 7510
},
{
"epoch": 0.8421052631578947,
"grad_norm": 2.873293161392212,
"learning_rate": 3.596677864874953e-05,
"loss": 2.251,
"step": 7520
},
{
"epoch": 0.8432250839865622,
"grad_norm": 8.764281272888184,
"learning_rate": 3.594811496827175e-05,
"loss": 2.2559,
"step": 7530
},
{
"epoch": 0.8443449048152296,
"grad_norm": 11.748472213745117,
"learning_rate": 3.592945128779395e-05,
"loss": 1.9883,
"step": 7540
},
{
"epoch": 0.845464725643897,
"grad_norm": 8.871268272399902,
"learning_rate": 3.5910787607316165e-05,
"loss": 2.2952,
"step": 7550
},
{
"epoch": 0.8465845464725644,
"grad_norm": 3.4777164459228516,
"learning_rate": 3.589212392683837e-05,
"loss": 2.1871,
"step": 7560
},
{
"epoch": 0.8477043673012318,
"grad_norm": 13.322107315063477,
"learning_rate": 3.587346024636059e-05,
"loss": 2.2841,
"step": 7570
},
{
"epoch": 0.8488241881298992,
"grad_norm": 3.384903907775879,
"learning_rate": 3.585479656588279e-05,
"loss": 1.7451,
"step": 7580
},
{
"epoch": 0.8499440089585666,
"grad_norm": 3.1881563663482666,
"learning_rate": 3.5836132885405005e-05,
"loss": 2.3855,
"step": 7590
},
{
"epoch": 0.851063829787234,
"grad_norm": 8.006708145141602,
"learning_rate": 3.581746920492721e-05,
"loss": 2.0241,
"step": 7600
},
{
"epoch": 0.8521836506159015,
"grad_norm": 3.468590259552002,
"learning_rate": 3.579880552444942e-05,
"loss": 2.3348,
"step": 7610
},
{
"epoch": 0.8533034714445689,
"grad_norm": 8.839496612548828,
"learning_rate": 3.578014184397163e-05,
"loss": 2.0181,
"step": 7620
},
{
"epoch": 0.8544232922732363,
"grad_norm": 2.8391735553741455,
"learning_rate": 3.5761478163493845e-05,
"loss": 2.2046,
"step": 7630
},
{
"epoch": 0.8555431131019037,
"grad_norm": 2.341062068939209,
"learning_rate": 3.574281448301605e-05,
"loss": 2.509,
"step": 7640
},
{
"epoch": 0.8566629339305711,
"grad_norm": 4.910477161407471,
"learning_rate": 3.572415080253826e-05,
"loss": 2.4332,
"step": 7650
},
{
"epoch": 0.8577827547592385,
"grad_norm": 9.427245140075684,
"learning_rate": 3.570548712206047e-05,
"loss": 2.2668,
"step": 7660
},
{
"epoch": 0.858902575587906,
"grad_norm": 2.9346938133239746,
"learning_rate": 3.5686823441582685e-05,
"loss": 1.8698,
"step": 7670
},
{
"epoch": 0.8600223964165733,
"grad_norm": 3.292447328567505,
"learning_rate": 3.566815976110489e-05,
"loss": 2.1736,
"step": 7680
},
{
"epoch": 0.8611422172452408,
"grad_norm": 3.89292573928833,
"learning_rate": 3.56494960806271e-05,
"loss": 2.4429,
"step": 7690
},
{
"epoch": 0.8622620380739082,
"grad_norm": 8.658332824707031,
"learning_rate": 3.563083240014931e-05,
"loss": 2.1728,
"step": 7700
},
{
"epoch": 0.8633818589025756,
"grad_norm": 8.725335121154785,
"learning_rate": 3.5612168719671525e-05,
"loss": 2.0126,
"step": 7710
},
{
"epoch": 0.864501679731243,
"grad_norm": 2.872495412826538,
"learning_rate": 3.559350503919373e-05,
"loss": 2.4384,
"step": 7720
},
{
"epoch": 0.8656215005599104,
"grad_norm": 17.420711517333984,
"learning_rate": 3.5574841358715935e-05,
"loss": 1.8937,
"step": 7730
},
{
"epoch": 0.8667413213885778,
"grad_norm": 7.1470489501953125,
"learning_rate": 3.555617767823815e-05,
"loss": 1.6198,
"step": 7740
},
{
"epoch": 0.8678611422172452,
"grad_norm": 9.926697731018066,
"learning_rate": 3.553751399776036e-05,
"loss": 1.9396,
"step": 7750
},
{
"epoch": 0.8689809630459127,
"grad_norm": 3.2263200283050537,
"learning_rate": 3.551885031728257e-05,
"loss": 2.3201,
"step": 7760
},
{
"epoch": 0.8701007838745801,
"grad_norm": 7.058889865875244,
"learning_rate": 3.5500186636804775e-05,
"loss": 2.1745,
"step": 7770
},
{
"epoch": 0.8712206047032475,
"grad_norm": 12.746253967285156,
"learning_rate": 3.548152295632699e-05,
"loss": 2.4713,
"step": 7780
},
{
"epoch": 0.8723404255319149,
"grad_norm": 10.826375961303711,
"learning_rate": 3.54628592758492e-05,
"loss": 1.9633,
"step": 7790
},
{
"epoch": 0.8734602463605823,
"grad_norm": 3.0302278995513916,
"learning_rate": 3.544419559537141e-05,
"loss": 2.3689,
"step": 7800
},
{
"epoch": 0.8745800671892497,
"grad_norm": 2.6338868141174316,
"learning_rate": 3.5425531914893615e-05,
"loss": 1.6076,
"step": 7810
},
{
"epoch": 0.8756998880179171,
"grad_norm": 2.724898099899292,
"learning_rate": 3.540686823441583e-05,
"loss": 1.6231,
"step": 7820
},
{
"epoch": 0.8768197088465846,
"grad_norm": 5.752689361572266,
"learning_rate": 3.538820455393804e-05,
"loss": 1.8717,
"step": 7830
},
{
"epoch": 0.8779395296752519,
"grad_norm": 3.7347118854522705,
"learning_rate": 3.536954087346025e-05,
"loss": 2.1681,
"step": 7840
},
{
"epoch": 0.8790593505039194,
"grad_norm": 6.816357612609863,
"learning_rate": 3.5350877192982455e-05,
"loss": 1.6885,
"step": 7850
},
{
"epoch": 0.8801791713325868,
"grad_norm": 10.730504989624023,
"learning_rate": 3.533221351250467e-05,
"loss": 2.3201,
"step": 7860
},
{
"epoch": 0.8812989921612542,
"grad_norm": 10.225847244262695,
"learning_rate": 3.531354983202688e-05,
"loss": 2.307,
"step": 7870
},
{
"epoch": 0.8824188129899216,
"grad_norm": 3.7461752891540527,
"learning_rate": 3.529488615154909e-05,
"loss": 2.4028,
"step": 7880
},
{
"epoch": 0.883538633818589,
"grad_norm": 3.2298667430877686,
"learning_rate": 3.5276222471071295e-05,
"loss": 2.2726,
"step": 7890
},
{
"epoch": 0.8846584546472565,
"grad_norm": 4.180459976196289,
"learning_rate": 3.525755879059351e-05,
"loss": 2.3693,
"step": 7900
},
{
"epoch": 0.8857782754759238,
"grad_norm": 3.8476152420043945,
"learning_rate": 3.523889511011572e-05,
"loss": 1.9672,
"step": 7910
},
{
"epoch": 0.8868980963045913,
"grad_norm": 2.8095366954803467,
"learning_rate": 3.522023142963793e-05,
"loss": 1.5762,
"step": 7920
},
{
"epoch": 0.8880179171332587,
"grad_norm": 11.424389839172363,
"learning_rate": 3.5201567749160135e-05,
"loss": 1.9222,
"step": 7930
},
{
"epoch": 0.8891377379619261,
"grad_norm": 10.916204452514648,
"learning_rate": 3.518290406868235e-05,
"loss": 2.2247,
"step": 7940
},
{
"epoch": 0.8902575587905935,
"grad_norm": 8.619460105895996,
"learning_rate": 3.516424038820456e-05,
"loss": 2.5038,
"step": 7950
},
{
"epoch": 0.8913773796192609,
"grad_norm": 3.383333444595337,
"learning_rate": 3.514557670772676e-05,
"loss": 2.0325,
"step": 7960
},
{
"epoch": 0.8924972004479284,
"grad_norm": 2.8794310092926025,
"learning_rate": 3.5126913027248975e-05,
"loss": 2.2037,
"step": 7970
},
{
"epoch": 0.8936170212765957,
"grad_norm": 3.270104169845581,
"learning_rate": 3.510824934677118e-05,
"loss": 2.5766,
"step": 7980
},
{
"epoch": 0.8947368421052632,
"grad_norm": 5.58250617980957,
"learning_rate": 3.50895856662934e-05,
"loss": 2.0883,
"step": 7990
},
{
"epoch": 0.8958566629339306,
"grad_norm": 4.2217488288879395,
"learning_rate": 3.50709219858156e-05,
"loss": 2.1016,
"step": 8000
},
{
"epoch": 0.896976483762598,
"grad_norm": 5.107589244842529,
"learning_rate": 3.5052258305337815e-05,
"loss": 1.9658,
"step": 8010
},
{
"epoch": 0.8980963045912654,
"grad_norm": 3.2384800910949707,
"learning_rate": 3.503359462486002e-05,
"loss": 1.8828,
"step": 8020
},
{
"epoch": 0.8992161254199328,
"grad_norm": 3.6768581867218018,
"learning_rate": 3.501493094438223e-05,
"loss": 1.6965,
"step": 8030
},
{
"epoch": 0.9003359462486002,
"grad_norm": 3.0174429416656494,
"learning_rate": 3.499626726390444e-05,
"loss": 2.16,
"step": 8040
},
{
"epoch": 0.9014557670772676,
"grad_norm": 13.087141036987305,
"learning_rate": 3.4977603583426655e-05,
"loss": 1.7648,
"step": 8050
},
{
"epoch": 0.9025755879059351,
"grad_norm": 11.361166000366211,
"learning_rate": 3.495893990294886e-05,
"loss": 1.94,
"step": 8060
},
{
"epoch": 0.9036954087346024,
"grad_norm": 3.1637301445007324,
"learning_rate": 3.494027622247107e-05,
"loss": 2.0952,
"step": 8070
},
{
"epoch": 0.9048152295632699,
"grad_norm": 3.610626459121704,
"learning_rate": 3.492161254199328e-05,
"loss": 2.4445,
"step": 8080
},
{
"epoch": 0.9059350503919373,
"grad_norm": 3.5841760635375977,
"learning_rate": 3.4902948861515495e-05,
"loss": 2.3581,
"step": 8090
},
{
"epoch": 0.9070548712206047,
"grad_norm": 8.359783172607422,
"learning_rate": 3.48842851810377e-05,
"loss": 2.0259,
"step": 8100
},
{
"epoch": 0.9081746920492721,
"grad_norm": 3.30151629447937,
"learning_rate": 3.486562150055991e-05,
"loss": 2.3721,
"step": 8110
},
{
"epoch": 0.9092945128779395,
"grad_norm": 3.3997480869293213,
"learning_rate": 3.484695782008212e-05,
"loss": 2.0008,
"step": 8120
},
{
"epoch": 0.910414333706607,
"grad_norm": 4.009676456451416,
"learning_rate": 3.4828294139604335e-05,
"loss": 1.8577,
"step": 8130
},
{
"epoch": 0.9115341545352743,
"grad_norm": 11.38424015045166,
"learning_rate": 3.480963045912654e-05,
"loss": 2.1525,
"step": 8140
},
{
"epoch": 0.9126539753639418,
"grad_norm": 3.4985761642456055,
"learning_rate": 3.479096677864875e-05,
"loss": 2.0185,
"step": 8150
},
{
"epoch": 0.9137737961926092,
"grad_norm": 2.8695907592773438,
"learning_rate": 3.4772303098170963e-05,
"loss": 2.2721,
"step": 8160
},
{
"epoch": 0.9148936170212766,
"grad_norm": 9.850143432617188,
"learning_rate": 3.4753639417693175e-05,
"loss": 2.0199,
"step": 8170
},
{
"epoch": 0.916013437849944,
"grad_norm": 6.895715713500977,
"learning_rate": 3.473497573721538e-05,
"loss": 2.0351,
"step": 8180
},
{
"epoch": 0.9171332586786114,
"grad_norm": 3.088392972946167,
"learning_rate": 3.4716312056737585e-05,
"loss": 2.1426,
"step": 8190
},
{
"epoch": 0.9182530795072789,
"grad_norm": 2.6038413047790527,
"learning_rate": 3.4697648376259803e-05,
"loss": 1.9374,
"step": 8200
},
{
"epoch": 0.9193729003359462,
"grad_norm": 8.73396110534668,
"learning_rate": 3.467898469578201e-05,
"loss": 2.3542,
"step": 8210
},
{
"epoch": 0.9204927211646137,
"grad_norm": 7.529842853546143,
"learning_rate": 3.466032101530422e-05,
"loss": 2.5576,
"step": 8220
},
{
"epoch": 0.921612541993281,
"grad_norm": 11.439668655395508,
"learning_rate": 3.4641657334826425e-05,
"loss": 2.3506,
"step": 8230
},
{
"epoch": 0.9227323628219485,
"grad_norm": 11.114765167236328,
"learning_rate": 3.462299365434864e-05,
"loss": 2.1594,
"step": 8240
},
{
"epoch": 0.9238521836506159,
"grad_norm": 3.631915807723999,
"learning_rate": 3.460432997387085e-05,
"loss": 1.9301,
"step": 8250
},
{
"epoch": 0.9249720044792833,
"grad_norm": 4.813271522521973,
"learning_rate": 3.458566629339306e-05,
"loss": 1.7937,
"step": 8260
},
{
"epoch": 0.9260918253079508,
"grad_norm": 9.251919746398926,
"learning_rate": 3.4567002612915265e-05,
"loss": 2.4907,
"step": 8270
},
{
"epoch": 0.9272116461366181,
"grad_norm": 10.042062759399414,
"learning_rate": 3.454833893243748e-05,
"loss": 2.3766,
"step": 8280
},
{
"epoch": 0.9283314669652856,
"grad_norm": 5.098442554473877,
"learning_rate": 3.452967525195969e-05,
"loss": 1.9331,
"step": 8290
},
{
"epoch": 0.9294512877939529,
"grad_norm": 3.054330348968506,
"learning_rate": 3.45110115714819e-05,
"loss": 1.9556,
"step": 8300
},
{
"epoch": 0.9305711086226204,
"grad_norm": 5.500843524932861,
"learning_rate": 3.4492347891004105e-05,
"loss": 1.8301,
"step": 8310
},
{
"epoch": 0.9316909294512878,
"grad_norm": 11.334184646606445,
"learning_rate": 3.447368421052632e-05,
"loss": 2.3866,
"step": 8320
},
{
"epoch": 0.9328107502799552,
"grad_norm": 9.781439781188965,
"learning_rate": 3.445502053004853e-05,
"loss": 2.3349,
"step": 8330
},
{
"epoch": 0.9339305711086227,
"grad_norm": 5.9633049964904785,
"learning_rate": 3.443635684957074e-05,
"loss": 1.5215,
"step": 8340
},
{
"epoch": 0.93505039193729,
"grad_norm": 9.052412033081055,
"learning_rate": 3.4417693169092945e-05,
"loss": 2.2278,
"step": 8350
},
{
"epoch": 0.9361702127659575,
"grad_norm": 15.582505226135254,
"learning_rate": 3.439902948861516e-05,
"loss": 2.2908,
"step": 8360
},
{
"epoch": 0.9372900335946248,
"grad_norm": 9.649676322937012,
"learning_rate": 3.438036580813737e-05,
"loss": 2.0235,
"step": 8370
},
{
"epoch": 0.9384098544232923,
"grad_norm": 2.680288314819336,
"learning_rate": 3.436170212765958e-05,
"loss": 1.9282,
"step": 8380
},
{
"epoch": 0.9395296752519597,
"grad_norm": 3.08258318901062,
"learning_rate": 3.4343038447181785e-05,
"loss": 2.2415,
"step": 8390
},
{
"epoch": 0.9406494960806271,
"grad_norm": 4.9708380699157715,
"learning_rate": 3.4324374766704e-05,
"loss": 2.3105,
"step": 8400
},
{
"epoch": 0.9417693169092946,
"grad_norm": 2.7266993522644043,
"learning_rate": 3.430571108622621e-05,
"loss": 2.2027,
"step": 8410
},
{
"epoch": 0.9428891377379619,
"grad_norm": 10.68362808227539,
"learning_rate": 3.428704740574841e-05,
"loss": 2.0632,
"step": 8420
},
{
"epoch": 0.9440089585666294,
"grad_norm": 3.0944361686706543,
"learning_rate": 3.4268383725270625e-05,
"loss": 2.2351,
"step": 8430
},
{
"epoch": 0.9451287793952967,
"grad_norm": 3.2292227745056152,
"learning_rate": 3.424972004479283e-05,
"loss": 1.9755,
"step": 8440
},
{
"epoch": 0.9462486002239642,
"grad_norm": 16.302453994750977,
"learning_rate": 3.423105636431504e-05,
"loss": 2.0759,
"step": 8450
},
{
"epoch": 0.9473684210526315,
"grad_norm": 4.625180244445801,
"learning_rate": 3.421239268383725e-05,
"loss": 2.5148,
"step": 8460
},
{
"epoch": 0.948488241881299,
"grad_norm": 8.57646656036377,
"learning_rate": 3.4193729003359465e-05,
"loss": 1.9348,
"step": 8470
},
{
"epoch": 0.9496080627099664,
"grad_norm": 3.611316442489624,
"learning_rate": 3.417506532288167e-05,
"loss": 2.3202,
"step": 8480
},
{
"epoch": 0.9507278835386338,
"grad_norm": 12.828388214111328,
"learning_rate": 3.415640164240388e-05,
"loss": 2.0078,
"step": 8490
},
{
"epoch": 0.9518477043673013,
"grad_norm": 7.542992115020752,
"learning_rate": 3.413773796192609e-05,
"loss": 1.8984,
"step": 8500
},
{
"epoch": 0.9529675251959686,
"grad_norm": 10.747339248657227,
"learning_rate": 3.4119074281448305e-05,
"loss": 2.2511,
"step": 8510
},
{
"epoch": 0.9540873460246361,
"grad_norm": 6.7283453941345215,
"learning_rate": 3.410041060097051e-05,
"loss": 1.9405,
"step": 8520
},
{
"epoch": 0.9552071668533034,
"grad_norm": 2.935981512069702,
"learning_rate": 3.408174692049272e-05,
"loss": 2.1033,
"step": 8530
},
{
"epoch": 0.9563269876819709,
"grad_norm": 3.4737389087677,
"learning_rate": 3.4063083240014933e-05,
"loss": 2.0328,
"step": 8540
},
{
"epoch": 0.9574468085106383,
"grad_norm": 8.525548934936523,
"learning_rate": 3.4044419559537145e-05,
"loss": 1.8537,
"step": 8550
},
{
"epoch": 0.9585666293393057,
"grad_norm": 6.467761516571045,
"learning_rate": 3.402575587905935e-05,
"loss": 2.0973,
"step": 8560
},
{
"epoch": 0.9596864501679732,
"grad_norm": 10.39410400390625,
"learning_rate": 3.400709219858156e-05,
"loss": 2.0941,
"step": 8570
},
{
"epoch": 0.9608062709966405,
"grad_norm": 5.414796829223633,
"learning_rate": 3.3988428518103773e-05,
"loss": 1.6318,
"step": 8580
},
{
"epoch": 0.961926091825308,
"grad_norm": 2.808164119720459,
"learning_rate": 3.3969764837625985e-05,
"loss": 1.8583,
"step": 8590
},
{
"epoch": 0.9630459126539753,
"grad_norm": 2.665485382080078,
"learning_rate": 3.395110115714819e-05,
"loss": 2.1733,
"step": 8600
},
{
"epoch": 0.9641657334826428,
"grad_norm": 3.183068037033081,
"learning_rate": 3.39324374766704e-05,
"loss": 1.7059,
"step": 8610
},
{
"epoch": 0.9652855543113102,
"grad_norm": 2.7500557899475098,
"learning_rate": 3.3913773796192613e-05,
"loss": 2.2814,
"step": 8620
},
{
"epoch": 0.9664053751399776,
"grad_norm": 6.916834831237793,
"learning_rate": 3.3895110115714825e-05,
"loss": 2.0092,
"step": 8630
},
{
"epoch": 0.9675251959686451,
"grad_norm": 11.789180755615234,
"learning_rate": 3.387644643523703e-05,
"loss": 1.744,
"step": 8640
},
{
"epoch": 0.9686450167973124,
"grad_norm": 12.441953659057617,
"learning_rate": 3.3857782754759235e-05,
"loss": 2.2952,
"step": 8650
},
{
"epoch": 0.9697648376259799,
"grad_norm": 4.348373889923096,
"learning_rate": 3.3839119074281453e-05,
"loss": 1.8666,
"step": 8660
},
{
"epoch": 0.9708846584546472,
"grad_norm": 3.6366405487060547,
"learning_rate": 3.382045539380366e-05,
"loss": 2.3619,
"step": 8670
},
{
"epoch": 0.9720044792833147,
"grad_norm": 5.705763816833496,
"learning_rate": 3.380179171332587e-05,
"loss": 1.8297,
"step": 8680
},
{
"epoch": 0.973124300111982,
"grad_norm": 8.548418998718262,
"learning_rate": 3.3783128032848075e-05,
"loss": 1.6297,
"step": 8690
},
{
"epoch": 0.9742441209406495,
"grad_norm": 6.786285877227783,
"learning_rate": 3.376446435237029e-05,
"loss": 1.6739,
"step": 8700
},
{
"epoch": 0.975363941769317,
"grad_norm": 2.983182430267334,
"learning_rate": 3.37458006718925e-05,
"loss": 1.8939,
"step": 8710
},
{
"epoch": 0.9764837625979843,
"grad_norm": 7.78575325012207,
"learning_rate": 3.372713699141471e-05,
"loss": 2.2775,
"step": 8720
},
{
"epoch": 0.9776035834266518,
"grad_norm": 2.9739723205566406,
"learning_rate": 3.3708473310936915e-05,
"loss": 2.3235,
"step": 8730
},
{
"epoch": 0.9787234042553191,
"grad_norm": 13.118427276611328,
"learning_rate": 3.368980963045913e-05,
"loss": 2.2344,
"step": 8740
},
{
"epoch": 0.9798432250839866,
"grad_norm": 3.2696194648742676,
"learning_rate": 3.367114594998134e-05,
"loss": 2.3788,
"step": 8750
},
{
"epoch": 0.9809630459126539,
"grad_norm": 2.9010257720947266,
"learning_rate": 3.365248226950355e-05,
"loss": 2.0307,
"step": 8760
},
{
"epoch": 0.9820828667413214,
"grad_norm": 3.2224440574645996,
"learning_rate": 3.3633818589025755e-05,
"loss": 2.0968,
"step": 8770
},
{
"epoch": 0.9832026875699889,
"grad_norm": 9.395108222961426,
"learning_rate": 3.361515490854797e-05,
"loss": 2.2748,
"step": 8780
},
{
"epoch": 0.9843225083986562,
"grad_norm": 3.0687882900238037,
"learning_rate": 3.359649122807018e-05,
"loss": 1.7044,
"step": 8790
},
{
"epoch": 0.9854423292273237,
"grad_norm": 3.7267823219299316,
"learning_rate": 3.357782754759239e-05,
"loss": 1.6726,
"step": 8800
},
{
"epoch": 0.986562150055991,
"grad_norm": 3.8064417839050293,
"learning_rate": 3.3559163867114595e-05,
"loss": 1.8408,
"step": 8810
},
{
"epoch": 0.9876819708846585,
"grad_norm": 8.669193267822266,
"learning_rate": 3.354050018663681e-05,
"loss": 1.8474,
"step": 8820
},
{
"epoch": 0.9888017917133258,
"grad_norm": 14.256889343261719,
"learning_rate": 3.352183650615902e-05,
"loss": 2.0988,
"step": 8830
},
{
"epoch": 0.9899216125419933,
"grad_norm": 10.54806137084961,
"learning_rate": 3.350317282568123e-05,
"loss": 2.1008,
"step": 8840
},
{
"epoch": 0.9910414333706606,
"grad_norm": 3.6541545391082764,
"learning_rate": 3.3484509145203435e-05,
"loss": 1.7903,
"step": 8850
},
{
"epoch": 0.9921612541993281,
"grad_norm": 3.3884453773498535,
"learning_rate": 3.346584546472565e-05,
"loss": 2.0664,
"step": 8860
},
{
"epoch": 0.9932810750279956,
"grad_norm": 3.597472906112671,
"learning_rate": 3.344718178424786e-05,
"loss": 2.2501,
"step": 8870
},
{
"epoch": 0.9944008958566629,
"grad_norm": 3.326669931411743,
"learning_rate": 3.3428518103770063e-05,
"loss": 2.2494,
"step": 8880
},
{
"epoch": 0.9955207166853304,
"grad_norm": 3.445563316345215,
"learning_rate": 3.3409854423292275e-05,
"loss": 2.4157,
"step": 8890
},
{
"epoch": 0.9966405375139977,
"grad_norm": 3.6265370845794678,
"learning_rate": 3.339119074281448e-05,
"loss": 1.9543,
"step": 8900
},
{
"epoch": 0.9977603583426652,
"grad_norm": 6.9715471267700195,
"learning_rate": 3.337252706233669e-05,
"loss": 1.9576,
"step": 8910
},
{
"epoch": 0.9988801791713325,
"grad_norm": 2.990663528442383,
"learning_rate": 3.3353863381858903e-05,
"loss": 1.7998,
"step": 8920
},
{
"epoch": 1.0,
"grad_norm": 16.68695640563965,
"learning_rate": 3.3335199701381115e-05,
"loss": 2.4764,
"step": 8930
},
{
"epoch": 1.0011198208286674,
"grad_norm": 3.5761334896087646,
"learning_rate": 3.331653602090332e-05,
"loss": 2.0049,
"step": 8940
},
{
"epoch": 1.002239641657335,
"grad_norm": 7.336765289306641,
"learning_rate": 3.329787234042553e-05,
"loss": 1.9596,
"step": 8950
},
{
"epoch": 1.0033594624860023,
"grad_norm": 9.628479957580566,
"learning_rate": 3.3279208659947743e-05,
"loss": 2.1628,
"step": 8960
},
{
"epoch": 1.0044792833146696,
"grad_norm": 6.432254791259766,
"learning_rate": 3.3260544979469955e-05,
"loss": 1.9689,
"step": 8970
},
{
"epoch": 1.005599104143337,
"grad_norm": 3.5775656700134277,
"learning_rate": 3.324188129899216e-05,
"loss": 2.0591,
"step": 8980
},
{
"epoch": 1.0067189249720045,
"grad_norm": 7.444267272949219,
"learning_rate": 3.322321761851437e-05,
"loss": 2.2116,
"step": 8990
},
{
"epoch": 1.007838745800672,
"grad_norm": 9.2912015914917,
"learning_rate": 3.3204553938036583e-05,
"loss": 1.8121,
"step": 9000
},
{
"epoch": 1.0089585666293392,
"grad_norm": 12.298483848571777,
"learning_rate": 3.3185890257558795e-05,
"loss": 2.2219,
"step": 9010
},
{
"epoch": 1.0100783874580068,
"grad_norm": 11.347268104553223,
"learning_rate": 3.3167226577081e-05,
"loss": 1.9998,
"step": 9020
},
{
"epoch": 1.0111982082866742,
"grad_norm": 4.02382755279541,
"learning_rate": 3.314856289660321e-05,
"loss": 2.1333,
"step": 9030
},
{
"epoch": 1.0123180291153415,
"grad_norm": 8.781681060791016,
"learning_rate": 3.3129899216125423e-05,
"loss": 2.0904,
"step": 9040
},
{
"epoch": 1.0134378499440089,
"grad_norm": 7.836172580718994,
"learning_rate": 3.3111235535647635e-05,
"loss": 1.8313,
"step": 9050
},
{
"epoch": 1.0145576707726764,
"grad_norm": 7.975405693054199,
"learning_rate": 3.309257185516984e-05,
"loss": 2.1517,
"step": 9060
},
{
"epoch": 1.0156774916013438,
"grad_norm": 9.539911270141602,
"learning_rate": 3.307390817469205e-05,
"loss": 2.228,
"step": 9070
},
{
"epoch": 1.0167973124300111,
"grad_norm": 4.019872665405273,
"learning_rate": 3.3055244494214263e-05,
"loss": 1.97,
"step": 9080
},
{
"epoch": 1.0179171332586787,
"grad_norm": 3.3895974159240723,
"learning_rate": 3.3036580813736475e-05,
"loss": 1.6972,
"step": 9090
},
{
"epoch": 1.019036954087346,
"grad_norm": 12.689729690551758,
"learning_rate": 3.301791713325868e-05,
"loss": 1.7648,
"step": 9100
},
{
"epoch": 1.0201567749160134,
"grad_norm": 15.403214454650879,
"learning_rate": 3.2999253452780885e-05,
"loss": 2.3582,
"step": 9110
},
{
"epoch": 1.0212765957446808,
"grad_norm": 3.3380067348480225,
"learning_rate": 3.29805897723031e-05,
"loss": 1.9642,
"step": 9120
},
{
"epoch": 1.0223964165733483,
"grad_norm": 3.8533127307891846,
"learning_rate": 3.296192609182531e-05,
"loss": 1.9215,
"step": 9130
},
{
"epoch": 1.0235162374020157,
"grad_norm": 3.532688617706299,
"learning_rate": 3.294326241134752e-05,
"loss": 1.6916,
"step": 9140
},
{
"epoch": 1.024636058230683,
"grad_norm": 2.9172744750976562,
"learning_rate": 3.2924598730869725e-05,
"loss": 2.0278,
"step": 9150
},
{
"epoch": 1.0257558790593504,
"grad_norm": 4.141864776611328,
"learning_rate": 3.290593505039194e-05,
"loss": 2.57,
"step": 9160
},
{
"epoch": 1.026875699888018,
"grad_norm": 15.59328842163086,
"learning_rate": 3.288727136991415e-05,
"loss": 2.3305,
"step": 9170
},
{
"epoch": 1.0279955207166853,
"grad_norm": 8.242568016052246,
"learning_rate": 3.286860768943636e-05,
"loss": 2.2541,
"step": 9180
},
{
"epoch": 1.0291153415453527,
"grad_norm": 7.377376079559326,
"learning_rate": 3.2849944008958565e-05,
"loss": 2.2119,
"step": 9190
},
{
"epoch": 1.0302351623740202,
"grad_norm": 3.7288410663604736,
"learning_rate": 3.283128032848078e-05,
"loss": 1.9549,
"step": 9200
},
{
"epoch": 1.0313549832026876,
"grad_norm": 7.731942653656006,
"learning_rate": 3.281261664800299e-05,
"loss": 2.0964,
"step": 9210
},
{
"epoch": 1.032474804031355,
"grad_norm": 8.507601737976074,
"learning_rate": 3.27939529675252e-05,
"loss": 1.4743,
"step": 9220
},
{
"epoch": 1.0335946248600223,
"grad_norm": 10.14968204498291,
"learning_rate": 3.2775289287047405e-05,
"loss": 2.1759,
"step": 9230
},
{
"epoch": 1.0347144456886899,
"grad_norm": 4.713762283325195,
"learning_rate": 3.275662560656962e-05,
"loss": 2.4896,
"step": 9240
},
{
"epoch": 1.0358342665173572,
"grad_norm": 4.729640483856201,
"learning_rate": 3.273796192609183e-05,
"loss": 2.0363,
"step": 9250
},
{
"epoch": 1.0369540873460246,
"grad_norm": 3.9254088401794434,
"learning_rate": 3.271929824561404e-05,
"loss": 1.82,
"step": 9260
},
{
"epoch": 1.0380739081746921,
"grad_norm": 6.3994622230529785,
"learning_rate": 3.2700634565136245e-05,
"loss": 2.0063,
"step": 9270
},
{
"epoch": 1.0391937290033595,
"grad_norm": 4.113112449645996,
"learning_rate": 3.268197088465846e-05,
"loss": 1.8885,
"step": 9280
},
{
"epoch": 1.0403135498320268,
"grad_norm": 9.683294296264648,
"learning_rate": 3.266330720418067e-05,
"loss": 2.087,
"step": 9290
},
{
"epoch": 1.0414333706606942,
"grad_norm": 3.7569706439971924,
"learning_rate": 3.264464352370288e-05,
"loss": 2.0629,
"step": 9300
},
{
"epoch": 1.0425531914893618,
"grad_norm": 6.442532062530518,
"learning_rate": 3.2625979843225085e-05,
"loss": 2.1135,
"step": 9310
},
{
"epoch": 1.0436730123180291,
"grad_norm": 7.427597522735596,
"learning_rate": 3.26073161627473e-05,
"loss": 1.4265,
"step": 9320
},
{
"epoch": 1.0447928331466965,
"grad_norm": 14.338908195495605,
"learning_rate": 3.25886524822695e-05,
"loss": 1.9124,
"step": 9330
},
{
"epoch": 1.045912653975364,
"grad_norm": 9.877331733703613,
"learning_rate": 3.256998880179172e-05,
"loss": 2.5536,
"step": 9340
},
{
"epoch": 1.0470324748040314,
"grad_norm": 5.434894561767578,
"learning_rate": 3.2551325121313925e-05,
"loss": 2.0743,
"step": 9350
},
{
"epoch": 1.0481522956326987,
"grad_norm": 5.651406764984131,
"learning_rate": 3.253266144083613e-05,
"loss": 1.6426,
"step": 9360
},
{
"epoch": 1.049272116461366,
"grad_norm": 5.694229602813721,
"learning_rate": 3.251399776035834e-05,
"loss": 1.709,
"step": 9370
},
{
"epoch": 1.0503919372900337,
"grad_norm": 8.92438793182373,
"learning_rate": 3.2495334079880553e-05,
"loss": 1.8702,
"step": 9380
},
{
"epoch": 1.051511758118701,
"grad_norm": 6.862886428833008,
"learning_rate": 3.2476670399402765e-05,
"loss": 2.3834,
"step": 9390
},
{
"epoch": 1.0526315789473684,
"grad_norm": 7.55111026763916,
"learning_rate": 3.245800671892497e-05,
"loss": 1.8377,
"step": 9400
},
{
"epoch": 1.053751399776036,
"grad_norm": 4.6407341957092285,
"learning_rate": 3.243934303844718e-05,
"loss": 2.1082,
"step": 9410
},
{
"epoch": 1.0548712206047033,
"grad_norm": 6.718739032745361,
"learning_rate": 3.2420679357969393e-05,
"loss": 2.4031,
"step": 9420
},
{
"epoch": 1.0559910414333706,
"grad_norm": 2.9721930027008057,
"learning_rate": 3.2402015677491605e-05,
"loss": 1.9876,
"step": 9430
},
{
"epoch": 1.057110862262038,
"grad_norm": 2.9498345851898193,
"learning_rate": 3.238335199701381e-05,
"loss": 1.8549,
"step": 9440
},
{
"epoch": 1.0582306830907056,
"grad_norm": 13.339334487915039,
"learning_rate": 3.236468831653602e-05,
"loss": 1.8697,
"step": 9450
},
{
"epoch": 1.059350503919373,
"grad_norm": 4.650289058685303,
"learning_rate": 3.2346024636058234e-05,
"loss": 2.2676,
"step": 9460
},
{
"epoch": 1.0604703247480403,
"grad_norm": 14.234888076782227,
"learning_rate": 3.2327360955580445e-05,
"loss": 2.3395,
"step": 9470
},
{
"epoch": 1.0615901455767078,
"grad_norm": 3.4030983448028564,
"learning_rate": 3.230869727510265e-05,
"loss": 2.2116,
"step": 9480
},
{
"epoch": 1.0627099664053752,
"grad_norm": 4.666158199310303,
"learning_rate": 3.229003359462486e-05,
"loss": 2.3914,
"step": 9490
},
{
"epoch": 1.0638297872340425,
"grad_norm": 9.740036010742188,
"learning_rate": 3.2271369914147074e-05,
"loss": 1.7909,
"step": 9500
},
{
"epoch": 1.0649496080627099,
"grad_norm": 3.0109105110168457,
"learning_rate": 3.2252706233669285e-05,
"loss": 2.4118,
"step": 9510
},
{
"epoch": 1.0660694288913775,
"grad_norm": 3.5755748748779297,
"learning_rate": 3.223404255319149e-05,
"loss": 2.0648,
"step": 9520
},
{
"epoch": 1.0671892497200448,
"grad_norm": 3.826801061630249,
"learning_rate": 3.22153788727137e-05,
"loss": 1.5787,
"step": 9530
},
{
"epoch": 1.0683090705487122,
"grad_norm": 10.07292366027832,
"learning_rate": 3.219671519223591e-05,
"loss": 2.2986,
"step": 9540
},
{
"epoch": 1.0694288913773797,
"grad_norm": 13.892361640930176,
"learning_rate": 3.2178051511758125e-05,
"loss": 1.9787,
"step": 9550
},
{
"epoch": 1.070548712206047,
"grad_norm": 36.16096496582031,
"learning_rate": 3.215938783128033e-05,
"loss": 1.885,
"step": 9560
},
{
"epoch": 1.0716685330347144,
"grad_norm": 2.89412260055542,
"learning_rate": 3.214072415080254e-05,
"loss": 1.745,
"step": 9570
},
{
"epoch": 1.0727883538633818,
"grad_norm": 11.991975784301758,
"learning_rate": 3.212206047032475e-05,
"loss": 1.4454,
"step": 9580
},
{
"epoch": 1.0739081746920494,
"grad_norm": 14.4371337890625,
"learning_rate": 3.210339678984696e-05,
"loss": 1.8037,
"step": 9590
},
{
"epoch": 1.0750279955207167,
"grad_norm": 3.9582924842834473,
"learning_rate": 3.208473310936917e-05,
"loss": 2.414,
"step": 9600
},
{
"epoch": 1.076147816349384,
"grad_norm": 3.2256970405578613,
"learning_rate": 3.2066069428891375e-05,
"loss": 1.905,
"step": 9610
},
{
"epoch": 1.0772676371780516,
"grad_norm": 10.48331356048584,
"learning_rate": 3.204740574841359e-05,
"loss": 1.9094,
"step": 9620
},
{
"epoch": 1.078387458006719,
"grad_norm": 5.042234420776367,
"learning_rate": 3.20287420679358e-05,
"loss": 2.2765,
"step": 9630
},
{
"epoch": 1.0795072788353863,
"grad_norm": 3.3927605152130127,
"learning_rate": 3.201007838745801e-05,
"loss": 1.9585,
"step": 9640
},
{
"epoch": 1.0806270996640537,
"grad_norm": 3.9636199474334717,
"learning_rate": 3.1991414706980215e-05,
"loss": 2.0755,
"step": 9650
},
{
"epoch": 1.0817469204927213,
"grad_norm": 3.8078179359436035,
"learning_rate": 3.197275102650243e-05,
"loss": 1.7727,
"step": 9660
},
{
"epoch": 1.0828667413213886,
"grad_norm": 11.449139595031738,
"learning_rate": 3.195408734602464e-05,
"loss": 2.0101,
"step": 9670
},
{
"epoch": 1.083986562150056,
"grad_norm": 13.973347663879395,
"learning_rate": 3.193542366554685e-05,
"loss": 2.2601,
"step": 9680
},
{
"epoch": 1.0851063829787233,
"grad_norm": 11.239791870117188,
"learning_rate": 3.1916759985069055e-05,
"loss": 2.1264,
"step": 9690
},
{
"epoch": 1.0862262038073909,
"grad_norm": 11.495058059692383,
"learning_rate": 3.189809630459127e-05,
"loss": 1.9445,
"step": 9700
},
{
"epoch": 1.0873460246360582,
"grad_norm": 4.135149002075195,
"learning_rate": 3.187943262411348e-05,
"loss": 1.7415,
"step": 9710
},
{
"epoch": 1.0884658454647256,
"grad_norm": 10.35810375213623,
"learning_rate": 3.186076894363569e-05,
"loss": 2.1196,
"step": 9720
},
{
"epoch": 1.0895856662933932,
"grad_norm": 3.5504679679870605,
"learning_rate": 3.1842105263157895e-05,
"loss": 2.1255,
"step": 9730
},
{
"epoch": 1.0907054871220605,
"grad_norm": 7.433374404907227,
"learning_rate": 3.182344158268011e-05,
"loss": 2.0092,
"step": 9740
},
{
"epoch": 1.0918253079507279,
"grad_norm": 7.075191974639893,
"learning_rate": 3.180477790220231e-05,
"loss": 1.935,
"step": 9750
},
{
"epoch": 1.0929451287793952,
"grad_norm": 5.563907623291016,
"learning_rate": 3.178611422172453e-05,
"loss": 2.0882,
"step": 9760
},
{
"epoch": 1.0940649496080628,
"grad_norm": 3.0820200443267822,
"learning_rate": 3.1767450541246735e-05,
"loss": 1.7695,
"step": 9770
},
{
"epoch": 1.0951847704367301,
"grad_norm": 5.051406383514404,
"learning_rate": 3.174878686076895e-05,
"loss": 2.3345,
"step": 9780
},
{
"epoch": 1.0963045912653975,
"grad_norm": 9.736443519592285,
"learning_rate": 3.173012318029115e-05,
"loss": 1.9699,
"step": 9790
},
{
"epoch": 1.097424412094065,
"grad_norm": 3.930483818054199,
"learning_rate": 3.171145949981337e-05,
"loss": 2.1495,
"step": 9800
},
{
"epoch": 1.0985442329227324,
"grad_norm": 9.857285499572754,
"learning_rate": 3.1692795819335575e-05,
"loss": 2.1356,
"step": 9810
},
{
"epoch": 1.0996640537513998,
"grad_norm": 3.3318631649017334,
"learning_rate": 3.167413213885778e-05,
"loss": 1.9198,
"step": 9820
},
{
"epoch": 1.100783874580067,
"grad_norm": 3.945836067199707,
"learning_rate": 3.165546845837999e-05,
"loss": 2.0346,
"step": 9830
},
{
"epoch": 1.1019036954087347,
"grad_norm": 9.270819664001465,
"learning_rate": 3.1636804777902204e-05,
"loss": 2.1785,
"step": 9840
},
{
"epoch": 1.103023516237402,
"grad_norm": 14.800010681152344,
"learning_rate": 3.1618141097424415e-05,
"loss": 1.9342,
"step": 9850
},
{
"epoch": 1.1041433370660694,
"grad_norm": 8.554313659667969,
"learning_rate": 3.159947741694662e-05,
"loss": 2.0811,
"step": 9860
},
{
"epoch": 1.1052631578947367,
"grad_norm": 3.686922073364258,
"learning_rate": 3.158081373646883e-05,
"loss": 2.1673,
"step": 9870
},
{
"epoch": 1.1063829787234043,
"grad_norm": 2.909205436706543,
"learning_rate": 3.1562150055991044e-05,
"loss": 2.0985,
"step": 9880
},
{
"epoch": 1.1075027995520716,
"grad_norm": 3.2634377479553223,
"learning_rate": 3.1543486375513255e-05,
"loss": 1.7348,
"step": 9890
},
{
"epoch": 1.108622620380739,
"grad_norm": 3.7922704219818115,
"learning_rate": 3.152482269503546e-05,
"loss": 2.0642,
"step": 9900
},
{
"epoch": 1.1097424412094066,
"grad_norm": 3.7772440910339355,
"learning_rate": 3.150615901455767e-05,
"loss": 1.6834,
"step": 9910
},
{
"epoch": 1.110862262038074,
"grad_norm": 6.348939895629883,
"learning_rate": 3.1487495334079884e-05,
"loss": 1.8862,
"step": 9920
},
{
"epoch": 1.1119820828667413,
"grad_norm": 4.5603790283203125,
"learning_rate": 3.1468831653602095e-05,
"loss": 2.0568,
"step": 9930
},
{
"epoch": 1.1131019036954086,
"grad_norm": 11.244080543518066,
"learning_rate": 3.14501679731243e-05,
"loss": 2.015,
"step": 9940
},
{
"epoch": 1.1142217245240762,
"grad_norm": 8.52851390838623,
"learning_rate": 3.143150429264651e-05,
"loss": 2.0168,
"step": 9950
},
{
"epoch": 1.1153415453527435,
"grad_norm": 3.2907376289367676,
"learning_rate": 3.1412840612168724e-05,
"loss": 2.0653,
"step": 9960
},
{
"epoch": 1.116461366181411,
"grad_norm": 3.665787696838379,
"learning_rate": 3.1394176931690935e-05,
"loss": 2.5296,
"step": 9970
},
{
"epoch": 1.1175811870100785,
"grad_norm": 3.52567982673645,
"learning_rate": 3.137551325121314e-05,
"loss": 2.0133,
"step": 9980
},
{
"epoch": 1.1187010078387458,
"grad_norm": 3.8598620891571045,
"learning_rate": 3.135684957073535e-05,
"loss": 2.0675,
"step": 9990
},
{
"epoch": 1.1198208286674132,
"grad_norm": 10.049873352050781,
"learning_rate": 3.133818589025756e-05,
"loss": 2.1145,
"step": 10000
},
{
"epoch": 1.1209406494960805,
"grad_norm": 3.7973287105560303,
"learning_rate": 3.1319522209779775e-05,
"loss": 2.3125,
"step": 10010
},
{
"epoch": 1.122060470324748,
"grad_norm": 9.45921516418457,
"learning_rate": 3.130085852930198e-05,
"loss": 2.2593,
"step": 10020
},
{
"epoch": 1.1231802911534154,
"grad_norm": 3.3235390186309814,
"learning_rate": 3.128219484882419e-05,
"loss": 2.0233,
"step": 10030
},
{
"epoch": 1.1243001119820828,
"grad_norm": 8.556841850280762,
"learning_rate": 3.12635311683464e-05,
"loss": 2.0527,
"step": 10040
},
{
"epoch": 1.1254199328107504,
"grad_norm": 3.7315330505371094,
"learning_rate": 3.124486748786861e-05,
"loss": 1.9582,
"step": 10050
},
{
"epoch": 1.1265397536394177,
"grad_norm": 9.111562728881836,
"learning_rate": 3.122620380739082e-05,
"loss": 1.931,
"step": 10060
},
{
"epoch": 1.127659574468085,
"grad_norm": 13.934300422668457,
"learning_rate": 3.1207540126913025e-05,
"loss": 2.2748,
"step": 10070
},
{
"epoch": 1.1287793952967524,
"grad_norm": 2.9079439640045166,
"learning_rate": 3.118887644643524e-05,
"loss": 2.0733,
"step": 10080
},
{
"epoch": 1.12989921612542,
"grad_norm": 14.349089622497559,
"learning_rate": 3.117021276595745e-05,
"loss": 1.9089,
"step": 10090
},
{
"epoch": 1.1310190369540873,
"grad_norm": 4.357903003692627,
"learning_rate": 3.115154908547966e-05,
"loss": 2.1913,
"step": 10100
},
{
"epoch": 1.1321388577827547,
"grad_norm": 18.6312255859375,
"learning_rate": 3.1132885405001865e-05,
"loss": 2.0145,
"step": 10110
},
{
"epoch": 1.1332586786114223,
"grad_norm": 10.29723834991455,
"learning_rate": 3.111422172452408e-05,
"loss": 2.1836,
"step": 10120
},
{
"epoch": 1.1343784994400896,
"grad_norm": 8.374372482299805,
"learning_rate": 3.109555804404629e-05,
"loss": 1.8299,
"step": 10130
},
{
"epoch": 1.135498320268757,
"grad_norm": 3.970510244369507,
"learning_rate": 3.10768943635685e-05,
"loss": 1.2505,
"step": 10140
},
{
"epoch": 1.1366181410974243,
"grad_norm": 2.9629228115081787,
"learning_rate": 3.1058230683090705e-05,
"loss": 1.8105,
"step": 10150
},
{
"epoch": 1.137737961926092,
"grad_norm": 11.693266868591309,
"learning_rate": 3.103956700261292e-05,
"loss": 2.1849,
"step": 10160
},
{
"epoch": 1.1388577827547592,
"grad_norm": 5.791164398193359,
"learning_rate": 3.102090332213513e-05,
"loss": 2.1892,
"step": 10170
},
{
"epoch": 1.1399776035834266,
"grad_norm": 13.902082443237305,
"learning_rate": 3.100223964165734e-05,
"loss": 2.0516,
"step": 10180
},
{
"epoch": 1.1410974244120942,
"grad_norm": 8.851689338684082,
"learning_rate": 3.0983575961179545e-05,
"loss": 1.8785,
"step": 10190
},
{
"epoch": 1.1422172452407615,
"grad_norm": 7.506970405578613,
"learning_rate": 3.096491228070176e-05,
"loss": 1.77,
"step": 10200
},
{
"epoch": 1.1433370660694289,
"grad_norm": 5.175302028656006,
"learning_rate": 3.094624860022396e-05,
"loss": 1.5816,
"step": 10210
},
{
"epoch": 1.1444568868980962,
"grad_norm": 11.070626258850098,
"learning_rate": 3.092758491974618e-05,
"loss": 2.2113,
"step": 10220
},
{
"epoch": 1.1455767077267638,
"grad_norm": 14.317008018493652,
"learning_rate": 3.0908921239268385e-05,
"loss": 2.4373,
"step": 10230
},
{
"epoch": 1.1466965285554311,
"grad_norm": 3.7413330078125,
"learning_rate": 3.08902575587906e-05,
"loss": 2.0543,
"step": 10240
},
{
"epoch": 1.1478163493840985,
"grad_norm": 8.698836326599121,
"learning_rate": 3.08715938783128e-05,
"loss": 1.8267,
"step": 10250
},
{
"epoch": 1.148936170212766,
"grad_norm": 9.122303009033203,
"learning_rate": 3.085293019783502e-05,
"loss": 2.1165,
"step": 10260
},
{
"epoch": 1.1500559910414334,
"grad_norm": 10.478148460388184,
"learning_rate": 3.0834266517357225e-05,
"loss": 1.7777,
"step": 10270
},
{
"epoch": 1.1511758118701008,
"grad_norm": 7.338038444519043,
"learning_rate": 3.081560283687943e-05,
"loss": 1.5512,
"step": 10280
},
{
"epoch": 1.1522956326987681,
"grad_norm": 5.104288578033447,
"learning_rate": 3.079693915640164e-05,
"loss": 1.5521,
"step": 10290
},
{
"epoch": 1.1534154535274357,
"grad_norm": 6.295915126800537,
"learning_rate": 3.0778275475923854e-05,
"loss": 1.8437,
"step": 10300
},
{
"epoch": 1.154535274356103,
"grad_norm": 3.8285670280456543,
"learning_rate": 3.0759611795446065e-05,
"loss": 2.3621,
"step": 10310
},
{
"epoch": 1.1556550951847704,
"grad_norm": 9.0399751663208,
"learning_rate": 3.074094811496827e-05,
"loss": 1.9887,
"step": 10320
},
{
"epoch": 1.156774916013438,
"grad_norm": 14.121618270874023,
"learning_rate": 3.072228443449048e-05,
"loss": 2.091,
"step": 10330
},
{
"epoch": 1.1578947368421053,
"grad_norm": 3.0184133052825928,
"learning_rate": 3.0703620754012694e-05,
"loss": 2.018,
"step": 10340
},
{
"epoch": 1.1590145576707727,
"grad_norm": 7.774500370025635,
"learning_rate": 3.0684957073534905e-05,
"loss": 1.6259,
"step": 10350
},
{
"epoch": 1.16013437849944,
"grad_norm": 3.3404550552368164,
"learning_rate": 3.066629339305711e-05,
"loss": 1.6987,
"step": 10360
},
{
"epoch": 1.1612541993281076,
"grad_norm": 5.787201881408691,
"learning_rate": 3.064762971257932e-05,
"loss": 1.7056,
"step": 10370
},
{
"epoch": 1.162374020156775,
"grad_norm": 4.292003631591797,
"learning_rate": 3.0628966032101534e-05,
"loss": 2.0171,
"step": 10380
},
{
"epoch": 1.1634938409854423,
"grad_norm": 8.185914993286133,
"learning_rate": 3.0610302351623745e-05,
"loss": 1.75,
"step": 10390
},
{
"epoch": 1.1646136618141099,
"grad_norm": 12.01564884185791,
"learning_rate": 3.059163867114595e-05,
"loss": 1.8298,
"step": 10400
},
{
"epoch": 1.1657334826427772,
"grad_norm": 4.348435878753662,
"learning_rate": 3.057297499066816e-05,
"loss": 1.829,
"step": 10410
},
{
"epoch": 1.1668533034714446,
"grad_norm": 12.32309341430664,
"learning_rate": 3.055431131019037e-05,
"loss": 1.6587,
"step": 10420
},
{
"epoch": 1.167973124300112,
"grad_norm": 8.916961669921875,
"learning_rate": 3.0535647629712585e-05,
"loss": 1.814,
"step": 10430
},
{
"epoch": 1.1690929451287795,
"grad_norm": 6.272485256195068,
"learning_rate": 3.051698394923479e-05,
"loss": 1.8092,
"step": 10440
},
{
"epoch": 1.1702127659574468,
"grad_norm": 12.134109497070312,
"learning_rate": 3.0498320268757002e-05,
"loss": 2.1558,
"step": 10450
},
{
"epoch": 1.1713325867861142,
"grad_norm": 5.216768741607666,
"learning_rate": 3.047965658827921e-05,
"loss": 1.9535,
"step": 10460
},
{
"epoch": 1.1724524076147818,
"grad_norm": 9.889372825622559,
"learning_rate": 3.0460992907801422e-05,
"loss": 1.6845,
"step": 10470
},
{
"epoch": 1.173572228443449,
"grad_norm": 9.93710708618164,
"learning_rate": 3.044232922732363e-05,
"loss": 1.7271,
"step": 10480
},
{
"epoch": 1.1746920492721165,
"grad_norm": 11.036845207214355,
"learning_rate": 3.0423665546845842e-05,
"loss": 1.8613,
"step": 10490
},
{
"epoch": 1.1758118701007838,
"grad_norm": 4.051137924194336,
"learning_rate": 3.040500186636805e-05,
"loss": 1.6322,
"step": 10500
},
{
"epoch": 1.1769316909294514,
"grad_norm": 12.119963645935059,
"learning_rate": 3.0386338185890255e-05,
"loss": 1.9307,
"step": 10510
},
{
"epoch": 1.1780515117581187,
"grad_norm": 3.4073593616485596,
"learning_rate": 3.036767450541247e-05,
"loss": 1.8893,
"step": 10520
},
{
"epoch": 1.179171332586786,
"grad_norm": 4.570372104644775,
"learning_rate": 3.0349010824934675e-05,
"loss": 2.0297,
"step": 10530
},
{
"epoch": 1.1802911534154534,
"grad_norm": 4.415748119354248,
"learning_rate": 3.033034714445689e-05,
"loss": 1.818,
"step": 10540
},
{
"epoch": 1.181410974244121,
"grad_norm": 4.645330429077148,
"learning_rate": 3.0311683463979095e-05,
"loss": 2.1483,
"step": 10550
},
{
"epoch": 1.1825307950727884,
"grad_norm": 5.267803192138672,
"learning_rate": 3.0293019783501307e-05,
"loss": 2.009,
"step": 10560
},
{
"epoch": 1.1836506159014557,
"grad_norm": 4.232731819152832,
"learning_rate": 3.0274356103023515e-05,
"loss": 2.0684,
"step": 10570
},
{
"epoch": 1.184770436730123,
"grad_norm": 4.118504047393799,
"learning_rate": 3.0255692422545727e-05,
"loss": 2.1593,
"step": 10580
},
{
"epoch": 1.1858902575587906,
"grad_norm": 3.479337692260742,
"learning_rate": 3.0237028742067935e-05,
"loss": 2.428,
"step": 10590
},
{
"epoch": 1.187010078387458,
"grad_norm": 9.307233810424805,
"learning_rate": 3.0218365061590147e-05,
"loss": 2.113,
"step": 10600
},
{
"epoch": 1.1881298992161253,
"grad_norm": 7.910929203033447,
"learning_rate": 3.0199701381112355e-05,
"loss": 1.8977,
"step": 10610
},
{
"epoch": 1.189249720044793,
"grad_norm": 14.38522720336914,
"learning_rate": 3.0181037700634567e-05,
"loss": 2.3208,
"step": 10620
},
{
"epoch": 1.1903695408734603,
"grad_norm": 7.7208147048950195,
"learning_rate": 3.0162374020156775e-05,
"loss": 1.9594,
"step": 10630
},
{
"epoch": 1.1914893617021276,
"grad_norm": 3.620098114013672,
"learning_rate": 3.0143710339678987e-05,
"loss": 1.999,
"step": 10640
},
{
"epoch": 1.192609182530795,
"grad_norm": 8.450474739074707,
"learning_rate": 3.0125046659201195e-05,
"loss": 2.0836,
"step": 10650
},
{
"epoch": 1.1937290033594625,
"grad_norm": 5.103601932525635,
"learning_rate": 3.0106382978723407e-05,
"loss": 2.0798,
"step": 10660
},
{
"epoch": 1.1948488241881299,
"grad_norm": 3.6850714683532715,
"learning_rate": 3.0087719298245615e-05,
"loss": 2.0787,
"step": 10670
},
{
"epoch": 1.1959686450167972,
"grad_norm": 10.51152229309082,
"learning_rate": 3.0069055617767827e-05,
"loss": 2.4453,
"step": 10680
},
{
"epoch": 1.1970884658454648,
"grad_norm": 9.488080978393555,
"learning_rate": 3.0050391937290035e-05,
"loss": 2.3139,
"step": 10690
},
{
"epoch": 1.1982082866741322,
"grad_norm": 4.7864274978637695,
"learning_rate": 3.0031728256812247e-05,
"loss": 1.9159,
"step": 10700
},
{
"epoch": 1.1993281075027995,
"grad_norm": 4.440032482147217,
"learning_rate": 3.0013064576334455e-05,
"loss": 2.0064,
"step": 10710
},
{
"epoch": 1.2004479283314669,
"grad_norm": 5.0448503494262695,
"learning_rate": 2.9994400895856667e-05,
"loss": 2.42,
"step": 10720
},
{
"epoch": 1.2015677491601344,
"grad_norm": 3.938079357147217,
"learning_rate": 2.9975737215378875e-05,
"loss": 1.8528,
"step": 10730
},
{
"epoch": 1.2026875699888018,
"grad_norm": 8.685888290405273,
"learning_rate": 2.9957073534901087e-05,
"loss": 1.8489,
"step": 10740
},
{
"epoch": 1.2038073908174691,
"grad_norm": 4.123725891113281,
"learning_rate": 2.9938409854423295e-05,
"loss": 2.4495,
"step": 10750
},
{
"epoch": 1.2049272116461367,
"grad_norm": 7.6613593101501465,
"learning_rate": 2.99197461739455e-05,
"loss": 1.8993,
"step": 10760
},
{
"epoch": 1.206047032474804,
"grad_norm": 4.251744747161865,
"learning_rate": 2.9901082493467715e-05,
"loss": 2.1757,
"step": 10770
},
{
"epoch": 1.2071668533034714,
"grad_norm": 8.871329307556152,
"learning_rate": 2.988241881298992e-05,
"loss": 1.6899,
"step": 10780
},
{
"epoch": 1.2082866741321387,
"grad_norm": 3.407541275024414,
"learning_rate": 2.9863755132512132e-05,
"loss": 2.0731,
"step": 10790
},
{
"epoch": 1.2094064949608063,
"grad_norm": 4.200164794921875,
"learning_rate": 2.984509145203434e-05,
"loss": 2.1467,
"step": 10800
},
{
"epoch": 1.2105263157894737,
"grad_norm": 5.967247009277344,
"learning_rate": 2.9826427771556552e-05,
"loss": 2.3341,
"step": 10810
},
{
"epoch": 1.211646136618141,
"grad_norm": 6.902993202209473,
"learning_rate": 2.980776409107876e-05,
"loss": 2.1097,
"step": 10820
},
{
"epoch": 1.2127659574468086,
"grad_norm": 3.4361205101013184,
"learning_rate": 2.9789100410600972e-05,
"loss": 2.2153,
"step": 10830
},
{
"epoch": 1.213885778275476,
"grad_norm": 3.588088274002075,
"learning_rate": 2.977043673012318e-05,
"loss": 2.4579,
"step": 10840
},
{
"epoch": 1.2150055991041433,
"grad_norm": 5.1023173332214355,
"learning_rate": 2.9751773049645392e-05,
"loss": 1.8074,
"step": 10850
},
{
"epoch": 1.2161254199328106,
"grad_norm": 5.618678092956543,
"learning_rate": 2.97331093691676e-05,
"loss": 1.7553,
"step": 10860
},
{
"epoch": 1.2172452407614782,
"grad_norm": 3.7290029525756836,
"learning_rate": 2.9714445688689812e-05,
"loss": 2.2483,
"step": 10870
},
{
"epoch": 1.2183650615901456,
"grad_norm": 9.374983787536621,
"learning_rate": 2.969578200821202e-05,
"loss": 2.295,
"step": 10880
},
{
"epoch": 1.219484882418813,
"grad_norm": 12.954818725585938,
"learning_rate": 2.9677118327734232e-05,
"loss": 2.1509,
"step": 10890
},
{
"epoch": 1.2206047032474805,
"grad_norm": 5.120643615722656,
"learning_rate": 2.965845464725644e-05,
"loss": 1.6961,
"step": 10900
},
{
"epoch": 1.2217245240761478,
"grad_norm": 6.945920944213867,
"learning_rate": 2.9639790966778652e-05,
"loss": 1.92,
"step": 10910
},
{
"epoch": 1.2228443449048152,
"grad_norm": 4.189951419830322,
"learning_rate": 2.962112728630086e-05,
"loss": 1.8421,
"step": 10920
},
{
"epoch": 1.2239641657334825,
"grad_norm": 13.6853666305542,
"learning_rate": 2.9602463605823072e-05,
"loss": 1.6631,
"step": 10930
},
{
"epoch": 1.2250839865621501,
"grad_norm": 13.50125789642334,
"learning_rate": 2.958379992534528e-05,
"loss": 1.9662,
"step": 10940
},
{
"epoch": 1.2262038073908175,
"grad_norm": 11.182577133178711,
"learning_rate": 2.9565136244867492e-05,
"loss": 2.474,
"step": 10950
},
{
"epoch": 1.2273236282194848,
"grad_norm": 8.855241775512695,
"learning_rate": 2.95464725643897e-05,
"loss": 1.9798,
"step": 10960
},
{
"epoch": 1.2284434490481524,
"grad_norm": 2.290292263031006,
"learning_rate": 2.9527808883911912e-05,
"loss": 1.7049,
"step": 10970
},
{
"epoch": 1.2295632698768197,
"grad_norm": 3.8447682857513428,
"learning_rate": 2.950914520343412e-05,
"loss": 2.0167,
"step": 10980
},
{
"epoch": 1.230683090705487,
"grad_norm": 3.326638698577881,
"learning_rate": 2.9490481522956325e-05,
"loss": 1.8316,
"step": 10990
},
{
"epoch": 1.2318029115341544,
"grad_norm": 9.62152099609375,
"learning_rate": 2.9471817842478537e-05,
"loss": 1.6587,
"step": 11000
},
{
"epoch": 1.232922732362822,
"grad_norm": 4.14316463470459,
"learning_rate": 2.9453154162000745e-05,
"loss": 1.8924,
"step": 11010
},
{
"epoch": 1.2340425531914894,
"grad_norm": 12.48459529876709,
"learning_rate": 2.9434490481522957e-05,
"loss": 1.8707,
"step": 11020
},
{
"epoch": 1.2351623740201567,
"grad_norm": 8.288812637329102,
"learning_rate": 2.9415826801045165e-05,
"loss": 1.8107,
"step": 11030
},
{
"epoch": 1.2362821948488243,
"grad_norm": 14.502120018005371,
"learning_rate": 2.9397163120567377e-05,
"loss": 2.3128,
"step": 11040
},
{
"epoch": 1.2374020156774916,
"grad_norm": 3.264012336730957,
"learning_rate": 2.9378499440089585e-05,
"loss": 2.1651,
"step": 11050
},
{
"epoch": 1.238521836506159,
"grad_norm": 7.62103271484375,
"learning_rate": 2.9359835759611797e-05,
"loss": 2.0078,
"step": 11060
},
{
"epoch": 1.2396416573348263,
"grad_norm": 14.445006370544434,
"learning_rate": 2.9341172079134005e-05,
"loss": 1.9942,
"step": 11070
},
{
"epoch": 1.240761478163494,
"grad_norm": 4.4992899894714355,
"learning_rate": 2.9322508398656217e-05,
"loss": 2.1584,
"step": 11080
},
{
"epoch": 1.2418812989921613,
"grad_norm": 7.469330310821533,
"learning_rate": 2.9303844718178425e-05,
"loss": 1.9318,
"step": 11090
},
{
"epoch": 1.2430011198208286,
"grad_norm": 4.640170574188232,
"learning_rate": 2.9285181037700637e-05,
"loss": 2.0114,
"step": 11100
},
{
"epoch": 1.2441209406494962,
"grad_norm": 5.856334686279297,
"learning_rate": 2.9266517357222845e-05,
"loss": 1.7062,
"step": 11110
},
{
"epoch": 1.2452407614781635,
"grad_norm": 5.0445404052734375,
"learning_rate": 2.9247853676745057e-05,
"loss": 1.8474,
"step": 11120
},
{
"epoch": 1.2463605823068309,
"grad_norm": 11.517007827758789,
"learning_rate": 2.9229189996267265e-05,
"loss": 2.4335,
"step": 11130
},
{
"epoch": 1.2474804031354982,
"grad_norm": 15.464090347290039,
"learning_rate": 2.9210526315789477e-05,
"loss": 2.2518,
"step": 11140
},
{
"epoch": 1.2486002239641658,
"grad_norm": 4.1234025955200195,
"learning_rate": 2.9191862635311685e-05,
"loss": 2.2545,
"step": 11150
},
{
"epoch": 1.2497200447928332,
"grad_norm": 12.045602798461914,
"learning_rate": 2.9173198954833897e-05,
"loss": 1.9101,
"step": 11160
},
{
"epoch": 1.2508398656215005,
"grad_norm": 9.400586128234863,
"learning_rate": 2.9154535274356105e-05,
"loss": 2.2002,
"step": 11170
},
{
"epoch": 1.251959686450168,
"grad_norm": 4.317978382110596,
"learning_rate": 2.9135871593878317e-05,
"loss": 2.008,
"step": 11180
},
{
"epoch": 1.2530795072788354,
"grad_norm": 3.613831043243408,
"learning_rate": 2.9117207913400525e-05,
"loss": 2.2759,
"step": 11190
},
{
"epoch": 1.2541993281075028,
"grad_norm": 9.903818130493164,
"learning_rate": 2.9098544232922737e-05,
"loss": 1.8002,
"step": 11200
},
{
"epoch": 1.2553191489361701,
"grad_norm": 4.823100566864014,
"learning_rate": 2.9079880552444942e-05,
"loss": 2.041,
"step": 11210
},
{
"epoch": 1.2564389697648375,
"grad_norm": 8.871933937072754,
"learning_rate": 2.906121687196715e-05,
"loss": 1.8554,
"step": 11220
},
{
"epoch": 1.257558790593505,
"grad_norm": 3.7882330417633057,
"learning_rate": 2.9042553191489362e-05,
"loss": 2.0572,
"step": 11230
},
{
"epoch": 1.2586786114221724,
"grad_norm": 3.4256579875946045,
"learning_rate": 2.902388951101157e-05,
"loss": 2.0062,
"step": 11240
},
{
"epoch": 1.25979843225084,
"grad_norm": 7.357487678527832,
"learning_rate": 2.9005225830533782e-05,
"loss": 2.0538,
"step": 11250
},
{
"epoch": 1.2609182530795073,
"grad_norm": 8.090987205505371,
"learning_rate": 2.898656215005599e-05,
"loss": 1.7325,
"step": 11260
},
{
"epoch": 1.2620380739081747,
"grad_norm": 7.4141669273376465,
"learning_rate": 2.8967898469578202e-05,
"loss": 2.2278,
"step": 11270
},
{
"epoch": 1.263157894736842,
"grad_norm": 9.293551445007324,
"learning_rate": 2.894923478910041e-05,
"loss": 1.9437,
"step": 11280
},
{
"epoch": 1.2642777155655094,
"grad_norm": 7.823407173156738,
"learning_rate": 2.8930571108622622e-05,
"loss": 1.7969,
"step": 11290
},
{
"epoch": 1.265397536394177,
"grad_norm": 7.021416664123535,
"learning_rate": 2.891190742814483e-05,
"loss": 1.7032,
"step": 11300
},
{
"epoch": 1.2665173572228443,
"grad_norm": 5.103081703186035,
"learning_rate": 2.8893243747667042e-05,
"loss": 1.9367,
"step": 11310
},
{
"epoch": 1.2676371780515119,
"grad_norm": 11.817680358886719,
"learning_rate": 2.887458006718925e-05,
"loss": 1.9144,
"step": 11320
},
{
"epoch": 1.2687569988801792,
"grad_norm": 4.182484149932861,
"learning_rate": 2.8855916386711462e-05,
"loss": 2.2476,
"step": 11330
},
{
"epoch": 1.2698768197088466,
"grad_norm": 12.477089881896973,
"learning_rate": 2.883725270623367e-05,
"loss": 2.1192,
"step": 11340
},
{
"epoch": 1.270996640537514,
"grad_norm": 4.275122165679932,
"learning_rate": 2.8818589025755882e-05,
"loss": 2.4108,
"step": 11350
},
{
"epoch": 1.2721164613661813,
"grad_norm": 15.332164764404297,
"learning_rate": 2.879992534527809e-05,
"loss": 2.3805,
"step": 11360
},
{
"epoch": 1.2732362821948489,
"grad_norm": 10.845608711242676,
"learning_rate": 2.8781261664800302e-05,
"loss": 2.3275,
"step": 11370
},
{
"epoch": 1.2743561030235162,
"grad_norm": 8.792692184448242,
"learning_rate": 2.876259798432251e-05,
"loss": 1.6722,
"step": 11380
},
{
"epoch": 1.2754759238521838,
"grad_norm": 9.559557914733887,
"learning_rate": 2.8743934303844722e-05,
"loss": 2.2257,
"step": 11390
},
{
"epoch": 1.2765957446808511,
"grad_norm": 6.456275463104248,
"learning_rate": 2.872527062336693e-05,
"loss": 1.7045,
"step": 11400
},
{
"epoch": 1.2777155655095185,
"grad_norm": 8.79680347442627,
"learning_rate": 2.8706606942889142e-05,
"loss": 2.2707,
"step": 11410
},
{
"epoch": 1.2788353863381858,
"grad_norm": 4.077367782592773,
"learning_rate": 2.8687943262411347e-05,
"loss": 2.0724,
"step": 11420
},
{
"epoch": 1.2799552071668532,
"grad_norm": 15.875419616699219,
"learning_rate": 2.8669279581933562e-05,
"loss": 2.4802,
"step": 11430
},
{
"epoch": 1.2810750279955208,
"grad_norm": 9.360994338989258,
"learning_rate": 2.8650615901455767e-05,
"loss": 2.561,
"step": 11440
},
{
"epoch": 1.282194848824188,
"grad_norm": 3.356452226638794,
"learning_rate": 2.8631952220977975e-05,
"loss": 1.6649,
"step": 11450
},
{
"epoch": 1.2833146696528557,
"grad_norm": 4.318080425262451,
"learning_rate": 2.8613288540500187e-05,
"loss": 2.1682,
"step": 11460
},
{
"epoch": 1.284434490481523,
"grad_norm": 14.16100025177002,
"learning_rate": 2.8594624860022395e-05,
"loss": 2.245,
"step": 11470
},
{
"epoch": 1.2855543113101904,
"grad_norm": 4.342535495758057,
"learning_rate": 2.8575961179544607e-05,
"loss": 1.9388,
"step": 11480
},
{
"epoch": 1.2866741321388577,
"grad_norm": 4.287493705749512,
"learning_rate": 2.8557297499066815e-05,
"loss": 2.0392,
"step": 11490
},
{
"epoch": 1.287793952967525,
"grad_norm": 4.413599967956543,
"learning_rate": 2.8538633818589027e-05,
"loss": 1.7355,
"step": 11500
},
{
"epoch": 1.2889137737961927,
"grad_norm": 6.577482223510742,
"learning_rate": 2.8519970138111235e-05,
"loss": 2.0267,
"step": 11510
},
{
"epoch": 1.29003359462486,
"grad_norm": 16.563228607177734,
"learning_rate": 2.8501306457633447e-05,
"loss": 1.7356,
"step": 11520
},
{
"epoch": 1.2911534154535274,
"grad_norm": 4.8970255851745605,
"learning_rate": 2.8482642777155655e-05,
"loss": 2.0905,
"step": 11530
},
{
"epoch": 1.292273236282195,
"grad_norm": 3.6036787033081055,
"learning_rate": 2.8463979096677867e-05,
"loss": 1.8632,
"step": 11540
},
{
"epoch": 1.2933930571108623,
"grad_norm": 3.7850587368011475,
"learning_rate": 2.8445315416200075e-05,
"loss": 1.8984,
"step": 11550
},
{
"epoch": 1.2945128779395296,
"grad_norm": 3.808590888977051,
"learning_rate": 2.8426651735722287e-05,
"loss": 2.0321,
"step": 11560
},
{
"epoch": 1.295632698768197,
"grad_norm": 6.799190044403076,
"learning_rate": 2.8407988055244495e-05,
"loss": 2.3606,
"step": 11570
},
{
"epoch": 1.2967525195968646,
"grad_norm": 13.739738464355469,
"learning_rate": 2.8389324374766707e-05,
"loss": 2.4276,
"step": 11580
},
{
"epoch": 1.297872340425532,
"grad_norm": 10.142343521118164,
"learning_rate": 2.8370660694288915e-05,
"loss": 2.1635,
"step": 11590
},
{
"epoch": 1.2989921612541993,
"grad_norm": 7.874263286590576,
"learning_rate": 2.8351997013811127e-05,
"loss": 1.9213,
"step": 11600
},
{
"epoch": 1.3001119820828668,
"grad_norm": 9.147733688354492,
"learning_rate": 2.8333333333333335e-05,
"loss": 1.9669,
"step": 11610
},
{
"epoch": 1.3012318029115342,
"grad_norm": 4.748540878295898,
"learning_rate": 2.8314669652855547e-05,
"loss": 2.1498,
"step": 11620
},
{
"epoch": 1.3023516237402015,
"grad_norm": 3.711635112762451,
"learning_rate": 2.8296005972377755e-05,
"loss": 2.0037,
"step": 11630
},
{
"epoch": 1.3034714445688689,
"grad_norm": 9.331302642822266,
"learning_rate": 2.8277342291899967e-05,
"loss": 2.2264,
"step": 11640
},
{
"epoch": 1.3045912653975364,
"grad_norm": 3.66086483001709,
"learning_rate": 2.8258678611422172e-05,
"loss": 1.9577,
"step": 11650
},
{
"epoch": 1.3057110862262038,
"grad_norm": 3.9760518074035645,
"learning_rate": 2.8240014930944387e-05,
"loss": 2.4175,
"step": 11660
},
{
"epoch": 1.3068309070548711,
"grad_norm": 11.729776382446289,
"learning_rate": 2.8221351250466592e-05,
"loss": 1.8006,
"step": 11670
},
{
"epoch": 1.3079507278835387,
"grad_norm": 5.228203296661377,
"learning_rate": 2.82026875699888e-05,
"loss": 2.1344,
"step": 11680
},
{
"epoch": 1.309070548712206,
"grad_norm": 4.904013633728027,
"learning_rate": 2.8184023889511012e-05,
"loss": 1.3898,
"step": 11690
},
{
"epoch": 1.3101903695408734,
"grad_norm": 10.309314727783203,
"learning_rate": 2.816536020903322e-05,
"loss": 1.6145,
"step": 11700
},
{
"epoch": 1.3113101903695408,
"grad_norm": 6.570261478424072,
"learning_rate": 2.8146696528555432e-05,
"loss": 2.0292,
"step": 11710
},
{
"epoch": 1.3124300111982083,
"grad_norm": 4.200310707092285,
"learning_rate": 2.812803284807764e-05,
"loss": 2.086,
"step": 11720
},
{
"epoch": 1.3135498320268757,
"grad_norm": 10.7840576171875,
"learning_rate": 2.8109369167599852e-05,
"loss": 2.0459,
"step": 11730
},
{
"epoch": 1.314669652855543,
"grad_norm": 3.7457571029663086,
"learning_rate": 2.809070548712206e-05,
"loss": 1.6617,
"step": 11740
},
{
"epoch": 1.3157894736842106,
"grad_norm": 4.625324726104736,
"learning_rate": 2.8072041806644272e-05,
"loss": 1.5736,
"step": 11750
},
{
"epoch": 1.316909294512878,
"grad_norm": 9.565194129943848,
"learning_rate": 2.805337812616648e-05,
"loss": 2.4976,
"step": 11760
},
{
"epoch": 1.3180291153415453,
"grad_norm": 10.894997596740723,
"learning_rate": 2.8034714445688692e-05,
"loss": 1.9974,
"step": 11770
},
{
"epoch": 1.3191489361702127,
"grad_norm": 4.6816725730896,
"learning_rate": 2.80160507652109e-05,
"loss": 2.1758,
"step": 11780
},
{
"epoch": 1.3202687569988802,
"grad_norm": 3.4345543384552,
"learning_rate": 2.7997387084733112e-05,
"loss": 2.0768,
"step": 11790
},
{
"epoch": 1.3213885778275476,
"grad_norm": 11.61649227142334,
"learning_rate": 2.797872340425532e-05,
"loss": 2.135,
"step": 11800
},
{
"epoch": 1.322508398656215,
"grad_norm": 3.6992645263671875,
"learning_rate": 2.7960059723777532e-05,
"loss": 2.0889,
"step": 11810
},
{
"epoch": 1.3236282194848825,
"grad_norm": 3.465416193008423,
"learning_rate": 2.794139604329974e-05,
"loss": 2.0867,
"step": 11820
},
{
"epoch": 1.3247480403135499,
"grad_norm": 7.6769795417785645,
"learning_rate": 2.7922732362821952e-05,
"loss": 1.9802,
"step": 11830
},
{
"epoch": 1.3258678611422172,
"grad_norm": 4.010658264160156,
"learning_rate": 2.790406868234416e-05,
"loss": 2.2954,
"step": 11840
},
{
"epoch": 1.3269876819708846,
"grad_norm": 5.182217597961426,
"learning_rate": 2.7885405001866372e-05,
"loss": 2.3104,
"step": 11850
},
{
"epoch": 1.3281075027995521,
"grad_norm": 9.429098129272461,
"learning_rate": 2.7866741321388577e-05,
"loss": 1.7616,
"step": 11860
},
{
"epoch": 1.3292273236282195,
"grad_norm": 3.6495516300201416,
"learning_rate": 2.7848077640910792e-05,
"loss": 1.7191,
"step": 11870
},
{
"epoch": 1.3303471444568868,
"grad_norm": 3.693429470062256,
"learning_rate": 2.7829413960432997e-05,
"loss": 2.2813,
"step": 11880
},
{
"epoch": 1.3314669652855544,
"grad_norm": 3.9754602909088135,
"learning_rate": 2.7810750279955212e-05,
"loss": 1.9402,
"step": 11890
},
{
"epoch": 1.3325867861142218,
"grad_norm": 8.377337455749512,
"learning_rate": 2.7792086599477417e-05,
"loss": 2.0752,
"step": 11900
},
{
"epoch": 1.3337066069428891,
"grad_norm": 7.4295220375061035,
"learning_rate": 2.7773422918999625e-05,
"loss": 1.6284,
"step": 11910
},
{
"epoch": 1.3348264277715565,
"grad_norm": 3.935297966003418,
"learning_rate": 2.7754759238521837e-05,
"loss": 2.2115,
"step": 11920
},
{
"epoch": 1.335946248600224,
"grad_norm": 9.240707397460938,
"learning_rate": 2.7736095558044045e-05,
"loss": 1.8889,
"step": 11930
},
{
"epoch": 1.3370660694288914,
"grad_norm": 11.241867065429688,
"learning_rate": 2.7717431877566257e-05,
"loss": 1.7422,
"step": 11940
},
{
"epoch": 1.3381858902575587,
"grad_norm": 5.747684478759766,
"learning_rate": 2.7698768197088465e-05,
"loss": 1.8776,
"step": 11950
},
{
"epoch": 1.3393057110862263,
"grad_norm": 8.002111434936523,
"learning_rate": 2.7680104516610677e-05,
"loss": 2.0645,
"step": 11960
},
{
"epoch": 1.3404255319148937,
"grad_norm": 6.661399841308594,
"learning_rate": 2.7661440836132885e-05,
"loss": 1.914,
"step": 11970
},
{
"epoch": 1.341545352743561,
"grad_norm": 4.897961616516113,
"learning_rate": 2.7642777155655097e-05,
"loss": 2.0699,
"step": 11980
},
{
"epoch": 1.3426651735722284,
"grad_norm": 8.352503776550293,
"learning_rate": 2.7624113475177305e-05,
"loss": 2.3203,
"step": 11990
},
{
"epoch": 1.343784994400896,
"grad_norm": 5.313516616821289,
"learning_rate": 2.7605449794699517e-05,
"loss": 1.4091,
"step": 12000
},
{
"epoch": 1.3449048152295633,
"grad_norm": 8.284523010253906,
"learning_rate": 2.7586786114221725e-05,
"loss": 2.2698,
"step": 12010
},
{
"epoch": 1.3460246360582306,
"grad_norm": 4.834831237792969,
"learning_rate": 2.7568122433743937e-05,
"loss": 1.9009,
"step": 12020
},
{
"epoch": 1.3471444568868982,
"grad_norm": 14.059358596801758,
"learning_rate": 2.7549458753266145e-05,
"loss": 2.1775,
"step": 12030
},
{
"epoch": 1.3482642777155656,
"grad_norm": 8.378131866455078,
"learning_rate": 2.7530795072788357e-05,
"loss": 2.1616,
"step": 12040
},
{
"epoch": 1.349384098544233,
"grad_norm": 3.4480719566345215,
"learning_rate": 2.7512131392310565e-05,
"loss": 2.2743,
"step": 12050
},
{
"epoch": 1.3505039193729003,
"grad_norm": 4.051682472229004,
"learning_rate": 2.7493467711832777e-05,
"loss": 1.7351,
"step": 12060
},
{
"epoch": 1.3516237402015676,
"grad_norm": 15.0495023727417,
"learning_rate": 2.7474804031354982e-05,
"loss": 2.3378,
"step": 12070
},
{
"epoch": 1.3527435610302352,
"grad_norm": 12.420659065246582,
"learning_rate": 2.7456140350877197e-05,
"loss": 2.0799,
"step": 12080
},
{
"epoch": 1.3538633818589025,
"grad_norm": 3.576589345932007,
"learning_rate": 2.7437476670399402e-05,
"loss": 1.7943,
"step": 12090
},
{
"epoch": 1.35498320268757,
"grad_norm": 9.293567657470703,
"learning_rate": 2.7418812989921617e-05,
"loss": 2.0307,
"step": 12100
},
{
"epoch": 1.3561030235162375,
"grad_norm": 4.058133125305176,
"learning_rate": 2.7400149309443822e-05,
"loss": 1.6863,
"step": 12110
},
{
"epoch": 1.3572228443449048,
"grad_norm": 3.99945330619812,
"learning_rate": 2.7381485628966037e-05,
"loss": 2.2194,
"step": 12120
},
{
"epoch": 1.3583426651735722,
"grad_norm": 10.465315818786621,
"learning_rate": 2.7362821948488242e-05,
"loss": 1.7545,
"step": 12130
},
{
"epoch": 1.3594624860022395,
"grad_norm": 5.384920120239258,
"learning_rate": 2.734415826801045e-05,
"loss": 1.6758,
"step": 12140
},
{
"epoch": 1.360582306830907,
"grad_norm": 3.6617019176483154,
"learning_rate": 2.7325494587532662e-05,
"loss": 1.8267,
"step": 12150
},
{
"epoch": 1.3617021276595744,
"grad_norm": 5.872734069824219,
"learning_rate": 2.730683090705487e-05,
"loss": 1.7598,
"step": 12160
},
{
"epoch": 1.362821948488242,
"grad_norm": 3.7369675636291504,
"learning_rate": 2.7288167226577082e-05,
"loss": 1.8391,
"step": 12170
},
{
"epoch": 1.3639417693169094,
"grad_norm": 4.193478107452393,
"learning_rate": 2.726950354609929e-05,
"loss": 1.9595,
"step": 12180
},
{
"epoch": 1.3650615901455767,
"grad_norm": 11.2186918258667,
"learning_rate": 2.7250839865621502e-05,
"loss": 2.3907,
"step": 12190
},
{
"epoch": 1.366181410974244,
"grad_norm": 10.962636947631836,
"learning_rate": 2.723217618514371e-05,
"loss": 1.8998,
"step": 12200
},
{
"epoch": 1.3673012318029114,
"grad_norm": 6.624661922454834,
"learning_rate": 2.7213512504665922e-05,
"loss": 2.1747,
"step": 12210
},
{
"epoch": 1.368421052631579,
"grad_norm": 3.9043078422546387,
"learning_rate": 2.719484882418813e-05,
"loss": 1.9947,
"step": 12220
},
{
"epoch": 1.3695408734602463,
"grad_norm": 13.550288200378418,
"learning_rate": 2.7176185143710342e-05,
"loss": 1.7701,
"step": 12230
},
{
"epoch": 1.370660694288914,
"grad_norm": 3.8633484840393066,
"learning_rate": 2.715752146323255e-05,
"loss": 1.701,
"step": 12240
},
{
"epoch": 1.3717805151175813,
"grad_norm": 15.98534870147705,
"learning_rate": 2.7138857782754762e-05,
"loss": 2.2531,
"step": 12250
},
{
"epoch": 1.3729003359462486,
"grad_norm": 3.9303388595581055,
"learning_rate": 2.712019410227697e-05,
"loss": 2.0902,
"step": 12260
},
{
"epoch": 1.374020156774916,
"grad_norm": 5.423203945159912,
"learning_rate": 2.7101530421799182e-05,
"loss": 2.3482,
"step": 12270
},
{
"epoch": 1.3751399776035833,
"grad_norm": 12.370367050170898,
"learning_rate": 2.708286674132139e-05,
"loss": 1.9292,
"step": 12280
},
{
"epoch": 1.3762597984322509,
"grad_norm": 5.228443622589111,
"learning_rate": 2.7064203060843602e-05,
"loss": 2.1979,
"step": 12290
},
{
"epoch": 1.3773796192609182,
"grad_norm": 5.5691423416137695,
"learning_rate": 2.7045539380365807e-05,
"loss": 2.3052,
"step": 12300
},
{
"epoch": 1.3784994400895858,
"grad_norm": 4.302522659301758,
"learning_rate": 2.7026875699888022e-05,
"loss": 2.2908,
"step": 12310
},
{
"epoch": 1.3796192609182532,
"grad_norm": 5.334700584411621,
"learning_rate": 2.7008212019410227e-05,
"loss": 1.704,
"step": 12320
},
{
"epoch": 1.3807390817469205,
"grad_norm": 6.528292655944824,
"learning_rate": 2.6989548338932442e-05,
"loss": 2.0202,
"step": 12330
},
{
"epoch": 1.3818589025755879,
"grad_norm": 8.879626274108887,
"learning_rate": 2.6970884658454647e-05,
"loss": 1.9973,
"step": 12340
},
{
"epoch": 1.3829787234042552,
"grad_norm": 12.133624076843262,
"learning_rate": 2.6952220977976862e-05,
"loss": 2.0525,
"step": 12350
},
{
"epoch": 1.3840985442329228,
"grad_norm": 3.8778038024902344,
"learning_rate": 2.6933557297499067e-05,
"loss": 2.7127,
"step": 12360
},
{
"epoch": 1.3852183650615901,
"grad_norm": 12.094010353088379,
"learning_rate": 2.6914893617021282e-05,
"loss": 2.2501,
"step": 12370
},
{
"epoch": 1.3863381858902575,
"grad_norm": 14.439510345458984,
"learning_rate": 2.6896229936543487e-05,
"loss": 2.4553,
"step": 12380
},
{
"epoch": 1.387458006718925,
"grad_norm": 13.809215545654297,
"learning_rate": 2.6877566256065695e-05,
"loss": 1.7811,
"step": 12390
},
{
"epoch": 1.3885778275475924,
"grad_norm": 3.6743390560150146,
"learning_rate": 2.6858902575587907e-05,
"loss": 1.5438,
"step": 12400
},
{
"epoch": 1.3896976483762598,
"grad_norm": 4.393309116363525,
"learning_rate": 2.6840238895110115e-05,
"loss": 1.4149,
"step": 12410
},
{
"epoch": 1.390817469204927,
"grad_norm": 8.418529510498047,
"learning_rate": 2.6821575214632327e-05,
"loss": 1.8214,
"step": 12420
},
{
"epoch": 1.3919372900335947,
"grad_norm": 11.139238357543945,
"learning_rate": 2.6802911534154535e-05,
"loss": 2.2626,
"step": 12430
},
{
"epoch": 1.393057110862262,
"grad_norm": 7.604578495025635,
"learning_rate": 2.6784247853676747e-05,
"loss": 2.5468,
"step": 12440
},
{
"epoch": 1.3941769316909294,
"grad_norm": 4.145791053771973,
"learning_rate": 2.6765584173198955e-05,
"loss": 1.6144,
"step": 12450
},
{
"epoch": 1.395296752519597,
"grad_norm": 6.010091781616211,
"learning_rate": 2.6746920492721167e-05,
"loss": 2.0923,
"step": 12460
},
{
"epoch": 1.3964165733482643,
"grad_norm": 10.133779525756836,
"learning_rate": 2.6728256812243375e-05,
"loss": 1.9483,
"step": 12470
},
{
"epoch": 1.3975363941769317,
"grad_norm": 3.6997570991516113,
"learning_rate": 2.6709593131765587e-05,
"loss": 1.701,
"step": 12480
},
{
"epoch": 1.398656215005599,
"grad_norm": 10.962636947631836,
"learning_rate": 2.6690929451287795e-05,
"loss": 2.384,
"step": 12490
},
{
"epoch": 1.3997760358342666,
"grad_norm": 3.6743199825286865,
"learning_rate": 2.6672265770810007e-05,
"loss": 1.9178,
"step": 12500
},
{
"epoch": 1.400895856662934,
"grad_norm": 12.958653450012207,
"learning_rate": 2.6653602090332212e-05,
"loss": 2.594,
"step": 12510
},
{
"epoch": 1.4020156774916013,
"grad_norm": 11.249685287475586,
"learning_rate": 2.6634938409854427e-05,
"loss": 1.6907,
"step": 12520
},
{
"epoch": 1.4031354983202688,
"grad_norm": 4.222360134124756,
"learning_rate": 2.6616274729376632e-05,
"loss": 2.1065,
"step": 12530
},
{
"epoch": 1.4042553191489362,
"grad_norm": 11.4049654006958,
"learning_rate": 2.6597611048898847e-05,
"loss": 2.0968,
"step": 12540
},
{
"epoch": 1.4053751399776035,
"grad_norm": 4.947079181671143,
"learning_rate": 2.6578947368421052e-05,
"loss": 1.942,
"step": 12550
},
{
"epoch": 1.406494960806271,
"grad_norm": 11.592204093933105,
"learning_rate": 2.6560283687943267e-05,
"loss": 1.9633,
"step": 12560
},
{
"epoch": 1.4076147816349385,
"grad_norm": 5.250865459442139,
"learning_rate": 2.6541620007465472e-05,
"loss": 1.9282,
"step": 12570
},
{
"epoch": 1.4087346024636058,
"grad_norm": 12.925929069519043,
"learning_rate": 2.6522956326987687e-05,
"loss": 2.3242,
"step": 12580
},
{
"epoch": 1.4098544232922732,
"grad_norm": 13.145225524902344,
"learning_rate": 2.6504292646509892e-05,
"loss": 2.0153,
"step": 12590
},
{
"epoch": 1.4109742441209407,
"grad_norm": 5.17841100692749,
"learning_rate": 2.6485628966032107e-05,
"loss": 1.7325,
"step": 12600
},
{
"epoch": 1.412094064949608,
"grad_norm": 4.1737213134765625,
"learning_rate": 2.6466965285554312e-05,
"loss": 1.8959,
"step": 12610
},
{
"epoch": 1.4132138857782754,
"grad_norm": 14.844003677368164,
"learning_rate": 2.644830160507652e-05,
"loss": 2.1662,
"step": 12620
},
{
"epoch": 1.4143337066069428,
"grad_norm": 3.8433620929718018,
"learning_rate": 2.6429637924598732e-05,
"loss": 1.7181,
"step": 12630
},
{
"epoch": 1.4154535274356104,
"grad_norm": 7.718703746795654,
"learning_rate": 2.641097424412094e-05,
"loss": 1.9043,
"step": 12640
},
{
"epoch": 1.4165733482642777,
"grad_norm": 9.38231086730957,
"learning_rate": 2.6392310563643152e-05,
"loss": 1.8313,
"step": 12650
},
{
"epoch": 1.417693169092945,
"grad_norm": 13.019353866577148,
"learning_rate": 2.637364688316536e-05,
"loss": 2.2505,
"step": 12660
},
{
"epoch": 1.4188129899216126,
"grad_norm": 14.78768253326416,
"learning_rate": 2.6354983202687572e-05,
"loss": 1.8465,
"step": 12670
},
{
"epoch": 1.41993281075028,
"grad_norm": 12.229498863220215,
"learning_rate": 2.633631952220978e-05,
"loss": 2.4691,
"step": 12680
},
{
"epoch": 1.4210526315789473,
"grad_norm": 4.5396294593811035,
"learning_rate": 2.6317655841731992e-05,
"loss": 2.0822,
"step": 12690
},
{
"epoch": 1.4221724524076147,
"grad_norm": 11.260706901550293,
"learning_rate": 2.62989921612542e-05,
"loss": 1.8299,
"step": 12700
},
{
"epoch": 1.4232922732362823,
"grad_norm": 7.562645435333252,
"learning_rate": 2.6280328480776412e-05,
"loss": 1.9251,
"step": 12710
},
{
"epoch": 1.4244120940649496,
"grad_norm": 12.989692687988281,
"learning_rate": 2.6261664800298617e-05,
"loss": 1.8278,
"step": 12720
},
{
"epoch": 1.425531914893617,
"grad_norm": 15.355886459350586,
"learning_rate": 2.6243001119820832e-05,
"loss": 2.2122,
"step": 12730
},
{
"epoch": 1.4266517357222845,
"grad_norm": 4.491844654083252,
"learning_rate": 2.6224337439343037e-05,
"loss": 1.8286,
"step": 12740
},
{
"epoch": 1.427771556550952,
"grad_norm": 11.244644165039062,
"learning_rate": 2.6205673758865252e-05,
"loss": 2.2198,
"step": 12750
},
{
"epoch": 1.4288913773796192,
"grad_norm": 4.543248176574707,
"learning_rate": 2.6187010078387457e-05,
"loss": 2.1489,
"step": 12760
},
{
"epoch": 1.4300111982082866,
"grad_norm": 5.585264205932617,
"learning_rate": 2.6168346397909672e-05,
"loss": 1.9485,
"step": 12770
},
{
"epoch": 1.4311310190369542,
"grad_norm": 16.626436233520508,
"learning_rate": 2.6149682717431877e-05,
"loss": 2.0467,
"step": 12780
},
{
"epoch": 1.4322508398656215,
"grad_norm": 5.619150161743164,
"learning_rate": 2.6131019036954092e-05,
"loss": 2.0983,
"step": 12790
},
{
"epoch": 1.4333706606942889,
"grad_norm": 3.807325839996338,
"learning_rate": 2.6112355356476297e-05,
"loss": 1.736,
"step": 12800
},
{
"epoch": 1.4344904815229564,
"grad_norm": 16.317922592163086,
"learning_rate": 2.6093691675998512e-05,
"loss": 2.26,
"step": 12810
},
{
"epoch": 1.4356103023516238,
"grad_norm": 4.438934326171875,
"learning_rate": 2.6075027995520717e-05,
"loss": 2.2066,
"step": 12820
},
{
"epoch": 1.4367301231802911,
"grad_norm": 15.27676010131836,
"learning_rate": 2.605636431504293e-05,
"loss": 2.0826,
"step": 12830
},
{
"epoch": 1.4378499440089585,
"grad_norm": 7.73093843460083,
"learning_rate": 2.6037700634565137e-05,
"loss": 1.9085,
"step": 12840
},
{
"epoch": 1.4389697648376258,
"grad_norm": 12.442554473876953,
"learning_rate": 2.6019036954087345e-05,
"loss": 2.1945,
"step": 12850
},
{
"epoch": 1.4400895856662934,
"grad_norm": 21.156641006469727,
"learning_rate": 2.6000373273609557e-05,
"loss": 2.3811,
"step": 12860
},
{
"epoch": 1.4412094064949608,
"grad_norm": 4.013643741607666,
"learning_rate": 2.5981709593131765e-05,
"loss": 2.0067,
"step": 12870
},
{
"epoch": 1.4423292273236283,
"grad_norm": 4.505249977111816,
"learning_rate": 2.5963045912653977e-05,
"loss": 2.1332,
"step": 12880
},
{
"epoch": 1.4434490481522957,
"grad_norm": 4.283412456512451,
"learning_rate": 2.5944382232176185e-05,
"loss": 2.2716,
"step": 12890
},
{
"epoch": 1.444568868980963,
"grad_norm": 9.626873016357422,
"learning_rate": 2.5925718551698397e-05,
"loss": 2.2156,
"step": 12900
},
{
"epoch": 1.4456886898096304,
"grad_norm": 10.732905387878418,
"learning_rate": 2.5907054871220605e-05,
"loss": 2.1807,
"step": 12910
},
{
"epoch": 1.4468085106382977,
"grad_norm": 7.605788707733154,
"learning_rate": 2.5888391190742817e-05,
"loss": 1.9077,
"step": 12920
},
{
"epoch": 1.4479283314669653,
"grad_norm": 3.839841604232788,
"learning_rate": 2.5869727510265025e-05,
"loss": 1.8521,
"step": 12930
},
{
"epoch": 1.4490481522956327,
"grad_norm": 3.6968777179718018,
"learning_rate": 2.5851063829787237e-05,
"loss": 1.7962,
"step": 12940
},
{
"epoch": 1.4501679731243002,
"grad_norm": 8.658880233764648,
"learning_rate": 2.5832400149309442e-05,
"loss": 1.8974,
"step": 12950
},
{
"epoch": 1.4512877939529676,
"grad_norm": 3.764810085296631,
"learning_rate": 2.5813736468831657e-05,
"loss": 2.2889,
"step": 12960
},
{
"epoch": 1.452407614781635,
"grad_norm": 7.589803218841553,
"learning_rate": 2.5795072788353862e-05,
"loss": 1.701,
"step": 12970
},
{
"epoch": 1.4535274356103023,
"grad_norm": 15.206584930419922,
"learning_rate": 2.5776409107876077e-05,
"loss": 2.196,
"step": 12980
},
{
"epoch": 1.4546472564389696,
"grad_norm": 13.450560569763184,
"learning_rate": 2.5757745427398282e-05,
"loss": 1.7454,
"step": 12990
},
{
"epoch": 1.4557670772676372,
"grad_norm": 9.42431926727295,
"learning_rate": 2.5739081746920497e-05,
"loss": 2.2713,
"step": 13000
},
{
"epoch": 1.4568868980963046,
"grad_norm": 6.517592906951904,
"learning_rate": 2.5720418066442702e-05,
"loss": 1.9129,
"step": 13010
},
{
"epoch": 1.4580067189249721,
"grad_norm": 3.8636746406555176,
"learning_rate": 2.5701754385964917e-05,
"loss": 1.8176,
"step": 13020
},
{
"epoch": 1.4591265397536395,
"grad_norm": 4.5231523513793945,
"learning_rate": 2.5683090705487122e-05,
"loss": 2.1806,
"step": 13030
},
{
"epoch": 1.4602463605823068,
"grad_norm": 4.154500484466553,
"learning_rate": 2.5664427025009334e-05,
"loss": 2.2852,
"step": 13040
},
{
"epoch": 1.4613661814109742,
"grad_norm": 4.697875022888184,
"learning_rate": 2.5645763344531542e-05,
"loss": 2.0672,
"step": 13050
},
{
"epoch": 1.4624860022396415,
"grad_norm": 5.5377702713012695,
"learning_rate": 2.5627099664053754e-05,
"loss": 1.9248,
"step": 13060
},
{
"epoch": 1.463605823068309,
"grad_norm": 3.9264962673187256,
"learning_rate": 2.5608435983575962e-05,
"loss": 2.226,
"step": 13070
},
{
"epoch": 1.4647256438969765,
"grad_norm": 4.375810146331787,
"learning_rate": 2.558977230309817e-05,
"loss": 1.8459,
"step": 13080
},
{
"epoch": 1.465845464725644,
"grad_norm": 9.115229606628418,
"learning_rate": 2.5571108622620382e-05,
"loss": 2.0442,
"step": 13090
},
{
"epoch": 1.4669652855543114,
"grad_norm": 11.976322174072266,
"learning_rate": 2.555244494214259e-05,
"loss": 1.6853,
"step": 13100
},
{
"epoch": 1.4680851063829787,
"grad_norm": 4.857337951660156,
"learning_rate": 2.5533781261664802e-05,
"loss": 2.2852,
"step": 13110
},
{
"epoch": 1.469204927211646,
"grad_norm": 15.375901222229004,
"learning_rate": 2.551511758118701e-05,
"loss": 2.0554,
"step": 13120
},
{
"epoch": 1.4703247480403134,
"grad_norm": 10.337723731994629,
"learning_rate": 2.5496453900709222e-05,
"loss": 2.0575,
"step": 13130
},
{
"epoch": 1.471444568868981,
"grad_norm": 9.989140510559082,
"learning_rate": 2.547779022023143e-05,
"loss": 2.2119,
"step": 13140
},
{
"epoch": 1.4725643896976484,
"grad_norm": 8.458128929138184,
"learning_rate": 2.5459126539753642e-05,
"loss": 2.3751,
"step": 13150
},
{
"epoch": 1.4736842105263157,
"grad_norm": 10.176783561706543,
"learning_rate": 2.5440462859275847e-05,
"loss": 1.8748,
"step": 13160
},
{
"epoch": 1.4748040313549833,
"grad_norm": 8.326006889343262,
"learning_rate": 2.5421799178798062e-05,
"loss": 2.2096,
"step": 13170
},
{
"epoch": 1.4759238521836506,
"grad_norm": 4.975625514984131,
"learning_rate": 2.5403135498320267e-05,
"loss": 2.3334,
"step": 13180
},
{
"epoch": 1.477043673012318,
"grad_norm": 3.7235939502716064,
"learning_rate": 2.5384471817842482e-05,
"loss": 1.8743,
"step": 13190
},
{
"epoch": 1.4781634938409853,
"grad_norm": 4.65376615524292,
"learning_rate": 2.5365808137364687e-05,
"loss": 1.8204,
"step": 13200
},
{
"epoch": 1.479283314669653,
"grad_norm": 5.088535308837891,
"learning_rate": 2.5347144456886902e-05,
"loss": 1.9513,
"step": 13210
},
{
"epoch": 1.4804031354983203,
"grad_norm": 13.776253700256348,
"learning_rate": 2.5328480776409107e-05,
"loss": 1.7841,
"step": 13220
},
{
"epoch": 1.4815229563269876,
"grad_norm": 13.486886024475098,
"learning_rate": 2.5309817095931322e-05,
"loss": 2.1629,
"step": 13230
},
{
"epoch": 1.4826427771556552,
"grad_norm": 3.3172719478607178,
"learning_rate": 2.5291153415453527e-05,
"loss": 1.7235,
"step": 13240
},
{
"epoch": 1.4837625979843225,
"grad_norm": 3.367083787918091,
"learning_rate": 2.5272489734975742e-05,
"loss": 2.4903,
"step": 13250
},
{
"epoch": 1.4848824188129899,
"grad_norm": 13.519879341125488,
"learning_rate": 2.5253826054497947e-05,
"loss": 1.8464,
"step": 13260
},
{
"epoch": 1.4860022396416572,
"grad_norm": 6.741097450256348,
"learning_rate": 2.523516237402016e-05,
"loss": 1.8458,
"step": 13270
},
{
"epoch": 1.4871220604703248,
"grad_norm": 2.4831910133361816,
"learning_rate": 2.5216498693542367e-05,
"loss": 1.9829,
"step": 13280
},
{
"epoch": 1.4882418812989922,
"grad_norm": 14.098200798034668,
"learning_rate": 2.519783501306458e-05,
"loss": 2.2151,
"step": 13290
},
{
"epoch": 1.4893617021276595,
"grad_norm": 9.473960876464844,
"learning_rate": 2.5179171332586787e-05,
"loss": 2.3018,
"step": 13300
},
{
"epoch": 1.490481522956327,
"grad_norm": 10.849329948425293,
"learning_rate": 2.5160507652108995e-05,
"loss": 2.3845,
"step": 13310
},
{
"epoch": 1.4916013437849944,
"grad_norm": 5.084878921508789,
"learning_rate": 2.5141843971631207e-05,
"loss": 1.9048,
"step": 13320
},
{
"epoch": 1.4927211646136618,
"grad_norm": 3.708717107772827,
"learning_rate": 2.5123180291153416e-05,
"loss": 2.0126,
"step": 13330
},
{
"epoch": 1.4938409854423291,
"grad_norm": 4.616156578063965,
"learning_rate": 2.5104516610675627e-05,
"loss": 2.2239,
"step": 13340
},
{
"epoch": 1.4949608062709967,
"grad_norm": 4.087489604949951,
"learning_rate": 2.5085852930197836e-05,
"loss": 2.3669,
"step": 13350
},
{
"epoch": 1.496080627099664,
"grad_norm": 5.101741790771484,
"learning_rate": 2.5067189249720047e-05,
"loss": 2.1289,
"step": 13360
},
{
"epoch": 1.4972004479283314,
"grad_norm": 4.274835109710693,
"learning_rate": 2.5048525569242252e-05,
"loss": 2.194,
"step": 13370
},
{
"epoch": 1.498320268756999,
"grad_norm": 8.17119026184082,
"learning_rate": 2.5029861888764467e-05,
"loss": 1.924,
"step": 13380
},
{
"epoch": 1.4994400895856663,
"grad_norm": 7.936726093292236,
"learning_rate": 2.5011198208286672e-05,
"loss": 2.0178,
"step": 13390
},
{
"epoch": 1.5005599104143337,
"grad_norm": 4.710428714752197,
"learning_rate": 2.4992534527808887e-05,
"loss": 2.1567,
"step": 13400
},
{
"epoch": 1.501679731243001,
"grad_norm": 17.4936580657959,
"learning_rate": 2.4973870847331096e-05,
"loss": 2.1754,
"step": 13410
},
{
"epoch": 1.5027995520716684,
"grad_norm": 3.8005337715148926,
"learning_rate": 2.4955207166853304e-05,
"loss": 1.3665,
"step": 13420
},
{
"epoch": 1.503919372900336,
"grad_norm": 5.918506145477295,
"learning_rate": 2.4936543486375512e-05,
"loss": 1.8023,
"step": 13430
},
{
"epoch": 1.5050391937290035,
"grad_norm": 3.53682541847229,
"learning_rate": 2.4917879805897724e-05,
"loss": 2.1256,
"step": 13440
},
{
"epoch": 1.5061590145576709,
"grad_norm": 7.532880783081055,
"learning_rate": 2.4899216125419932e-05,
"loss": 1.6287,
"step": 13450
},
{
"epoch": 1.5072788353863382,
"grad_norm": 5.1376800537109375,
"learning_rate": 2.4880552444942144e-05,
"loss": 2.2407,
"step": 13460
},
{
"epoch": 1.5083986562150056,
"grad_norm": 3.9931938648223877,
"learning_rate": 2.4861888764464352e-05,
"loss": 2.172,
"step": 13470
},
{
"epoch": 1.509518477043673,
"grad_norm": 6.361204147338867,
"learning_rate": 2.4843225083986564e-05,
"loss": 1.7437,
"step": 13480
},
{
"epoch": 1.5106382978723403,
"grad_norm": 7.249978542327881,
"learning_rate": 2.4824561403508772e-05,
"loss": 1.8938,
"step": 13490
},
{
"epoch": 1.5117581187010078,
"grad_norm": 1.6755268573760986,
"learning_rate": 2.4805897723030984e-05,
"loss": 1.9075,
"step": 13500
},
{
"epoch": 1.5128779395296752,
"grad_norm": 3.8444862365722656,
"learning_rate": 2.4787234042553192e-05,
"loss": 2.2699,
"step": 13510
},
{
"epoch": 1.5139977603583428,
"grad_norm": 5.975175380706787,
"learning_rate": 2.4768570362075404e-05,
"loss": 1.9212,
"step": 13520
},
{
"epoch": 1.5151175811870101,
"grad_norm": 8.798042297363281,
"learning_rate": 2.4749906681597612e-05,
"loss": 2.1584,
"step": 13530
},
{
"epoch": 1.5162374020156775,
"grad_norm": 12.09782600402832,
"learning_rate": 2.473124300111982e-05,
"loss": 2.3702,
"step": 13540
},
{
"epoch": 1.5173572228443448,
"grad_norm": 6.850448131561279,
"learning_rate": 2.4712579320642032e-05,
"loss": 1.9944,
"step": 13550
},
{
"epoch": 1.5184770436730122,
"grad_norm": 9.481196403503418,
"learning_rate": 2.469391564016424e-05,
"loss": 1.9876,
"step": 13560
},
{
"epoch": 1.5195968645016797,
"grad_norm": 3.648925304412842,
"learning_rate": 2.4675251959686452e-05,
"loss": 2.1083,
"step": 13570
},
{
"epoch": 1.520716685330347,
"grad_norm": 4.310310363769531,
"learning_rate": 2.465658827920866e-05,
"loss": 1.7789,
"step": 13580
},
{
"epoch": 1.5218365061590147,
"grad_norm": 6.024844646453857,
"learning_rate": 2.4637924598730872e-05,
"loss": 2.0254,
"step": 13590
},
{
"epoch": 1.522956326987682,
"grad_norm": 9.881074905395508,
"learning_rate": 2.461926091825308e-05,
"loss": 2.0021,
"step": 13600
},
{
"epoch": 1.5240761478163494,
"grad_norm": 17.45454216003418,
"learning_rate": 2.4600597237775292e-05,
"loss": 1.8438,
"step": 13610
},
{
"epoch": 1.5251959686450167,
"grad_norm": 11.487627029418945,
"learning_rate": 2.45819335572975e-05,
"loss": 2.1005,
"step": 13620
},
{
"epoch": 1.526315789473684,
"grad_norm": 4.326568126678467,
"learning_rate": 2.4563269876819712e-05,
"loss": 2.3081,
"step": 13630
},
{
"epoch": 1.5274356103023516,
"grad_norm": 4.558961868286133,
"learning_rate": 2.454460619634192e-05,
"loss": 1.6342,
"step": 13640
},
{
"epoch": 1.528555431131019,
"grad_norm": 5.035207748413086,
"learning_rate": 2.4525942515864132e-05,
"loss": 1.785,
"step": 13650
},
{
"epoch": 1.5296752519596866,
"grad_norm": 13.78886604309082,
"learning_rate": 2.4507278835386337e-05,
"loss": 2.0897,
"step": 13660
},
{
"epoch": 1.530795072788354,
"grad_norm": 9.688088417053223,
"learning_rate": 2.448861515490855e-05,
"loss": 1.8179,
"step": 13670
},
{
"epoch": 1.5319148936170213,
"grad_norm": 5.47663688659668,
"learning_rate": 2.4469951474430757e-05,
"loss": 2.2184,
"step": 13680
},
{
"epoch": 1.5330347144456886,
"grad_norm": 16.983068466186523,
"learning_rate": 2.445128779395297e-05,
"loss": 2.1049,
"step": 13690
},
{
"epoch": 1.534154535274356,
"grad_norm": 4.000819683074951,
"learning_rate": 2.4432624113475177e-05,
"loss": 1.8282,
"step": 13700
},
{
"epoch": 1.5352743561030235,
"grad_norm": 8.065638542175293,
"learning_rate": 2.441396043299739e-05,
"loss": 1.8497,
"step": 13710
},
{
"epoch": 1.536394176931691,
"grad_norm": 7.793942451477051,
"learning_rate": 2.4395296752519597e-05,
"loss": 1.6021,
"step": 13720
},
{
"epoch": 1.5375139977603585,
"grad_norm": 4.013867378234863,
"learning_rate": 2.437663307204181e-05,
"loss": 1.585,
"step": 13730
},
{
"epoch": 1.5386338185890258,
"grad_norm": 5.563870429992676,
"learning_rate": 2.4357969391564017e-05,
"loss": 1.7468,
"step": 13740
},
{
"epoch": 1.5397536394176932,
"grad_norm": 3.9095380306243896,
"learning_rate": 2.4339305711086226e-05,
"loss": 1.8195,
"step": 13750
},
{
"epoch": 1.5408734602463605,
"grad_norm": 3.52274489402771,
"learning_rate": 2.4320642030608437e-05,
"loss": 2.2316,
"step": 13760
},
{
"epoch": 1.5419932810750279,
"grad_norm": 8.85901927947998,
"learning_rate": 2.4301978350130646e-05,
"loss": 1.933,
"step": 13770
},
{
"epoch": 1.5431131019036954,
"grad_norm": 6.702213764190674,
"learning_rate": 2.4283314669652857e-05,
"loss": 1.9826,
"step": 13780
},
{
"epoch": 1.5442329227323628,
"grad_norm": 16.482467651367188,
"learning_rate": 2.4264650989175066e-05,
"loss": 1.6521,
"step": 13790
},
{
"epoch": 1.5453527435610304,
"grad_norm": 9.306038856506348,
"learning_rate": 2.4245987308697277e-05,
"loss": 1.6685,
"step": 13800
},
{
"epoch": 1.5464725643896977,
"grad_norm": 15.01627254486084,
"learning_rate": 2.4227323628219486e-05,
"loss": 1.9639,
"step": 13810
},
{
"epoch": 1.547592385218365,
"grad_norm": 6.116465091705322,
"learning_rate": 2.4208659947741697e-05,
"loss": 1.8788,
"step": 13820
},
{
"epoch": 1.5487122060470324,
"grad_norm": 8.37788200378418,
"learning_rate": 2.4189996267263906e-05,
"loss": 2.2028,
"step": 13830
},
{
"epoch": 1.5498320268756998,
"grad_norm": 11.314355850219727,
"learning_rate": 2.4171332586786117e-05,
"loss": 2.0903,
"step": 13840
},
{
"epoch": 1.5509518477043673,
"grad_norm": 4.621204853057861,
"learning_rate": 2.4152668906308326e-05,
"loss": 1.9054,
"step": 13850
},
{
"epoch": 1.5520716685330347,
"grad_norm": 5.006560325622559,
"learning_rate": 2.4134005225830537e-05,
"loss": 1.8487,
"step": 13860
},
{
"epoch": 1.5531914893617023,
"grad_norm": 6.626319885253906,
"learning_rate": 2.4115341545352746e-05,
"loss": 1.9409,
"step": 13870
},
{
"epoch": 1.5543113101903696,
"grad_norm": 2.9766428470611572,
"learning_rate": 2.4096677864874957e-05,
"loss": 1.5523,
"step": 13880
},
{
"epoch": 1.555431131019037,
"grad_norm": 15.116016387939453,
"learning_rate": 2.4078014184397162e-05,
"loss": 2.1559,
"step": 13890
},
{
"epoch": 1.5565509518477043,
"grad_norm": 4.559194564819336,
"learning_rate": 2.4059350503919374e-05,
"loss": 1.7988,
"step": 13900
},
{
"epoch": 1.5576707726763717,
"grad_norm": 8.903999328613281,
"learning_rate": 2.4040686823441582e-05,
"loss": 2.2537,
"step": 13910
},
{
"epoch": 1.5587905935050392,
"grad_norm": 17.303340911865234,
"learning_rate": 2.4022023142963794e-05,
"loss": 1.9898,
"step": 13920
},
{
"epoch": 1.5599104143337066,
"grad_norm": 5.961864948272705,
"learning_rate": 2.4003359462486002e-05,
"loss": 2.0574,
"step": 13930
},
{
"epoch": 1.5610302351623742,
"grad_norm": 14.988414764404297,
"learning_rate": 2.3984695782008214e-05,
"loss": 2.1711,
"step": 13940
},
{
"epoch": 1.5621500559910415,
"grad_norm": 4.600130081176758,
"learning_rate": 2.3966032101530422e-05,
"loss": 2.644,
"step": 13950
},
{
"epoch": 1.5632698768197089,
"grad_norm": 4.028290271759033,
"learning_rate": 2.394736842105263e-05,
"loss": 2.007,
"step": 13960
},
{
"epoch": 1.5643896976483762,
"grad_norm": 16.717845916748047,
"learning_rate": 2.3928704740574842e-05,
"loss": 2.1179,
"step": 13970
},
{
"epoch": 1.5655095184770436,
"grad_norm": 13.442608833312988,
"learning_rate": 2.391004106009705e-05,
"loss": 2.061,
"step": 13980
},
{
"epoch": 1.5666293393057111,
"grad_norm": 4.753323078155518,
"learning_rate": 2.3891377379619262e-05,
"loss": 2.1708,
"step": 13990
},
{
"epoch": 1.5677491601343785,
"grad_norm": 4.6569600105285645,
"learning_rate": 2.387271369914147e-05,
"loss": 1.9684,
"step": 14000
},
{
"epoch": 1.568868980963046,
"grad_norm": 3.8971803188323975,
"learning_rate": 2.3854050018663682e-05,
"loss": 2.4244,
"step": 14010
},
{
"epoch": 1.5699888017917134,
"grad_norm": 4.851494789123535,
"learning_rate": 2.383538633818589e-05,
"loss": 2.0636,
"step": 14020
},
{
"epoch": 1.5711086226203808,
"grad_norm": 3.54333758354187,
"learning_rate": 2.3816722657708102e-05,
"loss": 1.9378,
"step": 14030
},
{
"epoch": 1.572228443449048,
"grad_norm": 6.895486831665039,
"learning_rate": 2.379805897723031e-05,
"loss": 1.7275,
"step": 14040
},
{
"epoch": 1.5733482642777155,
"grad_norm": 4.1524739265441895,
"learning_rate": 2.3779395296752522e-05,
"loss": 2.2593,
"step": 14050
},
{
"epoch": 1.574468085106383,
"grad_norm": 4.263177394866943,
"learning_rate": 2.376073161627473e-05,
"loss": 1.8631,
"step": 14060
},
{
"epoch": 1.5755879059350504,
"grad_norm": 14.865904808044434,
"learning_rate": 2.3742067935796942e-05,
"loss": 1.6639,
"step": 14070
},
{
"epoch": 1.576707726763718,
"grad_norm": 4.426602840423584,
"learning_rate": 2.372340425531915e-05,
"loss": 1.987,
"step": 14080
},
{
"epoch": 1.5778275475923853,
"grad_norm": 4.001722812652588,
"learning_rate": 2.3704740574841362e-05,
"loss": 2.0681,
"step": 14090
},
{
"epoch": 1.5789473684210527,
"grad_norm": 3.0097544193267822,
"learning_rate": 2.368607689436357e-05,
"loss": 2.3647,
"step": 14100
},
{
"epoch": 1.58006718924972,
"grad_norm": 3.5997202396392822,
"learning_rate": 2.3667413213885782e-05,
"loss": 2.1803,
"step": 14110
},
{
"epoch": 1.5811870100783874,
"grad_norm": 4.4173784255981445,
"learning_rate": 2.3648749533407987e-05,
"loss": 1.9483,
"step": 14120
},
{
"epoch": 1.5823068309070547,
"grad_norm": 10.679607391357422,
"learning_rate": 2.36300858529302e-05,
"loss": 2.2298,
"step": 14130
},
{
"epoch": 1.5834266517357223,
"grad_norm": 11.639561653137207,
"learning_rate": 2.3611422172452407e-05,
"loss": 1.6313,
"step": 14140
},
{
"epoch": 1.5845464725643899,
"grad_norm": 7.233905792236328,
"learning_rate": 2.359275849197462e-05,
"loss": 2.2515,
"step": 14150
},
{
"epoch": 1.5856662933930572,
"grad_norm": 9.425190925598145,
"learning_rate": 2.3574094811496827e-05,
"loss": 1.7644,
"step": 14160
},
{
"epoch": 1.5867861142217246,
"grad_norm": 13.162445068359375,
"learning_rate": 2.355543113101904e-05,
"loss": 1.937,
"step": 14170
},
{
"epoch": 1.587905935050392,
"grad_norm": 5.631926536560059,
"learning_rate": 2.3536767450541247e-05,
"loss": 1.7986,
"step": 14180
},
{
"epoch": 1.5890257558790593,
"grad_norm": 14.373632431030273,
"learning_rate": 2.3518103770063456e-05,
"loss": 1.9261,
"step": 14190
},
{
"epoch": 1.5901455767077266,
"grad_norm": 10.449909210205078,
"learning_rate": 2.3499440089585667e-05,
"loss": 2.1721,
"step": 14200
},
{
"epoch": 1.5912653975363942,
"grad_norm": 3.89623761177063,
"learning_rate": 2.3480776409107876e-05,
"loss": 2.0745,
"step": 14210
},
{
"epoch": 1.5923852183650617,
"grad_norm": 4.33968448638916,
"learning_rate": 2.3462112728630087e-05,
"loss": 1.9008,
"step": 14220
},
{
"epoch": 1.593505039193729,
"grad_norm": 9.331836700439453,
"learning_rate": 2.3443449048152296e-05,
"loss": 2.0979,
"step": 14230
},
{
"epoch": 1.5946248600223965,
"grad_norm": 10.781360626220703,
"learning_rate": 2.3424785367674507e-05,
"loss": 2.2364,
"step": 14240
},
{
"epoch": 1.5957446808510638,
"grad_norm": 5.107909679412842,
"learning_rate": 2.3406121687196716e-05,
"loss": 2.0156,
"step": 14250
},
{
"epoch": 1.5968645016797312,
"grad_norm": 3.5812559127807617,
"learning_rate": 2.3387458006718927e-05,
"loss": 2.2581,
"step": 14260
},
{
"epoch": 1.5979843225083985,
"grad_norm": 13.384634017944336,
"learning_rate": 2.3368794326241136e-05,
"loss": 2.1169,
"step": 14270
},
{
"epoch": 1.599104143337066,
"grad_norm": 5.330173969268799,
"learning_rate": 2.3350130645763347e-05,
"loss": 1.7373,
"step": 14280
},
{
"epoch": 1.6002239641657336,
"grad_norm": 7.670846462249756,
"learning_rate": 2.3331466965285556e-05,
"loss": 1.9934,
"step": 14290
},
{
"epoch": 1.601343784994401,
"grad_norm": 4.610497951507568,
"learning_rate": 2.3312803284807767e-05,
"loss": 1.5182,
"step": 14300
},
{
"epoch": 1.6024636058230683,
"grad_norm": 7.437980651855469,
"learning_rate": 2.3294139604329976e-05,
"loss": 1.7472,
"step": 14310
},
{
"epoch": 1.6035834266517357,
"grad_norm": 6.804001808166504,
"learning_rate": 2.3275475923852187e-05,
"loss": 1.5694,
"step": 14320
},
{
"epoch": 1.604703247480403,
"grad_norm": 11.887704849243164,
"learning_rate": 2.3256812243374396e-05,
"loss": 1.7139,
"step": 14330
},
{
"epoch": 1.6058230683090704,
"grad_norm": 3.467298746109009,
"learning_rate": 2.3238148562896604e-05,
"loss": 2.1763,
"step": 14340
},
{
"epoch": 1.606942889137738,
"grad_norm": 8.796690940856934,
"learning_rate": 2.3219484882418816e-05,
"loss": 1.9841,
"step": 14350
},
{
"epoch": 1.6080627099664053,
"grad_norm": 4.704202651977539,
"learning_rate": 2.3200821201941024e-05,
"loss": 2.0089,
"step": 14360
},
{
"epoch": 1.609182530795073,
"grad_norm": 7.0840163230896,
"learning_rate": 2.3182157521463232e-05,
"loss": 1.9829,
"step": 14370
},
{
"epoch": 1.6103023516237402,
"grad_norm": 4.396951198577881,
"learning_rate": 2.3163493840985444e-05,
"loss": 1.8063,
"step": 14380
},
{
"epoch": 1.6114221724524076,
"grad_norm": 15.083405494689941,
"learning_rate": 2.3144830160507652e-05,
"loss": 1.6894,
"step": 14390
},
{
"epoch": 1.612541993281075,
"grad_norm": 4.376276969909668,
"learning_rate": 2.312616648002986e-05,
"loss": 1.8177,
"step": 14400
},
{
"epoch": 1.6136618141097423,
"grad_norm": 5.626823902130127,
"learning_rate": 2.3107502799552072e-05,
"loss": 2.002,
"step": 14410
},
{
"epoch": 1.6147816349384099,
"grad_norm": 6.14142370223999,
"learning_rate": 2.308883911907428e-05,
"loss": 1.798,
"step": 14420
},
{
"epoch": 1.6159014557670772,
"grad_norm": 4.125568866729736,
"learning_rate": 2.3070175438596492e-05,
"loss": 1.9081,
"step": 14430
},
{
"epoch": 1.6170212765957448,
"grad_norm": 7.834494590759277,
"learning_rate": 2.30515117581187e-05,
"loss": 2.014,
"step": 14440
},
{
"epoch": 1.6181410974244121,
"grad_norm": 14.719797134399414,
"learning_rate": 2.3032848077640912e-05,
"loss": 2.1659,
"step": 14450
},
{
"epoch": 1.6192609182530795,
"grad_norm": 10.502878189086914,
"learning_rate": 2.301418439716312e-05,
"loss": 2.0006,
"step": 14460
},
{
"epoch": 1.6203807390817468,
"grad_norm": 4.505667686462402,
"learning_rate": 2.2995520716685332e-05,
"loss": 2.0919,
"step": 14470
},
{
"epoch": 1.6215005599104142,
"grad_norm": 8.213534355163574,
"learning_rate": 2.297685703620754e-05,
"loss": 1.8744,
"step": 14480
},
{
"epoch": 1.6226203807390818,
"grad_norm": 3.4308199882507324,
"learning_rate": 2.2958193355729752e-05,
"loss": 2.0321,
"step": 14490
},
{
"epoch": 1.6237402015677491,
"grad_norm": 8.835275650024414,
"learning_rate": 2.293952967525196e-05,
"loss": 2.0342,
"step": 14500
},
{
"epoch": 1.6248600223964167,
"grad_norm": 13.789974212646484,
"learning_rate": 2.2920865994774172e-05,
"loss": 2.1533,
"step": 14510
},
{
"epoch": 1.625979843225084,
"grad_norm": 4.554170608520508,
"learning_rate": 2.290220231429638e-05,
"loss": 1.8698,
"step": 14520
},
{
"epoch": 1.6270996640537514,
"grad_norm": 4.612897872924805,
"learning_rate": 2.2883538633818592e-05,
"loss": 1.8866,
"step": 14530
},
{
"epoch": 1.6282194848824187,
"grad_norm": 3.9619531631469727,
"learning_rate": 2.28648749533408e-05,
"loss": 1.795,
"step": 14540
},
{
"epoch": 1.629339305711086,
"grad_norm": 13.929571151733398,
"learning_rate": 2.2846211272863012e-05,
"loss": 1.7292,
"step": 14550
},
{
"epoch": 1.6304591265397537,
"grad_norm": 3.8777716159820557,
"learning_rate": 2.282754759238522e-05,
"loss": 2.1625,
"step": 14560
},
{
"epoch": 1.631578947368421,
"grad_norm": 10.221095085144043,
"learning_rate": 2.280888391190743e-05,
"loss": 1.9096,
"step": 14570
},
{
"epoch": 1.6326987681970886,
"grad_norm": 11.300918579101562,
"learning_rate": 2.279022023142964e-05,
"loss": 2.0733,
"step": 14580
},
{
"epoch": 1.633818589025756,
"grad_norm": 3.6058292388916016,
"learning_rate": 2.277155655095185e-05,
"loss": 1.7012,
"step": 14590
},
{
"epoch": 1.6349384098544233,
"grad_norm": 8.574664115905762,
"learning_rate": 2.2752892870474057e-05,
"loss": 1.7154,
"step": 14600
},
{
"epoch": 1.6360582306830906,
"grad_norm": 7.9404096603393555,
"learning_rate": 2.2734229189996266e-05,
"loss": 2.0302,
"step": 14610
},
{
"epoch": 1.637178051511758,
"grad_norm": 4.232813358306885,
"learning_rate": 2.2715565509518477e-05,
"loss": 2.2532,
"step": 14620
},
{
"epoch": 1.6382978723404256,
"grad_norm": 4.7303643226623535,
"learning_rate": 2.2696901829040686e-05,
"loss": 1.6629,
"step": 14630
},
{
"epoch": 1.639417693169093,
"grad_norm": 10.848748207092285,
"learning_rate": 2.2678238148562897e-05,
"loss": 2.2253,
"step": 14640
},
{
"epoch": 1.6405375139977605,
"grad_norm": 13.379515647888184,
"learning_rate": 2.2659574468085106e-05,
"loss": 1.9358,
"step": 14650
},
{
"epoch": 1.6416573348264278,
"grad_norm": 3.6262221336364746,
"learning_rate": 2.2640910787607317e-05,
"loss": 1.698,
"step": 14660
},
{
"epoch": 1.6427771556550952,
"grad_norm": 4.654207229614258,
"learning_rate": 2.2622247107129526e-05,
"loss": 1.6241,
"step": 14670
},
{
"epoch": 1.6438969764837625,
"grad_norm": 18.657798767089844,
"learning_rate": 2.2603583426651737e-05,
"loss": 2.1006,
"step": 14680
},
{
"epoch": 1.64501679731243,
"grad_norm": 11.848627090454102,
"learning_rate": 2.2584919746173946e-05,
"loss": 2.1113,
"step": 14690
},
{
"epoch": 1.6461366181410975,
"grad_norm": 9.06197738647461,
"learning_rate": 2.2566256065696157e-05,
"loss": 1.6563,
"step": 14700
},
{
"epoch": 1.6472564389697648,
"grad_norm": 13.184223175048828,
"learning_rate": 2.2547592385218366e-05,
"loss": 2.3761,
"step": 14710
},
{
"epoch": 1.6483762597984324,
"grad_norm": 11.905593872070312,
"learning_rate": 2.2528928704740577e-05,
"loss": 2.0665,
"step": 14720
},
{
"epoch": 1.6494960806270997,
"grad_norm": 3.726668357849121,
"learning_rate": 2.2510265024262786e-05,
"loss": 1.7046,
"step": 14730
},
{
"epoch": 1.650615901455767,
"grad_norm": 8.543424606323242,
"learning_rate": 2.2491601343784997e-05,
"loss": 1.8803,
"step": 14740
},
{
"epoch": 1.6517357222844344,
"grad_norm": 7.099303245544434,
"learning_rate": 2.2472937663307206e-05,
"loss": 1.9246,
"step": 14750
},
{
"epoch": 1.6528555431131018,
"grad_norm": 11.42622184753418,
"learning_rate": 2.2454273982829417e-05,
"loss": 1.7691,
"step": 14760
},
{
"epoch": 1.6539753639417694,
"grad_norm": 11.167017936706543,
"learning_rate": 2.2435610302351626e-05,
"loss": 2.0076,
"step": 14770
},
{
"epoch": 1.6550951847704367,
"grad_norm": 3.956162214279175,
"learning_rate": 2.2416946621873834e-05,
"loss": 2.3021,
"step": 14780
},
{
"epoch": 1.6562150055991043,
"grad_norm": 7.405086994171143,
"learning_rate": 2.2398282941396046e-05,
"loss": 2.2505,
"step": 14790
},
{
"epoch": 1.6573348264277716,
"grad_norm": 6.949024200439453,
"learning_rate": 2.2379619260918254e-05,
"loss": 1.6625,
"step": 14800
},
{
"epoch": 1.658454647256439,
"grad_norm": 5.116047382354736,
"learning_rate": 2.2360955580440466e-05,
"loss": 2.0589,
"step": 14810
},
{
"epoch": 1.6595744680851063,
"grad_norm": 4.537695407867432,
"learning_rate": 2.2342291899962674e-05,
"loss": 1.7413,
"step": 14820
},
{
"epoch": 1.6606942889137737,
"grad_norm": 4.4663004875183105,
"learning_rate": 2.2323628219484882e-05,
"loss": 1.8535,
"step": 14830
},
{
"epoch": 1.6618141097424413,
"grad_norm": 4.609202861785889,
"learning_rate": 2.230496453900709e-05,
"loss": 2.2902,
"step": 14840
},
{
"epoch": 1.6629339305711086,
"grad_norm": 4.477583885192871,
"learning_rate": 2.2286300858529302e-05,
"loss": 2.2012,
"step": 14850
},
{
"epoch": 1.6640537513997762,
"grad_norm": 19.314029693603516,
"learning_rate": 2.226763717805151e-05,
"loss": 2.3641,
"step": 14860
},
{
"epoch": 1.6651735722284435,
"grad_norm": 7.962518692016602,
"learning_rate": 2.2248973497573722e-05,
"loss": 2.1222,
"step": 14870
},
{
"epoch": 1.6662933930571109,
"grad_norm": 4.907433986663818,
"learning_rate": 2.223030981709593e-05,
"loss": 1.8145,
"step": 14880
},
{
"epoch": 1.6674132138857782,
"grad_norm": 8.89202880859375,
"learning_rate": 2.2211646136618142e-05,
"loss": 2.2891,
"step": 14890
},
{
"epoch": 1.6685330347144456,
"grad_norm": 9.831536293029785,
"learning_rate": 2.219298245614035e-05,
"loss": 2.0534,
"step": 14900
},
{
"epoch": 1.6696528555431132,
"grad_norm": 3.9551281929016113,
"learning_rate": 2.2174318775662562e-05,
"loss": 1.6471,
"step": 14910
},
{
"epoch": 1.6707726763717805,
"grad_norm": 4.45933723449707,
"learning_rate": 2.215565509518477e-05,
"loss": 2.2105,
"step": 14920
},
{
"epoch": 1.671892497200448,
"grad_norm": 4.2659783363342285,
"learning_rate": 2.2136991414706982e-05,
"loss": 1.9115,
"step": 14930
},
{
"epoch": 1.6730123180291154,
"grad_norm": 5.429946422576904,
"learning_rate": 2.211832773422919e-05,
"loss": 1.9213,
"step": 14940
},
{
"epoch": 1.6741321388577828,
"grad_norm": 12.490592956542969,
"learning_rate": 2.2099664053751402e-05,
"loss": 2.1818,
"step": 14950
},
{
"epoch": 1.6752519596864501,
"grad_norm": 5.016933917999268,
"learning_rate": 2.208100037327361e-05,
"loss": 2.3414,
"step": 14960
},
{
"epoch": 1.6763717805151175,
"grad_norm": 11.83879566192627,
"learning_rate": 2.2062336692795822e-05,
"loss": 1.5455,
"step": 14970
},
{
"epoch": 1.6774916013437848,
"grad_norm": 5.847216606140137,
"learning_rate": 2.204367301231803e-05,
"loss": 1.9266,
"step": 14980
},
{
"epoch": 1.6786114221724524,
"grad_norm": 5.979493141174316,
"learning_rate": 2.202500933184024e-05,
"loss": 1.5915,
"step": 14990
},
{
"epoch": 1.67973124300112,
"grad_norm": 10.372356414794922,
"learning_rate": 2.200634565136245e-05,
"loss": 1.5493,
"step": 15000
},
{
"epoch": 1.6808510638297873,
"grad_norm": 5.963084697723389,
"learning_rate": 2.198768197088466e-05,
"loss": 1.9619,
"step": 15010
},
{
"epoch": 1.6819708846584547,
"grad_norm": 10.619939804077148,
"learning_rate": 2.196901829040687e-05,
"loss": 2.351,
"step": 15020
},
{
"epoch": 1.683090705487122,
"grad_norm": 4.406311511993408,
"learning_rate": 2.195035460992908e-05,
"loss": 1.7913,
"step": 15030
},
{
"epoch": 1.6842105263157894,
"grad_norm": 4.74340295791626,
"learning_rate": 2.193169092945129e-05,
"loss": 1.6961,
"step": 15040
},
{
"epoch": 1.6853303471444567,
"grad_norm": 10.785073280334473,
"learning_rate": 2.1913027248973496e-05,
"loss": 1.6512,
"step": 15050
},
{
"epoch": 1.6864501679731243,
"grad_norm": 7.105363368988037,
"learning_rate": 2.1894363568495707e-05,
"loss": 1.8099,
"step": 15060
},
{
"epoch": 1.6875699888017919,
"grad_norm": 4.944157123565674,
"learning_rate": 2.1875699888017916e-05,
"loss": 1.9102,
"step": 15070
},
{
"epoch": 1.6886898096304592,
"grad_norm": 4.357661724090576,
"learning_rate": 2.1857036207540127e-05,
"loss": 2.1368,
"step": 15080
},
{
"epoch": 1.6898096304591266,
"grad_norm": 4.5606207847595215,
"learning_rate": 2.1838372527062336e-05,
"loss": 1.9584,
"step": 15090
},
{
"epoch": 1.690929451287794,
"grad_norm": 9.327258110046387,
"learning_rate": 2.1819708846584547e-05,
"loss": 1.9882,
"step": 15100
},
{
"epoch": 1.6920492721164613,
"grad_norm": 4.126927375793457,
"learning_rate": 2.1801045166106756e-05,
"loss": 2.4206,
"step": 15110
},
{
"epoch": 1.6931690929451286,
"grad_norm": 16.299972534179688,
"learning_rate": 2.1782381485628967e-05,
"loss": 2.2734,
"step": 15120
},
{
"epoch": 1.6942889137737962,
"grad_norm": 7.43623685836792,
"learning_rate": 2.1763717805151176e-05,
"loss": 1.7911,
"step": 15130
},
{
"epoch": 1.6954087346024636,
"grad_norm": 4.106508255004883,
"learning_rate": 2.1745054124673387e-05,
"loss": 2.1128,
"step": 15140
},
{
"epoch": 1.6965285554311311,
"grad_norm": 13.86871337890625,
"learning_rate": 2.1726390444195596e-05,
"loss": 1.7943,
"step": 15150
},
{
"epoch": 1.6976483762597985,
"grad_norm": 6.735291957855225,
"learning_rate": 2.1707726763717807e-05,
"loss": 1.5294,
"step": 15160
},
{
"epoch": 1.6987681970884658,
"grad_norm": 5.739629745483398,
"learning_rate": 2.1689063083240016e-05,
"loss": 2.0369,
"step": 15170
},
{
"epoch": 1.6998880179171332,
"grad_norm": 5.946849822998047,
"learning_rate": 2.1670399402762227e-05,
"loss": 2.0375,
"step": 15180
},
{
"epoch": 1.7010078387458005,
"grad_norm": 4.547854423522949,
"learning_rate": 2.1651735722284436e-05,
"loss": 2.0474,
"step": 15190
},
{
"epoch": 1.702127659574468,
"grad_norm": 6.0930070877075195,
"learning_rate": 2.1633072041806644e-05,
"loss": 1.7421,
"step": 15200
},
{
"epoch": 1.7032474804031354,
"grad_norm": 4.635743141174316,
"learning_rate": 2.1614408361328856e-05,
"loss": 2.3968,
"step": 15210
},
{
"epoch": 1.704367301231803,
"grad_norm": 2.2271034717559814,
"learning_rate": 2.1595744680851064e-05,
"loss": 1.8041,
"step": 15220
},
{
"epoch": 1.7054871220604704,
"grad_norm": 6.688762664794922,
"learning_rate": 2.1577081000373276e-05,
"loss": 1.8699,
"step": 15230
},
{
"epoch": 1.7066069428891377,
"grad_norm": 4.520251274108887,
"learning_rate": 2.1558417319895484e-05,
"loss": 1.7934,
"step": 15240
},
{
"epoch": 1.707726763717805,
"grad_norm": 5.595422744750977,
"learning_rate": 2.1539753639417696e-05,
"loss": 1.8382,
"step": 15250
},
{
"epoch": 1.7088465845464724,
"grad_norm": 10.029720306396484,
"learning_rate": 2.1521089958939904e-05,
"loss": 1.9193,
"step": 15260
},
{
"epoch": 1.70996640537514,
"grad_norm": 5.297349452972412,
"learning_rate": 2.1502426278462116e-05,
"loss": 2.0587,
"step": 15270
},
{
"epoch": 1.7110862262038073,
"grad_norm": 16.516834259033203,
"learning_rate": 2.1483762597984324e-05,
"loss": 2.0523,
"step": 15280
},
{
"epoch": 1.712206047032475,
"grad_norm": 3.686732292175293,
"learning_rate": 2.1465098917506532e-05,
"loss": 1.8393,
"step": 15290
},
{
"epoch": 1.7133258678611423,
"grad_norm": 8.316386222839355,
"learning_rate": 2.144643523702874e-05,
"loss": 2.1096,
"step": 15300
},
{
"epoch": 1.7144456886898096,
"grad_norm": 14.509235382080078,
"learning_rate": 2.1427771556550952e-05,
"loss": 2.2452,
"step": 15310
},
{
"epoch": 1.715565509518477,
"grad_norm": 12.271526336669922,
"learning_rate": 2.140910787607316e-05,
"loss": 1.8003,
"step": 15320
},
{
"epoch": 1.7166853303471443,
"grad_norm": 16.485271453857422,
"learning_rate": 2.1390444195595372e-05,
"loss": 1.8001,
"step": 15330
},
{
"epoch": 1.717805151175812,
"grad_norm": 4.867336273193359,
"learning_rate": 2.137178051511758e-05,
"loss": 1.8425,
"step": 15340
},
{
"epoch": 1.7189249720044792,
"grad_norm": 3.5718979835510254,
"learning_rate": 2.1353116834639792e-05,
"loss": 1.6484,
"step": 15350
},
{
"epoch": 1.7200447928331468,
"grad_norm": 26.389127731323242,
"learning_rate": 2.1334453154162e-05,
"loss": 1.7185,
"step": 15360
},
{
"epoch": 1.7211646136618142,
"grad_norm": 4.237075328826904,
"learning_rate": 2.1315789473684212e-05,
"loss": 1.8523,
"step": 15370
},
{
"epoch": 1.7222844344904815,
"grad_norm": 11.237632751464844,
"learning_rate": 2.129712579320642e-05,
"loss": 2.3631,
"step": 15380
},
{
"epoch": 1.7234042553191489,
"grad_norm": 4.580799579620361,
"learning_rate": 2.1278462112728632e-05,
"loss": 2.2933,
"step": 15390
},
{
"epoch": 1.7245240761478162,
"grad_norm": 5.851457118988037,
"learning_rate": 2.125979843225084e-05,
"loss": 1.9909,
"step": 15400
},
{
"epoch": 1.7256438969764838,
"grad_norm": 4.036518573760986,
"learning_rate": 2.1241134751773052e-05,
"loss": 2.1115,
"step": 15410
},
{
"epoch": 1.7267637178051511,
"grad_norm": 10.545909881591797,
"learning_rate": 2.122247107129526e-05,
"loss": 1.7327,
"step": 15420
},
{
"epoch": 1.7278835386338187,
"grad_norm": 1.6649363040924072,
"learning_rate": 2.120380739081747e-05,
"loss": 1.7729,
"step": 15430
},
{
"epoch": 1.729003359462486,
"grad_norm": 10.285140991210938,
"learning_rate": 2.118514371033968e-05,
"loss": 1.9684,
"step": 15440
},
{
"epoch": 1.7301231802911534,
"grad_norm": 10.789081573486328,
"learning_rate": 2.116648002986189e-05,
"loss": 1.9455,
"step": 15450
},
{
"epoch": 1.7312430011198208,
"grad_norm": 12.79870891571045,
"learning_rate": 2.11478163493841e-05,
"loss": 2.0917,
"step": 15460
},
{
"epoch": 1.7323628219484881,
"grad_norm": 7.7222065925598145,
"learning_rate": 2.112915266890631e-05,
"loss": 2.064,
"step": 15470
},
{
"epoch": 1.7334826427771557,
"grad_norm": 4.771847724914551,
"learning_rate": 2.111048898842852e-05,
"loss": 1.9891,
"step": 15480
},
{
"epoch": 1.734602463605823,
"grad_norm": 3.90159273147583,
"learning_rate": 2.109182530795073e-05,
"loss": 2.0179,
"step": 15490
},
{
"epoch": 1.7357222844344906,
"grad_norm": 7.232120037078857,
"learning_rate": 2.107316162747294e-05,
"loss": 2.3234,
"step": 15500
},
{
"epoch": 1.736842105263158,
"grad_norm": 5.076690196990967,
"learning_rate": 2.105449794699515e-05,
"loss": 2.2486,
"step": 15510
},
{
"epoch": 1.7379619260918253,
"grad_norm": 12.581092834472656,
"learning_rate": 2.1035834266517357e-05,
"loss": 2.1017,
"step": 15520
},
{
"epoch": 1.7390817469204927,
"grad_norm": 7.462939262390137,
"learning_rate": 2.1017170586039566e-05,
"loss": 1.8854,
"step": 15530
},
{
"epoch": 1.74020156774916,
"grad_norm": 5.599474906921387,
"learning_rate": 2.0998506905561777e-05,
"loss": 1.987,
"step": 15540
},
{
"epoch": 1.7413213885778276,
"grad_norm": 2.1986734867095947,
"learning_rate": 2.0979843225083986e-05,
"loss": 1.3435,
"step": 15550
},
{
"epoch": 1.742441209406495,
"grad_norm": 10.124311447143555,
"learning_rate": 2.0961179544606197e-05,
"loss": 2.2367,
"step": 15560
},
{
"epoch": 1.7435610302351625,
"grad_norm": 11.939183235168457,
"learning_rate": 2.0942515864128406e-05,
"loss": 2.5315,
"step": 15570
},
{
"epoch": 1.7446808510638299,
"grad_norm": 10.486783027648926,
"learning_rate": 2.0923852183650617e-05,
"loss": 2.1949,
"step": 15580
},
{
"epoch": 1.7458006718924972,
"grad_norm": 10.252884864807129,
"learning_rate": 2.0905188503172826e-05,
"loss": 1.7758,
"step": 15590
},
{
"epoch": 1.7469204927211646,
"grad_norm": 3.4020817279815674,
"learning_rate": 2.0886524822695037e-05,
"loss": 1.7837,
"step": 15600
},
{
"epoch": 1.748040313549832,
"grad_norm": 7.330861568450928,
"learning_rate": 2.0867861142217246e-05,
"loss": 1.4382,
"step": 15610
},
{
"epoch": 1.7491601343784995,
"grad_norm": 8.330341339111328,
"learning_rate": 2.0849197461739457e-05,
"loss": 2.185,
"step": 15620
},
{
"epoch": 1.7502799552071668,
"grad_norm": 4.57420015335083,
"learning_rate": 2.0830533781261666e-05,
"loss": 2.1382,
"step": 15630
},
{
"epoch": 1.7513997760358344,
"grad_norm": 14.915903091430664,
"learning_rate": 2.0811870100783874e-05,
"loss": 1.8498,
"step": 15640
},
{
"epoch": 1.7525195968645018,
"grad_norm": 4.439641952514648,
"learning_rate": 2.0793206420306086e-05,
"loss": 1.9074,
"step": 15650
},
{
"epoch": 1.7536394176931691,
"grad_norm": 4.21160364151001,
"learning_rate": 2.0774542739828294e-05,
"loss": 2.0153,
"step": 15660
},
{
"epoch": 1.7547592385218365,
"grad_norm": 3.9740211963653564,
"learning_rate": 2.0755879059350506e-05,
"loss": 2.1055,
"step": 15670
},
{
"epoch": 1.7558790593505038,
"grad_norm": 8.004166603088379,
"learning_rate": 2.0737215378872714e-05,
"loss": 1.9654,
"step": 15680
},
{
"epoch": 1.7569988801791714,
"grad_norm": 12.65368938446045,
"learning_rate": 2.0718551698394926e-05,
"loss": 2.3388,
"step": 15690
},
{
"epoch": 1.7581187010078387,
"grad_norm": 7.4648566246032715,
"learning_rate": 2.0699888017917134e-05,
"loss": 1.9942,
"step": 15700
},
{
"epoch": 1.7592385218365063,
"grad_norm": 3.306600570678711,
"learning_rate": 2.0681224337439346e-05,
"loss": 1.7987,
"step": 15710
},
{
"epoch": 1.7603583426651737,
"grad_norm": 4.179432392120361,
"learning_rate": 2.0662560656961554e-05,
"loss": 2.2372,
"step": 15720
},
{
"epoch": 1.761478163493841,
"grad_norm": 13.356534004211426,
"learning_rate": 2.0643896976483766e-05,
"loss": 1.5609,
"step": 15730
},
{
"epoch": 1.7625979843225084,
"grad_norm": 9.077022552490234,
"learning_rate": 2.0625233296005974e-05,
"loss": 1.9327,
"step": 15740
},
{
"epoch": 1.7637178051511757,
"grad_norm": 3.596141815185547,
"learning_rate": 2.0606569615528182e-05,
"loss": 1.971,
"step": 15750
},
{
"epoch": 1.764837625979843,
"grad_norm": 3.860454559326172,
"learning_rate": 2.058790593505039e-05,
"loss": 2.3288,
"step": 15760
},
{
"epoch": 1.7659574468085106,
"grad_norm": 12.444572448730469,
"learning_rate": 2.0569242254572602e-05,
"loss": 1.8277,
"step": 15770
},
{
"epoch": 1.7670772676371782,
"grad_norm": 10.383987426757812,
"learning_rate": 2.055057857409481e-05,
"loss": 2.2227,
"step": 15780
},
{
"epoch": 1.7681970884658456,
"grad_norm": 9.582054138183594,
"learning_rate": 2.0531914893617022e-05,
"loss": 1.7383,
"step": 15790
},
{
"epoch": 1.769316909294513,
"grad_norm": 12.529754638671875,
"learning_rate": 2.051325121313923e-05,
"loss": 1.9381,
"step": 15800
},
{
"epoch": 1.7704367301231803,
"grad_norm": 8.915084838867188,
"learning_rate": 2.0494587532661442e-05,
"loss": 1.668,
"step": 15810
},
{
"epoch": 1.7715565509518476,
"grad_norm": 13.440780639648438,
"learning_rate": 2.047592385218365e-05,
"loss": 2.1798,
"step": 15820
},
{
"epoch": 1.772676371780515,
"grad_norm": 7.045945167541504,
"learning_rate": 2.0457260171705862e-05,
"loss": 1.9319,
"step": 15830
},
{
"epoch": 1.7737961926091825,
"grad_norm": 2.6684410572052,
"learning_rate": 2.043859649122807e-05,
"loss": 1.7845,
"step": 15840
},
{
"epoch": 1.77491601343785,
"grad_norm": 11.95478343963623,
"learning_rate": 2.041993281075028e-05,
"loss": 2.3651,
"step": 15850
},
{
"epoch": 1.7760358342665175,
"grad_norm": 4.880320072174072,
"learning_rate": 2.040126913027249e-05,
"loss": 1.9852,
"step": 15860
},
{
"epoch": 1.7771556550951848,
"grad_norm": 12.231099128723145,
"learning_rate": 2.03826054497947e-05,
"loss": 2.0981,
"step": 15870
},
{
"epoch": 1.7782754759238522,
"grad_norm": 7.029375076293945,
"learning_rate": 2.036394176931691e-05,
"loss": 2.1145,
"step": 15880
},
{
"epoch": 1.7793952967525195,
"grad_norm": 6.398838043212891,
"learning_rate": 2.034527808883912e-05,
"loss": 2.0601,
"step": 15890
},
{
"epoch": 1.7805151175811869,
"grad_norm": 4.79886531829834,
"learning_rate": 2.032661440836133e-05,
"loss": 1.7714,
"step": 15900
},
{
"epoch": 1.7816349384098544,
"grad_norm": 5.065080642700195,
"learning_rate": 2.030795072788354e-05,
"loss": 1.5375,
"step": 15910
},
{
"epoch": 1.782754759238522,
"grad_norm": 4.638917446136475,
"learning_rate": 2.028928704740575e-05,
"loss": 2.0297,
"step": 15920
},
{
"epoch": 1.7838745800671894,
"grad_norm": 8.476948738098145,
"learning_rate": 2.027062336692796e-05,
"loss": 2.2116,
"step": 15930
},
{
"epoch": 1.7849944008958567,
"grad_norm": 14.950053215026855,
"learning_rate": 2.025195968645017e-05,
"loss": 2.2304,
"step": 15940
},
{
"epoch": 1.786114221724524,
"grad_norm": 4.521371841430664,
"learning_rate": 2.023329600597238e-05,
"loss": 2.205,
"step": 15950
},
{
"epoch": 1.7872340425531914,
"grad_norm": 4.019099712371826,
"learning_rate": 2.021463232549459e-05,
"loss": 1.9879,
"step": 15960
},
{
"epoch": 1.7883538633818588,
"grad_norm": 10.061615943908691,
"learning_rate": 2.01959686450168e-05,
"loss": 1.6731,
"step": 15970
},
{
"epoch": 1.7894736842105263,
"grad_norm": 8.204621315002441,
"learning_rate": 2.017730496453901e-05,
"loss": 1.9748,
"step": 15980
},
{
"epoch": 1.7905935050391937,
"grad_norm": 5.247344493865967,
"learning_rate": 2.0158641284061216e-05,
"loss": 1.6542,
"step": 15990
},
{
"epoch": 1.7917133258678613,
"grad_norm": 4.7308735847473145,
"learning_rate": 2.0139977603583427e-05,
"loss": 1.4543,
"step": 16000
},
{
"epoch": 1.7928331466965286,
"grad_norm": 3.514563798904419,
"learning_rate": 2.0121313923105636e-05,
"loss": 2.0833,
"step": 16010
},
{
"epoch": 1.793952967525196,
"grad_norm": 4.816470623016357,
"learning_rate": 2.0102650242627847e-05,
"loss": 2.283,
"step": 16020
},
{
"epoch": 1.7950727883538633,
"grad_norm": 11.659377098083496,
"learning_rate": 2.0083986562150056e-05,
"loss": 1.8348,
"step": 16030
},
{
"epoch": 1.7961926091825307,
"grad_norm": 5.277092933654785,
"learning_rate": 2.0065322881672267e-05,
"loss": 1.8455,
"step": 16040
},
{
"epoch": 1.7973124300111982,
"grad_norm": 10.653385162353516,
"learning_rate": 2.0046659201194476e-05,
"loss": 1.8004,
"step": 16050
},
{
"epoch": 1.7984322508398656,
"grad_norm": 5.165909767150879,
"learning_rate": 2.0027995520716687e-05,
"loss": 1.9219,
"step": 16060
},
{
"epoch": 1.7995520716685331,
"grad_norm": 12.408156394958496,
"learning_rate": 2.0009331840238896e-05,
"loss": 2.0149,
"step": 16070
},
{
"epoch": 1.8006718924972005,
"grad_norm": 3.793848752975464,
"learning_rate": 1.9990668159761104e-05,
"loss": 2.1401,
"step": 16080
},
{
"epoch": 1.8017917133258678,
"grad_norm": 4.723913192749023,
"learning_rate": 1.9972004479283316e-05,
"loss": 1.5437,
"step": 16090
},
{
"epoch": 1.8029115341545352,
"grad_norm": 5.787063121795654,
"learning_rate": 1.9953340798805524e-05,
"loss": 1.93,
"step": 16100
},
{
"epoch": 1.8040313549832026,
"grad_norm": 6.674378395080566,
"learning_rate": 1.9934677118327736e-05,
"loss": 2.3135,
"step": 16110
},
{
"epoch": 1.8051511758118701,
"grad_norm": 14.730244636535645,
"learning_rate": 1.9916013437849944e-05,
"loss": 2.1795,
"step": 16120
},
{
"epoch": 1.8062709966405375,
"grad_norm": 4.993513584136963,
"learning_rate": 1.9897349757372156e-05,
"loss": 1.7507,
"step": 16130
},
{
"epoch": 1.807390817469205,
"grad_norm": 14.580843925476074,
"learning_rate": 1.9878686076894364e-05,
"loss": 1.7252,
"step": 16140
},
{
"epoch": 1.8085106382978724,
"grad_norm": 14.65086841583252,
"learning_rate": 1.9860022396416576e-05,
"loss": 1.6178,
"step": 16150
},
{
"epoch": 1.8096304591265397,
"grad_norm": 15.6979398727417,
"learning_rate": 1.9841358715938784e-05,
"loss": 2.3194,
"step": 16160
},
{
"epoch": 1.810750279955207,
"grad_norm": 5.819782733917236,
"learning_rate": 1.9822695035460996e-05,
"loss": 1.572,
"step": 16170
},
{
"epoch": 1.8118701007838744,
"grad_norm": 4.418210983276367,
"learning_rate": 1.9804031354983204e-05,
"loss": 2.247,
"step": 16180
},
{
"epoch": 1.812989921612542,
"grad_norm": 5.038919925689697,
"learning_rate": 1.9785367674505416e-05,
"loss": 1.8143,
"step": 16190
},
{
"epoch": 1.8141097424412094,
"grad_norm": 5.138890743255615,
"learning_rate": 1.9766703994027624e-05,
"loss": 1.8542,
"step": 16200
},
{
"epoch": 1.815229563269877,
"grad_norm": 12.318073272705078,
"learning_rate": 1.9748040313549836e-05,
"loss": 2.1731,
"step": 16210
},
{
"epoch": 1.8163493840985443,
"grad_norm": 4.275629043579102,
"learning_rate": 1.972937663307204e-05,
"loss": 2.4228,
"step": 16220
},
{
"epoch": 1.8174692049272116,
"grad_norm": 4.5237016677856445,
"learning_rate": 1.9710712952594252e-05,
"loss": 1.9218,
"step": 16230
},
{
"epoch": 1.818589025755879,
"grad_norm": 7.575822353363037,
"learning_rate": 1.969204927211646e-05,
"loss": 1.7327,
"step": 16240
},
{
"epoch": 1.8197088465845463,
"grad_norm": 12.654701232910156,
"learning_rate": 1.9673385591638672e-05,
"loss": 1.993,
"step": 16250
},
{
"epoch": 1.820828667413214,
"grad_norm": 8.574930191040039,
"learning_rate": 1.965472191116088e-05,
"loss": 1.8782,
"step": 16260
},
{
"epoch": 1.8219484882418813,
"grad_norm": 4.255867958068848,
"learning_rate": 1.9636058230683092e-05,
"loss": 1.8501,
"step": 16270
},
{
"epoch": 1.8230683090705488,
"grad_norm": 6.834265232086182,
"learning_rate": 1.96173945502053e-05,
"loss": 2.3436,
"step": 16280
},
{
"epoch": 1.8241881298992162,
"grad_norm": 3.866483688354492,
"learning_rate": 1.959873086972751e-05,
"loss": 2.3195,
"step": 16290
},
{
"epoch": 1.8253079507278835,
"grad_norm": 10.645752906799316,
"learning_rate": 1.958006718924972e-05,
"loss": 1.7716,
"step": 16300
},
{
"epoch": 1.826427771556551,
"grad_norm": 15.50953197479248,
"learning_rate": 1.956140350877193e-05,
"loss": 2.5357,
"step": 16310
},
{
"epoch": 1.8275475923852182,
"grad_norm": 8.97745418548584,
"learning_rate": 1.954273982829414e-05,
"loss": 1.8631,
"step": 16320
},
{
"epoch": 1.8286674132138858,
"grad_norm": 10.974065780639648,
"learning_rate": 1.952407614781635e-05,
"loss": 1.6453,
"step": 16330
},
{
"epoch": 1.8297872340425532,
"grad_norm": 14.380806922912598,
"learning_rate": 1.950541246733856e-05,
"loss": 2.1817,
"step": 16340
},
{
"epoch": 1.8309070548712207,
"grad_norm": 3.8893136978149414,
"learning_rate": 1.948674878686077e-05,
"loss": 2.1023,
"step": 16350
},
{
"epoch": 1.832026875699888,
"grad_norm": 3.2880914211273193,
"learning_rate": 1.946808510638298e-05,
"loss": 1.466,
"step": 16360
},
{
"epoch": 1.8331466965285554,
"grad_norm": 9.581578254699707,
"learning_rate": 1.944942142590519e-05,
"loss": 1.8245,
"step": 16370
},
{
"epoch": 1.8342665173572228,
"grad_norm": 15.423023223876953,
"learning_rate": 1.94307577454274e-05,
"loss": 2.1899,
"step": 16380
},
{
"epoch": 1.8353863381858901,
"grad_norm": 5.308213233947754,
"learning_rate": 1.941209406494961e-05,
"loss": 1.6693,
"step": 16390
},
{
"epoch": 1.8365061590145577,
"grad_norm": 13.718766212463379,
"learning_rate": 1.939343038447182e-05,
"loss": 1.6922,
"step": 16400
},
{
"epoch": 1.837625979843225,
"grad_norm": 5.901851177215576,
"learning_rate": 1.937476670399403e-05,
"loss": 1.9234,
"step": 16410
},
{
"epoch": 1.8387458006718926,
"grad_norm": 4.218606948852539,
"learning_rate": 1.935610302351624e-05,
"loss": 1.7346,
"step": 16420
},
{
"epoch": 1.83986562150056,
"grad_norm": 3.545685291290283,
"learning_rate": 1.933743934303845e-05,
"loss": 1.5436,
"step": 16430
},
{
"epoch": 1.8409854423292273,
"grad_norm": 3.544178009033203,
"learning_rate": 1.931877566256066e-05,
"loss": 2.0027,
"step": 16440
},
{
"epoch": 1.8421052631578947,
"grad_norm": 16.046741485595703,
"learning_rate": 1.9300111982082866e-05,
"loss": 1.8394,
"step": 16450
},
{
"epoch": 1.843225083986562,
"grad_norm": 3.808443546295166,
"learning_rate": 1.9281448301605077e-05,
"loss": 1.5258,
"step": 16460
},
{
"epoch": 1.8443449048152296,
"grad_norm": 16.202293395996094,
"learning_rate": 1.9262784621127286e-05,
"loss": 1.8592,
"step": 16470
},
{
"epoch": 1.845464725643897,
"grad_norm": 12.9262056350708,
"learning_rate": 1.9244120940649498e-05,
"loss": 2.35,
"step": 16480
},
{
"epoch": 1.8465845464725645,
"grad_norm": 9.115686416625977,
"learning_rate": 1.9225457260171706e-05,
"loss": 2.0023,
"step": 16490
},
{
"epoch": 1.8477043673012319,
"grad_norm": 4.362748622894287,
"learning_rate": 1.9206793579693914e-05,
"loss": 2.2523,
"step": 16500
},
{
"epoch": 1.8488241881298992,
"grad_norm": 6.081763744354248,
"learning_rate": 1.9188129899216126e-05,
"loss": 1.9744,
"step": 16510
},
{
"epoch": 1.8499440089585666,
"grad_norm": 13.35545539855957,
"learning_rate": 1.9169466218738334e-05,
"loss": 2.0795,
"step": 16520
},
{
"epoch": 1.851063829787234,
"grad_norm": 4.248141765594482,
"learning_rate": 1.9150802538260546e-05,
"loss": 2.4488,
"step": 16530
},
{
"epoch": 1.8521836506159015,
"grad_norm": 10.578146934509277,
"learning_rate": 1.9132138857782754e-05,
"loss": 1.8938,
"step": 16540
},
{
"epoch": 1.8533034714445689,
"grad_norm": 8.211806297302246,
"learning_rate": 1.9113475177304966e-05,
"loss": 1.939,
"step": 16550
},
{
"epoch": 1.8544232922732364,
"grad_norm": 11.0032320022583,
"learning_rate": 1.9094811496827174e-05,
"loss": 2.0445,
"step": 16560
},
{
"epoch": 1.8555431131019038,
"grad_norm": 15.884440422058105,
"learning_rate": 1.9076147816349386e-05,
"loss": 2.1312,
"step": 16570
},
{
"epoch": 1.8566629339305711,
"grad_norm": 17.182661056518555,
"learning_rate": 1.9057484135871594e-05,
"loss": 1.8968,
"step": 16580
},
{
"epoch": 1.8577827547592385,
"grad_norm": 12.066494941711426,
"learning_rate": 1.9038820455393806e-05,
"loss": 2.1683,
"step": 16590
},
{
"epoch": 1.8589025755879058,
"grad_norm": 5.713685989379883,
"learning_rate": 1.9020156774916014e-05,
"loss": 1.8316,
"step": 16600
},
{
"epoch": 1.8600223964165732,
"grad_norm": 3.7835745811462402,
"learning_rate": 1.9001493094438226e-05,
"loss": 1.7352,
"step": 16610
},
{
"epoch": 1.8611422172452408,
"grad_norm": 5.586095809936523,
"learning_rate": 1.8982829413960434e-05,
"loss": 1.9586,
"step": 16620
},
{
"epoch": 1.8622620380739083,
"grad_norm": 10.472651481628418,
"learning_rate": 1.8964165733482646e-05,
"loss": 1.8188,
"step": 16630
},
{
"epoch": 1.8633818589025757,
"grad_norm": 8.586959838867188,
"learning_rate": 1.8945502053004854e-05,
"loss": 1.7482,
"step": 16640
},
{
"epoch": 1.864501679731243,
"grad_norm": 14.469319343566895,
"learning_rate": 1.8926838372527066e-05,
"loss": 1.9509,
"step": 16650
},
{
"epoch": 1.8656215005599104,
"grad_norm": 12.987029075622559,
"learning_rate": 1.8908174692049274e-05,
"loss": 1.505,
"step": 16660
},
{
"epoch": 1.8667413213885777,
"grad_norm": 4.787947654724121,
"learning_rate": 1.8889511011571483e-05,
"loss": 1.7582,
"step": 16670
},
{
"epoch": 1.867861142217245,
"grad_norm": 4.350035667419434,
"learning_rate": 1.8870847331093694e-05,
"loss": 2.4025,
"step": 16680
},
{
"epoch": 1.8689809630459127,
"grad_norm": 4.656111717224121,
"learning_rate": 1.8852183650615903e-05,
"loss": 1.558,
"step": 16690
},
{
"epoch": 1.8701007838745802,
"grad_norm": 5.183754920959473,
"learning_rate": 1.883351997013811e-05,
"loss": 1.6411,
"step": 16700
},
{
"epoch": 1.8712206047032476,
"grad_norm": 13.324991226196289,
"learning_rate": 1.8814856289660323e-05,
"loss": 2.254,
"step": 16710
},
{
"epoch": 1.872340425531915,
"grad_norm": 15.952241897583008,
"learning_rate": 1.879619260918253e-05,
"loss": 1.7153,
"step": 16720
},
{
"epoch": 1.8734602463605823,
"grad_norm": 8.312430381774902,
"learning_rate": 1.877752892870474e-05,
"loss": 2.2136,
"step": 16730
},
{
"epoch": 1.8745800671892496,
"grad_norm": 7.2421393394470215,
"learning_rate": 1.875886524822695e-05,
"loss": 1.8395,
"step": 16740
},
{
"epoch": 1.875699888017917,
"grad_norm": 9.180643081665039,
"learning_rate": 1.874020156774916e-05,
"loss": 2.1119,
"step": 16750
},
{
"epoch": 1.8768197088465846,
"grad_norm": 15.752584457397461,
"learning_rate": 1.872153788727137e-05,
"loss": 2.3431,
"step": 16760
},
{
"epoch": 1.877939529675252,
"grad_norm": 15.961100578308105,
"learning_rate": 1.870287420679358e-05,
"loss": 2.2094,
"step": 16770
},
{
"epoch": 1.8790593505039195,
"grad_norm": 4.183115482330322,
"learning_rate": 1.868421052631579e-05,
"loss": 1.9611,
"step": 16780
},
{
"epoch": 1.8801791713325868,
"grad_norm": 15.471096992492676,
"learning_rate": 1.8665546845838e-05,
"loss": 2.2645,
"step": 16790
},
{
"epoch": 1.8812989921612542,
"grad_norm": 15.710405349731445,
"learning_rate": 1.864688316536021e-05,
"loss": 2.1026,
"step": 16800
},
{
"epoch": 1.8824188129899215,
"grad_norm": 7.765809535980225,
"learning_rate": 1.862821948488242e-05,
"loss": 1.8401,
"step": 16810
},
{
"epoch": 1.8835386338185889,
"grad_norm": 6.538113117218018,
"learning_rate": 1.860955580440463e-05,
"loss": 1.7096,
"step": 16820
},
{
"epoch": 1.8846584546472565,
"grad_norm": 16.50730323791504,
"learning_rate": 1.859089212392684e-05,
"loss": 2.0152,
"step": 16830
},
{
"epoch": 1.8857782754759238,
"grad_norm": 3.642190933227539,
"learning_rate": 1.857222844344905e-05,
"loss": 2.0935,
"step": 16840
},
{
"epoch": 1.8868980963045914,
"grad_norm": 5.2225518226623535,
"learning_rate": 1.855356476297126e-05,
"loss": 1.7445,
"step": 16850
},
{
"epoch": 1.8880179171332587,
"grad_norm": 3.3426289558410645,
"learning_rate": 1.853490108249347e-05,
"loss": 2.3258,
"step": 16860
},
{
"epoch": 1.889137737961926,
"grad_norm": 8.263337135314941,
"learning_rate": 1.851623740201568e-05,
"loss": 1.6674,
"step": 16870
},
{
"epoch": 1.8902575587905934,
"grad_norm": 4.517258167266846,
"learning_rate": 1.8497573721537888e-05,
"loss": 2.0328,
"step": 16880
},
{
"epoch": 1.8913773796192608,
"grad_norm": 5.429361820220947,
"learning_rate": 1.84789100410601e-05,
"loss": 2.3338,
"step": 16890
},
{
"epoch": 1.8924972004479284,
"grad_norm": 11.747203826904297,
"learning_rate": 1.8460246360582308e-05,
"loss": 1.9876,
"step": 16900
},
{
"epoch": 1.8936170212765957,
"grad_norm": 14.812180519104004,
"learning_rate": 1.844158268010452e-05,
"loss": 1.8193,
"step": 16910
},
{
"epoch": 1.8947368421052633,
"grad_norm": 4.837928771972656,
"learning_rate": 1.8422918999626728e-05,
"loss": 1.8644,
"step": 16920
},
{
"epoch": 1.8958566629339306,
"grad_norm": 9.403674125671387,
"learning_rate": 1.8404255319148936e-05,
"loss": 1.7544,
"step": 16930
},
{
"epoch": 1.896976483762598,
"grad_norm": 5.102954387664795,
"learning_rate": 1.8385591638671144e-05,
"loss": 1.8039,
"step": 16940
},
{
"epoch": 1.8980963045912653,
"grad_norm": 13.829090118408203,
"learning_rate": 1.8366927958193356e-05,
"loss": 2.3956,
"step": 16950
},
{
"epoch": 1.8992161254199327,
"grad_norm": 14.055281639099121,
"learning_rate": 1.8348264277715564e-05,
"loss": 1.952,
"step": 16960
},
{
"epoch": 1.9003359462486002,
"grad_norm": 4.856631278991699,
"learning_rate": 1.8329600597237776e-05,
"loss": 1.8025,
"step": 16970
},
{
"epoch": 1.9014557670772676,
"grad_norm": 5.615917205810547,
"learning_rate": 1.8310936916759984e-05,
"loss": 2.4083,
"step": 16980
},
{
"epoch": 1.9025755879059352,
"grad_norm": 4.638927459716797,
"learning_rate": 1.8292273236282196e-05,
"loss": 1.7921,
"step": 16990
},
{
"epoch": 1.9036954087346025,
"grad_norm": 3.5314502716064453,
"learning_rate": 1.8273609555804404e-05,
"loss": 1.8788,
"step": 17000
},
{
"epoch": 1.9048152295632699,
"grad_norm": 6.3414506912231445,
"learning_rate": 1.8254945875326616e-05,
"loss": 1.8657,
"step": 17010
},
{
"epoch": 1.9059350503919372,
"grad_norm": 8.888124465942383,
"learning_rate": 1.8236282194848824e-05,
"loss": 2.4341,
"step": 17020
},
{
"epoch": 1.9070548712206046,
"grad_norm": 5.071857929229736,
"learning_rate": 1.8217618514371036e-05,
"loss": 2.0112,
"step": 17030
},
{
"epoch": 1.9081746920492721,
"grad_norm": 3.5548458099365234,
"learning_rate": 1.8198954833893244e-05,
"loss": 2.0355,
"step": 17040
},
{
"epoch": 1.9092945128779395,
"grad_norm": 13.80466079711914,
"learning_rate": 1.8180291153415456e-05,
"loss": 2.532,
"step": 17050
},
{
"epoch": 1.910414333706607,
"grad_norm": 4.249703407287598,
"learning_rate": 1.8161627472937664e-05,
"loss": 1.9727,
"step": 17060
},
{
"epoch": 1.9115341545352744,
"grad_norm": 4.494642734527588,
"learning_rate": 1.8142963792459876e-05,
"loss": 2.0125,
"step": 17070
},
{
"epoch": 1.9126539753639418,
"grad_norm": 5.063194274902344,
"learning_rate": 1.8124300111982084e-05,
"loss": 1.647,
"step": 17080
},
{
"epoch": 1.9137737961926091,
"grad_norm": 9.803994178771973,
"learning_rate": 1.8105636431504293e-05,
"loss": 2.1401,
"step": 17090
},
{
"epoch": 1.9148936170212765,
"grad_norm": 7.283653736114502,
"learning_rate": 1.8086972751026504e-05,
"loss": 1.9052,
"step": 17100
},
{
"epoch": 1.916013437849944,
"grad_norm": 11.359768867492676,
"learning_rate": 1.8068309070548713e-05,
"loss": 2.0145,
"step": 17110
},
{
"epoch": 1.9171332586786114,
"grad_norm": 10.177249908447266,
"learning_rate": 1.8049645390070924e-05,
"loss": 1.7666,
"step": 17120
},
{
"epoch": 1.918253079507279,
"grad_norm": 5.568352699279785,
"learning_rate": 1.8030981709593133e-05,
"loss": 1.7245,
"step": 17130
},
{
"epoch": 1.9193729003359463,
"grad_norm": 19.79357147216797,
"learning_rate": 1.8012318029115344e-05,
"loss": 2.4217,
"step": 17140
},
{
"epoch": 1.9204927211646137,
"grad_norm": 10.292594909667969,
"learning_rate": 1.799365434863755e-05,
"loss": 1.4904,
"step": 17150
},
{
"epoch": 1.921612541993281,
"grad_norm": 3.906355381011963,
"learning_rate": 1.797499066815976e-05,
"loss": 2.4061,
"step": 17160
},
{
"epoch": 1.9227323628219484,
"grad_norm": 10.06027889251709,
"learning_rate": 1.795632698768197e-05,
"loss": 1.826,
"step": 17170
},
{
"epoch": 1.923852183650616,
"grad_norm": 3.928687572479248,
"learning_rate": 1.793766330720418e-05,
"loss": 1.9778,
"step": 17180
},
{
"epoch": 1.9249720044792833,
"grad_norm": 11.147214889526367,
"learning_rate": 1.791899962672639e-05,
"loss": 1.6609,
"step": 17190
},
{
"epoch": 1.9260918253079509,
"grad_norm": 5.292778968811035,
"learning_rate": 1.79003359462486e-05,
"loss": 1.581,
"step": 17200
},
{
"epoch": 1.9272116461366182,
"grad_norm": 5.773550987243652,
"learning_rate": 1.788167226577081e-05,
"loss": 1.5817,
"step": 17210
},
{
"epoch": 1.9283314669652856,
"grad_norm": 14.817527770996094,
"learning_rate": 1.786300858529302e-05,
"loss": 1.9826,
"step": 17220
},
{
"epoch": 1.929451287793953,
"grad_norm": 6.223337173461914,
"learning_rate": 1.784434490481523e-05,
"loss": 2.1578,
"step": 17230
},
{
"epoch": 1.9305711086226203,
"grad_norm": 4.402294158935547,
"learning_rate": 1.782568122433744e-05,
"loss": 2.2318,
"step": 17240
},
{
"epoch": 1.9316909294512878,
"grad_norm": 7.321905136108398,
"learning_rate": 1.780701754385965e-05,
"loss": 1.6848,
"step": 17250
},
{
"epoch": 1.9328107502799552,
"grad_norm": 14.152067184448242,
"learning_rate": 1.778835386338186e-05,
"loss": 2.5409,
"step": 17260
},
{
"epoch": 1.9339305711086228,
"grad_norm": 12.283940315246582,
"learning_rate": 1.776969018290407e-05,
"loss": 1.9965,
"step": 17270
},
{
"epoch": 1.9350503919372901,
"grad_norm": 8.56460189819336,
"learning_rate": 1.775102650242628e-05,
"loss": 1.6021,
"step": 17280
},
{
"epoch": 1.9361702127659575,
"grad_norm": 4.710309982299805,
"learning_rate": 1.773236282194849e-05,
"loss": 2.2814,
"step": 17290
},
{
"epoch": 1.9372900335946248,
"grad_norm": 12.966391563415527,
"learning_rate": 1.77136991414707e-05,
"loss": 2.172,
"step": 17300
},
{
"epoch": 1.9384098544232922,
"grad_norm": 16.88652229309082,
"learning_rate": 1.769503546099291e-05,
"loss": 2.029,
"step": 17310
},
{
"epoch": 1.9395296752519597,
"grad_norm": 8.579212188720703,
"learning_rate": 1.7676371780515118e-05,
"loss": 1.47,
"step": 17320
},
{
"epoch": 1.940649496080627,
"grad_norm": 14.837044715881348,
"learning_rate": 1.765770810003733e-05,
"loss": 1.9022,
"step": 17330
},
{
"epoch": 1.9417693169092947,
"grad_norm": 14.231435775756836,
"learning_rate": 1.7639044419559538e-05,
"loss": 2.4319,
"step": 17340
},
{
"epoch": 1.942889137737962,
"grad_norm": 6.342057704925537,
"learning_rate": 1.762038073908175e-05,
"loss": 1.8036,
"step": 17350
},
{
"epoch": 1.9440089585666294,
"grad_norm": 4.24338436126709,
"learning_rate": 1.7601717058603958e-05,
"loss": 1.9694,
"step": 17360
},
{
"epoch": 1.9451287793952967,
"grad_norm": 5.161984920501709,
"learning_rate": 1.758305337812617e-05,
"loss": 1.7682,
"step": 17370
},
{
"epoch": 1.946248600223964,
"grad_norm": 7.184517860412598,
"learning_rate": 1.7564389697648374e-05,
"loss": 1.5724,
"step": 17380
},
{
"epoch": 1.9473684210526314,
"grad_norm": 7.037195682525635,
"learning_rate": 1.7545726017170586e-05,
"loss": 1.6202,
"step": 17390
},
{
"epoch": 1.948488241881299,
"grad_norm": 7.23237419128418,
"learning_rate": 1.7527062336692794e-05,
"loss": 1.7879,
"step": 17400
},
{
"epoch": 1.9496080627099666,
"grad_norm": 4.513615131378174,
"learning_rate": 1.7508398656215006e-05,
"loss": 2.1071,
"step": 17410
},
{
"epoch": 1.950727883538634,
"grad_norm": 14.149372100830078,
"learning_rate": 1.7489734975737214e-05,
"loss": 1.8393,
"step": 17420
},
{
"epoch": 1.9518477043673013,
"grad_norm": 3.9815926551818848,
"learning_rate": 1.7471071295259426e-05,
"loss": 1.9888,
"step": 17430
},
{
"epoch": 1.9529675251959686,
"grad_norm": 15.270926475524902,
"learning_rate": 1.7452407614781634e-05,
"loss": 2.2696,
"step": 17440
},
{
"epoch": 1.954087346024636,
"grad_norm": 7.519197940826416,
"learning_rate": 1.7433743934303846e-05,
"loss": 2.0431,
"step": 17450
},
{
"epoch": 1.9552071668533033,
"grad_norm": 4.564593315124512,
"learning_rate": 1.7415080253826054e-05,
"loss": 1.7656,
"step": 17460
},
{
"epoch": 1.9563269876819709,
"grad_norm": 9.020241737365723,
"learning_rate": 1.7396416573348266e-05,
"loss": 1.9072,
"step": 17470
},
{
"epoch": 1.9574468085106385,
"grad_norm": 10.36052131652832,
"learning_rate": 1.7377752892870474e-05,
"loss": 1.7954,
"step": 17480
},
{
"epoch": 1.9585666293393058,
"grad_norm": 5.499046802520752,
"learning_rate": 1.7359089212392686e-05,
"loss": 2.2032,
"step": 17490
},
{
"epoch": 1.9596864501679732,
"grad_norm": 10.584261894226074,
"learning_rate": 1.7340425531914894e-05,
"loss": 2.2121,
"step": 17500
},
{
"epoch": 1.9608062709966405,
"grad_norm": 4.816810131072998,
"learning_rate": 1.7321761851437106e-05,
"loss": 1.9277,
"step": 17510
},
{
"epoch": 1.9619260918253079,
"grad_norm": 5.484105110168457,
"learning_rate": 1.7303098170959314e-05,
"loss": 1.7758,
"step": 17520
},
{
"epoch": 1.9630459126539752,
"grad_norm": 12.183406829833984,
"learning_rate": 1.7284434490481523e-05,
"loss": 2.0495,
"step": 17530
},
{
"epoch": 1.9641657334826428,
"grad_norm": 5.112043380737305,
"learning_rate": 1.7265770810003734e-05,
"loss": 1.9576,
"step": 17540
},
{
"epoch": 1.9652855543113104,
"grad_norm": 4.796443939208984,
"learning_rate": 1.7247107129525943e-05,
"loss": 2.0039,
"step": 17550
},
{
"epoch": 1.9664053751399777,
"grad_norm": 4.247778415679932,
"learning_rate": 1.7228443449048154e-05,
"loss": 1.7593,
"step": 17560
},
{
"epoch": 1.967525195968645,
"grad_norm": 4.498353481292725,
"learning_rate": 1.7209779768570363e-05,
"loss": 1.904,
"step": 17570
},
{
"epoch": 1.9686450167973124,
"grad_norm": 12.006962776184082,
"learning_rate": 1.7191116088092574e-05,
"loss": 1.6231,
"step": 17580
},
{
"epoch": 1.9697648376259798,
"grad_norm": 4.842081069946289,
"learning_rate": 1.7172452407614783e-05,
"loss": 2.0985,
"step": 17590
},
{
"epoch": 1.970884658454647,
"grad_norm": 15.521842956542969,
"learning_rate": 1.7153788727136994e-05,
"loss": 2.1864,
"step": 17600
},
{
"epoch": 1.9720044792833147,
"grad_norm": 9.48452091217041,
"learning_rate": 1.7135125046659203e-05,
"loss": 1.8124,
"step": 17610
},
{
"epoch": 1.973124300111982,
"grad_norm": 4.017796993255615,
"learning_rate": 1.711646136618141e-05,
"loss": 2.0427,
"step": 17620
},
{
"epoch": 1.9742441209406496,
"grad_norm": 3.9394009113311768,
"learning_rate": 1.709779768570362e-05,
"loss": 2.0485,
"step": 17630
},
{
"epoch": 1.975363941769317,
"grad_norm": 14.145578384399414,
"learning_rate": 1.707913400522583e-05,
"loss": 1.5402,
"step": 17640
},
{
"epoch": 1.9764837625979843,
"grad_norm": 4.282801628112793,
"learning_rate": 1.706047032474804e-05,
"loss": 1.8805,
"step": 17650
},
{
"epoch": 1.9776035834266517,
"grad_norm": 4.898009300231934,
"learning_rate": 1.704180664427025e-05,
"loss": 2.1731,
"step": 17660
},
{
"epoch": 1.978723404255319,
"grad_norm": 4.910828590393066,
"learning_rate": 1.702314296379246e-05,
"loss": 2.0025,
"step": 17670
},
{
"epoch": 1.9798432250839866,
"grad_norm": 15.03659725189209,
"learning_rate": 1.700447928331467e-05,
"loss": 2.3384,
"step": 17680
},
{
"epoch": 1.980963045912654,
"grad_norm": 10.689837455749512,
"learning_rate": 1.698581560283688e-05,
"loss": 1.7127,
"step": 17690
},
{
"epoch": 1.9820828667413215,
"grad_norm": 10.339581489562988,
"learning_rate": 1.696715192235909e-05,
"loss": 2.0183,
"step": 17700
},
{
"epoch": 1.9832026875699889,
"grad_norm": 7.037674903869629,
"learning_rate": 1.69484882418813e-05,
"loss": 1.9685,
"step": 17710
},
{
"epoch": 1.9843225083986562,
"grad_norm": 14.190945625305176,
"learning_rate": 1.692982456140351e-05,
"loss": 1.9507,
"step": 17720
},
{
"epoch": 1.9854423292273236,
"grad_norm": 4.3056416511535645,
"learning_rate": 1.691116088092572e-05,
"loss": 1.9554,
"step": 17730
},
{
"epoch": 1.986562150055991,
"grad_norm": 14.68007755279541,
"learning_rate": 1.6892497200447928e-05,
"loss": 2.2227,
"step": 17740
},
{
"epoch": 1.9876819708846585,
"grad_norm": 4.058879852294922,
"learning_rate": 1.687383351997014e-05,
"loss": 1.9667,
"step": 17750
},
{
"epoch": 1.9888017917133258,
"grad_norm": 8.660399436950684,
"learning_rate": 1.6855169839492348e-05,
"loss": 2.0649,
"step": 17760
},
{
"epoch": 1.9899216125419934,
"grad_norm": 11.349140167236328,
"learning_rate": 1.683650615901456e-05,
"loss": 1.438,
"step": 17770
},
{
"epoch": 1.9910414333706608,
"grad_norm": 4.842729568481445,
"learning_rate": 1.6817842478536768e-05,
"loss": 1.9431,
"step": 17780
},
{
"epoch": 1.992161254199328,
"grad_norm": 4.284554958343506,
"learning_rate": 1.679917879805898e-05,
"loss": 1.8363,
"step": 17790
},
{
"epoch": 1.9932810750279955,
"grad_norm": 6.62599515914917,
"learning_rate": 1.6780515117581188e-05,
"loss": 1.6479,
"step": 17800
},
{
"epoch": 1.9944008958566628,
"grad_norm": 12.138463973999023,
"learning_rate": 1.67618514371034e-05,
"loss": 1.9355,
"step": 17810
},
{
"epoch": 1.9955207166853304,
"grad_norm": 9.465065002441406,
"learning_rate": 1.6743187756625608e-05,
"loss": 2.1428,
"step": 17820
},
{
"epoch": 1.9966405375139977,
"grad_norm": 12.322503089904785,
"learning_rate": 1.672452407614782e-05,
"loss": 2.1444,
"step": 17830
},
{
"epoch": 1.9977603583426653,
"grad_norm": 9.275611877441406,
"learning_rate": 1.6705860395670028e-05,
"loss": 1.6226,
"step": 17840
},
{
"epoch": 1.9988801791713326,
"grad_norm": 4.5713982582092285,
"learning_rate": 1.6687196715192236e-05,
"loss": 1.9358,
"step": 17850
},
{
"epoch": 2.0,
"grad_norm": 4.424788951873779,
"learning_rate": 1.6668533034714444e-05,
"loss": 1.604,
"step": 17860
},
{
"epoch": 2.0011198208286674,
"grad_norm": 15.465615272521973,
"learning_rate": 1.6649869354236656e-05,
"loss": 1.7584,
"step": 17870
},
{
"epoch": 2.0022396416573347,
"grad_norm": 5.3801116943359375,
"learning_rate": 1.6631205673758864e-05,
"loss": 2.4112,
"step": 17880
},
{
"epoch": 2.003359462486002,
"grad_norm": 9.771553993225098,
"learning_rate": 1.6612541993281076e-05,
"loss": 2.3079,
"step": 17890
},
{
"epoch": 2.00447928331467,
"grad_norm": 6.370817184448242,
"learning_rate": 1.6593878312803284e-05,
"loss": 1.4866,
"step": 17900
},
{
"epoch": 2.005599104143337,
"grad_norm": 5.146578311920166,
"learning_rate": 1.6575214632325496e-05,
"loss": 1.9364,
"step": 17910
},
{
"epoch": 2.0067189249720045,
"grad_norm": 6.970976829528809,
"learning_rate": 1.6556550951847704e-05,
"loss": 1.6884,
"step": 17920
},
{
"epoch": 2.007838745800672,
"grad_norm": 9.017516136169434,
"learning_rate": 1.6537887271369916e-05,
"loss": 1.4814,
"step": 17930
},
{
"epoch": 2.0089585666293392,
"grad_norm": 5.169244289398193,
"learning_rate": 1.6519223590892124e-05,
"loss": 1.9879,
"step": 17940
},
{
"epoch": 2.0100783874580066,
"grad_norm": 3.8840739727020264,
"learning_rate": 1.6500559910414336e-05,
"loss": 1.4718,
"step": 17950
},
{
"epoch": 2.011198208286674,
"grad_norm": 9.54129409790039,
"learning_rate": 1.6481896229936544e-05,
"loss": 1.709,
"step": 17960
},
{
"epoch": 2.0123180291153417,
"grad_norm": 4.5542192459106445,
"learning_rate": 1.6463232549458753e-05,
"loss": 2.0193,
"step": 17970
},
{
"epoch": 2.013437849944009,
"grad_norm": 4.2427144050598145,
"learning_rate": 1.6444568868980964e-05,
"loss": 2.2357,
"step": 17980
},
{
"epoch": 2.0145576707726764,
"grad_norm": 8.179814338684082,
"learning_rate": 1.6425905188503173e-05,
"loss": 1.8587,
"step": 17990
},
{
"epoch": 2.015677491601344,
"grad_norm": 14.023432731628418,
"learning_rate": 1.6407241508025384e-05,
"loss": 1.9654,
"step": 18000
},
{
"epoch": 2.016797312430011,
"grad_norm": 4.784511566162109,
"learning_rate": 1.6388577827547593e-05,
"loss": 1.9038,
"step": 18010
},
{
"epoch": 2.0179171332586785,
"grad_norm": 9.668645858764648,
"learning_rate": 1.6369914147069804e-05,
"loss": 2.2853,
"step": 18020
},
{
"epoch": 2.019036954087346,
"grad_norm": 5.6623005867004395,
"learning_rate": 1.6351250466592013e-05,
"loss": 2.1568,
"step": 18030
},
{
"epoch": 2.0201567749160136,
"grad_norm": 12.06014347076416,
"learning_rate": 1.6332586786114224e-05,
"loss": 1.9534,
"step": 18040
},
{
"epoch": 2.021276595744681,
"grad_norm": 12.4910249710083,
"learning_rate": 1.6313923105636433e-05,
"loss": 1.6823,
"step": 18050
},
{
"epoch": 2.0223964165733483,
"grad_norm": 12.419768333435059,
"learning_rate": 1.6295259425158644e-05,
"loss": 2.3436,
"step": 18060
},
{
"epoch": 2.0235162374020157,
"grad_norm": 4.12880802154541,
"learning_rate": 1.6276595744680853e-05,
"loss": 2.1389,
"step": 18070
},
{
"epoch": 2.024636058230683,
"grad_norm": 6.19962739944458,
"learning_rate": 1.625793206420306e-05,
"loss": 2.0253,
"step": 18080
},
{
"epoch": 2.0257558790593504,
"grad_norm": 4.155970573425293,
"learning_rate": 1.623926838372527e-05,
"loss": 1.5464,
"step": 18090
},
{
"epoch": 2.0268756998880177,
"grad_norm": 2.5858302116394043,
"learning_rate": 1.622060470324748e-05,
"loss": 1.8217,
"step": 18100
},
{
"epoch": 2.0279955207166855,
"grad_norm": 3.9286646842956543,
"learning_rate": 1.620194102276969e-05,
"loss": 1.3943,
"step": 18110
},
{
"epoch": 2.029115341545353,
"grad_norm": 12.073657035827637,
"learning_rate": 1.61832773422919e-05,
"loss": 2.0707,
"step": 18120
},
{
"epoch": 2.0302351623740202,
"grad_norm": 6.261038780212402,
"learning_rate": 1.616461366181411e-05,
"loss": 1.9036,
"step": 18130
},
{
"epoch": 2.0313549832026876,
"grad_norm": 3.7651288509368896,
"learning_rate": 1.614594998133632e-05,
"loss": 1.3601,
"step": 18140
},
{
"epoch": 2.032474804031355,
"grad_norm": 5.616112232208252,
"learning_rate": 1.612728630085853e-05,
"loss": 1.9249,
"step": 18150
},
{
"epoch": 2.0335946248600223,
"grad_norm": 14.19587230682373,
"learning_rate": 1.610862262038074e-05,
"loss": 1.9037,
"step": 18160
},
{
"epoch": 2.0347144456886896,
"grad_norm": 14.49325942993164,
"learning_rate": 1.608995893990295e-05,
"loss": 1.9606,
"step": 18170
},
{
"epoch": 2.0358342665173574,
"grad_norm": 5.4950270652771,
"learning_rate": 1.6071295259425158e-05,
"loss": 2.1976,
"step": 18180
},
{
"epoch": 2.036954087346025,
"grad_norm": 4.107669830322266,
"learning_rate": 1.605263157894737e-05,
"loss": 1.6963,
"step": 18190
},
{
"epoch": 2.038073908174692,
"grad_norm": 5.567134857177734,
"learning_rate": 1.6033967898469578e-05,
"loss": 1.639,
"step": 18200
},
{
"epoch": 2.0391937290033595,
"grad_norm": 18.579816818237305,
"learning_rate": 1.601530421799179e-05,
"loss": 1.9576,
"step": 18210
},
{
"epoch": 2.040313549832027,
"grad_norm": 11.057695388793945,
"learning_rate": 1.5996640537513998e-05,
"loss": 1.8883,
"step": 18220
},
{
"epoch": 2.041433370660694,
"grad_norm": 6.482846260070801,
"learning_rate": 1.597797685703621e-05,
"loss": 1.812,
"step": 18230
},
{
"epoch": 2.0425531914893615,
"grad_norm": 12.868412017822266,
"learning_rate": 1.5959313176558418e-05,
"loss": 2.1451,
"step": 18240
},
{
"epoch": 2.0436730123180293,
"grad_norm": 4.0791401863098145,
"learning_rate": 1.594064949608063e-05,
"loss": 1.974,
"step": 18250
},
{
"epoch": 2.0447928331466967,
"grad_norm": 6.537319660186768,
"learning_rate": 1.5921985815602838e-05,
"loss": 1.8334,
"step": 18260
},
{
"epoch": 2.045912653975364,
"grad_norm": 8.384710311889648,
"learning_rate": 1.590332213512505e-05,
"loss": 2.1852,
"step": 18270
},
{
"epoch": 2.0470324748040314,
"grad_norm": 11.995549201965332,
"learning_rate": 1.5884658454647258e-05,
"loss": 1.9999,
"step": 18280
},
{
"epoch": 2.0481522956326987,
"grad_norm": 11.57607650756836,
"learning_rate": 1.586599477416947e-05,
"loss": 2.0097,
"step": 18290
},
{
"epoch": 2.049272116461366,
"grad_norm": 21.388427734375,
"learning_rate": 1.5847331093691678e-05,
"loss": 2.0354,
"step": 18300
},
{
"epoch": 2.0503919372900334,
"grad_norm": 4.375351428985596,
"learning_rate": 1.582866741321389e-05,
"loss": 1.8486,
"step": 18310
},
{
"epoch": 2.051511758118701,
"grad_norm": 7.059999942779541,
"learning_rate": 1.5810003732736094e-05,
"loss": 1.7516,
"step": 18320
},
{
"epoch": 2.0526315789473686,
"grad_norm": 4.776148796081543,
"learning_rate": 1.5791340052258306e-05,
"loss": 1.8479,
"step": 18330
},
{
"epoch": 2.053751399776036,
"grad_norm": 13.596695899963379,
"learning_rate": 1.5772676371780514e-05,
"loss": 1.9489,
"step": 18340
},
{
"epoch": 2.0548712206047033,
"grad_norm": 15.971503257751465,
"learning_rate": 1.5754012691302726e-05,
"loss": 2.1302,
"step": 18350
},
{
"epoch": 2.0559910414333706,
"grad_norm": 5.559121131896973,
"learning_rate": 1.5735349010824934e-05,
"loss": 1.8906,
"step": 18360
},
{
"epoch": 2.057110862262038,
"grad_norm": 11.740134239196777,
"learning_rate": 1.5716685330347146e-05,
"loss": 1.831,
"step": 18370
},
{
"epoch": 2.0582306830907053,
"grad_norm": 5.161749362945557,
"learning_rate": 1.5698021649869354e-05,
"loss": 2.0162,
"step": 18380
},
{
"epoch": 2.0593505039193727,
"grad_norm": 13.416109085083008,
"learning_rate": 1.5679357969391563e-05,
"loss": 2.0551,
"step": 18390
},
{
"epoch": 2.0604703247480405,
"grad_norm": 5.6357269287109375,
"learning_rate": 1.5660694288913774e-05,
"loss": 1.6993,
"step": 18400
},
{
"epoch": 2.061590145576708,
"grad_norm": 10.636037826538086,
"learning_rate": 1.5642030608435983e-05,
"loss": 1.8842,
"step": 18410
},
{
"epoch": 2.062709966405375,
"grad_norm": 14.341257095336914,
"learning_rate": 1.5623366927958194e-05,
"loss": 1.7778,
"step": 18420
},
{
"epoch": 2.0638297872340425,
"grad_norm": 7.4988322257995605,
"learning_rate": 1.5604703247480403e-05,
"loss": 1.7679,
"step": 18430
},
{
"epoch": 2.06494960806271,
"grad_norm": 12.500404357910156,
"learning_rate": 1.5586039567002614e-05,
"loss": 2.2651,
"step": 18440
},
{
"epoch": 2.0660694288913772,
"grad_norm": 5.027773380279541,
"learning_rate": 1.5567375886524823e-05,
"loss": 2.0421,
"step": 18450
},
{
"epoch": 2.0671892497200446,
"grad_norm": 10.962523460388184,
"learning_rate": 1.5548712206047034e-05,
"loss": 2.0656,
"step": 18460
},
{
"epoch": 2.0683090705487124,
"grad_norm": 2.7904582023620605,
"learning_rate": 1.5530048525569243e-05,
"loss": 1.9249,
"step": 18470
},
{
"epoch": 2.0694288913773797,
"grad_norm": 7.586933135986328,
"learning_rate": 1.5511384845091454e-05,
"loss": 2.0239,
"step": 18480
},
{
"epoch": 2.070548712206047,
"grad_norm": 4.160824775695801,
"learning_rate": 1.5492721164613663e-05,
"loss": 1.9334,
"step": 18490
},
{
"epoch": 2.0716685330347144,
"grad_norm": 7.0400471687316895,
"learning_rate": 1.5474057484135874e-05,
"loss": 1.8884,
"step": 18500
},
{
"epoch": 2.072788353863382,
"grad_norm": 7.399810314178467,
"learning_rate": 1.5455393803658083e-05,
"loss": 1.6476,
"step": 18510
},
{
"epoch": 2.073908174692049,
"grad_norm": 6.349668979644775,
"learning_rate": 1.5436730123180294e-05,
"loss": 1.8177,
"step": 18520
},
{
"epoch": 2.0750279955207165,
"grad_norm": 4.084234714508057,
"learning_rate": 1.5418066442702503e-05,
"loss": 1.4257,
"step": 18530
},
{
"epoch": 2.0761478163493843,
"grad_norm": 14.362452507019043,
"learning_rate": 1.5399402762224714e-05,
"loss": 2.0043,
"step": 18540
},
{
"epoch": 2.0772676371780516,
"grad_norm": 8.189460754394531,
"learning_rate": 1.538073908174692e-05,
"loss": 1.5747,
"step": 18550
},
{
"epoch": 2.078387458006719,
"grad_norm": 9.600176811218262,
"learning_rate": 1.536207540126913e-05,
"loss": 1.9165,
"step": 18560
},
{
"epoch": 2.0795072788353863,
"grad_norm": 8.519039154052734,
"learning_rate": 1.534341172079134e-05,
"loss": 1.8993,
"step": 18570
},
{
"epoch": 2.0806270996640537,
"grad_norm": 14.394335746765137,
"learning_rate": 1.532474804031355e-05,
"loss": 2.0525,
"step": 18580
},
{
"epoch": 2.081746920492721,
"grad_norm": 4.982779502868652,
"learning_rate": 1.530608435983576e-05,
"loss": 2.0925,
"step": 18590
},
{
"epoch": 2.0828667413213884,
"grad_norm": 15.897424697875977,
"learning_rate": 1.528742067935797e-05,
"loss": 1.7299,
"step": 18600
},
{
"epoch": 2.083986562150056,
"grad_norm": 12.037178993225098,
"learning_rate": 1.526875699888018e-05,
"loss": 1.7714,
"step": 18610
},
{
"epoch": 2.0851063829787235,
"grad_norm": 4.796445846557617,
"learning_rate": 1.525009331840239e-05,
"loss": 2.0635,
"step": 18620
},
{
"epoch": 2.086226203807391,
"grad_norm": 5.05470085144043,
"learning_rate": 1.52314296379246e-05,
"loss": 2.302,
"step": 18630
},
{
"epoch": 2.0873460246360582,
"grad_norm": 6.144739627838135,
"learning_rate": 1.521276595744681e-05,
"loss": 1.8878,
"step": 18640
},
{
"epoch": 2.0884658454647256,
"grad_norm": 5.468743801116943,
"learning_rate": 1.519410227696902e-05,
"loss": 2.1304,
"step": 18650
},
{
"epoch": 2.089585666293393,
"grad_norm": 6.490882873535156,
"learning_rate": 1.517543859649123e-05,
"loss": 1.913,
"step": 18660
},
{
"epoch": 2.0907054871220603,
"grad_norm": 5.277439117431641,
"learning_rate": 1.515677491601344e-05,
"loss": 1.9014,
"step": 18670
},
{
"epoch": 2.091825307950728,
"grad_norm": 5.904261112213135,
"learning_rate": 1.513811123553565e-05,
"loss": 1.963,
"step": 18680
},
{
"epoch": 2.0929451287793954,
"grad_norm": 10.203513145446777,
"learning_rate": 1.5119447555057858e-05,
"loss": 1.9816,
"step": 18690
},
{
"epoch": 2.0940649496080628,
"grad_norm": 13.511499404907227,
"learning_rate": 1.5100783874580068e-05,
"loss": 1.9453,
"step": 18700
},
{
"epoch": 2.09518477043673,
"grad_norm": 4.540700435638428,
"learning_rate": 1.5082120194102278e-05,
"loss": 1.7912,
"step": 18710
},
{
"epoch": 2.0963045912653975,
"grad_norm": 4.4151201248168945,
"learning_rate": 1.5063456513624488e-05,
"loss": 2.1123,
"step": 18720
},
{
"epoch": 2.097424412094065,
"grad_norm": 10.17835807800293,
"learning_rate": 1.5044792833146698e-05,
"loss": 2.0146,
"step": 18730
},
{
"epoch": 2.098544232922732,
"grad_norm": 5.04526424407959,
"learning_rate": 1.5026129152668908e-05,
"loss": 1.6177,
"step": 18740
},
{
"epoch": 2.0996640537514,
"grad_norm": 10.27774429321289,
"learning_rate": 1.5007465472191118e-05,
"loss": 2.0393,
"step": 18750
},
{
"epoch": 2.1007838745800673,
"grad_norm": 5.038769721984863,
"learning_rate": 1.4988801791713328e-05,
"loss": 1.741,
"step": 18760
},
{
"epoch": 2.1019036954087347,
"grad_norm": 9.592277526855469,
"learning_rate": 1.4970138111235538e-05,
"loss": 1.7934,
"step": 18770
},
{
"epoch": 2.103023516237402,
"grad_norm": 9.235641479492188,
"learning_rate": 1.4951474430757744e-05,
"loss": 1.7898,
"step": 18780
},
{
"epoch": 2.1041433370660694,
"grad_norm": 4.7292327880859375,
"learning_rate": 1.4932810750279954e-05,
"loss": 1.5251,
"step": 18790
},
{
"epoch": 2.1052631578947367,
"grad_norm": 9.268404006958008,
"learning_rate": 1.4914147069802164e-05,
"loss": 1.7425,
"step": 18800
},
{
"epoch": 2.106382978723404,
"grad_norm": 4.396312236785889,
"learning_rate": 1.4895483389324374e-05,
"loss": 1.9284,
"step": 18810
},
{
"epoch": 2.107502799552072,
"grad_norm": 5.53659725189209,
"learning_rate": 1.4876819708846584e-05,
"loss": 1.5367,
"step": 18820
},
{
"epoch": 2.108622620380739,
"grad_norm": 6.703355312347412,
"learning_rate": 1.4858156028368794e-05,
"loss": 1.808,
"step": 18830
},
{
"epoch": 2.1097424412094066,
"grad_norm": 13.882152557373047,
"learning_rate": 1.4839492347891004e-05,
"loss": 2.1221,
"step": 18840
},
{
"epoch": 2.110862262038074,
"grad_norm": 4.497895240783691,
"learning_rate": 1.4820828667413214e-05,
"loss": 1.5897,
"step": 18850
},
{
"epoch": 2.1119820828667413,
"grad_norm": 9.912936210632324,
"learning_rate": 1.4802164986935424e-05,
"loss": 1.9576,
"step": 18860
},
{
"epoch": 2.1131019036954086,
"grad_norm": 14.399587631225586,
"learning_rate": 1.4783501306457634e-05,
"loss": 1.9239,
"step": 18870
},
{
"epoch": 2.114221724524076,
"grad_norm": 17.03645133972168,
"learning_rate": 1.4764837625979844e-05,
"loss": 1.9142,
"step": 18880
},
{
"epoch": 2.1153415453527438,
"grad_norm": 12.911978721618652,
"learning_rate": 1.4746173945502054e-05,
"loss": 2.1077,
"step": 18890
},
{
"epoch": 2.116461366181411,
"grad_norm": 6.310555458068848,
"learning_rate": 1.4727510265024263e-05,
"loss": 1.6921,
"step": 18900
},
{
"epoch": 2.1175811870100785,
"grad_norm": 5.524637699127197,
"learning_rate": 1.4708846584546473e-05,
"loss": 1.7968,
"step": 18910
},
{
"epoch": 2.118701007838746,
"grad_norm": 12.02706527709961,
"learning_rate": 1.4690182904068683e-05,
"loss": 1.982,
"step": 18920
},
{
"epoch": 2.119820828667413,
"grad_norm": 16.045839309692383,
"learning_rate": 1.4671519223590893e-05,
"loss": 1.5663,
"step": 18930
},
{
"epoch": 2.1209406494960805,
"grad_norm": 5.100281715393066,
"learning_rate": 1.4652855543113103e-05,
"loss": 1.9206,
"step": 18940
},
{
"epoch": 2.122060470324748,
"grad_norm": 8.30729866027832,
"learning_rate": 1.4634191862635313e-05,
"loss": 1.9549,
"step": 18950
},
{
"epoch": 2.1231802911534157,
"grad_norm": 9.724970817565918,
"learning_rate": 1.4615528182157523e-05,
"loss": 1.8996,
"step": 18960
},
{
"epoch": 2.124300111982083,
"grad_norm": 9.640581130981445,
"learning_rate": 1.4596864501679733e-05,
"loss": 1.6712,
"step": 18970
},
{
"epoch": 2.1254199328107504,
"grad_norm": 7.71252965927124,
"learning_rate": 1.4578200821201943e-05,
"loss": 1.692,
"step": 18980
},
{
"epoch": 2.1265397536394177,
"grad_norm": 6.05610466003418,
"learning_rate": 1.4559537140724153e-05,
"loss": 1.7808,
"step": 18990
},
{
"epoch": 2.127659574468085,
"grad_norm": 12.274239540100098,
"learning_rate": 1.4540873460246363e-05,
"loss": 1.8396,
"step": 19000
},
{
"epoch": 2.1287793952967524,
"grad_norm": 5.31697416305542,
"learning_rate": 1.4522209779768573e-05,
"loss": 1.7804,
"step": 19010
},
{
"epoch": 2.1298992161254198,
"grad_norm": 19.778165817260742,
"learning_rate": 1.450354609929078e-05,
"loss": 1.6239,
"step": 19020
},
{
"epoch": 2.131019036954087,
"grad_norm": 4.198515892028809,
"learning_rate": 1.448488241881299e-05,
"loss": 1.7749,
"step": 19030
},
{
"epoch": 2.132138857782755,
"grad_norm": 5.769347667694092,
"learning_rate": 1.44662187383352e-05,
"loss": 1.7057,
"step": 19040
},
{
"epoch": 2.1332586786114223,
"grad_norm": 4.867179870605469,
"learning_rate": 1.444755505785741e-05,
"loss": 1.5719,
"step": 19050
},
{
"epoch": 2.1343784994400896,
"grad_norm": 4.64288854598999,
"learning_rate": 1.442889137737962e-05,
"loss": 1.8134,
"step": 19060
},
{
"epoch": 2.135498320268757,
"grad_norm": 5.441596031188965,
"learning_rate": 1.441022769690183e-05,
"loss": 2.0065,
"step": 19070
},
{
"epoch": 2.1366181410974243,
"grad_norm": 15.793349266052246,
"learning_rate": 1.439156401642404e-05,
"loss": 1.5416,
"step": 19080
},
{
"epoch": 2.1377379619260917,
"grad_norm": 9.388581275939941,
"learning_rate": 1.437290033594625e-05,
"loss": 1.9277,
"step": 19090
},
{
"epoch": 2.1388577827547595,
"grad_norm": 3.7332279682159424,
"learning_rate": 1.435423665546846e-05,
"loss": 1.7587,
"step": 19100
},
{
"epoch": 2.139977603583427,
"grad_norm": 4.11780309677124,
"learning_rate": 1.433557297499067e-05,
"loss": 2.2728,
"step": 19110
},
{
"epoch": 2.141097424412094,
"grad_norm": 4.731024742126465,
"learning_rate": 1.4316909294512878e-05,
"loss": 1.7996,
"step": 19120
},
{
"epoch": 2.1422172452407615,
"grad_norm": 20.14070701599121,
"learning_rate": 1.4298245614035088e-05,
"loss": 1.6665,
"step": 19130
},
{
"epoch": 2.143337066069429,
"grad_norm": 5.043517589569092,
"learning_rate": 1.4279581933557298e-05,
"loss": 1.8023,
"step": 19140
},
{
"epoch": 2.144456886898096,
"grad_norm": 15.140097618103027,
"learning_rate": 1.4260918253079508e-05,
"loss": 1.9381,
"step": 19150
},
{
"epoch": 2.1455767077267636,
"grad_norm": 5.910915374755859,
"learning_rate": 1.4242254572601718e-05,
"loss": 1.5582,
"step": 19160
},
{
"epoch": 2.146696528555431,
"grad_norm": 4.935706615447998,
"learning_rate": 1.4223590892123928e-05,
"loss": 1.6278,
"step": 19170
},
{
"epoch": 2.1478163493840987,
"grad_norm": 7.664555549621582,
"learning_rate": 1.4204927211646138e-05,
"loss": 2.1267,
"step": 19180
},
{
"epoch": 2.148936170212766,
"grad_norm": 5.317629337310791,
"learning_rate": 1.4186263531168348e-05,
"loss": 1.6991,
"step": 19190
},
{
"epoch": 2.1500559910414334,
"grad_norm": 8.390786170959473,
"learning_rate": 1.4167599850690558e-05,
"loss": 1.666,
"step": 19200
},
{
"epoch": 2.1511758118701008,
"grad_norm": 4.987608432769775,
"learning_rate": 1.4148936170212768e-05,
"loss": 1.9875,
"step": 19210
},
{
"epoch": 2.152295632698768,
"grad_norm": 13.22926139831543,
"learning_rate": 1.4130272489734978e-05,
"loss": 2.5654,
"step": 19220
},
{
"epoch": 2.1534154535274355,
"grad_norm": 5.794547080993652,
"learning_rate": 1.4111608809257188e-05,
"loss": 1.5586,
"step": 19230
},
{
"epoch": 2.1545352743561033,
"grad_norm": 18.272071838378906,
"learning_rate": 1.4092945128779398e-05,
"loss": 1.9604,
"step": 19240
},
{
"epoch": 2.1556550951847706,
"grad_norm": 8.715819358825684,
"learning_rate": 1.4074281448301604e-05,
"loss": 2.167,
"step": 19250
},
{
"epoch": 2.156774916013438,
"grad_norm": 5.621458530426025,
"learning_rate": 1.4055617767823814e-05,
"loss": 1.7484,
"step": 19260
},
{
"epoch": 2.1578947368421053,
"grad_norm": 5.077019214630127,
"learning_rate": 1.4036954087346024e-05,
"loss": 1.7654,
"step": 19270
},
{
"epoch": 2.1590145576707727,
"grad_norm": 4.050748825073242,
"learning_rate": 1.4018290406868234e-05,
"loss": 1.594,
"step": 19280
},
{
"epoch": 2.16013437849944,
"grad_norm": 8.234112739562988,
"learning_rate": 1.3999626726390444e-05,
"loss": 2.0443,
"step": 19290
},
{
"epoch": 2.1612541993281074,
"grad_norm": 9.419720649719238,
"learning_rate": 1.3980963045912654e-05,
"loss": 2.0494,
"step": 19300
},
{
"epoch": 2.1623740201567747,
"grad_norm": 5.222434997558594,
"learning_rate": 1.3962299365434864e-05,
"loss": 2.1207,
"step": 19310
},
{
"epoch": 2.1634938409854425,
"grad_norm": 4.949707508087158,
"learning_rate": 1.3943635684957074e-05,
"loss": 1.9951,
"step": 19320
},
{
"epoch": 2.16461366181411,
"grad_norm": 5.496902942657471,
"learning_rate": 1.3924972004479284e-05,
"loss": 2.0267,
"step": 19330
},
{
"epoch": 2.165733482642777,
"grad_norm": 21.034757614135742,
"learning_rate": 1.3906308324001493e-05,
"loss": 1.8192,
"step": 19340
},
{
"epoch": 2.1668533034714446,
"grad_norm": 16.238187789916992,
"learning_rate": 1.3887644643523703e-05,
"loss": 1.9281,
"step": 19350
},
{
"epoch": 2.167973124300112,
"grad_norm": 5.808258056640625,
"learning_rate": 1.3868980963045913e-05,
"loss": 1.7075,
"step": 19360
},
{
"epoch": 2.1690929451287793,
"grad_norm": 4.748766899108887,
"learning_rate": 1.3850317282568123e-05,
"loss": 2.0612,
"step": 19370
},
{
"epoch": 2.1702127659574466,
"grad_norm": 6.410683631896973,
"learning_rate": 1.3831653602090333e-05,
"loss": 1.8451,
"step": 19380
},
{
"epoch": 2.1713325867861144,
"grad_norm": 9.00479507446289,
"learning_rate": 1.3812989921612543e-05,
"loss": 1.8231,
"step": 19390
},
{
"epoch": 2.1724524076147818,
"grad_norm": 10.912854194641113,
"learning_rate": 1.3794326241134753e-05,
"loss": 1.75,
"step": 19400
},
{
"epoch": 2.173572228443449,
"grad_norm": 4.568667888641357,
"learning_rate": 1.3775662560656963e-05,
"loss": 1.8237,
"step": 19410
},
{
"epoch": 2.1746920492721165,
"grad_norm": 6.0548930168151855,
"learning_rate": 1.3756998880179173e-05,
"loss": 2.0656,
"step": 19420
},
{
"epoch": 2.175811870100784,
"grad_norm": 6.4514031410217285,
"learning_rate": 1.3738335199701383e-05,
"loss": 1.8537,
"step": 19430
},
{
"epoch": 2.176931690929451,
"grad_norm": 7.510464668273926,
"learning_rate": 1.3719671519223593e-05,
"loss": 2.1201,
"step": 19440
},
{
"epoch": 2.1780515117581185,
"grad_norm": 6.162042617797852,
"learning_rate": 1.3701007838745803e-05,
"loss": 1.9171,
"step": 19450
},
{
"epoch": 2.1791713325867863,
"grad_norm": 4.513441562652588,
"learning_rate": 1.3682344158268013e-05,
"loss": 1.6582,
"step": 19460
},
{
"epoch": 2.1802911534154537,
"grad_norm": 5.428256988525391,
"learning_rate": 1.3663680477790223e-05,
"loss": 1.807,
"step": 19470
},
{
"epoch": 2.181410974244121,
"grad_norm": 4.469424247741699,
"learning_rate": 1.364501679731243e-05,
"loss": 1.7969,
"step": 19480
},
{
"epoch": 2.1825307950727884,
"grad_norm": 18.87086296081543,
"learning_rate": 1.362635311683464e-05,
"loss": 2.0421,
"step": 19490
},
{
"epoch": 2.1836506159014557,
"grad_norm": 14.870500564575195,
"learning_rate": 1.360768943635685e-05,
"loss": 2.0896,
"step": 19500
},
{
"epoch": 2.184770436730123,
"grad_norm": 4.639315605163574,
"learning_rate": 1.358902575587906e-05,
"loss": 1.6858,
"step": 19510
},
{
"epoch": 2.1858902575587904,
"grad_norm": 5.512360572814941,
"learning_rate": 1.357036207540127e-05,
"loss": 1.7873,
"step": 19520
},
{
"epoch": 2.187010078387458,
"grad_norm": 4.680398464202881,
"learning_rate": 1.355169839492348e-05,
"loss": 1.7172,
"step": 19530
},
{
"epoch": 2.1881298992161256,
"grad_norm": 6.576661586761475,
"learning_rate": 1.353303471444569e-05,
"loss": 1.4995,
"step": 19540
},
{
"epoch": 2.189249720044793,
"grad_norm": 5.627395153045654,
"learning_rate": 1.3514371033967898e-05,
"loss": 2.0323,
"step": 19550
},
{
"epoch": 2.1903695408734603,
"grad_norm": 9.551543235778809,
"learning_rate": 1.3495707353490108e-05,
"loss": 1.9394,
"step": 19560
},
{
"epoch": 2.1914893617021276,
"grad_norm": 3.9927797317504883,
"learning_rate": 1.3477043673012318e-05,
"loss": 1.7925,
"step": 19570
},
{
"epoch": 2.192609182530795,
"grad_norm": 5.432565212249756,
"learning_rate": 1.3458379992534528e-05,
"loss": 1.9283,
"step": 19580
},
{
"epoch": 2.1937290033594623,
"grad_norm": 16.03640365600586,
"learning_rate": 1.3439716312056738e-05,
"loss": 1.554,
"step": 19590
},
{
"epoch": 2.19484882418813,
"grad_norm": 9.58271598815918,
"learning_rate": 1.3421052631578948e-05,
"loss": 2.2815,
"step": 19600
},
{
"epoch": 2.1959686450167974,
"grad_norm": 5.797165393829346,
"learning_rate": 1.3402388951101158e-05,
"loss": 2.107,
"step": 19610
},
{
"epoch": 2.197088465845465,
"grad_norm": 11.725902557373047,
"learning_rate": 1.3383725270623368e-05,
"loss": 1.8659,
"step": 19620
},
{
"epoch": 2.198208286674132,
"grad_norm": 16.76238441467285,
"learning_rate": 1.3365061590145578e-05,
"loss": 1.9411,
"step": 19630
},
{
"epoch": 2.1993281075027995,
"grad_norm": 4.064399242401123,
"learning_rate": 1.3346397909667788e-05,
"loss": 2.0996,
"step": 19640
},
{
"epoch": 2.200447928331467,
"grad_norm": 12.260157585144043,
"learning_rate": 1.3327734229189998e-05,
"loss": 1.8016,
"step": 19650
},
{
"epoch": 2.201567749160134,
"grad_norm": 4.968259811401367,
"learning_rate": 1.3309070548712208e-05,
"loss": 2.5253,
"step": 19660
},
{
"epoch": 2.202687569988802,
"grad_norm": 15.491079330444336,
"learning_rate": 1.3290406868234418e-05,
"loss": 1.9006,
"step": 19670
},
{
"epoch": 2.2038073908174693,
"grad_norm": 16.073698043823242,
"learning_rate": 1.3271743187756628e-05,
"loss": 2.1384,
"step": 19680
},
{
"epoch": 2.2049272116461367,
"grad_norm": 4.668467998504639,
"learning_rate": 1.3253079507278838e-05,
"loss": 1.7092,
"step": 19690
},
{
"epoch": 2.206047032474804,
"grad_norm": 16.72428321838379,
"learning_rate": 1.3234415826801048e-05,
"loss": 2.2848,
"step": 19700
},
{
"epoch": 2.2071668533034714,
"grad_norm": 16.04388999938965,
"learning_rate": 1.3215752146323254e-05,
"loss": 1.7384,
"step": 19710
},
{
"epoch": 2.2082866741321387,
"grad_norm": 7.498695373535156,
"learning_rate": 1.3197088465845464e-05,
"loss": 1.7484,
"step": 19720
},
{
"epoch": 2.209406494960806,
"grad_norm": 4.43148136138916,
"learning_rate": 1.3178424785367674e-05,
"loss": 1.845,
"step": 19730
},
{
"epoch": 2.2105263157894735,
"grad_norm": 3.5520262718200684,
"learning_rate": 1.3159761104889884e-05,
"loss": 2.0745,
"step": 19740
},
{
"epoch": 2.2116461366181412,
"grad_norm": 8.417689323425293,
"learning_rate": 1.3141097424412094e-05,
"loss": 2.0317,
"step": 19750
},
{
"epoch": 2.2127659574468086,
"grad_norm": 6.288638114929199,
"learning_rate": 1.3122433743934304e-05,
"loss": 1.9475,
"step": 19760
},
{
"epoch": 2.213885778275476,
"grad_norm": 9.5358304977417,
"learning_rate": 1.3103770063456513e-05,
"loss": 1.5572,
"step": 19770
},
{
"epoch": 2.2150055991041433,
"grad_norm": 6.784647464752197,
"learning_rate": 1.3085106382978723e-05,
"loss": 1.6474,
"step": 19780
},
{
"epoch": 2.2161254199328106,
"grad_norm": 6.584368705749512,
"learning_rate": 1.3066442702500933e-05,
"loss": 1.625,
"step": 19790
},
{
"epoch": 2.217245240761478,
"grad_norm": 10.06530475616455,
"learning_rate": 1.3047779022023143e-05,
"loss": 2.4165,
"step": 19800
},
{
"epoch": 2.218365061590146,
"grad_norm": 8.728497505187988,
"learning_rate": 1.3029115341545353e-05,
"loss": 2.192,
"step": 19810
},
{
"epoch": 2.219484882418813,
"grad_norm": 4.747127532958984,
"learning_rate": 1.3010451661067563e-05,
"loss": 1.8832,
"step": 19820
},
{
"epoch": 2.2206047032474805,
"grad_norm": 4.508890628814697,
"learning_rate": 1.2991787980589773e-05,
"loss": 1.9551,
"step": 19830
},
{
"epoch": 2.221724524076148,
"grad_norm": 9.029202461242676,
"learning_rate": 1.2973124300111983e-05,
"loss": 1.9141,
"step": 19840
},
{
"epoch": 2.222844344904815,
"grad_norm": 4.136125087738037,
"learning_rate": 1.2954460619634193e-05,
"loss": 1.9544,
"step": 19850
},
{
"epoch": 2.2239641657334825,
"grad_norm": 4.724370002746582,
"learning_rate": 1.2935796939156403e-05,
"loss": 1.5162,
"step": 19860
},
{
"epoch": 2.22508398656215,
"grad_norm": 5.846231937408447,
"learning_rate": 1.2917133258678613e-05,
"loss": 2.1023,
"step": 19870
},
{
"epoch": 2.2262038073908172,
"grad_norm": 5.567933082580566,
"learning_rate": 1.2898469578200823e-05,
"loss": 1.5832,
"step": 19880
},
{
"epoch": 2.227323628219485,
"grad_norm": 13.980506896972656,
"learning_rate": 1.2879805897723033e-05,
"loss": 1.5861,
"step": 19890
},
{
"epoch": 2.2284434490481524,
"grad_norm": 14.191877365112305,
"learning_rate": 1.2861142217245243e-05,
"loss": 1.6714,
"step": 19900
},
{
"epoch": 2.2295632698768197,
"grad_norm": 10.855998992919922,
"learning_rate": 1.2842478536767453e-05,
"loss": 1.9644,
"step": 19910
},
{
"epoch": 2.230683090705487,
"grad_norm": 5.852384090423584,
"learning_rate": 1.2823814856289663e-05,
"loss": 1.8953,
"step": 19920
},
{
"epoch": 2.2318029115341544,
"grad_norm": 5.915239334106445,
"learning_rate": 1.2805151175811871e-05,
"loss": 1.5698,
"step": 19930
},
{
"epoch": 2.232922732362822,
"grad_norm": 5.294043064117432,
"learning_rate": 1.2786487495334081e-05,
"loss": 2.0655,
"step": 19940
},
{
"epoch": 2.2340425531914896,
"grad_norm": 8.937568664550781,
"learning_rate": 1.276782381485629e-05,
"loss": 1.6406,
"step": 19950
},
{
"epoch": 2.235162374020157,
"grad_norm": 3.592744827270508,
"learning_rate": 1.27491601343785e-05,
"loss": 1.5429,
"step": 19960
},
{
"epoch": 2.2362821948488243,
"grad_norm": 5.134018898010254,
"learning_rate": 1.273049645390071e-05,
"loss": 2.1943,
"step": 19970
},
{
"epoch": 2.2374020156774916,
"grad_norm": 4.749664306640625,
"learning_rate": 1.271183277342292e-05,
"loss": 1.8952,
"step": 19980
},
{
"epoch": 2.238521836506159,
"grad_norm": 14.125395774841309,
"learning_rate": 1.2693169092945128e-05,
"loss": 1.9117,
"step": 19990
},
{
"epoch": 2.2396416573348263,
"grad_norm": 5.6524858474731445,
"learning_rate": 1.2674505412467338e-05,
"loss": 1.8199,
"step": 20000
},
{
"epoch": 2.2407614781634937,
"grad_norm": 9.836930274963379,
"learning_rate": 1.2655841731989548e-05,
"loss": 1.6642,
"step": 20010
},
{
"epoch": 2.241881298992161,
"grad_norm": 19.449764251708984,
"learning_rate": 1.2637178051511758e-05,
"loss": 1.9992,
"step": 20020
},
{
"epoch": 2.243001119820829,
"grad_norm": 6.832662105560303,
"learning_rate": 1.2618514371033968e-05,
"loss": 1.5874,
"step": 20030
},
{
"epoch": 2.244120940649496,
"grad_norm": 17.8643856048584,
"learning_rate": 1.2599850690556178e-05,
"loss": 2.1804,
"step": 20040
},
{
"epoch": 2.2452407614781635,
"grad_norm": 5.4305620193481445,
"learning_rate": 1.2581187010078388e-05,
"loss": 1.7595,
"step": 20050
},
{
"epoch": 2.246360582306831,
"grad_norm": 5.813434600830078,
"learning_rate": 1.2562523329600598e-05,
"loss": 1.8339,
"step": 20060
},
{
"epoch": 2.2474804031354982,
"grad_norm": 22.452621459960938,
"learning_rate": 1.2543859649122808e-05,
"loss": 2.0927,
"step": 20070
},
{
"epoch": 2.2486002239641656,
"grad_norm": 5.384066104888916,
"learning_rate": 1.2525195968645018e-05,
"loss": 1.8251,
"step": 20080
},
{
"epoch": 2.249720044792833,
"grad_norm": 16.19381332397461,
"learning_rate": 1.2506532288167228e-05,
"loss": 1.9519,
"step": 20090
},
{
"epoch": 2.2508398656215007,
"grad_norm": 5.359135627746582,
"learning_rate": 1.2487868607689438e-05,
"loss": 1.7556,
"step": 20100
},
{
"epoch": 2.251959686450168,
"grad_norm": 8.93488597869873,
"learning_rate": 1.2469204927211648e-05,
"loss": 2.0605,
"step": 20110
},
{
"epoch": 2.2530795072788354,
"grad_norm": 7.26114559173584,
"learning_rate": 1.2450541246733856e-05,
"loss": 1.9192,
"step": 20120
},
{
"epoch": 2.254199328107503,
"grad_norm": 10.906415939331055,
"learning_rate": 1.2431877566256066e-05,
"loss": 1.653,
"step": 20130
},
{
"epoch": 2.25531914893617,
"grad_norm": 5.915148735046387,
"learning_rate": 1.2413213885778276e-05,
"loss": 2.0476,
"step": 20140
},
{
"epoch": 2.2564389697648375,
"grad_norm": 10.197397232055664,
"learning_rate": 1.2394550205300486e-05,
"loss": 1.9027,
"step": 20150
},
{
"epoch": 2.257558790593505,
"grad_norm": 14.30677318572998,
"learning_rate": 1.2375886524822696e-05,
"loss": 1.4843,
"step": 20160
},
{
"epoch": 2.2586786114221726,
"grad_norm": 4.197308540344238,
"learning_rate": 1.2357222844344905e-05,
"loss": 1.9333,
"step": 20170
},
{
"epoch": 2.25979843225084,
"grad_norm": 6.416319847106934,
"learning_rate": 1.2338559163867115e-05,
"loss": 2.047,
"step": 20180
},
{
"epoch": 2.2609182530795073,
"grad_norm": 3.727569818496704,
"learning_rate": 1.2319895483389325e-05,
"loss": 2.0547,
"step": 20190
},
{
"epoch": 2.2620380739081747,
"grad_norm": 4.975082874298096,
"learning_rate": 1.2301231802911535e-05,
"loss": 1.9228,
"step": 20200
},
{
"epoch": 2.263157894736842,
"grad_norm": 10.25108528137207,
"learning_rate": 1.2282568122433745e-05,
"loss": 1.7067,
"step": 20210
},
{
"epoch": 2.2642777155655094,
"grad_norm": 6.914962291717529,
"learning_rate": 1.2263904441955955e-05,
"loss": 1.9841,
"step": 20220
},
{
"epoch": 2.265397536394177,
"grad_norm": 5.214871883392334,
"learning_rate": 1.2245240761478165e-05,
"loss": 1.6495,
"step": 20230
},
{
"epoch": 2.2665173572228445,
"grad_norm": 11.081496238708496,
"learning_rate": 1.2226577081000373e-05,
"loss": 1.9833,
"step": 20240
},
{
"epoch": 2.267637178051512,
"grad_norm": 7.861496448516846,
"learning_rate": 1.2207913400522583e-05,
"loss": 2.2553,
"step": 20250
},
{
"epoch": 2.2687569988801792,
"grad_norm": 4.18981409072876,
"learning_rate": 1.2189249720044793e-05,
"loss": 1.9001,
"step": 20260
},
{
"epoch": 2.2698768197088466,
"grad_norm": 15.897494316101074,
"learning_rate": 1.2170586039567003e-05,
"loss": 1.943,
"step": 20270
},
{
"epoch": 2.270996640537514,
"grad_norm": 14.775899887084961,
"learning_rate": 1.2151922359089213e-05,
"loss": 2.1118,
"step": 20280
},
{
"epoch": 2.2721164613661813,
"grad_norm": 10.079516410827637,
"learning_rate": 1.2133258678611423e-05,
"loss": 1.9167,
"step": 20290
},
{
"epoch": 2.2732362821948486,
"grad_norm": 4.894615173339844,
"learning_rate": 1.2114594998133633e-05,
"loss": 2.1881,
"step": 20300
},
{
"epoch": 2.2743561030235164,
"grad_norm": 16.707927703857422,
"learning_rate": 1.2095931317655843e-05,
"loss": 2.2987,
"step": 20310
},
{
"epoch": 2.275475923852184,
"grad_norm": 7.284656524658203,
"learning_rate": 1.2077267637178053e-05,
"loss": 1.7505,
"step": 20320
},
{
"epoch": 2.276595744680851,
"grad_norm": 5.649649143218994,
"learning_rate": 1.2058603956700263e-05,
"loss": 2.2449,
"step": 20330
},
{
"epoch": 2.2777155655095185,
"grad_norm": 5.512756824493408,
"learning_rate": 1.2039940276222473e-05,
"loss": 1.8765,
"step": 20340
},
{
"epoch": 2.278835386338186,
"grad_norm": 15.581269264221191,
"learning_rate": 1.2021276595744681e-05,
"loss": 2.1552,
"step": 20350
},
{
"epoch": 2.279955207166853,
"grad_norm": 13.498668670654297,
"learning_rate": 1.2002612915266891e-05,
"loss": 2.213,
"step": 20360
},
{
"epoch": 2.2810750279955205,
"grad_norm": 9.995055198669434,
"learning_rate": 1.1983949234789101e-05,
"loss": 2.079,
"step": 20370
},
{
"epoch": 2.2821948488241883,
"grad_norm": 3.790062189102173,
"learning_rate": 1.1965285554311311e-05,
"loss": 1.6197,
"step": 20380
},
{
"epoch": 2.2833146696528557,
"grad_norm": 4.261139392852783,
"learning_rate": 1.194662187383352e-05,
"loss": 2.0012,
"step": 20390
},
{
"epoch": 2.284434490481523,
"grad_norm": 3.16943621635437,
"learning_rate": 1.192795819335573e-05,
"loss": 1.9648,
"step": 20400
},
{
"epoch": 2.2855543113101904,
"grad_norm": 5.687836647033691,
"learning_rate": 1.190929451287794e-05,
"loss": 1.6828,
"step": 20410
},
{
"epoch": 2.2866741321388577,
"grad_norm": 5.451729774475098,
"learning_rate": 1.189063083240015e-05,
"loss": 2.2748,
"step": 20420
},
{
"epoch": 2.287793952967525,
"grad_norm": 10.777783393859863,
"learning_rate": 1.187196715192236e-05,
"loss": 2.0554,
"step": 20430
},
{
"epoch": 2.2889137737961924,
"grad_norm": 5.5777459144592285,
"learning_rate": 1.185330347144457e-05,
"loss": 1.9674,
"step": 20440
},
{
"epoch": 2.29003359462486,
"grad_norm": 11.55379581451416,
"learning_rate": 1.183463979096678e-05,
"loss": 1.8068,
"step": 20450
},
{
"epoch": 2.2911534154535276,
"grad_norm": 4.206369876861572,
"learning_rate": 1.181597611048899e-05,
"loss": 2.1127,
"step": 20460
},
{
"epoch": 2.292273236282195,
"grad_norm": 6.190250873565674,
"learning_rate": 1.1797312430011198e-05,
"loss": 2.1131,
"step": 20470
},
{
"epoch": 2.2933930571108623,
"grad_norm": 4.842706203460693,
"learning_rate": 1.1778648749533408e-05,
"loss": 1.7894,
"step": 20480
},
{
"epoch": 2.2945128779395296,
"grad_norm": 9.449223518371582,
"learning_rate": 1.1759985069055618e-05,
"loss": 1.7278,
"step": 20490
},
{
"epoch": 2.295632698768197,
"grad_norm": 11.598458290100098,
"learning_rate": 1.1741321388577828e-05,
"loss": 1.9876,
"step": 20500
},
{
"epoch": 2.2967525195968643,
"grad_norm": 4.6162238121032715,
"learning_rate": 1.1722657708100038e-05,
"loss": 1.4452,
"step": 20510
},
{
"epoch": 2.297872340425532,
"grad_norm": 17.614953994750977,
"learning_rate": 1.1703994027622248e-05,
"loss": 2.0197,
"step": 20520
},
{
"epoch": 2.2989921612541995,
"grad_norm": 13.69670295715332,
"learning_rate": 1.1685330347144458e-05,
"loss": 1.661,
"step": 20530
},
{
"epoch": 2.300111982082867,
"grad_norm": 3.7186923027038574,
"learning_rate": 1.1666666666666668e-05,
"loss": 2.0852,
"step": 20540
},
{
"epoch": 2.301231802911534,
"grad_norm": 7.386007308959961,
"learning_rate": 1.1648002986188878e-05,
"loss": 1.8018,
"step": 20550
},
{
"epoch": 2.3023516237402015,
"grad_norm": 4.978532791137695,
"learning_rate": 1.1629339305711088e-05,
"loss": 1.8555,
"step": 20560
},
{
"epoch": 2.303471444568869,
"grad_norm": 11.135411262512207,
"learning_rate": 1.1610675625233298e-05,
"loss": 2.0469,
"step": 20570
},
{
"epoch": 2.3045912653975362,
"grad_norm": 4.908180236816406,
"learning_rate": 1.1592011944755506e-05,
"loss": 1.5782,
"step": 20580
},
{
"epoch": 2.3057110862262036,
"grad_norm": 9.538016319274902,
"learning_rate": 1.1573348264277716e-05,
"loss": 2.1164,
"step": 20590
},
{
"epoch": 2.3068309070548714,
"grad_norm": 10.626128196716309,
"learning_rate": 1.1554684583799926e-05,
"loss": 2.0624,
"step": 20600
},
{
"epoch": 2.3079507278835387,
"grad_norm": 6.66682243347168,
"learning_rate": 1.1536020903322135e-05,
"loss": 1.7405,
"step": 20610
},
{
"epoch": 2.309070548712206,
"grad_norm": 5.158255100250244,
"learning_rate": 1.1517357222844345e-05,
"loss": 1.9986,
"step": 20620
},
{
"epoch": 2.3101903695408734,
"grad_norm": 12.696172714233398,
"learning_rate": 1.1498693542366555e-05,
"loss": 1.9899,
"step": 20630
},
{
"epoch": 2.3113101903695408,
"grad_norm": 10.744148254394531,
"learning_rate": 1.1480029861888765e-05,
"loss": 2.0481,
"step": 20640
},
{
"epoch": 2.312430011198208,
"grad_norm": 10.337085723876953,
"learning_rate": 1.1461366181410975e-05,
"loss": 2.08,
"step": 20650
},
{
"epoch": 2.313549832026876,
"grad_norm": 9.263678550720215,
"learning_rate": 1.1442702500933185e-05,
"loss": 1.7892,
"step": 20660
},
{
"epoch": 2.3146696528555433,
"grad_norm": 12.324502944946289,
"learning_rate": 1.1424038820455395e-05,
"loss": 1.9377,
"step": 20670
},
{
"epoch": 2.3157894736842106,
"grad_norm": 6.701484203338623,
"learning_rate": 1.1405375139977605e-05,
"loss": 1.8667,
"step": 20680
},
{
"epoch": 2.316909294512878,
"grad_norm": 7.743568420410156,
"learning_rate": 1.1386711459499815e-05,
"loss": 1.5235,
"step": 20690
},
{
"epoch": 2.3180291153415453,
"grad_norm": 4.805120468139648,
"learning_rate": 1.1368047779022023e-05,
"loss": 1.8663,
"step": 20700
},
{
"epoch": 2.3191489361702127,
"grad_norm": 6.219134330749512,
"learning_rate": 1.1349384098544233e-05,
"loss": 1.5116,
"step": 20710
},
{
"epoch": 2.32026875699888,
"grad_norm": 12.677251815795898,
"learning_rate": 1.1330720418066443e-05,
"loss": 1.8188,
"step": 20720
},
{
"epoch": 2.3213885778275474,
"grad_norm": 17.74958038330078,
"learning_rate": 1.1312056737588653e-05,
"loss": 2.178,
"step": 20730
},
{
"epoch": 2.322508398656215,
"grad_norm": 11.15487289428711,
"learning_rate": 1.1293393057110863e-05,
"loss": 2.0488,
"step": 20740
},
{
"epoch": 2.3236282194848825,
"grad_norm": 15.36052417755127,
"learning_rate": 1.1274729376633073e-05,
"loss": 1.7865,
"step": 20750
},
{
"epoch": 2.32474804031355,
"grad_norm": 14.987112998962402,
"learning_rate": 1.1256065696155283e-05,
"loss": 1.6118,
"step": 20760
},
{
"epoch": 2.325867861142217,
"grad_norm": 4.916079998016357,
"learning_rate": 1.1237402015677493e-05,
"loss": 2.0262,
"step": 20770
},
{
"epoch": 2.3269876819708846,
"grad_norm": 20.49549102783203,
"learning_rate": 1.1218738335199703e-05,
"loss": 2.1032,
"step": 20780
},
{
"epoch": 2.328107502799552,
"grad_norm": 16.82073402404785,
"learning_rate": 1.1200074654721911e-05,
"loss": 2.0666,
"step": 20790
},
{
"epoch": 2.3292273236282197,
"grad_norm": 4.178780555725098,
"learning_rate": 1.1181410974244121e-05,
"loss": 1.8655,
"step": 20800
},
{
"epoch": 2.330347144456887,
"grad_norm": 16.733497619628906,
"learning_rate": 1.1162747293766331e-05,
"loss": 1.7731,
"step": 20810
},
{
"epoch": 2.3314669652855544,
"grad_norm": 5.161161422729492,
"learning_rate": 1.114408361328854e-05,
"loss": 1.6687,
"step": 20820
},
{
"epoch": 2.3325867861142218,
"grad_norm": 4.8293304443359375,
"learning_rate": 1.112541993281075e-05,
"loss": 2.1226,
"step": 20830
},
{
"epoch": 2.333706606942889,
"grad_norm": 9.590071678161621,
"learning_rate": 1.110675625233296e-05,
"loss": 1.528,
"step": 20840
},
{
"epoch": 2.3348264277715565,
"grad_norm": 6.294408321380615,
"learning_rate": 1.108809257185517e-05,
"loss": 1.73,
"step": 20850
},
{
"epoch": 2.335946248600224,
"grad_norm": 10.485013008117676,
"learning_rate": 1.106942889137738e-05,
"loss": 2.159,
"step": 20860
},
{
"epoch": 2.337066069428891,
"grad_norm": 4.454178333282471,
"learning_rate": 1.105076521089959e-05,
"loss": 1.6548,
"step": 20870
},
{
"epoch": 2.338185890257559,
"grad_norm": 5.956608772277832,
"learning_rate": 1.10321015304218e-05,
"loss": 1.5492,
"step": 20880
},
{
"epoch": 2.3393057110862263,
"grad_norm": 7.09451150894165,
"learning_rate": 1.101343784994401e-05,
"loss": 1.6076,
"step": 20890
},
{
"epoch": 2.3404255319148937,
"grad_norm": 13.640632629394531,
"learning_rate": 1.099477416946622e-05,
"loss": 2.6322,
"step": 20900
},
{
"epoch": 2.341545352743561,
"grad_norm": 13.958121299743652,
"learning_rate": 1.097611048898843e-05,
"loss": 2.269,
"step": 20910
},
{
"epoch": 2.3426651735722284,
"grad_norm": 4.459420680999756,
"learning_rate": 1.095744680851064e-05,
"loss": 2.1053,
"step": 20920
},
{
"epoch": 2.3437849944008957,
"grad_norm": 6.627596855163574,
"learning_rate": 1.093878312803285e-05,
"loss": 1.8687,
"step": 20930
},
{
"epoch": 2.3449048152295635,
"grad_norm": 11.237924575805664,
"learning_rate": 1.0920119447555058e-05,
"loss": 1.8006,
"step": 20940
},
{
"epoch": 2.346024636058231,
"grad_norm": 6.611232757568359,
"learning_rate": 1.0901455767077268e-05,
"loss": 1.7491,
"step": 20950
},
{
"epoch": 2.347144456886898,
"grad_norm": 4.241340160369873,
"learning_rate": 1.0882792086599478e-05,
"loss": 1.9278,
"step": 20960
},
{
"epoch": 2.3482642777155656,
"grad_norm": 5.34893274307251,
"learning_rate": 1.0864128406121688e-05,
"loss": 1.9011,
"step": 20970
},
{
"epoch": 2.349384098544233,
"grad_norm": 15.109663009643555,
"learning_rate": 1.0845464725643898e-05,
"loss": 1.8797,
"step": 20980
},
{
"epoch": 2.3505039193729003,
"grad_norm": 6.220200061798096,
"learning_rate": 1.0826801045166108e-05,
"loss": 2.1459,
"step": 20990
},
{
"epoch": 2.3516237402015676,
"grad_norm": 4.657541751861572,
"learning_rate": 1.0808137364688318e-05,
"loss": 1.7789,
"step": 21000
},
{
"epoch": 2.352743561030235,
"grad_norm": 3.8326404094696045,
"learning_rate": 1.0789473684210526e-05,
"loss": 2.0583,
"step": 21010
},
{
"epoch": 2.3538633818589028,
"grad_norm": 6.379500865936279,
"learning_rate": 1.0770810003732736e-05,
"loss": 1.9574,
"step": 21020
},
{
"epoch": 2.35498320268757,
"grad_norm": 8.482017517089844,
"learning_rate": 1.0752146323254946e-05,
"loss": 1.7585,
"step": 21030
},
{
"epoch": 2.3561030235162375,
"grad_norm": 10.17912769317627,
"learning_rate": 1.0733482642777156e-05,
"loss": 2.1129,
"step": 21040
},
{
"epoch": 2.357222844344905,
"grad_norm": 5.909788131713867,
"learning_rate": 1.0714818962299365e-05,
"loss": 1.8167,
"step": 21050
},
{
"epoch": 2.358342665173572,
"grad_norm": 4.550489902496338,
"learning_rate": 1.0696155281821575e-05,
"loss": 1.4302,
"step": 21060
},
{
"epoch": 2.3594624860022395,
"grad_norm": 9.305154800415039,
"learning_rate": 1.0677491601343785e-05,
"loss": 1.5549,
"step": 21070
},
{
"epoch": 2.360582306830907,
"grad_norm": 6.617506504058838,
"learning_rate": 1.0658827920865995e-05,
"loss": 1.8111,
"step": 21080
},
{
"epoch": 2.3617021276595747,
"grad_norm": 4.230848789215088,
"learning_rate": 1.0640164240388205e-05,
"loss": 1.604,
"step": 21090
},
{
"epoch": 2.362821948488242,
"grad_norm": 5.817328929901123,
"learning_rate": 1.0621500559910415e-05,
"loss": 2.2684,
"step": 21100
},
{
"epoch": 2.3639417693169094,
"grad_norm": 11.449498176574707,
"learning_rate": 1.0602836879432625e-05,
"loss": 1.7141,
"step": 21110
},
{
"epoch": 2.3650615901455767,
"grad_norm": 11.167801856994629,
"learning_rate": 1.0584173198954835e-05,
"loss": 2.0209,
"step": 21120
},
{
"epoch": 2.366181410974244,
"grad_norm": 10.6549711227417,
"learning_rate": 1.0565509518477045e-05,
"loss": 1.5182,
"step": 21130
},
{
"epoch": 2.3673012318029114,
"grad_norm": 7.9820122718811035,
"learning_rate": 1.0546845837999255e-05,
"loss": 1.5622,
"step": 21140
},
{
"epoch": 2.3684210526315788,
"grad_norm": 3.4148659706115723,
"learning_rate": 1.0528182157521465e-05,
"loss": 2.0534,
"step": 21150
},
{
"epoch": 2.369540873460246,
"grad_norm": 11.469594955444336,
"learning_rate": 1.0509518477043675e-05,
"loss": 2.0005,
"step": 21160
},
{
"epoch": 2.370660694288914,
"grad_norm": 6.6395697593688965,
"learning_rate": 1.0490854796565883e-05,
"loss": 2.0595,
"step": 21170
},
{
"epoch": 2.3717805151175813,
"grad_norm": 6.698371410369873,
"learning_rate": 1.0472191116088093e-05,
"loss": 2.0216,
"step": 21180
},
{
"epoch": 2.3729003359462486,
"grad_norm": 12.63893985748291,
"learning_rate": 1.0453527435610303e-05,
"loss": 2.1768,
"step": 21190
},
{
"epoch": 2.374020156774916,
"grad_norm": 18.96299171447754,
"learning_rate": 1.0434863755132513e-05,
"loss": 1.8743,
"step": 21200
},
{
"epoch": 2.3751399776035833,
"grad_norm": 5.689150810241699,
"learning_rate": 1.0416200074654723e-05,
"loss": 1.7398,
"step": 21210
},
{
"epoch": 2.3762597984322507,
"grad_norm": 7.150450229644775,
"learning_rate": 1.0397536394176933e-05,
"loss": 1.4524,
"step": 21220
},
{
"epoch": 2.3773796192609185,
"grad_norm": 11.126862525939941,
"learning_rate": 1.0378872713699141e-05,
"loss": 2.1519,
"step": 21230
},
{
"epoch": 2.378499440089586,
"grad_norm": 5.954022407531738,
"learning_rate": 1.0360209033221351e-05,
"loss": 2.1018,
"step": 21240
},
{
"epoch": 2.379619260918253,
"grad_norm": 13.803711891174316,
"learning_rate": 1.0341545352743561e-05,
"loss": 1.5974,
"step": 21250
},
{
"epoch": 2.3807390817469205,
"grad_norm": 8.766247749328613,
"learning_rate": 1.0322881672265771e-05,
"loss": 2.0202,
"step": 21260
},
{
"epoch": 2.381858902575588,
"grad_norm": 10.347888946533203,
"learning_rate": 1.0304217991787981e-05,
"loss": 1.5816,
"step": 21270
},
{
"epoch": 2.382978723404255,
"grad_norm": 14.29037094116211,
"learning_rate": 1.0285554311310191e-05,
"loss": 1.8299,
"step": 21280
},
{
"epoch": 2.3840985442329226,
"grad_norm": 11.084178924560547,
"learning_rate": 1.02668906308324e-05,
"loss": 2.0029,
"step": 21290
},
{
"epoch": 2.38521836506159,
"grad_norm": 4.837276935577393,
"learning_rate": 1.024822695035461e-05,
"loss": 1.931,
"step": 21300
},
{
"epoch": 2.3863381858902577,
"grad_norm": 13.397496223449707,
"learning_rate": 1.022956326987682e-05,
"loss": 1.6302,
"step": 21310
},
{
"epoch": 2.387458006718925,
"grad_norm": 14.555484771728516,
"learning_rate": 1.021089958939903e-05,
"loss": 1.988,
"step": 21320
},
{
"epoch": 2.3885778275475924,
"grad_norm": 15.178104400634766,
"learning_rate": 1.019223590892124e-05,
"loss": 2.0671,
"step": 21330
},
{
"epoch": 2.3896976483762598,
"grad_norm": 19.128128051757812,
"learning_rate": 1.017357222844345e-05,
"loss": 1.8196,
"step": 21340
},
{
"epoch": 2.390817469204927,
"grad_norm": 5.763383865356445,
"learning_rate": 1.015490854796566e-05,
"loss": 1.6087,
"step": 21350
},
{
"epoch": 2.3919372900335945,
"grad_norm": 8.197660446166992,
"learning_rate": 1.013624486748787e-05,
"loss": 1.7122,
"step": 21360
},
{
"epoch": 2.3930571108622622,
"grad_norm": 14.964632987976074,
"learning_rate": 1.011758118701008e-05,
"loss": 1.8517,
"step": 21370
},
{
"epoch": 2.3941769316909296,
"grad_norm": 4.898643970489502,
"learning_rate": 1.009891750653229e-05,
"loss": 1.7062,
"step": 21380
},
{
"epoch": 2.395296752519597,
"grad_norm": 6.609580039978027,
"learning_rate": 1.00802538260545e-05,
"loss": 1.8959,
"step": 21390
},
{
"epoch": 2.3964165733482643,
"grad_norm": 5.1216912269592285,
"learning_rate": 1.0061590145576708e-05,
"loss": 2.305,
"step": 21400
},
{
"epoch": 2.3975363941769317,
"grad_norm": 12.660892486572266,
"learning_rate": 1.0042926465098918e-05,
"loss": 1.8053,
"step": 21410
},
{
"epoch": 2.398656215005599,
"grad_norm": 14.176844596862793,
"learning_rate": 1.0024262784621128e-05,
"loss": 1.9157,
"step": 21420
},
{
"epoch": 2.3997760358342664,
"grad_norm": 3.574338674545288,
"learning_rate": 1.0005599104143338e-05,
"loss": 2.1178,
"step": 21430
},
{
"epoch": 2.4008958566629337,
"grad_norm": 7.545993804931641,
"learning_rate": 9.986935423665546e-06,
"loss": 1.9478,
"step": 21440
},
{
"epoch": 2.4020156774916015,
"grad_norm": 5.757676601409912,
"learning_rate": 9.968271743187756e-06,
"loss": 1.6439,
"step": 21450
},
{
"epoch": 2.403135498320269,
"grad_norm": 8.382559776306152,
"learning_rate": 9.949608062709966e-06,
"loss": 2.1037,
"step": 21460
},
{
"epoch": 2.404255319148936,
"grad_norm": 20.870803833007812,
"learning_rate": 9.930944382232176e-06,
"loss": 2.1286,
"step": 21470
},
{
"epoch": 2.4053751399776035,
"grad_norm": 10.343074798583984,
"learning_rate": 9.912280701754386e-06,
"loss": 1.7321,
"step": 21480
},
{
"epoch": 2.406494960806271,
"grad_norm": 11.975387573242188,
"learning_rate": 9.893617021276596e-06,
"loss": 1.8359,
"step": 21490
},
{
"epoch": 2.4076147816349383,
"grad_norm": 9.322503089904785,
"learning_rate": 9.874953340798806e-06,
"loss": 1.7441,
"step": 21500
},
{
"epoch": 2.408734602463606,
"grad_norm": 5.631967544555664,
"learning_rate": 9.856289660321016e-06,
"loss": 1.8915,
"step": 21510
},
{
"epoch": 2.4098544232922734,
"grad_norm": 11.873062133789062,
"learning_rate": 9.837625979843225e-06,
"loss": 1.924,
"step": 21520
},
{
"epoch": 2.4109742441209407,
"grad_norm": 5.398472785949707,
"learning_rate": 9.818962299365435e-06,
"loss": 1.8141,
"step": 21530
},
{
"epoch": 2.412094064949608,
"grad_norm": 5.431132793426514,
"learning_rate": 9.800298618887645e-06,
"loss": 2.0006,
"step": 21540
},
{
"epoch": 2.4132138857782754,
"grad_norm": 6.424263000488281,
"learning_rate": 9.781634938409855e-06,
"loss": 2.2687,
"step": 21550
},
{
"epoch": 2.414333706606943,
"grad_norm": 15.934911727905273,
"learning_rate": 9.762971257932065e-06,
"loss": 1.5339,
"step": 21560
},
{
"epoch": 2.41545352743561,
"grad_norm": 8.640837669372559,
"learning_rate": 9.744307577454275e-06,
"loss": 1.9506,
"step": 21570
},
{
"epoch": 2.4165733482642775,
"grad_norm": 10.464945793151855,
"learning_rate": 9.725643896976485e-06,
"loss": 2.1268,
"step": 21580
},
{
"epoch": 2.4176931690929453,
"grad_norm": 10.642047882080078,
"learning_rate": 9.706980216498695e-06,
"loss": 1.7478,
"step": 21590
},
{
"epoch": 2.4188129899216126,
"grad_norm": 10.365204811096191,
"learning_rate": 9.688316536020905e-06,
"loss": 2.1777,
"step": 21600
},
{
"epoch": 2.41993281075028,
"grad_norm": 13.834842681884766,
"learning_rate": 9.669652855543115e-06,
"loss": 2.0466,
"step": 21610
},
{
"epoch": 2.4210526315789473,
"grad_norm": 11.793619155883789,
"learning_rate": 9.650989175065325e-06,
"loss": 1.707,
"step": 21620
},
{
"epoch": 2.4221724524076147,
"grad_norm": 12.311315536499023,
"learning_rate": 9.632325494587533e-06,
"loss": 1.8893,
"step": 21630
},
{
"epoch": 2.423292273236282,
"grad_norm": 11.055502891540527,
"learning_rate": 9.613661814109743e-06,
"loss": 1.9288,
"step": 21640
},
{
"epoch": 2.42441209406495,
"grad_norm": 4.9164934158325195,
"learning_rate": 9.594998133631953e-06,
"loss": 2.414,
"step": 21650
},
{
"epoch": 2.425531914893617,
"grad_norm": 15.42969036102295,
"learning_rate": 9.576334453154161e-06,
"loss": 1.9903,
"step": 21660
},
{
"epoch": 2.4266517357222845,
"grad_norm": 10.449507713317871,
"learning_rate": 9.557670772676371e-06,
"loss": 1.6622,
"step": 21670
},
{
"epoch": 2.427771556550952,
"grad_norm": 14.963556289672852,
"learning_rate": 9.539007092198581e-06,
"loss": 1.6082,
"step": 21680
},
{
"epoch": 2.4288913773796192,
"grad_norm": 13.123733520507812,
"learning_rate": 9.520343411720791e-06,
"loss": 1.9141,
"step": 21690
},
{
"epoch": 2.4300111982082866,
"grad_norm": 6.848084449768066,
"learning_rate": 9.501679731243001e-06,
"loss": 1.83,
"step": 21700
},
{
"epoch": 2.431131019036954,
"grad_norm": 5.545853137969971,
"learning_rate": 9.483016050765211e-06,
"loss": 2.2289,
"step": 21710
},
{
"epoch": 2.4322508398656213,
"grad_norm": 11.716573715209961,
"learning_rate": 9.464352370287421e-06,
"loss": 1.954,
"step": 21720
},
{
"epoch": 2.433370660694289,
"grad_norm": 7.332011699676514,
"learning_rate": 9.445688689809631e-06,
"loss": 1.6223,
"step": 21730
},
{
"epoch": 2.4344904815229564,
"grad_norm": 8.037787437438965,
"learning_rate": 9.427025009331841e-06,
"loss": 1.8996,
"step": 21740
},
{
"epoch": 2.435610302351624,
"grad_norm": 5.458945274353027,
"learning_rate": 9.40836132885405e-06,
"loss": 1.9408,
"step": 21750
},
{
"epoch": 2.436730123180291,
"grad_norm": 16.545921325683594,
"learning_rate": 9.38969764837626e-06,
"loss": 1.7479,
"step": 21760
},
{
"epoch": 2.4378499440089585,
"grad_norm": 12.776642799377441,
"learning_rate": 9.37103396789847e-06,
"loss": 1.8733,
"step": 21770
},
{
"epoch": 2.438969764837626,
"grad_norm": 3.9929423332214355,
"learning_rate": 9.35237028742068e-06,
"loss": 1.4235,
"step": 21780
},
{
"epoch": 2.4400895856662936,
"grad_norm": 12.087785720825195,
"learning_rate": 9.33370660694289e-06,
"loss": 1.8805,
"step": 21790
},
{
"epoch": 2.441209406494961,
"grad_norm": 4.026576519012451,
"learning_rate": 9.3150429264651e-06,
"loss": 2.21,
"step": 21800
},
{
"epoch": 2.4423292273236283,
"grad_norm": 5.352035999298096,
"learning_rate": 9.29637924598731e-06,
"loss": 1.8218,
"step": 21810
},
{
"epoch": 2.4434490481522957,
"grad_norm": 9.776129722595215,
"learning_rate": 9.27771556550952e-06,
"loss": 2.0464,
"step": 21820
},
{
"epoch": 2.444568868980963,
"grad_norm": 14.493003845214844,
"learning_rate": 9.25905188503173e-06,
"loss": 1.6871,
"step": 21830
},
{
"epoch": 2.4456886898096304,
"grad_norm": 17.30157470703125,
"learning_rate": 9.24038820455394e-06,
"loss": 2.5444,
"step": 21840
},
{
"epoch": 2.4468085106382977,
"grad_norm": 3.3516924381256104,
"learning_rate": 9.221724524076148e-06,
"loss": 1.6969,
"step": 21850
},
{
"epoch": 2.447928331466965,
"grad_norm": 12.714744567871094,
"learning_rate": 9.203060843598358e-06,
"loss": 2.1882,
"step": 21860
},
{
"epoch": 2.449048152295633,
"grad_norm": 5.864091396331787,
"learning_rate": 9.184397163120568e-06,
"loss": 1.9062,
"step": 21870
},
{
"epoch": 2.4501679731243002,
"grad_norm": 14.587536811828613,
"learning_rate": 9.165733482642776e-06,
"loss": 1.6267,
"step": 21880
},
{
"epoch": 2.4512877939529676,
"grad_norm": 9.073355674743652,
"learning_rate": 9.147069802164986e-06,
"loss": 1.619,
"step": 21890
},
{
"epoch": 2.452407614781635,
"grad_norm": 13.970871925354004,
"learning_rate": 9.128406121687196e-06,
"loss": 1.7811,
"step": 21900
},
{
"epoch": 2.4535274356103023,
"grad_norm": 13.218069076538086,
"learning_rate": 9.109742441209406e-06,
"loss": 2.1777,
"step": 21910
},
{
"epoch": 2.4546472564389696,
"grad_norm": 7.0698981285095215,
"learning_rate": 9.091078760731616e-06,
"loss": 1.672,
"step": 21920
},
{
"epoch": 2.455767077267637,
"grad_norm": 6.958558082580566,
"learning_rate": 9.072415080253826e-06,
"loss": 1.8189,
"step": 21930
},
{
"epoch": 2.456886898096305,
"grad_norm": 11.154871940612793,
"learning_rate": 9.053751399776036e-06,
"loss": 1.9092,
"step": 21940
},
{
"epoch": 2.458006718924972,
"grad_norm": 8.34592056274414,
"learning_rate": 9.035087719298246e-06,
"loss": 2.0183,
"step": 21950
},
{
"epoch": 2.4591265397536395,
"grad_norm": 17.590225219726562,
"learning_rate": 9.016424038820456e-06,
"loss": 2.0918,
"step": 21960
},
{
"epoch": 2.460246360582307,
"grad_norm": 16.22553825378418,
"learning_rate": 8.997760358342666e-06,
"loss": 1.9656,
"step": 21970
},
{
"epoch": 2.461366181410974,
"grad_norm": 18.10919952392578,
"learning_rate": 8.979096677864876e-06,
"loss": 1.6443,
"step": 21980
},
{
"epoch": 2.4624860022396415,
"grad_norm": 5.232290267944336,
"learning_rate": 8.960432997387085e-06,
"loss": 1.6849,
"step": 21990
},
{
"epoch": 2.463605823068309,
"grad_norm": 9.676012992858887,
"learning_rate": 8.941769316909295e-06,
"loss": 1.4555,
"step": 22000
},
{
"epoch": 2.4647256438969762,
"grad_norm": 6.006662845611572,
"learning_rate": 8.923105636431505e-06,
"loss": 1.5401,
"step": 22010
},
{
"epoch": 2.465845464725644,
"grad_norm": 17.258005142211914,
"learning_rate": 8.904441955953715e-06,
"loss": 2.4357,
"step": 22020
},
{
"epoch": 2.4669652855543114,
"grad_norm": 6.448751926422119,
"learning_rate": 8.885778275475925e-06,
"loss": 2.1087,
"step": 22030
},
{
"epoch": 2.4680851063829787,
"grad_norm": 5.431372165679932,
"learning_rate": 8.867114594998135e-06,
"loss": 1.9074,
"step": 22040
},
{
"epoch": 2.469204927211646,
"grad_norm": 12.926970481872559,
"learning_rate": 8.848450914520345e-06,
"loss": 1.9923,
"step": 22050
},
{
"epoch": 2.4703247480403134,
"grad_norm": 5.959377288818359,
"learning_rate": 8.829787234042553e-06,
"loss": 1.672,
"step": 22060
},
{
"epoch": 2.471444568868981,
"grad_norm": 16.465486526489258,
"learning_rate": 8.811123553564763e-06,
"loss": 1.8695,
"step": 22070
},
{
"epoch": 2.4725643896976486,
"grad_norm": 5.742716312408447,
"learning_rate": 8.792459873086973e-06,
"loss": 1.8781,
"step": 22080
},
{
"epoch": 2.473684210526316,
"grad_norm": 10.750136375427246,
"learning_rate": 8.773796192609183e-06,
"loss": 1.9953,
"step": 22090
},
{
"epoch": 2.4748040313549833,
"grad_norm": 5.631348609924316,
"learning_rate": 8.755132512131391e-06,
"loss": 1.6181,
"step": 22100
},
{
"epoch": 2.4759238521836506,
"grad_norm": 9.884969711303711,
"learning_rate": 8.736468831653601e-06,
"loss": 1.5844,
"step": 22110
},
{
"epoch": 2.477043673012318,
"grad_norm": 4.6717376708984375,
"learning_rate": 8.717805151175811e-06,
"loss": 1.8376,
"step": 22120
},
{
"epoch": 2.4781634938409853,
"grad_norm": 4.163270473480225,
"learning_rate": 8.699141470698021e-06,
"loss": 2.1804,
"step": 22130
},
{
"epoch": 2.4792833146696527,
"grad_norm": 7.534999847412109,
"learning_rate": 8.680477790220231e-06,
"loss": 1.8526,
"step": 22140
},
{
"epoch": 2.48040313549832,
"grad_norm": 9.90027904510498,
"learning_rate": 8.661814109742441e-06,
"loss": 1.8137,
"step": 22150
},
{
"epoch": 2.481522956326988,
"grad_norm": 19.237462997436523,
"learning_rate": 8.643150429264651e-06,
"loss": 2.6685,
"step": 22160
},
{
"epoch": 2.482642777155655,
"grad_norm": 9.682941436767578,
"learning_rate": 8.624486748786861e-06,
"loss": 1.826,
"step": 22170
},
{
"epoch": 2.4837625979843225,
"grad_norm": 3.538151264190674,
"learning_rate": 8.605823068309071e-06,
"loss": 1.801,
"step": 22180
},
{
"epoch": 2.48488241881299,
"grad_norm": 5.478328704833984,
"learning_rate": 8.587159387831281e-06,
"loss": 1.8762,
"step": 22190
},
{
"epoch": 2.4860022396416572,
"grad_norm": 7.476826190948486,
"learning_rate": 8.568495707353491e-06,
"loss": 2.1028,
"step": 22200
},
{
"epoch": 2.4871220604703246,
"grad_norm": 5.20843505859375,
"learning_rate": 8.549832026875701e-06,
"loss": 1.6034,
"step": 22210
},
{
"epoch": 2.4882418812989924,
"grad_norm": 5.28059720993042,
"learning_rate": 8.53116834639791e-06,
"loss": 1.7654,
"step": 22220
},
{
"epoch": 2.4893617021276597,
"grad_norm": 6.444010257720947,
"learning_rate": 8.51250466592012e-06,
"loss": 1.8655,
"step": 22230
},
{
"epoch": 2.490481522956327,
"grad_norm": 6.940224647521973,
"learning_rate": 8.49384098544233e-06,
"loss": 1.7356,
"step": 22240
},
{
"epoch": 2.4916013437849944,
"grad_norm": 23.465511322021484,
"learning_rate": 8.47517730496454e-06,
"loss": 1.846,
"step": 22250
},
{
"epoch": 2.4927211646136618,
"grad_norm": 8.295751571655273,
"learning_rate": 8.45651362448675e-06,
"loss": 1.9118,
"step": 22260
},
{
"epoch": 2.493840985442329,
"grad_norm": 12.125283241271973,
"learning_rate": 8.43784994400896e-06,
"loss": 2.0026,
"step": 22270
},
{
"epoch": 2.4949608062709965,
"grad_norm": 5.106156349182129,
"learning_rate": 8.419186263531168e-06,
"loss": 1.8797,
"step": 22280
},
{
"epoch": 2.496080627099664,
"grad_norm": 5.151834011077881,
"learning_rate": 8.400522583053378e-06,
"loss": 2.063,
"step": 22290
},
{
"epoch": 2.4972004479283316,
"grad_norm": 13.763957023620605,
"learning_rate": 8.381858902575588e-06,
"loss": 1.6182,
"step": 22300
},
{
"epoch": 2.498320268756999,
"grad_norm": 3.533198356628418,
"learning_rate": 8.363195222097798e-06,
"loss": 1.6865,
"step": 22310
},
{
"epoch": 2.4994400895856663,
"grad_norm": 5.562738418579102,
"learning_rate": 8.344531541620008e-06,
"loss": 1.6556,
"step": 22320
},
{
"epoch": 2.5005599104143337,
"grad_norm": 4.3271613121032715,
"learning_rate": 8.325867861142216e-06,
"loss": 2.088,
"step": 22330
},
{
"epoch": 2.501679731243001,
"grad_norm": 12.457752227783203,
"learning_rate": 8.307204180664426e-06,
"loss": 2.2114,
"step": 22340
},
{
"epoch": 2.5027995520716684,
"grad_norm": 5.431798934936523,
"learning_rate": 8.288540500186636e-06,
"loss": 1.5092,
"step": 22350
},
{
"epoch": 2.503919372900336,
"grad_norm": 13.543461799621582,
"learning_rate": 8.269876819708846e-06,
"loss": 1.9257,
"step": 22360
},
{
"epoch": 2.5050391937290035,
"grad_norm": 8.819217681884766,
"learning_rate": 8.251213139231056e-06,
"loss": 1.817,
"step": 22370
},
{
"epoch": 2.506159014557671,
"grad_norm": 7.270272254943848,
"learning_rate": 8.232549458753266e-06,
"loss": 1.7905,
"step": 22380
},
{
"epoch": 2.5072788353863382,
"grad_norm": 7.694066524505615,
"learning_rate": 8.213885778275476e-06,
"loss": 2.2374,
"step": 22390
},
{
"epoch": 2.5083986562150056,
"grad_norm": 7.0074286460876465,
"learning_rate": 8.195222097797686e-06,
"loss": 1.8355,
"step": 22400
},
{
"epoch": 2.509518477043673,
"grad_norm": 9.260007858276367,
"learning_rate": 8.176558417319896e-06,
"loss": 1.9881,
"step": 22410
},
{
"epoch": 2.5106382978723403,
"grad_norm": 11.184020042419434,
"learning_rate": 8.157894736842106e-06,
"loss": 2.0734,
"step": 22420
},
{
"epoch": 2.5117581187010076,
"grad_norm": 4.612696647644043,
"learning_rate": 8.139231056364316e-06,
"loss": 2.1045,
"step": 22430
},
{
"epoch": 2.512877939529675,
"grad_norm": 4.042170524597168,
"learning_rate": 8.120567375886526e-06,
"loss": 2.1674,
"step": 22440
},
{
"epoch": 2.5139977603583428,
"grad_norm": 6.870834827423096,
"learning_rate": 8.101903695408735e-06,
"loss": 1.5094,
"step": 22450
},
{
"epoch": 2.51511758118701,
"grad_norm": 11.569465637207031,
"learning_rate": 8.083240014930945e-06,
"loss": 1.7942,
"step": 22460
},
{
"epoch": 2.5162374020156775,
"grad_norm": 9.036044120788574,
"learning_rate": 8.064576334453155e-06,
"loss": 1.8934,
"step": 22470
},
{
"epoch": 2.517357222844345,
"grad_norm": 16.669771194458008,
"learning_rate": 8.045912653975365e-06,
"loss": 2.087,
"step": 22480
},
{
"epoch": 2.518477043673012,
"grad_norm": 11.910612106323242,
"learning_rate": 8.027248973497575e-06,
"loss": 2.067,
"step": 22490
},
{
"epoch": 2.51959686450168,
"grad_norm": 8.747892379760742,
"learning_rate": 8.008585293019783e-06,
"loss": 2.064,
"step": 22500
},
{
"epoch": 2.5207166853303473,
"grad_norm": 8.751014709472656,
"learning_rate": 7.989921612541993e-06,
"loss": 1.9492,
"step": 22510
},
{
"epoch": 2.5218365061590147,
"grad_norm": 16.253923416137695,
"learning_rate": 7.971257932064203e-06,
"loss": 2.2059,
"step": 22520
},
{
"epoch": 2.522956326987682,
"grad_norm": 5.416139125823975,
"learning_rate": 7.952594251586413e-06,
"loss": 2.3393,
"step": 22530
},
{
"epoch": 2.5240761478163494,
"grad_norm": 5.805497169494629,
"learning_rate": 7.933930571108623e-06,
"loss": 2.3075,
"step": 22540
},
{
"epoch": 2.5251959686450167,
"grad_norm": 14.180325508117676,
"learning_rate": 7.915266890630833e-06,
"loss": 1.3238,
"step": 22550
},
{
"epoch": 2.526315789473684,
"grad_norm": 18.959636688232422,
"learning_rate": 7.896603210153043e-06,
"loss": 2.4995,
"step": 22560
},
{
"epoch": 2.5274356103023514,
"grad_norm": 15.236656188964844,
"learning_rate": 7.877939529675251e-06,
"loss": 1.979,
"step": 22570
},
{
"epoch": 2.5285554311310188,
"grad_norm": 11.582307815551758,
"learning_rate": 7.859275849197461e-06,
"loss": 1.8858,
"step": 22580
},
{
"epoch": 2.5296752519596866,
"grad_norm": 4.920597553253174,
"learning_rate": 7.840612168719671e-06,
"loss": 1.9993,
"step": 22590
},
{
"epoch": 2.530795072788354,
"grad_norm": 16.651355743408203,
"learning_rate": 7.821948488241881e-06,
"loss": 1.8391,
"step": 22600
},
{
"epoch": 2.5319148936170213,
"grad_norm": 4.262025356292725,
"learning_rate": 7.803284807764091e-06,
"loss": 1.8714,
"step": 22610
},
{
"epoch": 2.5330347144456886,
"grad_norm": 16.481779098510742,
"learning_rate": 7.784621127286301e-06,
"loss": 1.8893,
"step": 22620
},
{
"epoch": 2.534154535274356,
"grad_norm": 6.779279708862305,
"learning_rate": 7.765957446808511e-06,
"loss": 1.7062,
"step": 22630
},
{
"epoch": 2.5352743561030238,
"grad_norm": 11.93194580078125,
"learning_rate": 7.747293766330721e-06,
"loss": 1.9667,
"step": 22640
},
{
"epoch": 2.536394176931691,
"grad_norm": 8.4479341506958,
"learning_rate": 7.728630085852931e-06,
"loss": 1.6635,
"step": 22650
},
{
"epoch": 2.5375139977603585,
"grad_norm": 9.052682876586914,
"learning_rate": 7.709966405375141e-06,
"loss": 1.7176,
"step": 22660
},
{
"epoch": 2.538633818589026,
"grad_norm": 17.69319725036621,
"learning_rate": 7.691302724897351e-06,
"loss": 2.0191,
"step": 22670
},
{
"epoch": 2.539753639417693,
"grad_norm": 8.785430908203125,
"learning_rate": 7.67263904441956e-06,
"loss": 1.7666,
"step": 22680
},
{
"epoch": 2.5408734602463605,
"grad_norm": 8.51176929473877,
"learning_rate": 7.65397536394177e-06,
"loss": 1.8087,
"step": 22690
},
{
"epoch": 2.541993281075028,
"grad_norm": 5.72242546081543,
"learning_rate": 7.63531168346398e-06,
"loss": 2.0391,
"step": 22700
},
{
"epoch": 2.543113101903695,
"grad_norm": 4.739030838012695,
"learning_rate": 7.616648002986189e-06,
"loss": 2.0501,
"step": 22710
},
{
"epoch": 2.5442329227323626,
"grad_norm": 7.8822736740112305,
"learning_rate": 7.597984322508399e-06,
"loss": 1.7549,
"step": 22720
},
{
"epoch": 2.5453527435610304,
"grad_norm": 14.290916442871094,
"learning_rate": 7.579320642030609e-06,
"loss": 1.606,
"step": 22730
},
{
"epoch": 2.5464725643896977,
"grad_norm": 13.635068893432617,
"learning_rate": 7.560656961552819e-06,
"loss": 1.795,
"step": 22740
},
{
"epoch": 2.547592385218365,
"grad_norm": 5.094437599182129,
"learning_rate": 7.541993281075028e-06,
"loss": 1.9598,
"step": 22750
},
{
"epoch": 2.5487122060470324,
"grad_norm": 5.03619384765625,
"learning_rate": 7.523329600597238e-06,
"loss": 2.3806,
"step": 22760
},
{
"epoch": 2.5498320268756998,
"grad_norm": 9.20356273651123,
"learning_rate": 7.504665920119448e-06,
"loss": 1.8016,
"step": 22770
},
{
"epoch": 2.5509518477043676,
"grad_norm": 7.040286540985107,
"learning_rate": 7.486002239641658e-06,
"loss": 1.924,
"step": 22780
},
{
"epoch": 2.552071668533035,
"grad_norm": 6.913671970367432,
"learning_rate": 7.467338559163868e-06,
"loss": 1.9914,
"step": 22790
},
{
"epoch": 2.5531914893617023,
"grad_norm": 6.331127643585205,
"learning_rate": 7.4486748786860764e-06,
"loss": 1.7911,
"step": 22800
},
{
"epoch": 2.5543113101903696,
"grad_norm": 6.453745365142822,
"learning_rate": 7.4300111982082864e-06,
"loss": 1.985,
"step": 22810
},
{
"epoch": 2.555431131019037,
"grad_norm": 15.195472717285156,
"learning_rate": 7.4113475177304964e-06,
"loss": 2.2203,
"step": 22820
},
{
"epoch": 2.5565509518477043,
"grad_norm": 13.667654037475586,
"learning_rate": 7.3926838372527064e-06,
"loss": 1.6591,
"step": 22830
},
{
"epoch": 2.5576707726763717,
"grad_norm": 4.465586185455322,
"learning_rate": 7.3740201567749165e-06,
"loss": 2.0768,
"step": 22840
},
{
"epoch": 2.558790593505039,
"grad_norm": 15.293925285339355,
"learning_rate": 7.3553564762971265e-06,
"loss": 1.9697,
"step": 22850
},
{
"epoch": 2.5599104143337064,
"grad_norm": 4.840792655944824,
"learning_rate": 7.336692795819336e-06,
"loss": 1.5074,
"step": 22860
},
{
"epoch": 2.561030235162374,
"grad_norm": 8.356353759765625,
"learning_rate": 7.318029115341546e-06,
"loss": 1.9765,
"step": 22870
},
{
"epoch": 2.5621500559910415,
"grad_norm": 6.289432525634766,
"learning_rate": 7.299365434863756e-06,
"loss": 1.7277,
"step": 22880
},
{
"epoch": 2.563269876819709,
"grad_norm": 14.32654094696045,
"learning_rate": 7.280701754385966e-06,
"loss": 2.1023,
"step": 22890
},
{
"epoch": 2.564389697648376,
"grad_norm": 7.886980056762695,
"learning_rate": 7.262038073908176e-06,
"loss": 1.8874,
"step": 22900
},
{
"epoch": 2.5655095184770436,
"grad_norm": 11.411437034606934,
"learning_rate": 7.243374393430386e-06,
"loss": 2.1147,
"step": 22910
},
{
"epoch": 2.5666293393057114,
"grad_norm": 7.815008640289307,
"learning_rate": 7.224710712952594e-06,
"loss": 2.302,
"step": 22920
},
{
"epoch": 2.5677491601343787,
"grad_norm": 13.516090393066406,
"learning_rate": 7.206047032474804e-06,
"loss": 1.6051,
"step": 22930
},
{
"epoch": 2.568868980963046,
"grad_norm": 5.94198751449585,
"learning_rate": 7.187383351997014e-06,
"loss": 1.8294,
"step": 22940
},
{
"epoch": 2.5699888017917134,
"grad_norm": 10.252525329589844,
"learning_rate": 7.168719671519224e-06,
"loss": 1.4627,
"step": 22950
},
{
"epoch": 2.5711086226203808,
"grad_norm": 8.05044174194336,
"learning_rate": 7.150055991041434e-06,
"loss": 1.7721,
"step": 22960
},
{
"epoch": 2.572228443449048,
"grad_norm": 5.872049331665039,
"learning_rate": 7.131392310563643e-06,
"loss": 2.0512,
"step": 22970
},
{
"epoch": 2.5733482642777155,
"grad_norm": 12.562164306640625,
"learning_rate": 7.112728630085853e-06,
"loss": 2.3197,
"step": 22980
},
{
"epoch": 2.574468085106383,
"grad_norm": 7.482940196990967,
"learning_rate": 7.094064949608063e-06,
"loss": 2.022,
"step": 22990
},
{
"epoch": 2.57558790593505,
"grad_norm": 11.764483451843262,
"learning_rate": 7.075401269130273e-06,
"loss": 1.9567,
"step": 23000
},
{
"epoch": 2.576707726763718,
"grad_norm": 10.071126937866211,
"learning_rate": 7.056737588652483e-06,
"loss": 1.8271,
"step": 23010
},
{
"epoch": 2.5778275475923853,
"grad_norm": 10.216212272644043,
"learning_rate": 7.038073908174693e-06,
"loss": 1.789,
"step": 23020
},
{
"epoch": 2.5789473684210527,
"grad_norm": 8.066825866699219,
"learning_rate": 7.0194102276969015e-06,
"loss": 1.6432,
"step": 23030
},
{
"epoch": 2.58006718924972,
"grad_norm": 16.877470016479492,
"learning_rate": 7.0007465472191115e-06,
"loss": 1.7376,
"step": 23040
},
{
"epoch": 2.5811870100783874,
"grad_norm": 5.387278079986572,
"learning_rate": 6.9820828667413215e-06,
"loss": 1.5875,
"step": 23050
},
{
"epoch": 2.5823068309070547,
"grad_norm": 15.250850677490234,
"learning_rate": 6.9634191862635315e-06,
"loss": 1.6844,
"step": 23060
},
{
"epoch": 2.5834266517357225,
"grad_norm": 7.900568962097168,
"learning_rate": 6.9447555057857415e-06,
"loss": 1.8616,
"step": 23070
},
{
"epoch": 2.58454647256439,
"grad_norm": 6.394863128662109,
"learning_rate": 6.926091825307951e-06,
"loss": 1.4066,
"step": 23080
},
{
"epoch": 2.585666293393057,
"grad_norm": 5.246687412261963,
"learning_rate": 6.907428144830161e-06,
"loss": 2.0123,
"step": 23090
},
{
"epoch": 2.5867861142217246,
"grad_norm": 13.717708587646484,
"learning_rate": 6.888764464352371e-06,
"loss": 1.8026,
"step": 23100
},
{
"epoch": 2.587905935050392,
"grad_norm": 5.991678237915039,
"learning_rate": 6.870100783874581e-06,
"loss": 1.9593,
"step": 23110
},
{
"epoch": 2.5890257558790593,
"grad_norm": 6.467216491699219,
"learning_rate": 6.851437103396791e-06,
"loss": 1.8865,
"step": 23120
},
{
"epoch": 2.5901455767077266,
"grad_norm": 11.342000961303711,
"learning_rate": 6.832773422919001e-06,
"loss": 2.0429,
"step": 23130
},
{
"epoch": 2.591265397536394,
"grad_norm": 6.355552673339844,
"learning_rate": 6.814109742441211e-06,
"loss": 1.5285,
"step": 23140
},
{
"epoch": 2.5923852183650617,
"grad_norm": 4.930696964263916,
"learning_rate": 6.795446061963419e-06,
"loss": 2.0815,
"step": 23150
},
{
"epoch": 2.593505039193729,
"grad_norm": 6.06488037109375,
"learning_rate": 6.776782381485629e-06,
"loss": 1.6929,
"step": 23160
},
{
"epoch": 2.5946248600223965,
"grad_norm": 17.244834899902344,
"learning_rate": 6.758118701007839e-06,
"loss": 2.1941,
"step": 23170
},
{
"epoch": 2.595744680851064,
"grad_norm": 6.777196407318115,
"learning_rate": 6.739455020530049e-06,
"loss": 1.6116,
"step": 23180
},
{
"epoch": 2.596864501679731,
"grad_norm": 10.06576156616211,
"learning_rate": 6.720791340052258e-06,
"loss": 1.6001,
"step": 23190
},
{
"epoch": 2.5979843225083985,
"grad_norm": 17.110258102416992,
"learning_rate": 6.702127659574468e-06,
"loss": 1.9066,
"step": 23200
},
{
"epoch": 2.5991041433370663,
"grad_norm": 10.628520011901855,
"learning_rate": 6.683463979096678e-06,
"loss": 1.9434,
"step": 23210
},
{
"epoch": 2.6002239641657336,
"grad_norm": 6.576961040496826,
"learning_rate": 6.664800298618888e-06,
"loss": 1.2507,
"step": 23220
},
{
"epoch": 2.601343784994401,
"grad_norm": 5.920810699462891,
"learning_rate": 6.646136618141098e-06,
"loss": 1.9261,
"step": 23230
},
{
"epoch": 2.6024636058230683,
"grad_norm": 4.780271530151367,
"learning_rate": 6.627472937663308e-06,
"loss": 1.9078,
"step": 23240
},
{
"epoch": 2.6035834266517357,
"grad_norm": 3.30251145362854,
"learning_rate": 6.608809257185518e-06,
"loss": 2.2392,
"step": 23250
},
{
"epoch": 2.604703247480403,
"grad_norm": 6.361575126647949,
"learning_rate": 6.590145576707728e-06,
"loss": 1.5298,
"step": 23260
},
{
"epoch": 2.6058230683090704,
"grad_norm": 5.133968830108643,
"learning_rate": 6.5714818962299365e-06,
"loss": 2.0936,
"step": 23270
},
{
"epoch": 2.6069428891377378,
"grad_norm": 11.35536003112793,
"learning_rate": 6.5528182157521465e-06,
"loss": 2.1213,
"step": 23280
},
{
"epoch": 2.608062709966405,
"grad_norm": 14.275880813598633,
"learning_rate": 6.534154535274356e-06,
"loss": 1.5543,
"step": 23290
},
{
"epoch": 2.609182530795073,
"grad_norm": 11.91288948059082,
"learning_rate": 6.515490854796566e-06,
"loss": 1.8959,
"step": 23300
},
{
"epoch": 2.6103023516237402,
"grad_norm": 7.54849910736084,
"learning_rate": 6.496827174318776e-06,
"loss": 1.9089,
"step": 23310
},
{
"epoch": 2.6114221724524076,
"grad_norm": 4.366549968719482,
"learning_rate": 6.478163493840986e-06,
"loss": 1.6129,
"step": 23320
},
{
"epoch": 2.612541993281075,
"grad_norm": 4.882798671722412,
"learning_rate": 6.459499813363196e-06,
"loss": 1.857,
"step": 23330
},
{
"epoch": 2.6136618141097423,
"grad_norm": 18.530513763427734,
"learning_rate": 6.440836132885406e-06,
"loss": 2.0854,
"step": 23340
},
{
"epoch": 2.61478163493841,
"grad_norm": 7.46307897567749,
"learning_rate": 6.422172452407616e-06,
"loss": 1.5743,
"step": 23350
},
{
"epoch": 2.6159014557670774,
"grad_norm": 16.533849716186523,
"learning_rate": 6.403508771929826e-06,
"loss": 1.9256,
"step": 23360
},
{
"epoch": 2.617021276595745,
"grad_norm": 10.886700630187988,
"learning_rate": 6.384845091452035e-06,
"loss": 1.8017,
"step": 23370
},
{
"epoch": 2.618141097424412,
"grad_norm": 12.989828109741211,
"learning_rate": 6.366181410974244e-06,
"loss": 1.6722,
"step": 23380
},
{
"epoch": 2.6192609182530795,
"grad_norm": 6.532835960388184,
"learning_rate": 6.347517730496454e-06,
"loss": 1.4352,
"step": 23390
},
{
"epoch": 2.620380739081747,
"grad_norm": 4.241251468658447,
"learning_rate": 6.328854050018663e-06,
"loss": 1.3917,
"step": 23400
},
{
"epoch": 2.621500559910414,
"grad_norm": 14.074196815490723,
"learning_rate": 6.310190369540873e-06,
"loss": 1.6827,
"step": 23410
},
{
"epoch": 2.6226203807390815,
"grad_norm": 12.457422256469727,
"learning_rate": 6.291526689063083e-06,
"loss": 1.9047,
"step": 23420
},
{
"epoch": 2.623740201567749,
"grad_norm": 5.373779296875,
"learning_rate": 6.272863008585293e-06,
"loss": 1.9629,
"step": 23430
},
{
"epoch": 2.6248600223964167,
"grad_norm": 9.897968292236328,
"learning_rate": 6.254199328107503e-06,
"loss": 2.0395,
"step": 23440
},
{
"epoch": 2.625979843225084,
"grad_norm": 8.989608764648438,
"learning_rate": 6.235535647629713e-06,
"loss": 2.2198,
"step": 23450
},
{
"epoch": 2.6270996640537514,
"grad_norm": 4.570735931396484,
"learning_rate": 6.216871967151923e-06,
"loss": 1.5039,
"step": 23460
},
{
"epoch": 2.6282194848824187,
"grad_norm": 5.221905708312988,
"learning_rate": 6.198208286674132e-06,
"loss": 1.9416,
"step": 23470
},
{
"epoch": 2.629339305711086,
"grad_norm": 12.316040992736816,
"learning_rate": 6.179544606196342e-06,
"loss": 1.5188,
"step": 23480
},
{
"epoch": 2.630459126539754,
"grad_norm": 7.457785606384277,
"learning_rate": 6.160880925718552e-06,
"loss": 1.8277,
"step": 23490
},
{
"epoch": 2.6315789473684212,
"grad_norm": 2.610050916671753,
"learning_rate": 6.1422172452407615e-06,
"loss": 1.8489,
"step": 23500
},
{
"epoch": 2.6326987681970886,
"grad_norm": 18.59623908996582,
"learning_rate": 6.1235535647629715e-06,
"loss": 2.0608,
"step": 23510
},
{
"epoch": 2.633818589025756,
"grad_norm": 12.009276390075684,
"learning_rate": 6.1048898842851815e-06,
"loss": 1.4504,
"step": 23520
},
{
"epoch": 2.6349384098544233,
"grad_norm": 5.837096214294434,
"learning_rate": 6.086226203807391e-06,
"loss": 1.832,
"step": 23530
},
{
"epoch": 2.6360582306830906,
"grad_norm": 7.26812744140625,
"learning_rate": 6.067562523329601e-06,
"loss": 1.9916,
"step": 23540
},
{
"epoch": 2.637178051511758,
"grad_norm": 12.086437225341797,
"learning_rate": 6.048898842851811e-06,
"loss": 2.0041,
"step": 23550
},
{
"epoch": 2.6382978723404253,
"grad_norm": 13.357325553894043,
"learning_rate": 6.030235162374021e-06,
"loss": 1.9994,
"step": 23560
},
{
"epoch": 2.6394176931690927,
"grad_norm": 5.371031284332275,
"learning_rate": 6.011571481896231e-06,
"loss": 2.0408,
"step": 23570
},
{
"epoch": 2.6405375139977605,
"grad_norm": 5.823469638824463,
"learning_rate": 5.992907801418441e-06,
"loss": 1.8143,
"step": 23580
},
{
"epoch": 2.641657334826428,
"grad_norm": 14.469114303588867,
"learning_rate": 5.97424412094065e-06,
"loss": 1.7717,
"step": 23590
},
{
"epoch": 2.642777155655095,
"grad_norm": 5.268034934997559,
"learning_rate": 5.955580440462859e-06,
"loss": 1.4735,
"step": 23600
},
{
"epoch": 2.6438969764837625,
"grad_norm": 9.155729293823242,
"learning_rate": 5.936916759985069e-06,
"loss": 1.6729,
"step": 23610
},
{
"epoch": 2.64501679731243,
"grad_norm": 6.211864471435547,
"learning_rate": 5.918253079507279e-06,
"loss": 1.8493,
"step": 23620
},
{
"epoch": 2.6461366181410977,
"grad_norm": 6.068511962890625,
"learning_rate": 5.899589399029489e-06,
"loss": 1.5881,
"step": 23630
},
{
"epoch": 2.647256438969765,
"grad_norm": 9.574570655822754,
"learning_rate": 5.880925718551699e-06,
"loss": 2.2456,
"step": 23640
},
{
"epoch": 2.6483762597984324,
"grad_norm": 17.160005569458008,
"learning_rate": 5.862262038073908e-06,
"loss": 2.0387,
"step": 23650
},
{
"epoch": 2.6494960806270997,
"grad_norm": 9.527430534362793,
"learning_rate": 5.843598357596118e-06,
"loss": 1.9547,
"step": 23660
},
{
"epoch": 2.650615901455767,
"grad_norm": 4.618621349334717,
"learning_rate": 5.824934677118328e-06,
"loss": 2.0931,
"step": 23670
},
{
"epoch": 2.6517357222844344,
"grad_norm": 8.613204002380371,
"learning_rate": 5.806270996640538e-06,
"loss": 1.9036,
"step": 23680
},
{
"epoch": 2.652855543113102,
"grad_norm": 6.867215156555176,
"learning_rate": 5.787607316162748e-06,
"loss": 1.6778,
"step": 23690
},
{
"epoch": 2.653975363941769,
"grad_norm": 7.841908931732178,
"learning_rate": 5.768943635684957e-06,
"loss": 1.8852,
"step": 23700
},
{
"epoch": 2.6550951847704365,
"grad_norm": 16.442686080932617,
"learning_rate": 5.7502799552071665e-06,
"loss": 2.0865,
"step": 23710
},
{
"epoch": 2.6562150055991043,
"grad_norm": 6.369174957275391,
"learning_rate": 5.7316162747293765e-06,
"loss": 2.0048,
"step": 23720
},
{
"epoch": 2.6573348264277716,
"grad_norm": 9.311386108398438,
"learning_rate": 5.7129525942515865e-06,
"loss": 2.0094,
"step": 23730
},
{
"epoch": 2.658454647256439,
"grad_norm": 20.149980545043945,
"learning_rate": 5.6942889137737965e-06,
"loss": 2.0632,
"step": 23740
},
{
"epoch": 2.6595744680851063,
"grad_norm": 6.561110496520996,
"learning_rate": 5.6756252332960065e-06,
"loss": 1.6642,
"step": 23750
},
{
"epoch": 2.6606942889137737,
"grad_norm": 12.71684455871582,
"learning_rate": 5.656961552818216e-06,
"loss": 1.8664,
"step": 23760
},
{
"epoch": 2.6618141097424415,
"grad_norm": 11.952552795410156,
"learning_rate": 5.638297872340426e-06,
"loss": 2.1605,
"step": 23770
},
{
"epoch": 2.662933930571109,
"grad_norm": 6.831346035003662,
"learning_rate": 5.619634191862636e-06,
"loss": 2.0142,
"step": 23780
},
{
"epoch": 2.664053751399776,
"grad_norm": 6.235137939453125,
"learning_rate": 5.600970511384846e-06,
"loss": 1.8274,
"step": 23790
},
{
"epoch": 2.6651735722284435,
"grad_norm": 6.040173530578613,
"learning_rate": 5.582306830907056e-06,
"loss": 1.8131,
"step": 23800
},
{
"epoch": 2.666293393057111,
"grad_norm": 5.556991100311279,
"learning_rate": 5.563643150429265e-06,
"loss": 1.8855,
"step": 23810
},
{
"epoch": 2.6674132138857782,
"grad_norm": 12.537089347839355,
"learning_rate": 5.544979469951474e-06,
"loss": 2.0187,
"step": 23820
},
{
"epoch": 2.6685330347144456,
"grad_norm": 6.158973693847656,
"learning_rate": 5.526315789473684e-06,
"loss": 1.9405,
"step": 23830
},
{
"epoch": 2.669652855543113,
"grad_norm": 7.678884029388428,
"learning_rate": 5.507652108995894e-06,
"loss": 1.6122,
"step": 23840
},
{
"epoch": 2.6707726763717803,
"grad_norm": 7.427279472351074,
"learning_rate": 5.488988428518104e-06,
"loss": 2.3574,
"step": 23850
},
{
"epoch": 2.671892497200448,
"grad_norm": 7.1111369132995605,
"learning_rate": 5.470324748040314e-06,
"loss": 2.1574,
"step": 23860
},
{
"epoch": 2.6730123180291154,
"grad_norm": 6.275892734527588,
"learning_rate": 5.451661067562524e-06,
"loss": 2.0668,
"step": 23870
},
{
"epoch": 2.674132138857783,
"grad_norm": 6.145971775054932,
"learning_rate": 5.432997387084733e-06,
"loss": 1.7284,
"step": 23880
},
{
"epoch": 2.67525195968645,
"grad_norm": 15.736746788024902,
"learning_rate": 5.414333706606943e-06,
"loss": 1.9066,
"step": 23890
},
{
"epoch": 2.6763717805151175,
"grad_norm": 15.421698570251465,
"learning_rate": 5.395670026129153e-06,
"loss": 1.8727,
"step": 23900
},
{
"epoch": 2.677491601343785,
"grad_norm": 8.590100288391113,
"learning_rate": 5.377006345651362e-06,
"loss": 2.0051,
"step": 23910
},
{
"epoch": 2.6786114221724526,
"grad_norm": 5.847489356994629,
"learning_rate": 5.358342665173572e-06,
"loss": 1.8494,
"step": 23920
},
{
"epoch": 2.67973124300112,
"grad_norm": 7.782309055328369,
"learning_rate": 5.339678984695782e-06,
"loss": 1.5807,
"step": 23930
},
{
"epoch": 2.6808510638297873,
"grad_norm": 12.278881072998047,
"learning_rate": 5.3210153042179915e-06,
"loss": 1.7016,
"step": 23940
},
{
"epoch": 2.6819708846584547,
"grad_norm": 8.289608001708984,
"learning_rate": 5.3023516237402016e-06,
"loss": 1.9277,
"step": 23950
},
{
"epoch": 2.683090705487122,
"grad_norm": 10.736886024475098,
"learning_rate": 5.2836879432624116e-06,
"loss": 1.6973,
"step": 23960
},
{
"epoch": 2.6842105263157894,
"grad_norm": 10.661380767822266,
"learning_rate": 5.2650242627846216e-06,
"loss": 1.8526,
"step": 23970
},
{
"epoch": 2.6853303471444567,
"grad_norm": 13.474076271057129,
"learning_rate": 5.2463605823068316e-06,
"loss": 1.901,
"step": 23980
},
{
"epoch": 2.686450167973124,
"grad_norm": 18.428157806396484,
"learning_rate": 5.227696901829041e-06,
"loss": 1.6495,
"step": 23990
},
{
"epoch": 2.687569988801792,
"grad_norm": 9.425440788269043,
"learning_rate": 5.209033221351251e-06,
"loss": 1.563,
"step": 24000
},
{
"epoch": 2.6886898096304592,
"grad_norm": 6.908605575561523,
"learning_rate": 5.190369540873461e-06,
"loss": 1.6201,
"step": 24010
},
{
"epoch": 2.6898096304591266,
"grad_norm": 14.704828262329102,
"learning_rate": 5.17170586039567e-06,
"loss": 1.9403,
"step": 24020
},
{
"epoch": 2.690929451287794,
"grad_norm": 6.244283676147461,
"learning_rate": 5.15304217991788e-06,
"loss": 1.8803,
"step": 24030
},
{
"epoch": 2.6920492721164613,
"grad_norm": 5.735403537750244,
"learning_rate": 5.13437849944009e-06,
"loss": 1.7747,
"step": 24040
},
{
"epoch": 2.6931690929451286,
"grad_norm": 10.276829719543457,
"learning_rate": 5.115714818962299e-06,
"loss": 1.4198,
"step": 24050
},
{
"epoch": 2.6942889137737964,
"grad_norm": 12.467009544372559,
"learning_rate": 5.097051138484509e-06,
"loss": 1.7507,
"step": 24060
},
{
"epoch": 2.6954087346024638,
"grad_norm": 5.5821685791015625,
"learning_rate": 5.078387458006719e-06,
"loss": 1.9415,
"step": 24070
},
{
"epoch": 2.696528555431131,
"grad_norm": 5.4152092933654785,
"learning_rate": 5.059723777528929e-06,
"loss": 1.8896,
"step": 24080
},
{
"epoch": 2.6976483762597985,
"grad_norm": 4.517449855804443,
"learning_rate": 5.041060097051139e-06,
"loss": 1.4663,
"step": 24090
},
{
"epoch": 2.698768197088466,
"grad_norm": 4.125208377838135,
"learning_rate": 5.022396416573349e-06,
"loss": 1.7891,
"step": 24100
},
{
"epoch": 2.699888017917133,
"grad_norm": 9.702006340026855,
"learning_rate": 5.003732736095558e-06,
"loss": 1.8673,
"step": 24110
},
{
"epoch": 2.7010078387458005,
"grad_norm": 6.5520524978637695,
"learning_rate": 4.985069055617768e-06,
"loss": 1.216,
"step": 24120
},
{
"epoch": 2.702127659574468,
"grad_norm": 6.257602214813232,
"learning_rate": 4.966405375139977e-06,
"loss": 1.8753,
"step": 24130
},
{
"epoch": 2.7032474804031352,
"grad_norm": 14.424186706542969,
"learning_rate": 4.947741694662187e-06,
"loss": 1.7425,
"step": 24140
},
{
"epoch": 2.704367301231803,
"grad_norm": 8.422646522521973,
"learning_rate": 4.929078014184397e-06,
"loss": 1.8206,
"step": 24150
},
{
"epoch": 2.7054871220604704,
"grad_norm": 12.657279968261719,
"learning_rate": 4.910414333706607e-06,
"loss": 1.8706,
"step": 24160
},
{
"epoch": 2.7066069428891377,
"grad_norm": 11.817652702331543,
"learning_rate": 4.8917506532288166e-06,
"loss": 1.5599,
"step": 24170
},
{
"epoch": 2.707726763717805,
"grad_norm": 10.890949249267578,
"learning_rate": 4.873086972751027e-06,
"loss": 1.8615,
"step": 24180
},
{
"epoch": 2.7088465845464724,
"grad_norm": 23.385997772216797,
"learning_rate": 4.854423292273237e-06,
"loss": 2.2412,
"step": 24190
},
{
"epoch": 2.70996640537514,
"grad_norm": 5.702988147735596,
"learning_rate": 4.835759611795447e-06,
"loss": 2.1498,
"step": 24200
},
{
"epoch": 2.7110862262038076,
"grad_norm": 14.075940132141113,
"learning_rate": 4.817095931317657e-06,
"loss": 2.0775,
"step": 24210
},
{
"epoch": 2.712206047032475,
"grad_norm": 7.462947845458984,
"learning_rate": 4.798432250839866e-06,
"loss": 1.7159,
"step": 24220
},
{
"epoch": 2.7133258678611423,
"grad_norm": 7.062658786773682,
"learning_rate": 4.779768570362076e-06,
"loss": 2.0002,
"step": 24230
},
{
"epoch": 2.7144456886898096,
"grad_norm": 4.55973482131958,
"learning_rate": 4.761104889884285e-06,
"loss": 1.6569,
"step": 24240
},
{
"epoch": 2.715565509518477,
"grad_norm": 5.43080997467041,
"learning_rate": 4.742441209406495e-06,
"loss": 1.8489,
"step": 24250
},
{
"epoch": 2.7166853303471443,
"grad_norm": 2.3447320461273193,
"learning_rate": 4.723777528928705e-06,
"loss": 1.8083,
"step": 24260
},
{
"epoch": 2.7178051511758117,
"grad_norm": 5.452301979064941,
"learning_rate": 4.705113848450915e-06,
"loss": 1.2504,
"step": 24270
},
{
"epoch": 2.718924972004479,
"grad_norm": 5.787363052368164,
"learning_rate": 4.686450167973125e-06,
"loss": 1.8861,
"step": 24280
},
{
"epoch": 2.720044792833147,
"grad_norm": 10.703194618225098,
"learning_rate": 4.667786487495334e-06,
"loss": 1.8052,
"step": 24290
},
{
"epoch": 2.721164613661814,
"grad_norm": 4.210628032684326,
"learning_rate": 4.649122807017544e-06,
"loss": 1.3951,
"step": 24300
},
{
"epoch": 2.7222844344904815,
"grad_norm": 10.037901878356934,
"learning_rate": 4.630459126539754e-06,
"loss": 1.4718,
"step": 24310
},
{
"epoch": 2.723404255319149,
"grad_norm": 8.047080993652344,
"learning_rate": 4.611795446061964e-06,
"loss": 1.9238,
"step": 24320
},
{
"epoch": 2.724524076147816,
"grad_norm": 7.7797980308532715,
"learning_rate": 4.593131765584173e-06,
"loss": 1.3415,
"step": 24330
},
{
"epoch": 2.725643896976484,
"grad_norm": 10.15149211883545,
"learning_rate": 4.574468085106383e-06,
"loss": 1.6493,
"step": 24340
},
{
"epoch": 2.7267637178051514,
"grad_norm": 9.764996528625488,
"learning_rate": 4.555804404628592e-06,
"loss": 1.9219,
"step": 24350
},
{
"epoch": 2.7278835386338187,
"grad_norm": 5.193393230438232,
"learning_rate": 4.5371407241508024e-06,
"loss": 1.9921,
"step": 24360
},
{
"epoch": 2.729003359462486,
"grad_norm": 6.264823913574219,
"learning_rate": 4.5184770436730124e-06,
"loss": 1.8523,
"step": 24370
},
{
"epoch": 2.7301231802911534,
"grad_norm": 7.00139045715332,
"learning_rate": 4.4998133631952224e-06,
"loss": 1.6758,
"step": 24380
},
{
"epoch": 2.7312430011198208,
"grad_norm": 12.917388916015625,
"learning_rate": 4.4811496827174324e-06,
"loss": 1.4985,
"step": 24390
},
{
"epoch": 2.732362821948488,
"grad_norm": 8.675110816955566,
"learning_rate": 4.462486002239642e-06,
"loss": 1.711,
"step": 24400
},
{
"epoch": 2.7334826427771555,
"grad_norm": 19.992246627807617,
"learning_rate": 4.443822321761852e-06,
"loss": 1.9475,
"step": 24410
},
{
"epoch": 2.734602463605823,
"grad_norm": 11.201859474182129,
"learning_rate": 4.425158641284062e-06,
"loss": 1.5635,
"step": 24420
},
{
"epoch": 2.7357222844344906,
"grad_norm": 16.30078887939453,
"learning_rate": 4.406494960806272e-06,
"loss": 1.5639,
"step": 24430
},
{
"epoch": 2.736842105263158,
"grad_norm": 6.638637542724609,
"learning_rate": 4.387831280328481e-06,
"loss": 1.5362,
"step": 24440
},
{
"epoch": 2.7379619260918253,
"grad_norm": 4.872713088989258,
"learning_rate": 4.369167599850691e-06,
"loss": 1.8645,
"step": 24450
},
{
"epoch": 2.7390817469204927,
"grad_norm": 7.324185848236084,
"learning_rate": 4.3505039193729e-06,
"loss": 1.8918,
"step": 24460
},
{
"epoch": 2.74020156774916,
"grad_norm": 4.039488792419434,
"learning_rate": 4.33184023889511e-06,
"loss": 1.6727,
"step": 24470
},
{
"epoch": 2.741321388577828,
"grad_norm": 11.2632417678833,
"learning_rate": 4.31317655841732e-06,
"loss": 1.7408,
"step": 24480
},
{
"epoch": 2.742441209406495,
"grad_norm": 5.795408725738525,
"learning_rate": 4.29451287793953e-06,
"loss": 1.7775,
"step": 24490
},
{
"epoch": 2.7435610302351625,
"grad_norm": 11.392952919006348,
"learning_rate": 4.27584919746174e-06,
"loss": 2.0474,
"step": 24500
},
{
"epoch": 2.74468085106383,
"grad_norm": 13.791424751281738,
"learning_rate": 4.25718551698395e-06,
"loss": 1.8472,
"step": 24510
},
{
"epoch": 2.745800671892497,
"grad_norm": 5.293121337890625,
"learning_rate": 4.238521836506159e-06,
"loss": 1.7227,
"step": 24520
},
{
"epoch": 2.7469204927211646,
"grad_norm": 12.450265884399414,
"learning_rate": 4.219858156028369e-06,
"loss": 1.8579,
"step": 24530
},
{
"epoch": 2.748040313549832,
"grad_norm": 15.723026275634766,
"learning_rate": 4.201194475550579e-06,
"loss": 1.8304,
"step": 24540
},
{
"epoch": 2.7491601343784993,
"grad_norm": 9.901751518249512,
"learning_rate": 4.182530795072788e-06,
"loss": 1.7486,
"step": 24550
},
{
"epoch": 2.7502799552071666,
"grad_norm": 4.100079536437988,
"learning_rate": 4.163867114594998e-06,
"loss": 1.9,
"step": 24560
},
{
"epoch": 2.7513997760358344,
"grad_norm": 12.591753005981445,
"learning_rate": 4.145203434117208e-06,
"loss": 1.8743,
"step": 24570
},
{
"epoch": 2.7525195968645018,
"grad_norm": 15.76544189453125,
"learning_rate": 4.1265397536394174e-06,
"loss": 1.8585,
"step": 24580
},
{
"epoch": 2.753639417693169,
"grad_norm": 16.583786010742188,
"learning_rate": 4.1078760731616274e-06,
"loss": 2.0335,
"step": 24590
},
{
"epoch": 2.7547592385218365,
"grad_norm": 4.9236741065979,
"learning_rate": 4.0892123926838375e-06,
"loss": 1.7419,
"step": 24600
},
{
"epoch": 2.755879059350504,
"grad_norm": 11.055110931396484,
"learning_rate": 4.0705487122060475e-06,
"loss": 1.8933,
"step": 24610
},
{
"epoch": 2.7569988801791716,
"grad_norm": 5.0752153396606445,
"learning_rate": 4.0518850317282575e-06,
"loss": 2.1183,
"step": 24620
},
{
"epoch": 2.758118701007839,
"grad_norm": 16.526071548461914,
"learning_rate": 4.0332213512504675e-06,
"loss": 2.0595,
"step": 24630
},
{
"epoch": 2.7592385218365063,
"grad_norm": 9.256998062133789,
"learning_rate": 4.014557670772677e-06,
"loss": 2.0778,
"step": 24640
},
{
"epoch": 2.7603583426651737,
"grad_norm": 5.081698894500732,
"learning_rate": 3.995893990294887e-06,
"loss": 2.3567,
"step": 24650
},
{
"epoch": 2.761478163493841,
"grad_norm": 6.34022855758667,
"learning_rate": 3.977230309817096e-06,
"loss": 1.7605,
"step": 24660
},
{
"epoch": 2.7625979843225084,
"grad_norm": 13.629969596862793,
"learning_rate": 3.958566629339306e-06,
"loss": 1.7667,
"step": 24670
},
{
"epoch": 2.7637178051511757,
"grad_norm": 2.7139463424682617,
"learning_rate": 3.939902948861516e-06,
"loss": 1.9269,
"step": 24680
},
{
"epoch": 2.764837625979843,
"grad_norm": 8.121241569519043,
"learning_rate": 3.921239268383725e-06,
"loss": 2.0188,
"step": 24690
},
{
"epoch": 2.7659574468085104,
"grad_norm": 8.049278259277344,
"learning_rate": 3.902575587905935e-06,
"loss": 1.6694,
"step": 24700
},
{
"epoch": 2.767077267637178,
"grad_norm": 11.107040405273438,
"learning_rate": 3.883911907428145e-06,
"loss": 2.3154,
"step": 24710
},
{
"epoch": 2.7681970884658456,
"grad_norm": 6.505083084106445,
"learning_rate": 3.865248226950355e-06,
"loss": 1.8864,
"step": 24720
},
{
"epoch": 2.769316909294513,
"grad_norm": 10.971221923828125,
"learning_rate": 3.846584546472565e-06,
"loss": 1.9134,
"step": 24730
},
{
"epoch": 2.7704367301231803,
"grad_norm": 4.81821870803833,
"learning_rate": 3.827920865994775e-06,
"loss": 1.8286,
"step": 24740
},
{
"epoch": 2.7715565509518476,
"grad_norm": 11.385892868041992,
"learning_rate": 3.8092571855169837e-06,
"loss": 2.0084,
"step": 24750
},
{
"epoch": 2.772676371780515,
"grad_norm": 5.368199825286865,
"learning_rate": 3.7905935050391937e-06,
"loss": 1.6912,
"step": 24760
},
{
"epoch": 2.7737961926091828,
"grad_norm": 8.81826400756836,
"learning_rate": 3.7719298245614037e-06,
"loss": 1.9957,
"step": 24770
},
{
"epoch": 2.77491601343785,
"grad_norm": 11.901360511779785,
"learning_rate": 3.7532661440836137e-06,
"loss": 2.0144,
"step": 24780
},
{
"epoch": 2.7760358342665175,
"grad_norm": 4.292434215545654,
"learning_rate": 3.7346024636058233e-06,
"loss": 1.7567,
"step": 24790
},
{
"epoch": 2.777155655095185,
"grad_norm": 11.836398124694824,
"learning_rate": 3.7159387831280333e-06,
"loss": 2.2476,
"step": 24800
},
{
"epoch": 2.778275475923852,
"grad_norm": 5.869718551635742,
"learning_rate": 3.6972751026502425e-06,
"loss": 1.7245,
"step": 24810
},
{
"epoch": 2.7793952967525195,
"grad_norm": 17.10307502746582,
"learning_rate": 3.6786114221724525e-06,
"loss": 1.8758,
"step": 24820
},
{
"epoch": 2.780515117581187,
"grad_norm": 12.18902587890625,
"learning_rate": 3.6599477416946625e-06,
"loss": 1.7381,
"step": 24830
},
{
"epoch": 2.781634938409854,
"grad_norm": 6.835455894470215,
"learning_rate": 3.641284061216872e-06,
"loss": 1.7884,
"step": 24840
},
{
"epoch": 2.782754759238522,
"grad_norm": 4.9272541999816895,
"learning_rate": 3.622620380739082e-06,
"loss": 1.7644,
"step": 24850
},
{
"epoch": 2.7838745800671894,
"grad_norm": 8.834504127502441,
"learning_rate": 3.603956700261292e-06,
"loss": 1.7862,
"step": 24860
},
{
"epoch": 2.7849944008958567,
"grad_norm": 4.071360111236572,
"learning_rate": 3.5852930197835012e-06,
"loss": 1.8609,
"step": 24870
},
{
"epoch": 2.786114221724524,
"grad_norm": 10.676229476928711,
"learning_rate": 3.5666293393057112e-06,
"loss": 2.1334,
"step": 24880
},
{
"epoch": 2.7872340425531914,
"grad_norm": 15.198076248168945,
"learning_rate": 3.547965658827921e-06,
"loss": 2.2799,
"step": 24890
},
{
"epoch": 2.7883538633818588,
"grad_norm": 6.402156829833984,
"learning_rate": 3.529301978350131e-06,
"loss": 2.0125,
"step": 24900
},
{
"epoch": 2.7894736842105265,
"grad_norm": 6.411627292633057,
"learning_rate": 3.510638297872341e-06,
"loss": 1.7944,
"step": 24910
},
{
"epoch": 2.790593505039194,
"grad_norm": 8.75882625579834,
"learning_rate": 3.491974617394551e-06,
"loss": 1.9815,
"step": 24920
},
{
"epoch": 2.7917133258678613,
"grad_norm": 7.310870170593262,
"learning_rate": 3.47331093691676e-06,
"loss": 1.8766,
"step": 24930
},
{
"epoch": 2.7928331466965286,
"grad_norm": 10.259556770324707,
"learning_rate": 3.45464725643897e-06,
"loss": 1.7763,
"step": 24940
},
{
"epoch": 2.793952967525196,
"grad_norm": 16.690889358520508,
"learning_rate": 3.4359835759611796e-06,
"loss": 1.8072,
"step": 24950
},
{
"epoch": 2.7950727883538633,
"grad_norm": 3.8095781803131104,
"learning_rate": 3.4173198954833896e-06,
"loss": 1.7118,
"step": 24960
},
{
"epoch": 2.7961926091825307,
"grad_norm": 14.5068941116333,
"learning_rate": 3.3986562150055996e-06,
"loss": 1.7467,
"step": 24970
},
{
"epoch": 2.797312430011198,
"grad_norm": 6.723850250244141,
"learning_rate": 3.3799925345278087e-06,
"loss": 1.9952,
"step": 24980
},
{
"epoch": 2.7984322508398654,
"grad_norm": 11.487224578857422,
"learning_rate": 3.3613288540500187e-06,
"loss": 2.0564,
"step": 24990
},
{
"epoch": 2.799552071668533,
"grad_norm": 8.214585304260254,
"learning_rate": 3.3426651735722283e-06,
"loss": 1.7448,
"step": 25000
},
{
"epoch": 2.8006718924972005,
"grad_norm": 17.024662017822266,
"learning_rate": 3.3240014930944383e-06,
"loss": 1.5265,
"step": 25010
},
{
"epoch": 2.801791713325868,
"grad_norm": 8.459342002868652,
"learning_rate": 3.3053378126166483e-06,
"loss": 1.9152,
"step": 25020
},
{
"epoch": 2.802911534154535,
"grad_norm": 8.775456428527832,
"learning_rate": 3.2866741321388583e-06,
"loss": 2.038,
"step": 25030
},
{
"epoch": 2.8040313549832026,
"grad_norm": 5.435427665710449,
"learning_rate": 3.2680104516610675e-06,
"loss": 1.9425,
"step": 25040
},
{
"epoch": 2.8051511758118703,
"grad_norm": 11.797080993652344,
"learning_rate": 3.2493467711832775e-06,
"loss": 2.1394,
"step": 25050
},
{
"epoch": 2.8062709966405377,
"grad_norm": 15.878313064575195,
"learning_rate": 3.230683090705487e-06,
"loss": 2.2649,
"step": 25060
},
{
"epoch": 2.807390817469205,
"grad_norm": 11.612027168273926,
"learning_rate": 3.212019410227697e-06,
"loss": 1.4949,
"step": 25070
},
{
"epoch": 2.8085106382978724,
"grad_norm": 14.034370422363281,
"learning_rate": 3.193355729749907e-06,
"loss": 1.2324,
"step": 25080
},
{
"epoch": 2.8096304591265397,
"grad_norm": 19.293161392211914,
"learning_rate": 3.174692049272117e-06,
"loss": 2.2584,
"step": 25090
},
{
"epoch": 2.810750279955207,
"grad_norm": 6.628214359283447,
"learning_rate": 3.1560283687943263e-06,
"loss": 1.5751,
"step": 25100
},
{
"epoch": 2.8118701007838744,
"grad_norm": 16.124217987060547,
"learning_rate": 3.137364688316536e-06,
"loss": 2.0243,
"step": 25110
},
{
"epoch": 2.812989921612542,
"grad_norm": 6.106673717498779,
"learning_rate": 3.118701007838746e-06,
"loss": 1.9384,
"step": 25120
},
{
"epoch": 2.814109742441209,
"grad_norm": 8.48365306854248,
"learning_rate": 3.100037327360956e-06,
"loss": 1.7521,
"step": 25130
},
{
"epoch": 2.815229563269877,
"grad_norm": 6.083116054534912,
"learning_rate": 3.0813736468831654e-06,
"loss": 1.5675,
"step": 25140
},
{
"epoch": 2.8163493840985443,
"grad_norm": 16.414230346679688,
"learning_rate": 3.0627099664053754e-06,
"loss": 1.8823,
"step": 25150
},
{
"epoch": 2.8174692049272116,
"grad_norm": 9.585153579711914,
"learning_rate": 3.044046285927585e-06,
"loss": 2.0315,
"step": 25160
},
{
"epoch": 2.818589025755879,
"grad_norm": 10.281465530395508,
"learning_rate": 3.0253826054497946e-06,
"loss": 1.8152,
"step": 25170
},
{
"epoch": 2.8197088465845463,
"grad_norm": 4.486020565032959,
"learning_rate": 3.0067189249720046e-06,
"loss": 1.5388,
"step": 25180
},
{
"epoch": 2.820828667413214,
"grad_norm": 9.82872200012207,
"learning_rate": 2.9880552444942146e-06,
"loss": 2.1895,
"step": 25190
},
{
"epoch": 2.8219484882418815,
"grad_norm": 13.074673652648926,
"learning_rate": 2.969391564016424e-06,
"loss": 2.0497,
"step": 25200
},
{
"epoch": 2.823068309070549,
"grad_norm": 14.259294509887695,
"learning_rate": 2.9507278835386338e-06,
"loss": 2.1442,
"step": 25210
},
{
"epoch": 2.824188129899216,
"grad_norm": 4.93138313293457,
"learning_rate": 2.9320642030608438e-06,
"loss": 1.7481,
"step": 25220
},
{
"epoch": 2.8253079507278835,
"grad_norm": 6.078362941741943,
"learning_rate": 2.9134005225830533e-06,
"loss": 1.7641,
"step": 25230
},
{
"epoch": 2.826427771556551,
"grad_norm": 12.861568450927734,
"learning_rate": 2.8947368421052634e-06,
"loss": 1.8018,
"step": 25240
},
{
"epoch": 2.8275475923852182,
"grad_norm": 8.707850456237793,
"learning_rate": 2.8760731616274734e-06,
"loss": 1.8521,
"step": 25250
},
{
"epoch": 2.8286674132138856,
"grad_norm": 12.562678337097168,
"learning_rate": 2.8574094811496825e-06,
"loss": 1.7405,
"step": 25260
},
{
"epoch": 2.829787234042553,
"grad_norm": 18.912614822387695,
"learning_rate": 2.8387458006718925e-06,
"loss": 1.7982,
"step": 25270
},
{
"epoch": 2.8309070548712207,
"grad_norm": 6.430877208709717,
"learning_rate": 2.8200821201941025e-06,
"loss": 1.924,
"step": 25280
},
{
"epoch": 2.832026875699888,
"grad_norm": 5.357717990875244,
"learning_rate": 2.801418439716312e-06,
"loss": 1.9311,
"step": 25290
},
{
"epoch": 2.8331466965285554,
"grad_norm": 13.666546821594238,
"learning_rate": 2.782754759238522e-06,
"loss": 1.6254,
"step": 25300
},
{
"epoch": 2.834266517357223,
"grad_norm": 3.9082486629486084,
"learning_rate": 2.7640910787607317e-06,
"loss": 1.9293,
"step": 25310
},
{
"epoch": 2.83538633818589,
"grad_norm": 5.5125732421875,
"learning_rate": 2.7454273982829413e-06,
"loss": 2.0829,
"step": 25320
},
{
"epoch": 2.836506159014558,
"grad_norm": 5.76453971862793,
"learning_rate": 2.7267637178051513e-06,
"loss": 1.9197,
"step": 25330
},
{
"epoch": 2.8376259798432253,
"grad_norm": 7.775246620178223,
"learning_rate": 2.7081000373273613e-06,
"loss": 1.8856,
"step": 25340
},
{
"epoch": 2.8387458006718926,
"grad_norm": 8.506511688232422,
"learning_rate": 2.689436356849571e-06,
"loss": 1.5753,
"step": 25350
},
{
"epoch": 2.83986562150056,
"grad_norm": 15.149114608764648,
"learning_rate": 2.670772676371781e-06,
"loss": 1.8317,
"step": 25360
},
{
"epoch": 2.8409854423292273,
"grad_norm": 11.696993827819824,
"learning_rate": 2.6521089958939904e-06,
"loss": 1.6111,
"step": 25370
},
{
"epoch": 2.8421052631578947,
"grad_norm": 4.9371209144592285,
"learning_rate": 2.6334453154162e-06,
"loss": 1.7954,
"step": 25380
},
{
"epoch": 2.843225083986562,
"grad_norm": 18.963680267333984,
"learning_rate": 2.61478163493841e-06,
"loss": 2.4235,
"step": 25390
},
{
"epoch": 2.8443449048152294,
"grad_norm": 11.938720703125,
"learning_rate": 2.59611795446062e-06,
"loss": 1.667,
"step": 25400
},
{
"epoch": 2.8454647256438967,
"grad_norm": 8.081610679626465,
"learning_rate": 2.5774542739828296e-06,
"loss": 1.9183,
"step": 25410
},
{
"epoch": 2.8465845464725645,
"grad_norm": 6.90621280670166,
"learning_rate": 2.558790593505039e-06,
"loss": 1.9125,
"step": 25420
},
{
"epoch": 2.847704367301232,
"grad_norm": 11.308591842651367,
"learning_rate": 2.540126913027249e-06,
"loss": 2.2834,
"step": 25430
},
{
"epoch": 2.8488241881298992,
"grad_norm": 12.649473190307617,
"learning_rate": 2.5214632325494588e-06,
"loss": 2.0221,
"step": 25440
},
{
"epoch": 2.8499440089585666,
"grad_norm": 2.6453304290771484,
"learning_rate": 2.502799552071669e-06,
"loss": 1.7468,
"step": 25450
},
{
"epoch": 2.851063829787234,
"grad_norm": 10.066814422607422,
"learning_rate": 2.484135871593879e-06,
"loss": 1.706,
"step": 25460
},
{
"epoch": 2.8521836506159017,
"grad_norm": 11.059213638305664,
"learning_rate": 2.465472191116088e-06,
"loss": 2.4896,
"step": 25470
},
{
"epoch": 2.853303471444569,
"grad_norm": 5.4998016357421875,
"learning_rate": 2.446808510638298e-06,
"loss": 1.8076,
"step": 25480
},
{
"epoch": 2.8544232922732364,
"grad_norm": 5.691712379455566,
"learning_rate": 2.4281448301605075e-06,
"loss": 1.9473,
"step": 25490
},
{
"epoch": 2.855543113101904,
"grad_norm": 15.464776992797852,
"learning_rate": 2.4094811496827175e-06,
"loss": 2.2398,
"step": 25500
},
{
"epoch": 2.856662933930571,
"grad_norm": 3.6245803833007812,
"learning_rate": 2.3908174692049275e-06,
"loss": 1.9342,
"step": 25510
},
{
"epoch": 2.8577827547592385,
"grad_norm": 8.159955978393555,
"learning_rate": 2.372153788727137e-06,
"loss": 1.6683,
"step": 25520
},
{
"epoch": 2.858902575587906,
"grad_norm": 11.253711700439453,
"learning_rate": 2.3534901082493467e-06,
"loss": 1.6143,
"step": 25530
},
{
"epoch": 2.860022396416573,
"grad_norm": 14.729676246643066,
"learning_rate": 2.3348264277715567e-06,
"loss": 1.8251,
"step": 25540
},
{
"epoch": 2.8611422172452405,
"grad_norm": 8.878775596618652,
"learning_rate": 2.3161627472937663e-06,
"loss": 1.8646,
"step": 25550
},
{
"epoch": 2.8622620380739083,
"grad_norm": 14.987347602844238,
"learning_rate": 2.2974990668159763e-06,
"loss": 1.9119,
"step": 25560
},
{
"epoch": 2.8633818589025757,
"grad_norm": 5.109477519989014,
"learning_rate": 2.2788353863381863e-06,
"loss": 2.2621,
"step": 25570
},
{
"epoch": 2.864501679731243,
"grad_norm": 15.59926700592041,
"learning_rate": 2.2601717058603955e-06,
"loss": 1.9845,
"step": 25580
},
{
"epoch": 2.8656215005599104,
"grad_norm": 4.6907057762146,
"learning_rate": 2.2415080253826055e-06,
"loss": 2.0773,
"step": 25590
},
{
"epoch": 2.8667413213885777,
"grad_norm": 6.02996826171875,
"learning_rate": 2.2228443449048155e-06,
"loss": 2.0783,
"step": 25600
},
{
"epoch": 2.867861142217245,
"grad_norm": 14.389623641967773,
"learning_rate": 2.204180664427025e-06,
"loss": 1.9496,
"step": 25610
},
{
"epoch": 2.868980963045913,
"grad_norm": 5.188795566558838,
"learning_rate": 2.185516983949235e-06,
"loss": 1.5794,
"step": 25620
},
{
"epoch": 2.8701007838745802,
"grad_norm": 11.492018699645996,
"learning_rate": 2.1668533034714446e-06,
"loss": 1.5701,
"step": 25630
},
{
"epoch": 2.8712206047032476,
"grad_norm": 7.7545366287231445,
"learning_rate": 2.1481896229936542e-06,
"loss": 1.9405,
"step": 25640
},
{
"epoch": 2.872340425531915,
"grad_norm": 6.0428314208984375,
"learning_rate": 2.1295259425158642e-06,
"loss": 1.6884,
"step": 25650
},
{
"epoch": 2.8734602463605823,
"grad_norm": 15.78642749786377,
"learning_rate": 2.1108622620380742e-06,
"loss": 1.9008,
"step": 25660
},
{
"epoch": 2.8745800671892496,
"grad_norm": 19.360076904296875,
"learning_rate": 2.092198581560284e-06,
"loss": 2.1805,
"step": 25670
},
{
"epoch": 2.875699888017917,
"grad_norm": 10.965484619140625,
"learning_rate": 2.0735349010824934e-06,
"loss": 1.8339,
"step": 25680
},
{
"epoch": 2.8768197088465843,
"grad_norm": 13.197468757629395,
"learning_rate": 2.0548712206047034e-06,
"loss": 1.7847,
"step": 25690
},
{
"epoch": 2.8779395296752517,
"grad_norm": 8.53710651397705,
"learning_rate": 2.036207540126913e-06,
"loss": 1.6177,
"step": 25700
},
{
"epoch": 2.8790593505039195,
"grad_norm": 5.556687355041504,
"learning_rate": 2.017543859649123e-06,
"loss": 1.8142,
"step": 25710
},
{
"epoch": 2.880179171332587,
"grad_norm": 8.486677169799805,
"learning_rate": 1.998880179171333e-06,
"loss": 2.013,
"step": 25720
},
{
"epoch": 2.881298992161254,
"grad_norm": 12.919852256774902,
"learning_rate": 1.9802164986935426e-06,
"loss": 1.7,
"step": 25730
},
{
"epoch": 2.8824188129899215,
"grad_norm": 9.491839408874512,
"learning_rate": 1.961552818215752e-06,
"loss": 1.881,
"step": 25740
},
{
"epoch": 2.883538633818589,
"grad_norm": 15.983736991882324,
"learning_rate": 1.942889137737962e-06,
"loss": 2.1679,
"step": 25750
},
{
"epoch": 2.8846584546472567,
"grad_norm": 5.595208644866943,
"learning_rate": 1.9242254572601717e-06,
"loss": 1.8901,
"step": 25760
},
{
"epoch": 2.885778275475924,
"grad_norm": 10.111395835876465,
"learning_rate": 1.9055617767823815e-06,
"loss": 2.3119,
"step": 25770
},
{
"epoch": 2.8868980963045914,
"grad_norm": 18.772340774536133,
"learning_rate": 1.8868980963045915e-06,
"loss": 1.9794,
"step": 25780
},
{
"epoch": 2.8880179171332587,
"grad_norm": 4.584385395050049,
"learning_rate": 1.8682344158268011e-06,
"loss": 1.7576,
"step": 25790
},
{
"epoch": 2.889137737961926,
"grad_norm": 17.171106338500977,
"learning_rate": 1.849570735349011e-06,
"loss": 2.2186,
"step": 25800
},
{
"epoch": 2.8902575587905934,
"grad_norm": 18.548585891723633,
"learning_rate": 1.8309070548712205e-06,
"loss": 2.2678,
"step": 25810
},
{
"epoch": 2.891377379619261,
"grad_norm": 4.9082417488098145,
"learning_rate": 1.8122433743934305e-06,
"loss": 1.9042,
"step": 25820
},
{
"epoch": 2.892497200447928,
"grad_norm": 15.952796936035156,
"learning_rate": 1.7935796939156403e-06,
"loss": 1.7798,
"step": 25830
},
{
"epoch": 2.8936170212765955,
"grad_norm": 12.888788223266602,
"learning_rate": 1.7749160134378499e-06,
"loss": 1.648,
"step": 25840
},
{
"epoch": 2.8947368421052633,
"grad_norm": 5.010805606842041,
"learning_rate": 1.7562523329600599e-06,
"loss": 1.8376,
"step": 25850
},
{
"epoch": 2.8958566629339306,
"grad_norm": 9.813081741333008,
"learning_rate": 1.7375886524822697e-06,
"loss": 1.846,
"step": 25860
},
{
"epoch": 2.896976483762598,
"grad_norm": 11.167447090148926,
"learning_rate": 1.7189249720044792e-06,
"loss": 1.8414,
"step": 25870
},
{
"epoch": 2.8980963045912653,
"grad_norm": 5.086580276489258,
"learning_rate": 1.700261291526689e-06,
"loss": 1.4907,
"step": 25880
},
{
"epoch": 2.8992161254199327,
"grad_norm": 12.3839693069458,
"learning_rate": 1.681597611048899e-06,
"loss": 1.8576,
"step": 25890
},
{
"epoch": 2.9003359462486005,
"grad_norm": 6.351990222930908,
"learning_rate": 1.6629339305711086e-06,
"loss": 1.8742,
"step": 25900
},
{
"epoch": 2.901455767077268,
"grad_norm": 5.9348978996276855,
"learning_rate": 1.6442702500933184e-06,
"loss": 2.1135,
"step": 25910
},
{
"epoch": 2.902575587905935,
"grad_norm": 6.191033363342285,
"learning_rate": 1.6256065696155284e-06,
"loss": 1.6505,
"step": 25920
},
{
"epoch": 2.9036954087346025,
"grad_norm": 6.510402202606201,
"learning_rate": 1.606942889137738e-06,
"loss": 1.8007,
"step": 25930
},
{
"epoch": 2.90481522956327,
"grad_norm": 7.332479953765869,
"learning_rate": 1.5882792086599478e-06,
"loss": 1.9839,
"step": 25940
},
{
"epoch": 2.9059350503919372,
"grad_norm": 3.8477463722229004,
"learning_rate": 1.5696155281821578e-06,
"loss": 2.1467,
"step": 25950
},
{
"epoch": 2.9070548712206046,
"grad_norm": 11.45783805847168,
"learning_rate": 1.5509518477043674e-06,
"loss": 1.9373,
"step": 25960
},
{
"epoch": 2.908174692049272,
"grad_norm": 18.16033363342285,
"learning_rate": 1.5322881672265772e-06,
"loss": 1.9102,
"step": 25970
},
{
"epoch": 2.9092945128779393,
"grad_norm": 14.75328254699707,
"learning_rate": 1.513624486748787e-06,
"loss": 1.5849,
"step": 25980
},
{
"epoch": 2.910414333706607,
"grad_norm": 7.447336196899414,
"learning_rate": 1.4949608062709968e-06,
"loss": 1.187,
"step": 25990
},
{
"epoch": 2.9115341545352744,
"grad_norm": 6.396903038024902,
"learning_rate": 1.4762971257932065e-06,
"loss": 1.6073,
"step": 26000
},
{
"epoch": 2.9126539753639418,
"grad_norm": 13.001298904418945,
"learning_rate": 1.4576334453154161e-06,
"loss": 2.1491,
"step": 26010
},
{
"epoch": 2.913773796192609,
"grad_norm": 12.818767547607422,
"learning_rate": 1.4389697648376261e-06,
"loss": 1.6316,
"step": 26020
},
{
"epoch": 2.9148936170212765,
"grad_norm": 11.24215030670166,
"learning_rate": 1.420306084359836e-06,
"loss": 2.058,
"step": 26030
},
{
"epoch": 2.9160134378499443,
"grad_norm": 5.04683780670166,
"learning_rate": 1.4016424038820455e-06,
"loss": 1.6477,
"step": 26040
},
{
"epoch": 2.9171332586786116,
"grad_norm": 8.69908618927002,
"learning_rate": 1.3829787234042553e-06,
"loss": 1.7106,
"step": 26050
},
{
"epoch": 2.918253079507279,
"grad_norm": 10.91391372680664,
"learning_rate": 1.364315042926465e-06,
"loss": 1.878,
"step": 26060
},
{
"epoch": 2.9193729003359463,
"grad_norm": 15.811108589172363,
"learning_rate": 1.3456513624486749e-06,
"loss": 2.1953,
"step": 26070
},
{
"epoch": 2.9204927211646137,
"grad_norm": 12.007214546203613,
"learning_rate": 1.3269876819708847e-06,
"loss": 2.0814,
"step": 26080
},
{
"epoch": 2.921612541993281,
"grad_norm": 11.011650085449219,
"learning_rate": 1.3083240014930945e-06,
"loss": 1.5842,
"step": 26090
},
{
"epoch": 2.9227323628219484,
"grad_norm": 8.247806549072266,
"learning_rate": 1.2896603210153043e-06,
"loss": 1.6496,
"step": 26100
},
{
"epoch": 2.9238521836506157,
"grad_norm": 8.519516944885254,
"learning_rate": 1.270996640537514e-06,
"loss": 2.2153,
"step": 26110
},
{
"epoch": 2.924972004479283,
"grad_norm": 8.969647407531738,
"learning_rate": 1.2523329600597239e-06,
"loss": 2.1363,
"step": 26120
},
{
"epoch": 2.926091825307951,
"grad_norm": 11.865436553955078,
"learning_rate": 1.2336692795819336e-06,
"loss": 2.0425,
"step": 26130
},
{
"epoch": 2.927211646136618,
"grad_norm": 20.383333206176758,
"learning_rate": 1.2150055991041434e-06,
"loss": 2.0145,
"step": 26140
},
{
"epoch": 2.9283314669652856,
"grad_norm": 5.632237434387207,
"learning_rate": 1.1963419186263532e-06,
"loss": 1.9828,
"step": 26150
},
{
"epoch": 2.929451287793953,
"grad_norm": 7.68386697769165,
"learning_rate": 1.177678238148563e-06,
"loss": 2.4198,
"step": 26160
},
{
"epoch": 2.9305711086226203,
"grad_norm": 5.8563151359558105,
"learning_rate": 1.1590145576707726e-06,
"loss": 1.6861,
"step": 26170
},
{
"epoch": 2.931690929451288,
"grad_norm": 20.610515594482422,
"learning_rate": 1.1403508771929826e-06,
"loss": 2.0284,
"step": 26180
},
{
"epoch": 2.9328107502799554,
"grad_norm": 5.223932266235352,
"learning_rate": 1.1216871967151924e-06,
"loss": 2.0917,
"step": 26190
},
{
"epoch": 2.9339305711086228,
"grad_norm": 5.224584579467773,
"learning_rate": 1.103023516237402e-06,
"loss": 1.6706,
"step": 26200
},
{
"epoch": 2.93505039193729,
"grad_norm": 6.829593181610107,
"learning_rate": 1.084359835759612e-06,
"loss": 2.0043,
"step": 26210
},
{
"epoch": 2.9361702127659575,
"grad_norm": 5.992856979370117,
"learning_rate": 1.0656961552818216e-06,
"loss": 1.4484,
"step": 26220
},
{
"epoch": 2.937290033594625,
"grad_norm": 9.6854829788208,
"learning_rate": 1.0470324748040314e-06,
"loss": 2.1745,
"step": 26230
},
{
"epoch": 2.938409854423292,
"grad_norm": 5.901656627655029,
"learning_rate": 1.0283687943262412e-06,
"loss": 2.2163,
"step": 26240
},
{
"epoch": 2.9395296752519595,
"grad_norm": 11.751072883605957,
"learning_rate": 1.009705113848451e-06,
"loss": 2.2047,
"step": 26250
},
{
"epoch": 2.940649496080627,
"grad_norm": 6.053452491760254,
"learning_rate": 9.910414333706607e-07,
"loss": 1.5099,
"step": 26260
},
{
"epoch": 2.9417693169092947,
"grad_norm": 9.445327758789062,
"learning_rate": 9.723777528928705e-07,
"loss": 2.0437,
"step": 26270
},
{
"epoch": 2.942889137737962,
"grad_norm": 12.053526878356934,
"learning_rate": 9.537140724150803e-07,
"loss": 2.1465,
"step": 26280
},
{
"epoch": 2.9440089585666294,
"grad_norm": 10.680662155151367,
"learning_rate": 9.350503919372901e-07,
"loss": 1.8593,
"step": 26290
},
{
"epoch": 2.9451287793952967,
"grad_norm": 13.565116882324219,
"learning_rate": 9.163867114594998e-07,
"loss": 2.0493,
"step": 26300
},
{
"epoch": 2.946248600223964,
"grad_norm": 6.383924961090088,
"learning_rate": 8.977230309817097e-07,
"loss": 2.2825,
"step": 26310
},
{
"epoch": 2.9473684210526314,
"grad_norm": 19.03361701965332,
"learning_rate": 8.790593505039194e-07,
"loss": 1.7299,
"step": 26320
},
{
"epoch": 2.948488241881299,
"grad_norm": 10.854887962341309,
"learning_rate": 8.603956700261292e-07,
"loss": 1.7987,
"step": 26330
},
{
"epoch": 2.9496080627099666,
"grad_norm": 11.263670921325684,
"learning_rate": 8.417319895483391e-07,
"loss": 1.9531,
"step": 26340
},
{
"epoch": 2.950727883538634,
"grad_norm": 6.675166606903076,
"learning_rate": 8.230683090705488e-07,
"loss": 2.1583,
"step": 26350
},
{
"epoch": 2.9518477043673013,
"grad_norm": 10.624512672424316,
"learning_rate": 8.044046285927585e-07,
"loss": 1.9506,
"step": 26360
},
{
"epoch": 2.9529675251959686,
"grad_norm": 8.63731575012207,
"learning_rate": 7.857409481149684e-07,
"loss": 1.802,
"step": 26370
},
{
"epoch": 2.954087346024636,
"grad_norm": 8.122550964355469,
"learning_rate": 7.670772676371781e-07,
"loss": 1.5223,
"step": 26380
},
{
"epoch": 2.9552071668533033,
"grad_norm": 16.51909065246582,
"learning_rate": 7.484135871593878e-07,
"loss": 1.8571,
"step": 26390
},
{
"epoch": 2.9563269876819707,
"grad_norm": 7.450239658355713,
"learning_rate": 7.297499066815976e-07,
"loss": 1.5009,
"step": 26400
},
{
"epoch": 2.9574468085106385,
"grad_norm": 14.5098237991333,
"learning_rate": 7.110862262038074e-07,
"loss": 1.9467,
"step": 26410
},
{
"epoch": 2.958566629339306,
"grad_norm": 6.5906782150268555,
"learning_rate": 6.924225457260172e-07,
"loss": 1.8331,
"step": 26420
},
{
"epoch": 2.959686450167973,
"grad_norm": 5.737934112548828,
"learning_rate": 6.73758865248227e-07,
"loss": 2.0944,
"step": 26430
},
{
"epoch": 2.9608062709966405,
"grad_norm": 11.81939697265625,
"learning_rate": 6.550951847704367e-07,
"loss": 1.6853,
"step": 26440
},
{
"epoch": 2.961926091825308,
"grad_norm": 7.474795341491699,
"learning_rate": 6.364315042926465e-07,
"loss": 1.4531,
"step": 26450
},
{
"epoch": 2.963045912653975,
"grad_norm": 10.875497817993164,
"learning_rate": 6.177678238148564e-07,
"loss": 2.3634,
"step": 26460
},
{
"epoch": 2.964165733482643,
"grad_norm": 7.887001991271973,
"learning_rate": 5.991041433370661e-07,
"loss": 1.9128,
"step": 26470
},
{
"epoch": 2.9652855543113104,
"grad_norm": 6.30941915512085,
"learning_rate": 5.804404628592759e-07,
"loss": 2.0812,
"step": 26480
},
{
"epoch": 2.9664053751399777,
"grad_norm": 6.238934516906738,
"learning_rate": 5.617767823814857e-07,
"loss": 1.7906,
"step": 26490
},
{
"epoch": 2.967525195968645,
"grad_norm": 7.524406909942627,
"learning_rate": 5.431131019036955e-07,
"loss": 1.9674,
"step": 26500
},
{
"epoch": 2.9686450167973124,
"grad_norm": 7.257613658905029,
"learning_rate": 5.244494214259052e-07,
"loss": 1.9621,
"step": 26510
},
{
"epoch": 2.9697648376259798,
"grad_norm": 16.3148136138916,
"learning_rate": 5.057857409481149e-07,
"loss": 1.6907,
"step": 26520
},
{
"epoch": 2.970884658454647,
"grad_norm": 5.2545952796936035,
"learning_rate": 4.871220604703247e-07,
"loss": 1.7305,
"step": 26530
},
{
"epoch": 2.9720044792833145,
"grad_norm": 16.13141632080078,
"learning_rate": 4.6845837999253457e-07,
"loss": 1.9891,
"step": 26540
},
{
"epoch": 2.973124300111982,
"grad_norm": 9.418984413146973,
"learning_rate": 4.497946995147443e-07,
"loss": 1.7309,
"step": 26550
},
{
"epoch": 2.9742441209406496,
"grad_norm": 3.6016457080841064,
"learning_rate": 4.311310190369541e-07,
"loss": 1.6292,
"step": 26560
},
{
"epoch": 2.975363941769317,
"grad_norm": 7.472093105316162,
"learning_rate": 4.1246733855916395e-07,
"loss": 1.8515,
"step": 26570
},
{
"epoch": 2.9764837625979843,
"grad_norm": 6.936214923858643,
"learning_rate": 3.9380365808137364e-07,
"loss": 1.7409,
"step": 26580
},
{
"epoch": 2.9776035834266517,
"grad_norm": 6.333293437957764,
"learning_rate": 3.751399776035835e-07,
"loss": 1.8553,
"step": 26590
},
{
"epoch": 2.978723404255319,
"grad_norm": 5.685304164886475,
"learning_rate": 3.564762971257932e-07,
"loss": 1.7563,
"step": 26600
},
{
"epoch": 2.979843225083987,
"grad_norm": 7.443325996398926,
"learning_rate": 3.37812616648003e-07,
"loss": 2.0484,
"step": 26610
},
{
"epoch": 2.980963045912654,
"grad_norm": 6.894618034362793,
"learning_rate": 3.1914893617021275e-07,
"loss": 1.6829,
"step": 26620
},
{
"epoch": 2.9820828667413215,
"grad_norm": 18.16074562072754,
"learning_rate": 3.004852556924226e-07,
"loss": 2.1276,
"step": 26630
},
{
"epoch": 2.983202687569989,
"grad_norm": 13.232165336608887,
"learning_rate": 2.8182157521463234e-07,
"loss": 1.8867,
"step": 26640
},
{
"epoch": 2.984322508398656,
"grad_norm": 3.8035480976104736,
"learning_rate": 2.6315789473684213e-07,
"loss": 1.9104,
"step": 26650
},
{
"epoch": 2.9854423292273236,
"grad_norm": 6.058941841125488,
"learning_rate": 2.4449421425905187e-07,
"loss": 1.6504,
"step": 26660
},
{
"epoch": 2.986562150055991,
"grad_norm": 8.38434886932373,
"learning_rate": 2.258305337812617e-07,
"loss": 1.4973,
"step": 26670
},
{
"epoch": 2.9876819708846583,
"grad_norm": 16.152746200561523,
"learning_rate": 2.0716685330347146e-07,
"loss": 1.6986,
"step": 26680
},
{
"epoch": 2.9888017917133256,
"grad_norm": 5.802217960357666,
"learning_rate": 1.8850317282568122e-07,
"loss": 1.6346,
"step": 26690
},
{
"epoch": 2.9899216125419934,
"grad_norm": 18.607810974121094,
"learning_rate": 1.6983949234789102e-07,
"loss": 1.9916,
"step": 26700
},
{
"epoch": 2.9910414333706608,
"grad_norm": 5.028476715087891,
"learning_rate": 1.5117581187010078e-07,
"loss": 1.7811,
"step": 26710
},
{
"epoch": 2.992161254199328,
"grad_norm": 6.0024824142456055,
"learning_rate": 1.3251213139231058e-07,
"loss": 2.2338,
"step": 26720
},
{
"epoch": 2.9932810750279955,
"grad_norm": 5.444699287414551,
"learning_rate": 1.1384845091452034e-07,
"loss": 2.1604,
"step": 26730
},
{
"epoch": 2.994400895856663,
"grad_norm": 12.872075080871582,
"learning_rate": 9.518477043673014e-08,
"loss": 1.2952,
"step": 26740
},
{
"epoch": 2.9955207166853306,
"grad_norm": 10.56808853149414,
"learning_rate": 7.652108995893992e-08,
"loss": 1.8126,
"step": 26750
},
{
"epoch": 2.996640537513998,
"grad_norm": 5.550817966461182,
"learning_rate": 5.785740948114969e-08,
"loss": 2.0958,
"step": 26760
},
{
"epoch": 2.9977603583426653,
"grad_norm": 6.287519454956055,
"learning_rate": 3.919372900335946e-08,
"loss": 1.8855,
"step": 26770
},
{
"epoch": 2.9988801791713326,
"grad_norm": 8.283456802368164,
"learning_rate": 2.0530048525569244e-08,
"loss": 1.916,
"step": 26780
},
{
"epoch": 3.0,
"grad_norm": 7.1067328453063965,
"learning_rate": 1.866368047779022e-09,
"loss": 1.6709,
"step": 26790
}
],
"logging_steps": 10,
"max_steps": 26790,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.457535008768e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}