diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json"
new file mode 100644--- /dev/null
+++ "b/last-checkpoint/trainer_state.json"
@@ -0,0 +1,15245 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9456264775413712,
+  "eval_steps": 20,
+  "global_step": 2000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0,
+      "eval_accuracy": 0.7339246119733924,
+      "eval_f1": 0.24528301886792453,
+      "eval_loss": 0.6025775074958801,
+      "eval_precision": 0.6,
+      "eval_recall": 0.1541501976284585,
+      "eval_runtime": 48.7601,
+      "eval_samples_per_second": 5.66,
+      "eval_steps_per_second": 0.185,
+      "step": 0
+    },
+    {
+      "epoch": 0.00047281323877068556,
+      "grad_norm": 2.2663159370422363,
+      "learning_rate": 9.433962264150944e-08,
+      "loss": 0.6749,
+      "step": 1
+    },
+    {
+      "epoch": 0.0009456264775413711,
+      "grad_norm": 2.2706336975097656,
+      "learning_rate": 1.886792452830189e-07,
+      "loss": 0.6176,
+      "step": 2
+    },
+    {
+      "epoch": 0.0014184397163120568,
+      "grad_norm": 3.000195026397705,
+      "learning_rate": 2.8301886792452833e-07,
+      "loss": 0.5503,
+      "step": 3
+    },
+    {
+      "epoch": 0.0018912529550827422,
+      "grad_norm": 1.9788570404052734,
+      "learning_rate": 3.773584905660378e-07,
+      "loss": 0.5419,
+      "step": 4
+    },
+    {
+      "epoch": 0.002364066193853428,
+      "grad_norm": 2.908334255218506,
+      "learning_rate": 4.716981132075472e-07,
+      "loss": 0.6287,
+      "step": 5
+    },
+    {
+      "epoch": 0.0028368794326241137,
+      "grad_norm": 2.66474986076355,
+      "learning_rate": 5.660377358490567e-07,
+      "loss": 0.6961,
+      "step": 6
+    },
+    {
+      "epoch": 0.003309692671394799,
+      "grad_norm": 2.1652109622955322,
+      "learning_rate": 6.603773584905661e-07,
+      "loss": 0.6128,
+      "step": 7
+    },
+    {
+      "epoch": 0.0037825059101654845,
+      "grad_norm": 2.8184926509857178,
+      "learning_rate": 7.547169811320755e-07,
+      "loss": 0.6289,
+      "step": 8
+    },
+    {
+      "epoch": 0.00425531914893617,
+      "grad_norm": 2.4191572666168213,
+      "learning_rate": 8.490566037735849e-07,
+      "loss": 0.6109,
+      "step": 9
+    },
+    {
+      "epoch": 0.004728132387706856,
+      "grad_norm": 2.1350443363189697,
+      "learning_rate": 9.433962264150944e-07,
+      "loss": 0.6027,
+      "step": 10
+    },
+    {
+      "epoch": 0.005200945626477541,
+      "grad_norm": 2.3789725303649902,
+      "learning_rate": 1.037735849056604e-06,
+      "loss": 0.6009,
+      "step": 11
+    },
+    {
+      "epoch": 0.005673758865248227,
+      "grad_norm": 2.0987956523895264,
+      "learning_rate": 1.1320754716981133e-06,
+      "loss": 0.5696,
+      "step": 12
+    },
+    {
+      "epoch": 0.006146572104018913,
+      "grad_norm": 2.1385180950164795,
+      "learning_rate": 1.2264150943396227e-06,
+      "loss": 0.6081,
+      "step": 13
+    },
+    {
+      "epoch": 0.006619385342789598,
+      "grad_norm": 2.550551652908325,
+      "learning_rate": 1.3207547169811322e-06,
+      "loss": 0.6091,
+      "step": 14
+    },
+    {
+      "epoch": 0.0070921985815602835,
+      "grad_norm": 2.2968673706054688,
+      "learning_rate": 1.4150943396226415e-06,
+      "loss": 0.6279,
+      "step": 15
+    },
+    {
+      "epoch": 0.007565011820330969,
+      "grad_norm": 2.0703189373016357,
+      "learning_rate": 1.509433962264151e-06,
+      "loss": 0.5747,
+      "step": 16
+    },
+    {
+      "epoch": 0.008037825059101654,
+      "grad_norm": 3.0899605751037598,
+      "learning_rate": 1.6037735849056604e-06,
+      "loss": 0.6645,
+      "step": 17
+    },
+    {
+      "epoch": 0.00851063829787234,
+      "grad_norm": 1.9014838933944702,
+      "learning_rate": 1.6981132075471698e-06,
+      "loss": 0.5237,
+      "step": 18
+    },
+    {
+      "epoch": 0.008983451536643027,
+      "grad_norm": 2.2065110206604004,
+      "learning_rate": 1.7924528301886793e-06,
+      "loss": 0.5515,
+      "step": 19
+    },
+    {
+      "epoch": 0.009456264775413711,
+      "grad_norm": 1.8808679580688477,
+      "learning_rate": 1.8867924528301889e-06,
+      "loss": 0.5256,
+      "step": 20
+    },
+    {
+      "epoch": 0.009456264775413711,
+      "eval_accuracy": 0.7350332594235033,
+      "eval_f1": 0.2507836990595611,
+      "eval_loss": 0.6015310287475586,
+      "eval_precision": 0.6060606060606061,
+      "eval_recall": 0.15810276679841898,
+      "eval_runtime": 49.4054,
+      "eval_samples_per_second": 5.586,
+      "eval_steps_per_second": 0.182,
+      "step": 20
+    },
+    {
+      "epoch": 0.009929078014184398,
+      "grad_norm": 1.7238119840621948,
+      "learning_rate": 1.981132075471698e-06,
+      "loss": 0.5206,
+      "step": 21
+    },
+    {
+      "epoch": 0.010401891252955082,
+      "grad_norm": 2.4662513732910156,
+      "learning_rate": 2.075471698113208e-06,
+      "loss": 0.6127,
+      "step": 22
+    },
+    {
+      "epoch": 0.010874704491725768,
+      "grad_norm": 2.3498942852020264,
+      "learning_rate": 2.1698113207547173e-06,
+      "loss": 0.6588,
+      "step": 23
+    },
+    {
+      "epoch": 0.011347517730496455,
+      "grad_norm": 2.9931042194366455,
+      "learning_rate": 2.2641509433962266e-06,
+      "loss": 0.5891,
+      "step": 24
+    },
+    {
+      "epoch": 0.01182033096926714,
+      "grad_norm": 2.32051944732666,
+      "learning_rate": 2.358490566037736e-06,
+      "loss": 0.599,
+      "step": 25
+    },
+    {
+      "epoch": 0.012293144208037825,
+      "grad_norm": 3.134979486465454,
+      "learning_rate": 2.4528301886792453e-06,
+      "loss": 0.6473,
+      "step": 26
+    },
+    {
+      "epoch": 0.01276595744680851,
+      "grad_norm": 2.371523857116699,
+      "learning_rate": 2.547169811320755e-06,
+      "loss": 0.6037,
+      "step": 27
+    },
+    {
+      "epoch": 0.013238770685579196,
+      "grad_norm": 2.681360960006714,
+      "learning_rate": 2.6415094339622644e-06,
+      "loss": 0.5573,
+      "step": 28
+    },
+    {
+      "epoch": 0.013711583924349883,
+      "grad_norm": 3.202848434448242,
+      "learning_rate": 2.7358490566037738e-06,
+      "loss": 0.6697,
+      "step": 29
+    },
+    {
+      "epoch": 0.014184397163120567,
+      "grad_norm": 2.109222888946533,
+      "learning_rate": 2.830188679245283e-06,
+      "loss": 0.6073,
+      "step": 30
+    },
+    {
+      "epoch": 0.014657210401891253,
+      "grad_norm": 2.031996250152588,
+      "learning_rate": 2.9245283018867924e-06,
+      "loss": 0.5569,
+      "step": 31
+    },
+    {
+      "epoch": 0.015130023640661938,
+      "grad_norm": 2.5829803943634033,
+      "learning_rate": 3.018867924528302e-06,
+      "loss": 0.5503,
+      "step": 32
+    },
+    {
+      "epoch": 0.015602836879432624,
+      "grad_norm": 3.7826120853424072,
+      "learning_rate": 3.1132075471698115e-06,
+      "loss": 0.4966,
+      "step": 33
+    },
+    {
+      "epoch": 0.01607565011820331,
+      "grad_norm": 1.3336539268493652,
+      "learning_rate": 3.207547169811321e-06,
+      "loss": 0.4965,
+      "step": 34
+    },
+    {
+      "epoch": 0.016548463356973995,
+      "grad_norm": 2.4206223487854004,
+      "learning_rate": 3.30188679245283e-06,
+      "loss": 0.6583,
+      "step": 35
+    },
+    {
+      "epoch": 0.01702127659574468,
+      "grad_norm": 2.437887191772461,
+      "learning_rate": 3.3962264150943395e-06,
+      "loss": 0.5355,
+      "step": 36
+    },
+    {
+      "epoch": 0.017494089834515367,
+      "grad_norm": 2.00358510017395,
+      "learning_rate": 3.4905660377358493e-06,
+      "loss": 0.5854,
+      "step": 37
+    },
+    {
+      "epoch": 0.017966903073286054,
+      "grad_norm": 2.635550022125244,
+      "learning_rate": 3.5849056603773586e-06,
+      "loss": 0.6226,
+      "step": 38
+    },
+    {
+      "epoch": 0.018439716312056736,
+      "grad_norm": 1.9979338645935059,
+      "learning_rate": 3.679245283018868e-06,
+      "loss": 0.6202,
+      "step": 39
+    },
+    {
+      "epoch": 0.018912529550827423,
+      "grad_norm": 2.933034658432007,
+      "learning_rate": 3.7735849056603777e-06,
+      "loss": 0.6118,
+      "step": 40
+    },
+    {
+      "epoch": 0.018912529550827423,
+      "eval_accuracy": 0.7361419068736141,
+      "eval_f1": 0.2608695652173913,
+      "eval_loss": 0.5987924337387085,
+      "eval_precision": 0.6086956521739131,
+      "eval_recall": 0.16600790513833993,
+      "eval_runtime": 49.5092,
+      "eval_samples_per_second": 5.575,
+      "eval_steps_per_second": 0.182,
+      "step": 40
+    },
+    {
+      "epoch": 0.01938534278959811,
+      "grad_norm": 2.4579005241394043,
+      "learning_rate": 3.8679245283018875e-06,
+      "loss": 0.6329,
+      "step": 41
+    },
+    {
+      "epoch": 0.019858156028368795,
+      "grad_norm": 2.0814871788024902,
+      "learning_rate": 3.962264150943396e-06,
+      "loss": 0.5792,
+      "step": 42
+    },
+    {
+      "epoch": 0.02033096926713948,
+      "grad_norm": 4.046266078948975,
+      "learning_rate": 4.056603773584906e-06,
+      "loss": 0.6294,
+      "step": 43
+    },
+    {
+      "epoch": 0.020803782505910164,
+      "grad_norm": 1.990343451499939,
+      "learning_rate": 4.150943396226416e-06,
+      "loss": 0.5279,
+      "step": 44
+    },
+    {
+      "epoch": 0.02127659574468085,
+      "grad_norm": 2.2721312046051025,
+      "learning_rate": 4.245283018867925e-06,
+      "loss": 0.5947,
+      "step": 45
+    },
+    {
+      "epoch": 0.021749408983451537,
+      "grad_norm": 2.3753161430358887,
+      "learning_rate": 4.339622641509435e-06,
+      "loss": 0.6097,
+      "step": 46
+    },
+    {
+      "epoch": 0.022222222222222223,
+      "grad_norm": 2.2465322017669678,
+      "learning_rate": 4.4339622641509435e-06,
+      "loss": 0.5921,
+      "step": 47
+    },
+    {
+      "epoch": 0.02269503546099291,
+      "grad_norm": 1.9690579175949097,
+      "learning_rate": 4.528301886792453e-06,
+      "loss": 0.5191,
+      "step": 48
+    },
+    {
+      "epoch": 0.023167848699763592,
+      "grad_norm": 2.9993767738342285,
+      "learning_rate": 4.622641509433963e-06,
+      "loss": 0.5827,
+      "step": 49
+    },
+    {
+      "epoch": 0.02364066193853428,
+      "grad_norm": 2.3443307876586914,
+      "learning_rate": 4.716981132075472e-06,
+      "loss": 0.5746,
+      "step": 50
+    },
+    {
+      "epoch": 0.024113475177304965,
+      "grad_norm": 2.446950912475586,
+      "learning_rate": 4.811320754716982e-06,
+      "loss": 0.582,
+      "step": 51
+    },
+    {
+      "epoch": 0.02458628841607565,
+      "grad_norm": 3.164130687713623,
+      "learning_rate": 4.905660377358491e-06,
+      "loss": 0.6406,
+      "step": 52
+    },
+    {
+      "epoch": 0.025059101654846337,
+      "grad_norm": 2.339772939682007,
+      "learning_rate": 5e-06,
+      "loss": 0.5627,
+      "step": 53
+    },
+    {
+      "epoch": 0.02553191489361702,
+      "grad_norm": 2.548523187637329,
+      "learning_rate": 5.09433962264151e-06,
+      "loss": 0.5909,
+      "step": 54
+    },
+    {
+      "epoch": 0.026004728132387706,
+      "grad_norm": 3.006196975708008,
+      "learning_rate": 5.188679245283019e-06,
+      "loss": 0.6046,
+      "step": 55
+    },
+    {
+      "epoch": 0.026477541371158392,
+      "grad_norm": 2.3136887550354004,
+      "learning_rate": 5.283018867924529e-06,
+      "loss": 0.5235,
+      "step": 56
+    },
+    {
+      "epoch": 0.02695035460992908,
+      "grad_norm": 2.072728157043457,
+      "learning_rate": 5.377358490566038e-06,
+      "loss": 0.6377,
+      "step": 57
+    },
+    {
+      "epoch": 0.027423167848699765,
+      "grad_norm": 3.415151357650757,
+      "learning_rate": 5.4716981132075475e-06,
+      "loss": 0.6367,
+      "step": 58
+    },
+    {
+      "epoch": 0.027895981087470448,
+      "grad_norm": 2.400956869125366,
+      "learning_rate": 5.566037735849057e-06,
+      "loss": 0.5671,
+      "step": 59
+    },
+    {
+      "epoch": 0.028368794326241134,
+      "grad_norm": 2.0561230182647705,
+      "learning_rate": 5.660377358490566e-06,
+      "loss": 0.5575,
+      "step": 60
+    },
+    {
+      "epoch": 0.028368794326241134,
+      "eval_accuracy": 0.7450110864745011,
+      "eval_f1": 0.3072289156626506,
+      "eval_loss": 0.5848703384399414,
+      "eval_precision": 0.6455696202531646,
+      "eval_recall": 0.2015810276679842,
+      "eval_runtime": 49.0008,
+      "eval_samples_per_second": 5.633,
+      "eval_steps_per_second": 0.184,
+      "step": 60
+    },
+    {
+      "epoch": 0.02884160756501182,
+      "grad_norm": 2.189640522003174,
+      "learning_rate": 5.754716981132076e-06,
+      "loss": 0.619,
+      "step": 61
+    },
+    {
+      "epoch": 0.029314420803782507,
+      "grad_norm": 2.607837677001953,
+      "learning_rate": 5.849056603773585e-06,
+      "loss": 0.6095,
+      "step": 62
+    },
+    {
+      "epoch": 0.029787234042553193,
+      "grad_norm": 2.26078200340271,
+      "learning_rate": 5.943396226415095e-06,
+      "loss": 0.5471,
+      "step": 63
+    },
+    {
+      "epoch": 0.030260047281323876,
+      "grad_norm": 2.622464656829834,
+      "learning_rate": 6.037735849056604e-06,
+      "loss": 0.6509,
+      "step": 64
+    },
+    {
+      "epoch": 0.030732860520094562,
+      "grad_norm": 2.8349571228027344,
+      "learning_rate": 6.132075471698113e-06,
+      "loss": 0.6926,
+      "step": 65
+    },
+    {
+      "epoch": 0.031205673758865248,
+      "grad_norm": 2.139317750930786,
+      "learning_rate": 6.226415094339623e-06,
+      "loss": 0.5786,
+      "step": 66
+    },
+    {
+      "epoch": 0.03167848699763593,
+      "grad_norm": 3.0620882511138916,
+      "learning_rate": 6.320754716981132e-06,
+      "loss": 0.5841,
+      "step": 67
+    },
+    {
+      "epoch": 0.03215130023640662,
+      "grad_norm": 2.194460391998291,
+      "learning_rate": 6.415094339622642e-06,
+      "loss": 0.5746,
+      "step": 68
+    },
+    {
+      "epoch": 0.032624113475177303,
+      "grad_norm": 2.3444063663482666,
+      "learning_rate": 6.5094339622641515e-06,
+      "loss": 0.51,
+      "step": 69
+    },
+    {
+      "epoch": 0.03309692671394799,
+      "grad_norm": 3.622739791870117,
+      "learning_rate": 6.60377358490566e-06,
+      "loss": 0.6342,
+      "step": 70
+    },
+    {
+      "epoch": 0.033569739952718676,
+      "grad_norm": 2.9004671573638916,
+      "learning_rate": 6.69811320754717e-06,
+      "loss": 0.641,
+      "step": 71
+    },
+    {
+      "epoch": 0.03404255319148936,
+      "grad_norm": 2.351501941680908,
+      "learning_rate": 6.792452830188679e-06,
+      "loss": 0.5936,
+      "step": 72
+    },
+    {
+      "epoch": 0.03451536643026005,
+      "grad_norm": 2.6966824531555176,
+      "learning_rate": 6.886792452830189e-06,
+      "loss": 0.5755,
+      "step": 73
+    },
+    {
+      "epoch": 0.034988179669030735,
+      "grad_norm": 2.026407241821289,
+      "learning_rate": 6.981132075471699e-06,
+      "loss": 0.4305,
+      "step": 74
+    },
+    {
+      "epoch": 0.03546099290780142,
+      "grad_norm": 2.9599199295043945,
+      "learning_rate": 7.0754716981132075e-06,
+      "loss": 0.494,
+      "step": 75
+    },
+    {
+      "epoch": 0.03593380614657211,
+      "grad_norm": 2.460238218307495,
+      "learning_rate": 7.169811320754717e-06,
+      "loss": 0.5991,
+      "step": 76
+    },
+    {
+      "epoch": 0.03640661938534279,
+      "grad_norm": 3.174283266067505,
+      "learning_rate": 7.264150943396226e-06,
+      "loss": 0.6035,
+      "step": 77
+    },
+    {
+      "epoch": 0.03687943262411347,
+      "grad_norm": 2.4575035572052,
+      "learning_rate": 7.358490566037736e-06,
+      "loss": 0.549,
+      "step": 78
+    },
+    {
+      "epoch": 0.03735224586288416,
+      "grad_norm": 2.558811664581299,
+      "learning_rate": 7.452830188679246e-06,
+      "loss": 0.4979,
+      "step": 79
+    },
+    {
+      "epoch": 0.037825059101654845,
+      "grad_norm": 2.396045684814453,
+      "learning_rate": 7.5471698113207555e-06,
+      "loss": 0.6385,
+      "step": 80
+    },
+    {
+      "epoch": 0.037825059101654845,
+      "eval_accuracy": 0.746119733924612,
+      "eval_f1": 0.3989501312335958,
+      "eval_loss": 0.5647635459899902,
+      "eval_precision": 0.59375,
+      "eval_recall": 0.30039525691699603,
+      "eval_runtime": 49.2336,
+      "eval_samples_per_second": 5.606,
+      "eval_steps_per_second": 0.183,
+      "step": 80
+    },
+    {
+      "epoch": 0.03829787234042553,
+      "grad_norm": 3.1989941596984863,
+      "learning_rate": 7.641509433962266e-06,
+      "loss": 0.6269,
+      "step": 81
+    },
+    {
+      "epoch": 0.03877068557919622,
+      "grad_norm": 2.829859972000122,
+      "learning_rate": 7.735849056603775e-06,
+      "loss": 0.5911,
+      "step": 82
+    },
+    {
+      "epoch": 0.039243498817966904,
+      "grad_norm": 2.1976866722106934,
+      "learning_rate": 7.830188679245284e-06,
+      "loss": 0.5403,
+      "step": 83
+    },
+    {
+      "epoch": 0.03971631205673759,
+      "grad_norm": 2.3576695919036865,
+      "learning_rate": 7.924528301886793e-06,
+      "loss": 0.5199,
+      "step": 84
+    },
+    {
+      "epoch": 0.04018912529550828,
+      "grad_norm": 2.662384033203125,
+      "learning_rate": 8.018867924528303e-06,
+      "loss": 0.5898,
+      "step": 85
+    },
+    {
+      "epoch": 0.04066193853427896,
+      "grad_norm": 4.871118545532227,
+      "learning_rate": 8.113207547169812e-06,
+      "loss": 0.5394,
+      "step": 86
+    },
+    {
+      "epoch": 0.04113475177304964,
+      "grad_norm": 2.710362195968628,
+      "learning_rate": 8.207547169811321e-06,
+      "loss": 0.509,
+      "step": 87
+    },
+    {
+      "epoch": 0.04160756501182033,
+      "grad_norm": 2.387660264968872,
+      "learning_rate": 8.301886792452832e-06,
+      "loss": 0.567,
+      "step": 88
+    },
+    {
+      "epoch": 0.042080378250591015,
+      "grad_norm": 2.4443883895874023,
+      "learning_rate": 8.39622641509434e-06,
+      "loss": 0.5319,
+      "step": 89
+    },
+    {
+      "epoch": 0.0425531914893617,
+      "grad_norm": 2.9741415977478027,
+      "learning_rate": 8.49056603773585e-06,
+      "loss": 0.6318,
+      "step": 90
+    },
+    {
+      "epoch": 0.04302600472813239,
+      "grad_norm": 2.7385494709014893,
+      "learning_rate": 8.58490566037736e-06,
+      "loss": 0.6109,
+      "step": 91
+    },
+    {
+      "epoch": 0.043498817966903074,
+      "grad_norm": 3.7744197845458984,
+      "learning_rate": 8.67924528301887e-06,
+      "loss": 0.6275,
+      "step": 92
+    },
+    {
+      "epoch": 0.04397163120567376,
+      "grad_norm": 3.1745519638061523,
+      "learning_rate": 8.773584905660378e-06,
+      "loss": 0.5503,
+      "step": 93
+    },
+    {
+      "epoch": 0.044444444444444446,
+      "grad_norm": 3.254016399383545,
+      "learning_rate": 8.867924528301887e-06,
+      "loss": 0.6103,
+      "step": 94
+    },
+    {
+      "epoch": 0.04491725768321513,
+      "grad_norm": 2.4502315521240234,
+      "learning_rate": 8.962264150943398e-06,
+      "loss": 0.4538,
+      "step": 95
+    },
+    {
+      "epoch": 0.04539007092198582,
+      "grad_norm": 2.24422025680542,
+      "learning_rate": 9.056603773584907e-06,
+      "loss": 0.5061,
+      "step": 96
+    },
+    {
+      "epoch": 0.0458628841607565,
+      "grad_norm": 3.284022092819214,
+      "learning_rate": 9.150943396226416e-06,
+      "loss": 0.579,
+      "step": 97
+    },
+    {
+      "epoch": 0.046335697399527184,
+      "grad_norm": 2.722243309020996,
+      "learning_rate": 9.245283018867926e-06,
+      "loss": 0.5395,
+      "step": 98
+    },
+    {
+      "epoch": 0.04680851063829787,
+      "grad_norm": 2.641986131668091,
+      "learning_rate": 9.339622641509435e-06,
+      "loss": 0.5201,
+      "step": 99
+    },
+    {
+      "epoch": 0.04728132387706856,
+      "grad_norm": 2.5733094215393066,
+      "learning_rate": 9.433962264150944e-06,
+      "loss": 0.4791,
+      "step": 100
+    },
+    {
+      "epoch": 0.04728132387706856,
+      "eval_accuracy": 0.7660753880266076,
+      "eval_f1": 0.4403183023872679,
+      "eval_loss": 0.539648711681366,
+      "eval_precision": 0.6693548387096774,
+      "eval_recall": 0.32806324110671936,
+      "eval_runtime": 50.2993,
+      "eval_samples_per_second": 5.487,
+      "eval_steps_per_second": 0.179,
+      "step": 100
+    },
+    {
+      "epoch": 0.04775413711583924,
+      "grad_norm": 2.3666300773620605,
+      "learning_rate": 9.528301886792455e-06,
+      "loss": 0.4835,
+      "step": 101
+    },
+    {
+      "epoch": 0.04822695035460993,
+      "grad_norm": 3.929025888442993,
+      "learning_rate": 9.622641509433963e-06,
+      "loss": 0.48,
+      "step": 102
+    },
+    {
+      "epoch": 0.048699763593380616,
+      "grad_norm": 2.604964017868042,
+      "learning_rate": 9.716981132075472e-06,
+      "loss": 0.4988,
+      "step": 103
+    },
+    {
+      "epoch": 0.0491725768321513,
+      "grad_norm": 2.985452890396118,
+      "learning_rate": 9.811320754716981e-06,
+      "loss": 0.4611,
+      "step": 104
+    },
+    {
+      "epoch": 0.04964539007092199,
+      "grad_norm": 3.0728108882904053,
+      "learning_rate": 9.905660377358492e-06,
+      "loss": 0.4563,
+      "step": 105
+    },
+    {
+      "epoch": 0.050118203309692674,
+      "grad_norm": 2.5450596809387207,
+      "learning_rate": 1e-05,
+      "loss": 0.399,
+      "step": 106
+    },
+    {
+      "epoch": 0.050591016548463354,
+      "grad_norm": 4.241573810577393,
+      "learning_rate": 1.0094339622641511e-05,
+      "loss": 0.659,
+      "step": 107
+    },
+    {
+      "epoch": 0.05106382978723404,
+      "grad_norm": 2.582282781600952,
+      "learning_rate": 1.018867924528302e-05,
+      "loss": 0.4278,
+      "step": 108
+    },
+    {
+      "epoch": 0.051536643026004726,
+      "grad_norm": 3.337094306945801,
+      "learning_rate": 1.0283018867924531e-05,
+      "loss": 0.4334,
+      "step": 109
+    },
+    {
+      "epoch": 0.05200945626477541,
+      "grad_norm": 2.199113368988037,
+      "learning_rate": 1.0377358490566038e-05,
+      "loss": 0.3644,
+      "step": 110
+    },
+    {
+      "epoch": 0.0524822695035461,
+      "grad_norm": 3.2351200580596924,
+      "learning_rate": 1.0471698113207549e-05,
+      "loss": 0.506,
+      "step": 111
+    },
+    {
+      "epoch": 0.052955082742316785,
+      "grad_norm": 3.9023163318634033,
+      "learning_rate": 1.0566037735849058e-05,
+      "loss": 0.5263,
+      "step": 112
+    },
+    {
+      "epoch": 0.05342789598108747,
+      "grad_norm": 2.8746888637542725,
+      "learning_rate": 1.0660377358490568e-05,
+      "loss": 0.4335,
+      "step": 113
+    },
+    {
+      "epoch": 0.05390070921985816,
+      "grad_norm": 3.934784173965454,
+      "learning_rate": 1.0754716981132076e-05,
+      "loss": 0.4804,
+      "step": 114
+    },
+    {
+      "epoch": 0.054373522458628844,
+      "grad_norm": 3.4959094524383545,
+      "learning_rate": 1.0849056603773586e-05,
+      "loss": 0.4114,
+      "step": 115
+    },
+    {
+      "epoch": 0.05484633569739953,
+      "grad_norm": 3.6590819358825684,
+      "learning_rate": 1.0943396226415095e-05,
+      "loss": 0.4618,
+      "step": 116
+    },
+    {
+      "epoch": 0.05531914893617021,
+      "grad_norm": 3.4006142616271973,
+      "learning_rate": 1.1037735849056606e-05,
+      "loss": 0.4839,
+      "step": 117
+    },
+    {
+      "epoch": 0.055791962174940896,
+      "grad_norm": 3.9192683696746826,
+      "learning_rate": 1.1132075471698115e-05,
+      "loss": 0.484,
+      "step": 118
+    },
+    {
+      "epoch": 0.05626477541371158,
+      "grad_norm": 3.4559454917907715,
+      "learning_rate": 1.1226415094339625e-05,
+      "loss": 0.5247,
+      "step": 119
+    },
+    {
+      "epoch": 0.05673758865248227,
+      "grad_norm": 4.34246301651001,
+      "learning_rate": 1.1320754716981132e-05,
+      "loss": 0.3593,
+      "step": 120
+    },
+    {
+      "epoch": 0.05673758865248227,
+      "eval_accuracy": 0.779379157427938,
+      "eval_f1": 0.4776902887139108,
+      "eval_loss": 0.5030393600463867,
+      "eval_precision": 0.7109375,
+      "eval_recall": 0.35968379446640314,
+      "eval_runtime": 47.339,
+      "eval_samples_per_second": 5.83,
+      "eval_steps_per_second": 0.19,
+      "step": 120
+    },
+    {
+      "epoch": 0.057210401891252954,
+      "grad_norm": 4.863562107086182,
+      "learning_rate": 1.1415094339622643e-05,
+      "loss": 0.4712,
+      "step": 121
+    },
+    {
+      "epoch": 0.05768321513002364,
+      "grad_norm": 3.856417179107666,
+      "learning_rate": 1.1509433962264152e-05,
+      "loss": 0.4069,
+      "step": 122
+    },
+    {
+      "epoch": 0.05815602836879433,
+      "grad_norm": 3.3835558891296387,
+      "learning_rate": 1.1603773584905663e-05,
+      "loss": 0.4641,
+      "step": 123
+    },
+    {
+      "epoch": 0.05862884160756501,
+      "grad_norm": 4.175307750701904,
+      "learning_rate": 1.169811320754717e-05,
+      "loss": 0.4452,
+      "step": 124
+    },
+    {
+      "epoch": 0.0591016548463357,
+      "grad_norm": 5.4297356605529785,
+      "learning_rate": 1.179245283018868e-05,
+      "loss": 0.3464,
+      "step": 125
+    },
+    {
+      "epoch": 0.059574468085106386,
+      "grad_norm": 3.767544746398926,
+      "learning_rate": 1.188679245283019e-05,
+      "loss": 0.4649,
+      "step": 126
+    },
+    {
+      "epoch": 0.06004728132387707,
+      "grad_norm": 3.859020233154297,
+      "learning_rate": 1.19811320754717e-05,
+      "loss": 0.4034,
+      "step": 127
+    },
+    {
+      "epoch": 0.06052009456264775,
+      "grad_norm": 5.159704685211182,
+      "learning_rate": 1.2075471698113209e-05,
+      "loss": 0.5041,
+      "step": 128
+    },
+    {
+      "epoch": 0.06099290780141844,
+      "grad_norm": 4.291565418243408,
+      "learning_rate": 1.216981132075472e-05,
+      "loss": 0.4173,
+      "step": 129
+    },
+    {
+      "epoch": 0.061465721040189124,
+      "grad_norm": 4.175761699676514,
+      "learning_rate": 1.2264150943396227e-05,
+      "loss": 0.391,
+      "step": 130
+    },
+    {
+      "epoch": 0.06193853427895981,
+      "grad_norm": 5.98757266998291,
+      "learning_rate": 1.2358490566037737e-05,
+      "loss": 0.4565,
+      "step": 131
+    },
+    {
+      "epoch": 0.062411347517730496,
+      "grad_norm": 6.7860307693481445,
+      "learning_rate": 1.2452830188679246e-05,
+      "loss": 0.4116,
+      "step": 132
+    },
+    {
+      "epoch": 0.06288416075650118,
+      "grad_norm": 7.493508338928223,
+      "learning_rate": 1.2547169811320757e-05,
+      "loss": 0.4762,
+      "step": 133
+    },
+    {
+      "epoch": 0.06335697399527186,
+      "grad_norm": 4.3719964027404785,
+      "learning_rate": 1.2641509433962264e-05,
+      "loss": 0.4301,
+      "step": 134
+    },
+    {
+      "epoch": 0.06382978723404255,
+      "grad_norm": 5.992913246154785,
+      "learning_rate": 1.2735849056603775e-05,
+      "loss": 0.4892,
+      "step": 135
+    },
+    {
+      "epoch": 0.06430260047281323,
+      "grad_norm": 6.05405330657959,
+      "learning_rate": 1.2830188679245283e-05,
+      "loss": 0.4004,
+      "step": 136
+    },
+    {
+      "epoch": 0.06477541371158392,
+      "grad_norm": 6.542272090911865,
+      "learning_rate": 1.2924528301886794e-05,
+      "loss": 0.5386,
+      "step": 137
+    },
+    {
+      "epoch": 0.06524822695035461,
+      "grad_norm": 5.304028511047363,
+      "learning_rate": 1.3018867924528303e-05,
+      "loss": 0.3309,
+      "step": 138
+    },
+    {
+      "epoch": 0.0657210401891253,
+      "grad_norm": 4.330917835235596,
+      "learning_rate": 1.3113207547169814e-05,
+      "loss": 0.3509,
+      "step": 139
+    },
+    {
+      "epoch": 0.06619385342789598,
+      "grad_norm": 6.812550067901611,
+      "learning_rate": 1.320754716981132e-05,
+      "loss": 0.4435,
+      "step": 140
+    },
+    {
+      "epoch": 0.06619385342789598,
+      "eval_accuracy": 0.779379157427938,
+      "eval_f1": 0.5446224256292906,
+      "eval_loss": 0.4715713858604431,
+      "eval_precision": 0.6467391304347826,
+      "eval_recall": 0.47035573122529645,
+      "eval_runtime": 48.4446,
+      "eval_samples_per_second": 5.697,
+      "eval_steps_per_second": 0.186,
+      "step": 140
+    },
+    {
+      "epoch": 0.06666666666666667,
+      "grad_norm": 7.422946453094482,
+      "learning_rate": 1.3301886792452831e-05,
+      "loss": 0.406,
+      "step": 141
+    },
+    {
+      "epoch": 0.06713947990543735,
+      "grad_norm": 7.327658653259277,
+      "learning_rate": 1.339622641509434e-05,
+      "loss": 0.4366,
+      "step": 142
+    },
+    {
+      "epoch": 0.06761229314420804,
+      "grad_norm": 6.69068717956543,
+      "learning_rate": 1.3490566037735851e-05,
+      "loss": 0.4263,
+      "step": 143
+    },
+    {
+      "epoch": 0.06808510638297872,
+      "grad_norm": 4.780946254730225,
+      "learning_rate": 1.3584905660377358e-05,
+      "loss": 0.325,
+      "step": 144
+    },
+    {
+      "epoch": 0.06855791962174941,
+      "grad_norm": 6.016948699951172,
+      "learning_rate": 1.3679245283018869e-05,
+      "loss": 0.4426,
+      "step": 145
+    },
+    {
+      "epoch": 0.0690307328605201,
+      "grad_norm": 5.669694900512695,
+      "learning_rate": 1.3773584905660378e-05,
+      "loss": 0.3957,
+      "step": 146
+    },
+    {
+      "epoch": 0.06950354609929078,
+      "grad_norm": 6.2454609870910645,
+      "learning_rate": 1.3867924528301888e-05,
+      "loss": 0.3033,
+      "step": 147
+    },
+    {
+      "epoch": 0.06997635933806147,
+      "grad_norm": 5.8120808601379395,
+      "learning_rate": 1.3962264150943397e-05,
+      "loss": 0.4472,
+      "step": 148
+    },
+    {
+      "epoch": 0.07044917257683216,
+      "grad_norm": 6.466278553009033,
+      "learning_rate": 1.4056603773584908e-05,
+      "loss": 0.3469,
+      "step": 149
+    },
+    {
+      "epoch": 0.07092198581560284,
+      "grad_norm": 8.212775230407715,
+      "learning_rate": 1.4150943396226415e-05,
+      "loss": 0.4478,
+      "step": 150
+    },
+    {
+      "epoch": 0.07139479905437353,
+      "grad_norm": 7.582151889801025,
+      "learning_rate": 1.4245283018867926e-05,
+      "loss": 0.4312,
+      "step": 151
+    },
+    {
+      "epoch": 0.07186761229314421,
+      "grad_norm": 5.214906215667725,
+      "learning_rate": 1.4339622641509435e-05,
+      "loss": 0.3446,
+      "step": 152
+    },
+    {
+      "epoch": 0.07234042553191489,
+      "grad_norm": 4.743616580963135,
+      "learning_rate": 1.4433962264150945e-05,
+      "loss": 0.2665,
+      "step": 153
+    },
+    {
+      "epoch": 0.07281323877068557,
+      "grad_norm": 5.460316181182861,
+      "learning_rate": 1.4528301886792452e-05,
+      "loss": 0.4369,
+      "step": 154
+    },
+    {
+      "epoch": 0.07328605200945626,
+      "grad_norm": 7.11004638671875,
+      "learning_rate": 1.4622641509433963e-05,
+      "loss": 0.3813,
+      "step": 155
+    },
+    {
+      "epoch": 0.07375886524822695,
+      "grad_norm": 6.461905479431152,
+      "learning_rate": 1.4716981132075472e-05,
+      "loss": 0.3413,
+      "step": 156
+    },
+    {
+      "epoch": 0.07423167848699763,
+      "grad_norm": 6.668741226196289,
+      "learning_rate": 1.4811320754716983e-05,
+      "loss": 0.425,
+      "step": 157
+    },
+    {
+      "epoch": 0.07470449172576832,
+      "grad_norm": 7.922025203704834,
+      "learning_rate": 1.4905660377358491e-05,
+      "loss": 0.3927,
+      "step": 158
+    },
+    {
+      "epoch": 0.075177304964539,
+      "grad_norm": 5.079823017120361,
+      "learning_rate": 1.5000000000000002e-05,
+      "loss": 0.34,
+      "step": 159
+    },
+    {
+      "epoch": 0.07565011820330969,
+      "grad_norm": 5.172731876373291,
+      "learning_rate": 1.5094339622641511e-05,
+      "loss": 0.3899,
+      "step": 160
+    },
+    {
+      "epoch": 0.07565011820330969,
+      "eval_accuracy": 0.7937915742793792,
+      "eval_f1": 0.5181347150259067,
+      "eval_loss": 0.4403148889541626,
+      "eval_precision": 0.7518796992481203,
+      "eval_recall": 0.3952569169960474,
+      "eval_runtime": 48.1121,
+      "eval_samples_per_second": 5.737,
+      "eval_steps_per_second": 0.187,
+      "step": 160
+    },
+    {
+      "epoch": 0.07612293144208038,
+      "grad_norm": 6.415732383728027,
+      "learning_rate": 1.518867924528302e-05,
+      "loss": 0.3399,
+      "step": 161
+    },
+    {
+      "epoch": 0.07659574468085106,
+      "grad_norm": 6.380404472351074,
+      "learning_rate": 1.5283018867924532e-05,
+      "loss": 0.4384,
+      "step": 162
+    },
+    {
+      "epoch": 0.07706855791962175,
+      "grad_norm": 4.902369976043701,
+      "learning_rate": 1.5377358490566038e-05,
+      "loss": 0.3413,
+      "step": 163
+    },
+    {
+      "epoch": 0.07754137115839244,
+      "grad_norm": 5.686254024505615,
+      "learning_rate": 1.547169811320755e-05,
+      "loss": 0.4252,
+      "step": 164
+    },
+    {
+      "epoch": 0.07801418439716312,
+      "grad_norm": 6.227957725524902,
+      "learning_rate": 1.556603773584906e-05,
+      "loss": 0.3132,
+      "step": 165
+    },
+    {
+      "epoch": 0.07848699763593381,
+      "grad_norm": 8.092106819152832,
+      "learning_rate": 1.5660377358490568e-05,
+      "loss": 0.5402,
+      "step": 166
+    },
+    {
+      "epoch": 0.0789598108747045,
+      "grad_norm": 6.633399486541748,
+      "learning_rate": 1.5754716981132077e-05,
+      "loss": 0.3574,
+      "step": 167
+    },
+    {
+      "epoch": 0.07943262411347518,
+      "grad_norm": 7.712852954864502,
+      "learning_rate": 1.5849056603773586e-05,
+      "loss": 0.435,
+      "step": 168
+    },
+    {
+      "epoch": 0.07990543735224587,
+      "grad_norm": 4.21342134475708,
+      "learning_rate": 1.5943396226415095e-05,
+      "loss": 0.3245,
+      "step": 169
+    },
+    {
+      "epoch": 0.08037825059101655,
+      "grad_norm": 4.879771709442139,
+      "learning_rate": 1.6037735849056607e-05,
+      "loss": 0.2535,
+      "step": 170
+    },
+    {
+      "epoch": 0.08085106382978724,
+      "grad_norm": 7.206470966339111,
+      "learning_rate": 1.6132075471698116e-05,
+      "loss": 0.3628,
+      "step": 171
+    },
+    {
+      "epoch": 0.08132387706855793,
+      "grad_norm": 4.479485034942627,
+      "learning_rate": 1.6226415094339625e-05,
+      "loss": 0.3318,
+      "step": 172
+    },
+    {
+      "epoch": 0.0817966903073286,
+      "grad_norm": 6.472604751586914,
+      "learning_rate": 1.6320754716981134e-05,
+      "loss": 0.4404,
+      "step": 173
+    },
+    {
+      "epoch": 0.08226950354609928,
+      "grad_norm": 4.094892501831055,
+      "learning_rate": 1.6415094339622643e-05,
+      "loss": 0.2986,
+      "step": 174
+    },
+    {
+      "epoch": 0.08274231678486997,
+      "grad_norm": 5.433969974517822,
+      "learning_rate": 1.650943396226415e-05,
+      "loss": 0.3356,
+      "step": 175
+    },
+    {
+      "epoch": 0.08321513002364066,
+      "grad_norm": 5.57079553604126,
+      "learning_rate": 1.6603773584905664e-05,
+      "loss": 0.3288,
+      "step": 176
+    },
+    {
+      "epoch": 0.08368794326241134,
+      "grad_norm": 5.4054460525512695,
+      "learning_rate": 1.669811320754717e-05,
+      "loss": 0.3688,
+      "step": 177
+    },
+    {
+      "epoch": 0.08416075650118203,
+      "grad_norm": 6.414549350738525,
+      "learning_rate": 1.679245283018868e-05,
+      "loss": 0.3791,
+      "step": 178
+    },
+    {
+      "epoch": 0.08463356973995272,
+      "grad_norm": 6.032560348510742,
+      "learning_rate": 1.688679245283019e-05,
+      "loss": 0.3142,
+      "step": 179
+    },
+    {
+      "epoch": 0.0851063829787234,
+      "grad_norm": 6.080160617828369,
+      "learning_rate": 1.69811320754717e-05,
+      "loss": 0.3429,
+      "step": 180
+    },
+    {
+      "epoch": 0.0851063829787234,
+      "eval_accuracy": 0.8159645232815964,
+      "eval_f1": 0.5951219512195122,
+      "eval_loss": 0.4055093824863434,
+      "eval_precision": 0.7770700636942676,
+      "eval_recall": 0.48221343873517786,
+      "eval_runtime": 47.3649,
+      "eval_samples_per_second": 5.827,
+      "eval_steps_per_second": 0.19,
+      "step": 180
+    },
+    {
+      "epoch": 0.08557919621749409,
+      "grad_norm": 5.409966468811035,
+      "learning_rate": 1.707547169811321e-05,
+      "loss": 0.2881,
+      "step": 181
+    },
+    {
+      "epoch": 0.08605200945626477,
+      "grad_norm": 7.760888576507568,
+      "learning_rate": 1.716981132075472e-05,
+      "loss": 0.4003,
+      "step": 182
+    },
+    {
+      "epoch": 0.08652482269503546,
+      "grad_norm": 6.271183013916016,
+      "learning_rate": 1.7264150943396226e-05,
+      "loss": 0.3343,
+      "step": 183
+    },
+    {
+      "epoch": 0.08699763593380615,
+      "grad_norm": 7.139448165893555,
+      "learning_rate": 1.735849056603774e-05,
+      "loss": 0.3246,
+      "step": 184
+    },
+    {
+      "epoch": 0.08747044917257683,
+      "grad_norm": 5.225427627563477,
+      "learning_rate": 1.7452830188679247e-05,
+      "loss": 0.3042,
+      "step": 185
+    },
+    {
+      "epoch": 0.08794326241134752,
+      "grad_norm": 7.858066558837891,
+      "learning_rate": 1.7547169811320756e-05,
+      "loss": 0.3843,
+      "step": 186
+    },
+    {
+      "epoch": 0.0884160756501182,
+      "grad_norm": 7.103234767913818,
+      "learning_rate": 1.7641509433962265e-05,
+      "loss": 0.3878,
+      "step": 187
+    },
+    {
+      "epoch": 0.08888888888888889,
+      "grad_norm": 8.041577339172363,
+      "learning_rate": 1.7735849056603774e-05,
+      "loss": 0.4482,
+      "step": 188
+    },
+    {
+      "epoch": 0.08936170212765958,
+      "grad_norm": 6.207291126251221,
+      "learning_rate": 1.7830188679245283e-05,
+      "loss": 0.3709,
+      "step": 189
+    },
+    {
+      "epoch": 0.08983451536643026,
+      "grad_norm": 5.278400897979736,
+      "learning_rate": 1.7924528301886795e-05,
+      "loss": 0.2548,
+      "step": 190
+    },
+    {
+      "epoch": 0.09030732860520095,
+      "grad_norm": 6.8568854331970215,
+      "learning_rate": 1.8018867924528304e-05,
+      "loss": 0.3724,
+      "step": 191
+    },
+    {
+      "epoch": 0.09078014184397164,
+      "grad_norm": 6.631660461425781,
+      "learning_rate": 1.8113207547169813e-05,
+      "loss": 0.3654,
+      "step": 192
+    },
+    {
+      "epoch": 0.09125295508274232,
+      "grad_norm": 7.872669696807861,
+      "learning_rate": 1.8207547169811322e-05,
+      "loss": 0.3598,
+      "step": 193
+    },
+    {
+      "epoch": 0.091725768321513,
+      "grad_norm": 5.30977725982666,
+      "learning_rate": 1.830188679245283e-05,
+      "loss": 0.316,
+      "step": 194
+    },
+    {
+      "epoch": 0.09219858156028368,
+      "grad_norm": 6.427607536315918,
+      "learning_rate": 1.839622641509434e-05,
+      "loss": 0.3215,
+      "step": 195
+    },
+    {
+      "epoch": 0.09267139479905437,
+      "grad_norm": 6.099403381347656,
+      "learning_rate": 1.8490566037735852e-05,
+      "loss": 0.3482,
+      "step": 196
+    },
+    {
+      "epoch": 0.09314420803782505,
+      "grad_norm": 5.679231643676758,
+      "learning_rate": 1.8584905660377358e-05,
+      "loss": 0.3189,
+      "step": 197
+    },
+    {
+      "epoch": 0.09361702127659574,
+      "grad_norm": 7.309570789337158,
+      "learning_rate": 1.867924528301887e-05,
+      "loss": 0.4746,
+      "step": 198
+    },
+    {
+      "epoch": 0.09408983451536643,
+      "grad_norm": 8.899137496948242,
+      "learning_rate": 1.877358490566038e-05,
+      "loss": 0.5097,
+      "step": 199
+    },
+    {
+      "epoch": 0.09456264775413711,
+      "grad_norm": 5.904891014099121,
+      "learning_rate": 1.8867924528301888e-05,
+      "loss": 0.3529,
+      "step": 200
+    },
+    {
+      "epoch": 0.09456264775413711,
+      "eval_accuracy": 0.8181818181818182,
+      "eval_f1": 0.6255707762557078,
+      "eval_loss": 0.38472801446914673,
+      "eval_precision": 0.7405405405405405,
+      "eval_recall": 0.541501976284585,
+      "eval_runtime": 48.2579,
+      "eval_samples_per_second": 5.719,
+      "eval_steps_per_second": 0.186,
+      "step": 200
+    },
+    {
+      "epoch": 0.0950354609929078,
+      "grad_norm": 5.29753303527832,
+      "learning_rate": 1.8962264150943397e-05,
+      "loss": 0.2815,
+      "step": 201
+    },
+    {
+      "epoch": 0.09550827423167849,
+      "grad_norm": 8.292261123657227,
+      "learning_rate": 1.905660377358491e-05,
+      "loss": 0.3791,
+      "step": 202
+    },
+    {
+      "epoch": 0.09598108747044917,
+      "grad_norm": 5.616471290588379,
+      "learning_rate": 1.9150943396226415e-05,
+      "loss": 0.3434,
+      "step": 203
+    },
+    {
+      "epoch": 0.09645390070921986,
+      "grad_norm": 4.834171772003174,
+      "learning_rate": 1.9245283018867927e-05,
+      "loss": 0.3383,
+      "step": 204
+    },
+    {
+      "epoch": 0.09692671394799054,
+      "grad_norm": 6.36716890335083,
+      "learning_rate": 1.9339622641509436e-05,
+      "loss": 0.3548,
+      "step": 205
+    },
+    {
+      "epoch": 0.09739952718676123,
+      "grad_norm": 5.7878899574279785,
+      "learning_rate": 1.9433962264150945e-05,
+      "loss": 0.3652,
+      "step": 206
+    },
+    {
+      "epoch": 0.09787234042553192,
+      "grad_norm": 5.697458267211914,
+      "learning_rate": 1.9528301886792454e-05,
+      "loss": 0.2581,
+      "step": 207
+    },
+    {
+      "epoch": 0.0983451536643026,
+      "grad_norm": 4.944214344024658,
+      "learning_rate": 1.9622641509433963e-05,
+      "loss": 0.3725,
+      "step": 208
+    },
+    {
+      "epoch": 0.09881796690307329,
+      "grad_norm": 5.800679683685303,
+      "learning_rate": 1.971698113207547e-05,
+      "loss": 0.2957,
+      "step": 209
+    },
+    {
+      "epoch": 0.09929078014184398,
+      "grad_norm": 5.455956935882568,
+      "learning_rate": 1.9811320754716984e-05,
+      "loss": 0.3826,
+      "step": 210
+    },
+    {
+      "epoch": 0.09976359338061466,
+      "grad_norm": 4.2240519523620605,
+      "learning_rate": 1.9905660377358493e-05,
+      "loss": 0.276,
+      "step": 211
+    },
+    {
+      "epoch": 0.10023640661938535,
+      "grad_norm": 4.200746059417725,
+      "learning_rate": 2e-05,
+      "loss": 0.2807,
+      "step": 212
+    },
+    {
+      "epoch": 0.10070921985815603,
+      "grad_norm": 5.269329071044922,
+      "learning_rate": 1.999998637325671e-05,
+      "loss": 0.3351,
+      "step": 213
+    },
+    {
+      "epoch": 0.10118203309692671,
+      "grad_norm": 4.950570583343506,
+      "learning_rate": 1.999994549306397e-05,
+      "loss": 0.3145,
+      "step": 214
+    },
+    {
+      "epoch": 0.1016548463356974,
+      "grad_norm": 6.465134143829346,
+      "learning_rate": 1.9999877359533202e-05,
+      "loss": 0.351,
+      "step": 215
+    },
+    {
+      "epoch": 0.10212765957446808,
+      "grad_norm": 6.148433685302734,
+      "learning_rate": 1.9999781972850082e-05,
+      "loss": 0.3563,
+      "step": 216
+    },
+    {
+      "epoch": 0.10260047281323877,
+      "grad_norm": 4.79353666305542,
+      "learning_rate": 1.9999659333274582e-05,
+      "loss": 0.2827,
+      "step": 217
+    },
+    {
+      "epoch": 0.10307328605200945,
+      "grad_norm": 5.91294002532959,
+      "learning_rate": 1.9999509441140934e-05,
+      "loss": 0.3741,
+      "step": 218
+    },
+    {
+      "epoch": 0.10354609929078014,
+      "grad_norm": 6.508899688720703,
+      "learning_rate": 1.9999332296857642e-05,
+      "loss": 0.3454,
+      "step": 219
+    },
+    {
+      "epoch": 0.10401891252955082,
+      "grad_norm": 6.511887073516846,
+      "learning_rate": 1.9999127900907496e-05,
+      "loss": 0.36,
+      "step": 220
+    },
+    {
+      "epoch": 0.10401891252955082,
+      "eval_accuracy": 0.8181818181818182,
+      "eval_f1": 0.6076555023923444,
+      "eval_loss": 0.38239341974258423,
+      "eval_precision": 0.7696969696969697,
+      "eval_recall": 0.5019762845849802,
+      "eval_runtime": 49.0986,
+      "eval_samples_per_second": 5.621,
+      "eval_steps_per_second": 0.183,
+      "step": 220
+    },
+    {
+      "epoch": 0.10449172576832151,
+      "grad_norm": 5.294708728790283,
+      "learning_rate": 1.9998896253847536e-05,
+      "loss": 0.345,
+      "step": 221
+    },
+    {
+      "epoch": 0.1049645390070922,
+      "grad_norm": 6.348694324493408,
+      "learning_rate": 1.9998637356309088e-05,
+      "loss": 0.2954,
+      "step": 222
+    },
+    {
+      "epoch": 0.10543735224586288,
+      "grad_norm": 5.638726234436035,
+      "learning_rate": 1.9998351208997734e-05,
+      "loss": 0.3151,
+      "step": 223
+    },
+    {
+      "epoch": 0.10591016548463357,
+      "grad_norm": 6.741607666015625,
+      "learning_rate": 1.999803781269333e-05,
+      "loss": 0.376,
+      "step": 224
+    },
+    {
+      "epoch": 0.10638297872340426,
+      "grad_norm": 5.410939693450928,
+      "learning_rate": 1.999769716824998e-05,
+      "loss": 0.2694,
+      "step": 225
+    },
+    {
+      "epoch": 0.10685579196217494,
+      "grad_norm": 6.066391944885254,
+      "learning_rate": 1.9997329276596073e-05,
+      "loss": 0.3443,
+      "step": 226
+    },
+    {
+      "epoch": 0.10732860520094563,
+      "grad_norm": 6.088653087615967,
+      "learning_rate": 1.999693413873423e-05,
+      "loss": 0.3617,
+      "step": 227
+    },
+    {
+      "epoch": 0.10780141843971631,
+      "grad_norm": 6.114930152893066,
+      "learning_rate": 1.9996511755741346e-05,
+      "loss": 0.2752,
+      "step": 228
+    },
+    {
+      "epoch": 0.108274231678487,
+      "grad_norm": 5.094395160675049,
+      "learning_rate": 1.999606212876856e-05,
+      "loss": 0.2251,
+      "step": 229
+    },
+    {
+      "epoch": 0.10874704491725769,
+      "grad_norm": 6.448328495025635,
+      "learning_rate": 1.999558525904126e-05,
+      "loss": 0.3729,
+      "step": 230
+    },
+    {
+      "epoch": 0.10921985815602837,
+      "grad_norm": 5.548649787902832,
+      "learning_rate": 1.9995081147859087e-05,
+      "loss": 0.3591,
+      "step": 231
+    },
+    {
+      "epoch": 0.10969267139479906,
+      "grad_norm": 8.929473876953125,
+      "learning_rate": 1.9994549796595913e-05,
+      "loss": 0.3535,
+      "step": 232
+    },
+    {
+      "epoch": 0.11016548463356975,
+      "grad_norm": 4.452419757843018,
+      "learning_rate": 1.9993991206699865e-05,
+      "loss": 0.2647,
+      "step": 233
+    },
+    {
+      "epoch": 0.11063829787234042,
+      "grad_norm": 4.958791732788086,
+      "learning_rate": 1.999340537969329e-05,
+      "loss": 0.3027,
+      "step": 234
+    },
+    {
+      "epoch": 0.1111111111111111,
+      "grad_norm": 6.243503093719482,
+      "learning_rate": 1.9992792317172765e-05,
+      "loss": 0.3199,
+      "step": 235
+    },
+    {
+      "epoch": 0.11158392434988179,
+      "grad_norm": 5.826799392700195,
+      "learning_rate": 1.9992152020809113e-05,
+      "loss": 0.3239,
+      "step": 236
+    },
+    {
+      "epoch": 0.11205673758865248,
+      "grad_norm": 8.090536117553711,
+      "learning_rate": 1.9991484492347353e-05,
+      "loss": 0.4326,
+      "step": 237
+    },
+    {
+      "epoch": 0.11252955082742316,
+      "grad_norm": 7.066166877746582,
+      "learning_rate": 1.9990789733606733e-05,
+      "loss": 0.2672,
+      "step": 238
+    },
+    {
+      "epoch": 0.11300236406619385,
+      "grad_norm": 5.99971342086792,
+      "learning_rate": 1.999006774648072e-05,
+      "loss": 0.2984,
+      "step": 239
+    },
+    {
+      "epoch": 0.11347517730496454,
+      "grad_norm": 5.290529727935791,
+      "learning_rate": 1.998931853293698e-05,
+      "loss": 0.2875,
+      "step": 240
+    },
+    {
+      "epoch": 0.11347517730496454,
+      "eval_accuracy": 0.8226164079822617,
+      "eval_f1": 0.6428571428571429,
+      "eval_loss": 0.3577645719051361,
+      "eval_precision": 0.7384615384615385,
+      "eval_recall": 0.5691699604743083,
+      "eval_runtime": 48.6337,
+      "eval_samples_per_second": 5.675,
+      "eval_steps_per_second": 0.185,
+      "step": 240
+    },
+    {
+      "epoch": 0.11394799054373522,
+      "grad_norm": 6.742930889129639,
+      "learning_rate": 1.9988542095017373e-05,
+      "loss": 0.3273,
+      "step": 241
+    },
+    {
+      "epoch": 0.11442080378250591,
+      "grad_norm": 6.42619514465332,
+      "learning_rate": 1.9987738434837973e-05,
+      "loss": 0.3949,
+      "step": 242
+    },
+    {
+      "epoch": 0.1148936170212766,
+      "grad_norm": 5.628868103027344,
+      "learning_rate": 1.9986907554589024e-05,
+      "loss": 0.2947,
+      "step": 243
+    },
+    {
+      "epoch": 0.11536643026004728,
+      "grad_norm": 6.224806308746338,
+      "learning_rate": 1.9986049456534972e-05,
+      "loss": 0.4323,
+      "step": 244
+    },
+    {
+      "epoch": 0.11583924349881797,
+      "grad_norm": 8.325421333312988,
+      "learning_rate": 1.9985164143014433e-05,
+      "loss": 0.2801,
+      "step": 245
+    },
+    {
+      "epoch": 0.11631205673758865,
+      "grad_norm": 6.2002129554748535,
+      "learning_rate": 1.9984251616440195e-05,
+      "loss": 0.2003,
+      "step": 246
+    },
+    {
+      "epoch": 0.11678486997635934,
+      "grad_norm": 6.008997917175293,
+      "learning_rate": 1.9983311879299203e-05,
+      "loss": 0.3376,
+      "step": 247
+    },
+    {
+      "epoch": 0.11725768321513003,
+      "grad_norm": 5.332534313201904,
+      "learning_rate": 1.9982344934152577e-05,
+      "loss": 0.3856,
+      "step": 248
+    },
+    {
+      "epoch": 0.11773049645390071,
+      "grad_norm": 3.9309067726135254,
+      "learning_rate": 1.9981350783635582e-05,
+      "loss": 0.2775,
+      "step": 249
+    },
+    {
+      "epoch": 0.1182033096926714,
+      "grad_norm": 6.058276653289795,
+      "learning_rate": 1.9980329430457616e-05,
+      "loss": 0.2785,
+      "step": 250
+    },
+    {
+      "epoch": 0.11867612293144209,
+      "grad_norm": 4.346603870391846,
+      "learning_rate": 1.997928087740222e-05,
+      "loss": 0.2102,
+      "step": 251
+    },
+    {
+      "epoch": 0.11914893617021277,
+      "grad_norm": 5.8436408042907715,
+      "learning_rate": 1.9978205127327085e-05,
+      "loss": 0.321,
+      "step": 252
+    },
+    {
+      "epoch": 0.11962174940898346,
+      "grad_norm": 8.181411743164062,
+      "learning_rate": 1.9977102183163984e-05,
+      "loss": 0.3408,
+      "step": 253
+    },
+    {
+      "epoch": 0.12009456264775414,
+      "grad_norm": 6.579717636108398,
+      "learning_rate": 1.997597204791884e-05,
+      "loss": 0.3031,
+      "step": 254
+    },
+    {
+      "epoch": 0.12056737588652482,
+      "grad_norm": 6.751533031463623,
+      "learning_rate": 1.9974814724671658e-05,
+      "loss": 0.3331,
+      "step": 255
+    },
+    {
+      "epoch": 0.1210401891252955,
+      "grad_norm": 5.326685905456543,
+      "learning_rate": 1.9973630216576547e-05,
+      "loss": 0.2865,
+      "step": 256
+    },
+    {
+      "epoch": 0.12151300236406619,
+      "grad_norm": 6.129054546356201,
+      "learning_rate": 1.9972418526861704e-05,
+      "loss": 0.243,
+      "step": 257
+    },
+    {
+      "epoch": 0.12198581560283688,
+      "grad_norm": 4.441340446472168,
+      "learning_rate": 1.997117965882941e-05,
+      "loss": 0.1915,
+      "step": 258
+    },
+    {
+      "epoch": 0.12245862884160756,
+      "grad_norm": 6.2238569259643555,
+      "learning_rate": 1.9969913615856015e-05,
+      "loss": 0.3069,
+      "step": 259
+    },
+    {
+      "epoch": 0.12293144208037825,
+      "grad_norm": 6.094357967376709,
+      "learning_rate": 1.9968620401391917e-05,
+      "loss": 0.3237,
+      "step": 260
+    },
+    {
+      "epoch": 0.12293144208037825,
+      "eval_accuracy": 0.8425720620842572,
+      "eval_f1": 0.710204081632653,
+      "eval_loss": 0.34573158621788025,
+      "eval_precision": 0.7341772151898734,
+      "eval_recall": 0.6877470355731226,
+      "eval_runtime": 49.3073,
+      "eval_samples_per_second": 5.598,
+      "eval_steps_per_second": 0.183,
+      "step": 260
+    },
+    {
+      "epoch": 0.12340425531914893,
+      "grad_norm": 7.97859001159668,
+      "learning_rate": 1.9967300018961582e-05,
+      "loss": 0.235,
+      "step": 261
+    },
+    {
+      "epoch": 0.12387706855791962,
+      "grad_norm": 8.9214448928833,
+      "learning_rate": 1.9965952472163517e-05,
+      "loss": 0.3719,
+      "step": 262
+    },
+    {
+      "epoch": 0.1243498817966903,
+      "grad_norm": 9.066556930541992,
+      "learning_rate": 1.996457776467025e-05,
+      "loss": 0.3064,
+      "step": 263
+    },
+    {
+      "epoch": 0.12482269503546099,
+      "grad_norm": 5.2799177169799805,
+      "learning_rate": 1.996317590022834e-05,
+      "loss": 0.3362,
+      "step": 264
+    },
+    {
+      "epoch": 0.12529550827423167,
+      "grad_norm": 7.641961574554443,
+      "learning_rate": 1.996174688265836e-05,
+      "loss": 0.326,
+      "step": 265
+    },
+    {
+      "epoch": 0.12576832151300235,
+      "grad_norm": 7.163477420806885,
+      "learning_rate": 1.9960290715854874e-05,
+      "loss": 0.2446,
+      "step": 266
+    },
+    {
+      "epoch": 0.12624113475177304,
+      "grad_norm": 5.137381553649902,
+      "learning_rate": 1.9958807403786452e-05,
+      "loss": 0.2447,
+      "step": 267
+    },
+    {
+      "epoch": 0.12671394799054372,
+      "grad_norm": 8.010651588439941,
+      "learning_rate": 1.995729695049563e-05,
+      "loss": 0.3647,
+      "step": 268
+    },
+    {
+      "epoch": 0.1271867612293144,
+      "grad_norm": 5.50241231918335,
+      "learning_rate": 1.995575936009893e-05,
+      "loss": 0.3076,
+      "step": 269
+    },
+    {
+      "epoch": 0.1276595744680851,
+      "grad_norm": 5.5639567375183105,
+      "learning_rate": 1.995419463678681e-05,
+      "loss": 0.3547,
+      "step": 270
+    },
+    {
+      "epoch": 0.12813238770685578,
+      "grad_norm": 5.540798187255859,
+      "learning_rate": 1.9952602784823688e-05,
+      "loss": 0.385,
+      "step": 271
+    },
+    {
+      "epoch": 0.12860520094562647,
+      "grad_norm": 8.423896789550781,
+      "learning_rate": 1.9950983808547923e-05,
+      "loss": 0.2973,
+      "step": 272
+    },
+    {
+      "epoch": 0.12907801418439716,
+      "grad_norm": 7.910554885864258,
+      "learning_rate": 1.994933771237179e-05,
+      "loss": 0.232,
+      "step": 273
+    },
+    {
+      "epoch": 0.12955082742316784,
+      "grad_norm": 5.810388565063477,
+      "learning_rate": 1.9947664500781464e-05,
+      "loss": 0.3371,
+      "step": 274
+    },
+    {
+      "epoch": 0.13002364066193853,
+      "grad_norm": 4.976069450378418,
+      "learning_rate": 1.9945964178337037e-05,
+      "loss": 0.2469,
+      "step": 275
+    },
+    {
+      "epoch": 0.13049645390070921,
+      "grad_norm": 5.738739013671875,
+      "learning_rate": 1.9944236749672483e-05,
+      "loss": 0.3208,
+      "step": 276
+    },
+    {
+      "epoch": 0.1309692671394799,
+      "grad_norm": 6.816530704498291,
+      "learning_rate": 1.9942482219495644e-05,
+      "loss": 0.327,
+      "step": 277
+    },
+    {
+      "epoch": 0.1314420803782506,
+      "grad_norm": 4.750877857208252,
+      "learning_rate": 1.9940700592588228e-05,
+      "loss": 0.2176,
+      "step": 278
+    },
+    {
+      "epoch": 0.13191489361702127,
+      "grad_norm": 4.42209529876709,
+      "learning_rate": 1.9938891873805787e-05,
+      "loss": 0.3138,
+      "step": 279
+    },
+    {
+      "epoch": 0.13238770685579196,
+      "grad_norm": 3.6869099140167236,
+      "learning_rate": 1.993705606807771e-05,
+      "loss": 0.2309,
+      "step": 280
+    },
+    {
+      "epoch": 0.13238770685579196,
+      "eval_accuracy": 0.8203991130820399,
+      "eval_f1": 0.5759162303664922,
+      "eval_loss": 0.36263027787208557,
+      "eval_precision": 0.8527131782945736,
+      "eval_recall": 0.43478260869565216,
+      "eval_runtime": 49.2617,
+      "eval_samples_per_second": 5.603,
+      "eval_steps_per_second": 0.183,
+      "step": 280
+    },
+    {
+      "epoch": 0.13286052009456265,
+      "grad_norm": 8.313398361206055,
+      "learning_rate": 1.9935193180407216e-05,
+      "loss": 0.3767,
+      "step": 281
+    },
+    {
+      "epoch": 0.13333333333333333,
+      "grad_norm": 3.1473255157470703,
+      "learning_rate": 1.9933303215871313e-05,
+      "loss": 0.21,
+      "step": 282
+    },
+    {
+      "epoch": 0.13380614657210402,
+      "grad_norm": 4.254230499267578,
+      "learning_rate": 1.9931386179620816e-05,
+      "loss": 0.2737,
+      "step": 283
+    },
+    {
+      "epoch": 0.1342789598108747,
+      "grad_norm": 7.221229553222656,
+      "learning_rate": 1.9929442076880323e-05,
+      "loss": 0.2579,
+      "step": 284
+    },
+    {
+      "epoch": 0.1347517730496454,
+      "grad_norm": 5.343625068664551,
+      "learning_rate": 1.9927470912948184e-05,
+      "loss": 0.3152,
+      "step": 285
+    },
+    {
+      "epoch": 0.13522458628841608,
+      "grad_norm": 6.4857916831970215,
+      "learning_rate": 1.992547269319651e-05,
+      "loss": 0.3524,
+      "step": 286
+    },
+    {
+      "epoch": 0.13569739952718676,
+      "grad_norm": 6.073742389678955,
+      "learning_rate": 1.9923447423071153e-05,
+      "loss": 0.3117,
+      "step": 287
+    },
+    {
+      "epoch": 0.13617021276595745,
+      "grad_norm": 6.168652057647705,
+      "learning_rate": 1.992139510809167e-05,
+      "loss": 0.343,
+      "step": 288
+    },
+    {
+      "epoch": 0.13664302600472814,
+      "grad_norm": 4.182333469390869,
+      "learning_rate": 1.9919315753851343e-05,
+      "loss": 0.2652,
+      "step": 289
+    },
+    {
+      "epoch": 0.13711583924349882,
+      "grad_norm": 5.490057945251465,
+      "learning_rate": 1.9917209366017134e-05,
+      "loss": 0.3377,
+      "step": 290
+    },
+    {
+      "epoch": 0.1375886524822695,
+      "grad_norm": 4.765297889709473,
+      "learning_rate": 1.9915075950329683e-05,
+      "loss": 0.2798,
+      "step": 291
+    },
+    {
+      "epoch": 0.1380614657210402,
+      "grad_norm": 6.1175923347473145,
+      "learning_rate": 1.9912915512603294e-05,
+      "loss": 0.3759,
+      "step": 292
+    },
+    {
+      "epoch": 0.13853427895981088,
+      "grad_norm": 7.154444217681885,
+      "learning_rate": 1.991072805872591e-05,
+      "loss": 0.3252,
+      "step": 293
+    },
+    {
+      "epoch": 0.13900709219858157,
+      "grad_norm": 6.086852550506592,
+      "learning_rate": 1.990851359465911e-05,
+      "loss": 0.3158,
+      "step": 294
+    },
+    {
+      "epoch": 0.13947990543735225,
+      "grad_norm": 6.656464576721191,
+      "learning_rate": 1.990627212643808e-05,
+      "loss": 0.3349,
+      "step": 295
+    },
+    {
+      "epoch": 0.13995271867612294,
+      "grad_norm": 5.9059062004089355,
+      "learning_rate": 1.9904003660171597e-05,
+      "loss": 0.2919,
+      "step": 296
+    },
+    {
+      "epoch": 0.14042553191489363,
+      "grad_norm": 7.576990604400635,
+      "learning_rate": 1.990170820204203e-05,
+      "loss": 0.2865,
+      "step": 297
+    },
+    {
+      "epoch": 0.1408983451536643,
+      "grad_norm": 5.305886268615723,
+      "learning_rate": 1.9899385758305298e-05,
+      "loss": 0.3499,
+      "step": 298
+    },
+    {
+      "epoch": 0.141371158392435,
+      "grad_norm": 4.519371032714844,
+      "learning_rate": 1.9897036335290868e-05,
+      "loss": 0.2928,
+      "step": 299
+    },
+    {
+      "epoch": 0.14184397163120568,
+      "grad_norm": 4.735250949859619,
+      "learning_rate": 1.989465993940174e-05,
+      "loss": 0.2843,
+      "step": 300
+    },
+    {
+      "epoch": 0.14184397163120568,
+      "eval_accuracy": 0.8325942350332595,
+      "eval_f1": 0.6215538847117794,
+      "eval_loss": 0.35112443566322327,
+      "eval_precision": 0.8493150684931506,
+      "eval_recall": 0.4901185770750988,
+      "eval_runtime": 49.5436,
+      "eval_samples_per_second": 5.571,
+      "eval_steps_per_second": 0.182,
+      "step": 300
+    },
+    {
+      "epoch": 0.14231678486997637,
+      "grad_norm": 6.546813488006592,
+      "learning_rate": 1.9892256577114422e-05,
+      "loss": 0.2892,
+      "step": 301
+    },
+    {
+      "epoch": 0.14278959810874706,
+      "grad_norm": 10.950983047485352,
+      "learning_rate": 1.9889826254978915e-05,
+      "loss": 0.4064,
+      "step": 302
+    },
+    {
+      "epoch": 0.14326241134751774,
+      "grad_norm": 3.705230474472046,
+      "learning_rate": 1.988736897961869e-05,
+      "loss": 0.2045,
+      "step": 303
+    },
+    {
+      "epoch": 0.14373522458628843,
+      "grad_norm": 8.16879940032959,
+      "learning_rate": 1.9884884757730683e-05,
+      "loss": 0.4112,
+      "step": 304
+    },
+    {
+      "epoch": 0.14420803782505912,
+      "grad_norm": 3.9046270847320557,
+      "learning_rate": 1.988237359608526e-05,
+      "loss": 0.234,
+      "step": 305
+    },
+    {
+      "epoch": 0.14468085106382977,
+      "grad_norm": 3.2188122272491455,
+      "learning_rate": 1.987983550152622e-05,
+      "loss": 0.16,
+      "step": 306
+    },
+    {
+      "epoch": 0.14515366430260046,
+      "grad_norm": 4.718387603759766,
+      "learning_rate": 1.987727048097075e-05,
+      "loss": 0.2409,
+      "step": 307
+    },
+    {
+      "epoch": 0.14562647754137115,
+      "grad_norm": 4.721340656280518,
+      "learning_rate": 1.9874678541409427e-05,
+      "loss": 0.3075,
+      "step": 308
+    },
+    {
+      "epoch": 0.14609929078014183,
+      "grad_norm": 8.97851848602295,
+      "learning_rate": 1.9872059689906188e-05,
+      "loss": 0.3628,
+      "step": 309
+    },
+    {
+      "epoch": 0.14657210401891252,
+      "grad_norm": 5.3872809410095215,
+      "learning_rate": 1.9869413933598317e-05,
+      "loss": 0.2832,
+      "step": 310
+    },
+    {
+      "epoch": 0.1470449172576832,
+      "grad_norm": 6.58519172668457,
+      "learning_rate": 1.986674127969642e-05,
+      "loss": 0.2423,
+      "step": 311
+    },
+    {
+      "epoch": 0.1475177304964539,
+      "grad_norm": 6.278075218200684,
+      "learning_rate": 1.9864041735484417e-05,
+      "loss": 0.3341,
+      "step": 312
+    },
+    {
+      "epoch": 0.14799054373522458,
+      "grad_norm": 6.8102288246154785,
+      "learning_rate": 1.986131530831951e-05,
+      "loss": 0.2887,
+      "step": 313
+    },
+    {
+      "epoch": 0.14846335697399526,
+      "grad_norm": 6.963206768035889,
+      "learning_rate": 1.985856200563215e-05,
+      "loss": 0.2883,
+      "step": 314
+    },
+    {
+      "epoch": 0.14893617021276595,
+      "grad_norm": 5.686670303344727,
+      "learning_rate": 1.9855781834926057e-05,
+      "loss": 0.3673,
+      "step": 315
+    },
+    {
+      "epoch": 0.14940898345153664,
+      "grad_norm": 5.6203227043151855,
+      "learning_rate": 1.985297480377816e-05,
+      "loss": 0.2601,
+      "step": 316
+    },
+    {
+      "epoch": 0.14988179669030732,
+      "grad_norm": 5.668765068054199,
+      "learning_rate": 1.98501409198386e-05,
+      "loss": 0.2868,
+      "step": 317
+    },
+    {
+      "epoch": 0.150354609929078,
+      "grad_norm": 4.719535827636719,
+      "learning_rate": 1.9847280190830706e-05,
+      "loss": 0.2295,
+      "step": 318
+    },
+    {
+      "epoch": 0.1508274231678487,
+      "grad_norm": 6.404664039611816,
+      "learning_rate": 1.9844392624550952e-05,
+      "loss": 0.3816,
+      "step": 319
+    },
+    {
+      "epoch": 0.15130023640661938,
+      "grad_norm": 4.635862827301025,
+      "learning_rate": 1.9841478228868966e-05,
+      "loss": 0.2694,
+      "step": 320
+    },
+    {
+      "epoch": 0.15130023640661938,
+      "eval_accuracy": 0.8337028824833703,
+      "eval_f1": 0.6287128712871287,
+      "eval_loss": 0.34866657853126526,
+      "eval_precision": 0.8410596026490066,
+      "eval_recall": 0.5019762845849802,
+      "eval_runtime": 49.2171,
+      "eval_samples_per_second": 5.608,
+      "eval_steps_per_second": 0.183,
+      "step": 320
+    },
+    {
+      "epoch": 0.15177304964539007,
+      "grad_norm": 6.7525787353515625,
+      "learning_rate": 1.983853701172749e-05,
+      "loss": 0.3208,
+      "step": 321
+    },
+    {
+      "epoch": 0.15224586288416075,
+      "grad_norm": 4.8178019523620605,
+      "learning_rate": 1.9835568981142376e-05,
+      "loss": 0.2365,
+      "step": 322
+    },
+    {
+      "epoch": 0.15271867612293144,
+      "grad_norm": 6.369754791259766,
+      "learning_rate": 1.9832574145202524e-05,
+      "loss": 0.3079,
+      "step": 323
+    },
+    {
+      "epoch": 0.15319148936170213,
+      "grad_norm": 6.613752365112305,
+      "learning_rate": 1.982955251206993e-05,
+      "loss": 0.3133,
+      "step": 324
+    },
+    {
+      "epoch": 0.1536643026004728,
+      "grad_norm": 7.213798999786377,
+      "learning_rate": 1.9826504089979573e-05,
+      "loss": 0.3551,
+      "step": 325
+    },
+    {
+      "epoch": 0.1541371158392435,
+      "grad_norm": 5.0920515060424805,
+      "learning_rate": 1.9823428887239484e-05,
+      "loss": 0.2295,
+      "step": 326
+    },
+    {
+      "epoch": 0.15460992907801419,
+      "grad_norm": 6.177611827850342,
+      "learning_rate": 1.9820326912230654e-05,
+      "loss": 0.3048,
+      "step": 327
+    },
+    {
+      "epoch": 0.15508274231678487,
+      "grad_norm": 6.256964683532715,
+      "learning_rate": 1.981719817340705e-05,
+      "loss": 0.3382,
+      "step": 328
+    },
+    {
+      "epoch": 0.15555555555555556,
+      "grad_norm": 7.9319915771484375,
+      "learning_rate": 1.9814042679295574e-05,
+      "loss": 0.4387,
+      "step": 329
+    },
+    {
+      "epoch": 0.15602836879432624,
+      "grad_norm": 5.281505107879639,
+      "learning_rate": 1.981086043849605e-05,
+      "loss": 0.2951,
+      "step": 330
+    },
+    {
+      "epoch": 0.15650118203309693,
+      "grad_norm": 6.895681858062744,
+      "learning_rate": 1.9807651459681195e-05,
+      "loss": 0.399,
+      "step": 331
+    },
+    {
+      "epoch": 0.15697399527186762,
+      "grad_norm": 6.184955596923828,
+      "learning_rate": 1.9804415751596587e-05,
+      "loss": 0.2605,
+      "step": 332
+    },
+    {
+      "epoch": 0.1574468085106383,
+      "grad_norm": 6.7411699295043945,
+      "learning_rate": 1.9801153323060667e-05,
+      "loss": 0.3157,
+      "step": 333
+    },
+    {
+      "epoch": 0.157919621749409,
+      "grad_norm": 3.7483937740325928,
+      "learning_rate": 1.9797864182964687e-05,
+      "loss": 0.2806,
+      "step": 334
+    },
+    {
+      "epoch": 0.15839243498817968,
+      "grad_norm": 7.106858253479004,
+      "learning_rate": 1.97945483402727e-05,
+      "loss": 0.3,
+      "step": 335
+    },
+    {
+      "epoch": 0.15886524822695036,
+      "grad_norm": 6.808032512664795,
+      "learning_rate": 1.9791205804021537e-05,
+      "loss": 0.3269,
+      "step": 336
+    },
+    {
+      "epoch": 0.15933806146572105,
+      "grad_norm": 5.708781719207764,
+      "learning_rate": 1.978783658332077e-05,
+      "loss": 0.248,
+      "step": 337
+    },
+    {
+      "epoch": 0.15981087470449173,
+      "grad_norm": 4.7683587074279785,
+      "learning_rate": 1.9784440687352708e-05,
+      "loss": 0.2693,
+      "step": 338
+    },
+    {
+      "epoch": 0.16028368794326242,
+      "grad_norm": 4.79544734954834,
+      "learning_rate": 1.9781018125372337e-05,
+      "loss": 0.2603,
+      "step": 339
+    },
+    {
+      "epoch": 0.1607565011820331,
+      "grad_norm": 8.155563354492188,
+      "learning_rate": 1.9777568906707344e-05,
+      "loss": 0.3854,
+      "step": 340
+    },
+    {
+      "epoch": 0.1607565011820331,
+      "eval_accuracy": 0.8192904656319291,
+      "eval_f1": 0.5788113695090439,
+      "eval_loss": 0.35731109976768494,
+      "eval_precision": 0.835820895522388,
+      "eval_recall": 0.4426877470355731,
+      "eval_runtime": 49.5821,
+      "eval_samples_per_second": 5.567,
+      "eval_steps_per_second": 0.182,
+      "step": 340
+    },
+    {
+      "epoch": 0.1612293144208038,
+      "grad_norm": 4.6710286140441895,
+      "learning_rate": 1.977409304075805e-05,
+      "loss": 0.2523,
+      "step": 341
+    },
+    {
+      "epoch": 0.16170212765957448,
+      "grad_norm": 5.3869781494140625,
+      "learning_rate": 1.97705905369974e-05,
+      "loss": 0.3004,
+      "step": 342
+    },
+    {
+      "epoch": 0.16217494089834517,
+      "grad_norm": 4.742314338684082,
+      "learning_rate": 1.976706140497094e-05,
+      "loss": 0.2293,
+      "step": 343
+    },
+    {
+      "epoch": 0.16264775413711585,
+      "grad_norm": 7.420506477355957,
+      "learning_rate": 1.9763505654296782e-05,
+      "loss": 0.2997,
+      "step": 344
+    },
+    {
+      "epoch": 0.16312056737588654,
+      "grad_norm": 6.123251438140869,
+      "learning_rate": 1.9759923294665588e-05,
+      "loss": 0.2884,
+      "step": 345
+    },
+    {
+      "epoch": 0.1635933806146572,
+      "grad_norm": 8.684674263000488,
+      "learning_rate": 1.9756314335840535e-05,
+      "loss": 0.3789,
+      "step": 346
+    },
+    {
+      "epoch": 0.16406619385342788,
+      "grad_norm": 6.067385673522949,
+      "learning_rate": 1.97526787876573e-05,
+      "loss": 0.3224,
+      "step": 347
+    },
+    {
+      "epoch": 0.16453900709219857,
+      "grad_norm": 5.475379467010498,
+      "learning_rate": 1.9749016660024014e-05,
+      "loss": 0.2177,
+      "step": 348
+    },
+    {
+      "epoch": 0.16501182033096926,
+      "grad_norm": 5.6096391677856445,
+      "learning_rate": 1.9745327962921253e-05,
+      "loss": 0.2141,
+      "step": 349
+    },
+    {
+      "epoch": 0.16548463356973994,
+      "grad_norm": 6.197654724121094,
+      "learning_rate": 1.9741612706402002e-05,
+      "loss": 0.3054,
+      "step": 350
+    },
+    {
+      "epoch": 0.16595744680851063,
+      "grad_norm": 5.453296184539795,
+      "learning_rate": 1.973787090059163e-05,
+      "loss": 0.2404,
+      "step": 351
+    },
+    {
+      "epoch": 0.16643026004728131,
+      "grad_norm": 4.969869613647461,
+      "learning_rate": 1.9734102555687868e-05,
+      "loss": 0.2441,
+      "step": 352
+    },
+    {
+      "epoch": 0.166903073286052,
+      "grad_norm": 5.547883987426758,
+      "learning_rate": 1.9730307681960763e-05,
+      "loss": 0.255,
+      "step": 353
+    },
+    {
+      "epoch": 0.1673758865248227,
+      "grad_norm": 8.373857498168945,
+      "learning_rate": 1.972648628975267e-05,
+      "loss": 0.323,
+      "step": 354
+    },
+    {
+      "epoch": 0.16784869976359337,
+      "grad_norm": 9.678048133850098,
+      "learning_rate": 1.9722638389478218e-05,
+      "loss": 0.3685,
+      "step": 355
+    },
+    {
+      "epoch": 0.16832151300236406,
+      "grad_norm": 5.831118106842041,
+      "learning_rate": 1.9718763991624277e-05,
+      "loss": 0.2394,
+      "step": 356
+    },
+    {
+      "epoch": 0.16879432624113475,
+      "grad_norm": 5.348067760467529,
+      "learning_rate": 1.9714863106749928e-05,
+      "loss": 0.2312,
+      "step": 357
+    },
+    {
+      "epoch": 0.16926713947990543,
+      "grad_norm": 5.5273518562316895,
+      "learning_rate": 1.9710935745486447e-05,
+      "loss": 0.2442,
+      "step": 358
+    },
+    {
+      "epoch": 0.16973995271867612,
+      "grad_norm": 4.848127365112305,
+      "learning_rate": 1.9706981918537257e-05,
+      "loss": 0.2208,
+      "step": 359
+    },
+    {
+      "epoch": 0.1702127659574468,
+      "grad_norm": 5.912291526794434,
+      "learning_rate": 1.970300163667792e-05,
+      "loss": 0.3062,
+      "step": 360
+    },
+    {
+      "epoch": 0.1702127659574468,
+      "eval_accuracy": 0.8470066518847007,
+      "eval_f1": 0.7,
+      "eval_loss": 0.326249897480011,
+      "eval_precision": 0.7777777777777778,
+      "eval_recall": 0.6363636363636364,
+      "eval_runtime": 49.3056,
+      "eval_samples_per_second": 5.598,
+      "eval_steps_per_second": 0.183,
+      "step": 360
+    },
+    {
+      "epoch": 0.1706855791962175,
+      "grad_norm": 7.104170322418213,
+      "learning_rate": 1.9698994910756092e-05,
+      "loss": 0.2781,
+      "step": 361
+    },
+    {
+      "epoch": 0.17115839243498818,
+      "grad_norm": 6.990221977233887,
+      "learning_rate": 1.969496175169149e-05,
+      "loss": 0.2876,
+      "step": 362
+    },
+    {
+      "epoch": 0.17163120567375886,
+      "grad_norm": 6.5197014808654785,
+      "learning_rate": 1.9690902170475894e-05,
+      "loss": 0.279,
+      "step": 363
+    },
+    {
+      "epoch": 0.17210401891252955,
+      "grad_norm": 4.127284526824951,
+      "learning_rate": 1.9686816178173065e-05,
+      "loss": 0.2238,
+      "step": 364
+    },
+    {
+      "epoch": 0.17257683215130024,
+      "grad_norm": 7.10957145690918,
+      "learning_rate": 1.968270378591876e-05,
+      "loss": 0.3302,
+      "step": 365
+    },
+    {
+      "epoch": 0.17304964539007092,
+      "grad_norm": 5.990342617034912,
+      "learning_rate": 1.967856500492068e-05,
+      "loss": 0.2445,
+      "step": 366
+    },
+    {
+      "epoch": 0.1735224586288416,
+      "grad_norm": 8.701051712036133,
+      "learning_rate": 1.9674399846458455e-05,
+      "loss": 0.3321,
+      "step": 367
+    },
+    {
+      "epoch": 0.1739952718676123,
+      "grad_norm": 6.448107719421387,
+      "learning_rate": 1.9670208321883588e-05,
+      "loss": 0.3304,
+      "step": 368
+    },
+    {
+      "epoch": 0.17446808510638298,
+      "grad_norm": 6.382139682769775,
+      "learning_rate": 1.966599044261944e-05,
+      "loss": 0.2925,
+      "step": 369
+    },
+    {
+      "epoch": 0.17494089834515367,
+      "grad_norm": 5.474351406097412,
+      "learning_rate": 1.9661746220161208e-05,
+      "loss": 0.3041,
+      "step": 370
+    },
+    {
+      "epoch": 0.17541371158392435,
+      "grad_norm": 6.983358383178711,
+      "learning_rate": 1.965747566607588e-05,
+      "loss": 0.3657,
+      "step": 371
+    },
+    {
+      "epoch": 0.17588652482269504,
+      "grad_norm": 6.531889915466309,
+      "learning_rate": 1.9653178792002203e-05,
+      "loss": 0.2679,
+      "step": 372
+    },
+    {
+      "epoch": 0.17635933806146573,
+      "grad_norm": 4.009150505065918,
+      "learning_rate": 1.964885560965065e-05,
+      "loss": 0.2429,
+      "step": 373
+    },
+    {
+      "epoch": 0.1768321513002364,
+      "grad_norm": 4.717473983764648,
+      "learning_rate": 1.964450613080341e-05,
+      "loss": 0.2378,
+      "step": 374
+    },
+    {
+      "epoch": 0.1773049645390071,
+      "grad_norm": 5.107661724090576,
+      "learning_rate": 1.9640130367314327e-05,
+      "loss": 0.3191,
+      "step": 375
+    },
+    {
+      "epoch": 0.17777777777777778,
+      "grad_norm": 6.810859203338623,
+      "learning_rate": 1.963572833110888e-05,
+      "loss": 0.3077,
+      "step": 376
+    },
+    {
+      "epoch": 0.17825059101654847,
+      "grad_norm": 4.793877124786377,
+      "learning_rate": 1.9631300034184155e-05,
+      "loss": 0.2426,
+      "step": 377
+    },
+    {
+      "epoch": 0.17872340425531916,
+      "grad_norm": 5.632551193237305,
+      "learning_rate": 1.96268454886088e-05,
+      "loss": 0.2767,
+      "step": 378
+    },
+    {
+      "epoch": 0.17919621749408984,
+      "grad_norm": 4.865971565246582,
+      "learning_rate": 1.962236470652301e-05,
+      "loss": 0.2907,
+      "step": 379
+    },
+    {
+      "epoch": 0.17966903073286053,
+      "grad_norm": 6.953917503356934,
+      "learning_rate": 1.9617857700138477e-05,
+      "loss": 0.2861,
+      "step": 380
+    },
+    {
+      "epoch": 0.17966903073286053,
+      "eval_accuracy": 0.8458980044345898,
+      "eval_f1": 0.6774941995359629,
+      "eval_loss": 0.3308302164077759,
+      "eval_precision": 0.8202247191011236,
+      "eval_recall": 0.5770750988142292,
+      "eval_runtime": 49.0662,
+      "eval_samples_per_second": 5.625,
+      "eval_steps_per_second": 0.183,
+      "step": 380
+    },
+    {
+      "epoch": 0.18014184397163122,
+      "grad_norm": 8.24282455444336,
+      "learning_rate": 1.9613324481738364e-05,
+      "loss": 0.3452,
+      "step": 381
+    },
+    {
+      "epoch": 0.1806146572104019,
+      "grad_norm": 6.349436283111572,
+      "learning_rate": 1.9608765063677272e-05,
+      "loss": 0.3436,
+      "step": 382
+    },
+    {
+      "epoch": 0.1810874704491726,
+      "grad_norm": 4.89585018157959,
+      "learning_rate": 1.9604179458381204e-05,
+      "loss": 0.2596,
+      "step": 383
+    },
+    {
+      "epoch": 0.18156028368794327,
+      "grad_norm": 5.193378925323486,
+      "learning_rate": 1.9599567678347536e-05,
+      "loss": 0.1857,
+      "step": 384
+    },
+    {
+      "epoch": 0.18203309692671396,
+      "grad_norm": 4.266732692718506,
+      "learning_rate": 1.9594929736144978e-05,
+      "loss": 0.1993,
+      "step": 385
+    },
+    {
+      "epoch": 0.18250591016548465,
+      "grad_norm": 4.616336822509766,
+      "learning_rate": 1.959026564441353e-05,
+      "loss": 0.2655,
+      "step": 386
+    },
+    {
+      "epoch": 0.1829787234042553,
+      "grad_norm": 5.701202392578125,
+      "learning_rate": 1.958557541586448e-05,
+      "loss": 0.2377,
+      "step": 387
+    },
+    {
+      "epoch": 0.183451536643026,
+      "grad_norm": 4.910188674926758,
+      "learning_rate": 1.9580859063280326e-05,
+      "loss": 0.2346,
+      "step": 388
+    },
+    {
+      "epoch": 0.18392434988179668,
+      "grad_norm": 5.084827899932861,
+      "learning_rate": 1.957611659951478e-05,
+      "loss": 0.2473,
+      "step": 389
+    },
+    {
+      "epoch": 0.18439716312056736,
+      "grad_norm": 5.31158971786499,
+      "learning_rate": 1.9571348037492705e-05,
+      "loss": 0.2524,
+      "step": 390
+    },
+    {
+      "epoch": 0.18486997635933805,
+      "grad_norm": 5.35557746887207,
+      "learning_rate": 1.9566553390210103e-05,
+      "loss": 0.22,
+      "step": 391
+    },
+    {
+      "epoch": 0.18534278959810874,
+      "grad_norm": 7.644250392913818,
+      "learning_rate": 1.9561732670734048e-05,
+      "loss": 0.3009,
+      "step": 392
+    },
+    {
+      "epoch": 0.18581560283687942,
+      "grad_norm": 9.144373893737793,
+      "learning_rate": 1.9556885892202685e-05,
+      "loss": 0.4346,
+      "step": 393
+    },
+    {
+      "epoch": 0.1862884160756501,
+      "grad_norm": 6.692631721496582,
+      "learning_rate": 1.9552013067825185e-05,
+      "loss": 0.3075,
+      "step": 394
+    },
+    {
+      "epoch": 0.1867612293144208,
+      "grad_norm": 7.828726768493652,
+      "learning_rate": 1.9547114210881683e-05,
+      "loss": 0.3187,
+      "step": 395
+    },
+    {
+      "epoch": 0.18723404255319148,
+      "grad_norm": 7.344780445098877,
+      "learning_rate": 1.954218933472327e-05,
+      "loss": 0.3382,
+      "step": 396
+    },
+    {
+      "epoch": 0.18770685579196217,
+      "grad_norm": 5.836700439453125,
+      "learning_rate": 1.9537238452771962e-05,
+      "loss": 0.2509,
+      "step": 397
+    },
+    {
+      "epoch": 0.18817966903073285,
+      "grad_norm": 6.650071144104004,
+      "learning_rate": 1.953226157852063e-05,
+      "loss": 0.214,
+      "step": 398
+    },
+    {
+      "epoch": 0.18865248226950354,
+      "grad_norm": 6.298871040344238,
+      "learning_rate": 1.952725872553299e-05,
+      "loss": 0.2362,
+      "step": 399
+    },
+    {
+      "epoch": 0.18912529550827423,
+      "grad_norm": 5.426384449005127,
+      "learning_rate": 1.952222990744357e-05,
+      "loss": 0.2808,
+      "step": 400
+    },
+    {
+      "epoch": 0.18912529550827423,
+      "eval_accuracy": 0.8337028824833703,
+      "eval_f1": 0.609375,
+      "eval_loss": 0.35840529203414917,
+      "eval_precision": 0.8931297709923665,
+      "eval_recall": 0.4624505928853755,
+      "eval_runtime": 49.5299,
+      "eval_samples_per_second": 5.572,
+      "eval_steps_per_second": 0.182,
+      "step": 400
+    },
+    {
+      "epoch": 0.1895981087470449,
+      "grad_norm": 4.219785213470459,
+      "learning_rate": 1.9517175137957647e-05,
+      "loss": 0.2007,
+      "step": 401
+    },
+    {
+      "epoch": 0.1900709219858156,
+      "grad_norm": 4.726499080657959,
+      "learning_rate": 1.9512094430851226e-05,
+      "loss": 0.2333,
+      "step": 402
+    },
+    {
+      "epoch": 0.19054373522458629,
+      "grad_norm": 7.44296407699585,
+      "learning_rate": 1.9506987799971013e-05,
+      "loss": 0.2563,
+      "step": 403
+    },
+    {
+      "epoch": 0.19101654846335697,
+      "grad_norm": 7.742011547088623,
+      "learning_rate": 1.9501855259234353e-05,
+      "loss": 0.313,
+      "step": 404
+    },
+    {
+      "epoch": 0.19148936170212766,
+      "grad_norm": 6.5203657150268555,
+      "learning_rate": 1.9496696822629208e-05,
+      "loss": 0.3372,
+      "step": 405
+    },
+    {
+      "epoch": 0.19196217494089834,
+      "grad_norm": 6.323021411895752,
+      "learning_rate": 1.9491512504214123e-05,
+      "loss": 0.2561,
+      "step": 406
+    },
+    {
+      "epoch": 0.19243498817966903,
+      "grad_norm": 5.560845851898193,
+      "learning_rate": 1.9486302318118164e-05,
+      "loss": 0.2822,
+      "step": 407
+    },
+    {
+      "epoch": 0.19290780141843972,
+      "grad_norm": 5.433687686920166,
+      "learning_rate": 1.9481066278540912e-05,
+      "loss": 0.1501,
+      "step": 408
+    },
+    {
+      "epoch": 0.1933806146572104,
+      "grad_norm": 5.543313026428223,
+      "learning_rate": 1.9475804399752397e-05,
+      "loss": 0.2399,
+      "step": 409
+    },
+    {
+      "epoch": 0.1938534278959811,
+      "grad_norm": 3.8356525897979736,
+      "learning_rate": 1.9470516696093075e-05,
+      "loss": 0.1458,
+      "step": 410
+    },
+    {
+      "epoch": 0.19432624113475178,
+      "grad_norm": 5.613055229187012,
+      "learning_rate": 1.946520318197378e-05,
+      "loss": 0.2665,
+      "step": 411
+    },
+    {
+      "epoch": 0.19479905437352246,
+      "grad_norm": 7.673485279083252,
+      "learning_rate": 1.9459863871875694e-05,
+      "loss": 0.3718,
+      "step": 412
+    },
+    {
+      "epoch": 0.19527186761229315,
+      "grad_norm": 5.367708683013916,
+      "learning_rate": 1.945449878035029e-05,
+      "loss": 0.1897,
+      "step": 413
+    },
+    {
+      "epoch": 0.19574468085106383,
+      "grad_norm": 7.302910804748535,
+      "learning_rate": 1.9449107922019326e-05,
+      "loss": 0.2457,
+      "step": 414
+    },
+    {
+      "epoch": 0.19621749408983452,
+      "grad_norm": 6.045085906982422,
+      "learning_rate": 1.944369131157476e-05,
+      "loss": 0.2974,
+      "step": 415
+    },
+    {
+      "epoch": 0.1966903073286052,
+      "grad_norm": 7.004913806915283,
+      "learning_rate": 1.9438248963778754e-05,
+      "loss": 0.2723,
+      "step": 416
+    },
+    {
+      "epoch": 0.1971631205673759,
+      "grad_norm": 6.820647716522217,
+      "learning_rate": 1.9432780893463594e-05,
+      "loss": 0.3367,
+      "step": 417
+    },
+    {
+      "epoch": 0.19763593380614658,
+      "grad_norm": 6.083995819091797,
+      "learning_rate": 1.942728711553168e-05,
+      "loss": 0.265,
+      "step": 418
+    },
+    {
+      "epoch": 0.19810874704491727,
+      "grad_norm": 4.812845706939697,
+      "learning_rate": 1.942176764495547e-05,
+      "loss": 0.2567,
+      "step": 419
+    },
+    {
+      "epoch": 0.19858156028368795,
+      "grad_norm": 5.881125450134277,
+      "learning_rate": 1.9416222496777453e-05,
+      "loss": 0.2716,
+      "step": 420
+    },
+    {
+      "epoch": 0.19858156028368795,
+      "eval_accuracy": 0.852549889135255,
+      "eval_f1": 0.6825775656324582,
+      "eval_loss": 0.33116355538368225,
+      "eval_precision": 0.8614457831325302,
+      "eval_recall": 0.5652173913043478,
+      "eval_runtime": 47.258,
+      "eval_samples_per_second": 5.84,
+      "eval_steps_per_second": 0.19,
+      "step": 420
+    },
+    {
+      "epoch": 0.19905437352245864,
+      "grad_norm": 3.609654426574707,
+      "learning_rate": 1.941065168611009e-05,
+      "loss": 0.2053,
+      "step": 421
+    },
+    {
+      "epoch": 0.19952718676122932,
+      "grad_norm": 5.121413230895996,
+      "learning_rate": 1.9405055228135777e-05,
+      "loss": 0.2572,
+      "step": 422
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 6.144900321960449,
+      "learning_rate": 1.9399433138106814e-05,
+      "loss": 0.2689,
+      "step": 423
+    },
+    {
+      "epoch": 0.2004728132387707,
+      "grad_norm": 5.300688743591309,
+      "learning_rate": 1.939378543134536e-05,
+      "loss": 0.3146,
+      "step": 424
+    },
+    {
+      "epoch": 0.20094562647754138,
+      "grad_norm": 8.221715927124023,
+      "learning_rate": 1.9388112123243386e-05,
+      "loss": 0.2843,
+      "step": 425
+    },
+    {
+      "epoch": 0.20141843971631207,
+      "grad_norm": 3.9929358959198,
+      "learning_rate": 1.938241322926263e-05,
+      "loss": 0.2603,
+      "step": 426
+    },
+    {
+      "epoch": 0.20189125295508276,
+      "grad_norm": 4.981365203857422,
+      "learning_rate": 1.937668876493457e-05,
+      "loss": 0.3303,
+      "step": 427
+    },
+    {
+      "epoch": 0.20236406619385341,
+      "grad_norm": 5.623127460479736,
+      "learning_rate": 1.9370938745860362e-05,
+      "loss": 0.2564,
+      "step": 428
+    },
+    {
+      "epoch": 0.2028368794326241,
+      "grad_norm": 5.664045810699463,
+      "learning_rate": 1.9365163187710817e-05,
+      "loss": 0.2749,
+      "step": 429
+    },
+    {
+      "epoch": 0.2033096926713948,
+      "grad_norm": 5.24921178817749,
+      "learning_rate": 1.935936210622634e-05,
+      "loss": 0.2835,
+      "step": 430
+    },
+    {
+      "epoch": 0.20378250591016547,
+      "grad_norm": 8.841286659240723,
+      "learning_rate": 1.9353535517216908e-05,
+      "loss": 0.3114,
+      "step": 431
+    },
+    {
+      "epoch": 0.20425531914893616,
+      "grad_norm": 4.470973491668701,
+      "learning_rate": 1.9347683436562e-05,
+      "loss": 0.2417,
+      "step": 432
+    },
+    {
+      "epoch": 0.20472813238770685,
+      "grad_norm": 7.2563276290893555,
+      "learning_rate": 1.934180588021058e-05,
+      "loss": 0.2727,
+      "step": 433
+    },
+    {
+      "epoch": 0.20520094562647753,
+      "grad_norm": 6.107417106628418,
+      "learning_rate": 1.933590286418104e-05,
+      "loss": 0.2461,
+      "step": 434
+    },
+    {
+      "epoch": 0.20567375886524822,
+      "grad_norm": 10.675925254821777,
+      "learning_rate": 1.932997440456115e-05,
+      "loss": 0.3362,
+      "step": 435
+    },
+    {
+      "epoch": 0.2061465721040189,
+      "grad_norm": 6.831496715545654,
+      "learning_rate": 1.932402051750803e-05,
+      "loss": 0.326,
+      "step": 436
+    },
+    {
+      "epoch": 0.2066193853427896,
+      "grad_norm": 6.8831095695495605,
+      "learning_rate": 1.9318041219248108e-05,
+      "loss": 0.3411,
+      "step": 437
+    },
+    {
+      "epoch": 0.20709219858156028,
+      "grad_norm": 4.773165225982666,
+      "learning_rate": 1.9312036526077055e-05,
+      "loss": 0.2587,
+      "step": 438
+    },
+    {
+      "epoch": 0.20756501182033096,
+      "grad_norm": 7.510995864868164,
+      "learning_rate": 1.930600645435974e-05,
+      "loss": 0.2906,
+      "step": 439
+    },
+    {
+      "epoch": 0.20803782505910165,
+      "grad_norm": 6.88019323348999,
+      "learning_rate": 1.9299951020530226e-05,
+      "loss": 0.3696,
+      "step": 440
+    },
+    {
+      "epoch": 0.20803782505910165,
+      "eval_accuracy": 0.8547671840354767,
+      "eval_f1": 0.7120879120879121,
+      "eval_loss": 0.31960517168045044,
+      "eval_precision": 0.801980198019802,
+      "eval_recall": 0.6403162055335968,
+      "eval_runtime": 47.3456,
+      "eval_samples_per_second": 5.829,
+      "eval_steps_per_second": 0.19,
+      "step": 440
+    },
+    {
+      "epoch": 0.20851063829787234,
+      "grad_norm": 4.385389804840088,
+      "learning_rate": 1.929387024109167e-05,
+      "loss": 0.2451,
+      "step": 441
+    },
+    {
+      "epoch": 0.20898345153664302,
+      "grad_norm": 5.8849077224731445,
+      "learning_rate": 1.9287764132616323e-05,
+      "loss": 0.2734,
+      "step": 442
+    },
+    {
+      "epoch": 0.2094562647754137,
+      "grad_norm": 8.523555755615234,
+      "learning_rate": 1.928163271174546e-05,
+      "loss": 0.3602,
+      "step": 443
+    },
+    {
+      "epoch": 0.2099290780141844,
+      "grad_norm": 8.392167091369629,
+      "learning_rate": 1.927547599518934e-05,
+      "loss": 0.286,
+      "step": 444
+    },
+    {
+      "epoch": 0.21040189125295508,
+      "grad_norm": 6.212944030761719,
+      "learning_rate": 1.9269293999727156e-05,
+      "loss": 0.2083,
+      "step": 445
+    },
+    {
+      "epoch": 0.21087470449172577,
+      "grad_norm": 6.23652982711792,
+      "learning_rate": 1.926308674220701e-05,
+      "loss": 0.259,
+      "step": 446
+    },
+    {
+      "epoch": 0.21134751773049645,
+      "grad_norm": 6.646223545074463,
+      "learning_rate": 1.9256854239545833e-05,
+      "loss": 0.325,
+      "step": 447
+    },
+    {
+      "epoch": 0.21182033096926714,
+      "grad_norm": 10.316300392150879,
+      "learning_rate": 1.925059650872938e-05,
+      "loss": 0.4394,
+      "step": 448
+    },
+    {
+      "epoch": 0.21229314420803783,
+      "grad_norm": 4.443530559539795,
+      "learning_rate": 1.9244313566812138e-05,
+      "loss": 0.2843,
+      "step": 449
+    },
+    {
+      "epoch": 0.2127659574468085,
+      "grad_norm": 5.295090198516846,
+      "learning_rate": 1.923800543091732e-05,
+      "loss": 0.2672,
+      "step": 450
+    },
+    {
+      "epoch": 0.2132387706855792,
+      "grad_norm": 4.4014482498168945,
+      "learning_rate": 1.9231672118236798e-05,
+      "loss": 0.2578,
+      "step": 451
+    },
+    {
+      "epoch": 0.21371158392434988,
+      "grad_norm": 5.532175540924072,
+      "learning_rate": 1.922531364603105e-05,
+      "loss": 0.2718,
+      "step": 452
+    },
+    {
+      "epoch": 0.21418439716312057,
+      "grad_norm": 5.192704200744629,
+      "learning_rate": 1.9218930031629134e-05,
+      "loss": 0.2279,
+      "step": 453
+    },
+    {
+      "epoch": 0.21465721040189126,
+      "grad_norm": 4.8312458992004395,
+      "learning_rate": 1.921252129242863e-05,
+      "loss": 0.3158,
+      "step": 454
+    },
+    {
+      "epoch": 0.21513002364066194,
+      "grad_norm": 4.629521369934082,
+      "learning_rate": 1.9206087445895572e-05,
+      "loss": 0.187,
+      "step": 455
+    },
+    {
+      "epoch": 0.21560283687943263,
+      "grad_norm": 5.664729118347168,
+      "learning_rate": 1.9199628509564455e-05,
+      "loss": 0.2869,
+      "step": 456
+    },
+    {
+      "epoch": 0.21607565011820332,
+      "grad_norm": 6.123085021972656,
+      "learning_rate": 1.9193144501038116e-05,
+      "loss": 0.2455,
+      "step": 457
+    },
+    {
+      "epoch": 0.216548463356974,
+      "grad_norm": 5.794577121734619,
+      "learning_rate": 1.9186635437987746e-05,
+      "loss": 0.2984,
+      "step": 458
+    },
+    {
+      "epoch": 0.2170212765957447,
+      "grad_norm": 5.055876731872559,
+      "learning_rate": 1.9180101338152807e-05,
+      "loss": 0.1974,
+      "step": 459
+    },
+    {
+      "epoch": 0.21749408983451538,
+      "grad_norm": 5.301477432250977,
+      "learning_rate": 1.9173542219341005e-05,
+      "loss": 0.1911,
+      "step": 460
+    },
+    {
+      "epoch": 0.21749408983451538,
+      "eval_accuracy": 0.8425720620842572,
+      "eval_f1": 0.6467661691542289,
+      "eval_loss": 0.34363728761672974,
+      "eval_precision": 0.87248322147651,
+      "eval_recall": 0.5138339920948617,
+      "eval_runtime": 47.279,
+      "eval_samples_per_second": 5.838,
+      "eval_steps_per_second": 0.19,
+      "step": 460
+    },
+    {
+      "epoch": 0.21796690307328606,
+      "grad_norm": 3.5169882774353027,
+      "learning_rate": 1.9166958099428227e-05,
+      "loss": 0.2012,
+      "step": 461
+    },
+    {
+      "epoch": 0.21843971631205675,
+      "grad_norm": 7.913601875305176,
+      "learning_rate": 1.9160348996358484e-05,
+      "loss": 0.3436,
+      "step": 462
+    },
+    {
+      "epoch": 0.21891252955082743,
+      "grad_norm": 6.095242500305176,
+      "learning_rate": 1.9153714928143898e-05,
+      "loss": 0.2419,
+      "step": 463
+    },
+    {
+      "epoch": 0.21938534278959812,
+      "grad_norm": 6.3486104011535645,
+      "learning_rate": 1.914705591286461e-05,
+      "loss": 0.2504,
+      "step": 464
+    },
+    {
+      "epoch": 0.2198581560283688,
+      "grad_norm": 4.352773189544678,
+      "learning_rate": 1.9140371968668767e-05,
+      "loss": 0.211,
+      "step": 465
+    },
+    {
+      "epoch": 0.2203309692671395,
+      "grad_norm": 7.6094889640808105,
+      "learning_rate": 1.9133663113772437e-05,
+      "loss": 0.2995,
+      "step": 466
+    },
+    {
+      "epoch": 0.22080378250591018,
+      "grad_norm": 5.553167819976807,
+      "learning_rate": 1.9126929366459596e-05,
+      "loss": 0.1836,
+      "step": 467
+    },
+    {
+      "epoch": 0.22127659574468084,
+      "grad_norm": 6.153863906860352,
+      "learning_rate": 1.912017074508205e-05,
+      "loss": 0.2946,
+      "step": 468
+    },
+    {
+      "epoch": 0.22174940898345152,
+      "grad_norm": 5.882277011871338,
+      "learning_rate": 1.9113387268059402e-05,
+      "loss": 0.1988,
+      "step": 469
+    },
+    {
+      "epoch": 0.2222222222222222,
+      "grad_norm": 8.288911819458008,
+      "learning_rate": 1.910657895387899e-05,
+      "loss": 0.2778,
+      "step": 470
+    },
+    {
+      "epoch": 0.2226950354609929,
+      "grad_norm": 8.619799613952637,
+      "learning_rate": 1.9099745821095842e-05,
+      "loss": 0.2995,
+      "step": 471
+    },
+    {
+      "epoch": 0.22316784869976358,
+      "grad_norm": 4.235837936401367,
+      "learning_rate": 1.909288788833263e-05,
+      "loss": 0.1523,
+      "step": 472
+    },
+    {
+      "epoch": 0.22364066193853427,
+      "grad_norm": 4.533273220062256,
+      "learning_rate": 1.908600517427961e-05,
+      "loss": 0.188,
+      "step": 473
+    },
+    {
+      "epoch": 0.22411347517730495,
+      "grad_norm": 6.656558513641357,
+      "learning_rate": 1.9079097697694578e-05,
+      "loss": 0.2706,
+      "step": 474
+    },
+    {
+      "epoch": 0.22458628841607564,
+      "grad_norm": 7.253176212310791,
+      "learning_rate": 1.9072165477402813e-05,
+      "loss": 0.2533,
+      "step": 475
+    },
+    {
+      "epoch": 0.22505910165484633,
+      "grad_norm": 7.984339237213135,
+      "learning_rate": 1.9065208532297043e-05,
+      "loss": 0.2768,
+      "step": 476
+    },
+    {
+      "epoch": 0.225531914893617,
+      "grad_norm": 5.5995259284973145,
+      "learning_rate": 1.9058226881337356e-05,
+      "loss": 0.2397,
+      "step": 477
+    },
+    {
+      "epoch": 0.2260047281323877,
+      "grad_norm": 8.392828941345215,
+      "learning_rate": 1.9051220543551193e-05,
+      "loss": 0.235,
+      "step": 478
+    },
+    {
+      "epoch": 0.2264775413711584,
+      "grad_norm": 5.6677422523498535,
+      "learning_rate": 1.9044189538033264e-05,
+      "loss": 0.2468,
+      "step": 479
+    },
+    {
+      "epoch": 0.22695035460992907,
+      "grad_norm": 5.317750930786133,
+      "learning_rate": 1.903713388394551e-05,
+      "loss": 0.2548,
+      "step": 480
+    },
+    {
+      "epoch": 0.22695035460992907,
+      "eval_accuracy": 0.852549889135255,
+      "eval_f1": 0.6795180722891566,
+      "eval_loss": 0.33114132285118103,
+      "eval_precision": 0.8703703703703703,
+      "eval_recall": 0.5573122529644269,
+      "eval_runtime": 48.0304,
+      "eval_samples_per_second": 5.746,
+      "eval_steps_per_second": 0.187,
+      "step": 480
+    },
+    {
+      "epoch": 0.22742316784869976,
+      "grad_norm": 6.943238258361816,
+      "learning_rate": 1.9030053600517053e-05,
+      "loss": 0.2657,
+      "step": 481
+    },
+    {
+      "epoch": 0.22789598108747045,
+      "grad_norm": 5.950002193450928,
+      "learning_rate": 1.902294870704413e-05,
+      "loss": 0.2934,
+      "step": 482
+    },
+    {
+      "epoch": 0.22836879432624113,
+      "grad_norm": 4.295022964477539,
+      "learning_rate": 1.901581922289005e-05,
+      "loss": 0.2139,
+      "step": 483
+    },
+    {
+      "epoch": 0.22884160756501182,
+      "grad_norm": 5.326821804046631,
+      "learning_rate": 1.9008665167485154e-05,
+      "loss": 0.2407,
+      "step": 484
+    },
+    {
+      "epoch": 0.2293144208037825,
+      "grad_norm": 6.2555036544799805,
+      "learning_rate": 1.9001486560326724e-05,
+      "loss": 0.2723,
+      "step": 485
+    },
+    {
+      "epoch": 0.2297872340425532,
+      "grad_norm": 6.824589729309082,
+      "learning_rate": 1.8994283420978975e-05,
+      "loss": 0.3014,
+      "step": 486
+    },
+    {
+      "epoch": 0.23026004728132388,
+      "grad_norm": 5.3086628913879395,
+      "learning_rate": 1.8987055769072973e-05,
+      "loss": 0.245,
+      "step": 487
+    },
+    {
+      "epoch": 0.23073286052009456,
+      "grad_norm": 5.174909591674805,
+      "learning_rate": 1.8979803624306585e-05,
+      "loss": 0.3507,
+      "step": 488
+    },
+    {
+      "epoch": 0.23120567375886525,
+      "grad_norm": 4.394033908843994,
+      "learning_rate": 1.897252700644444e-05,
+      "loss": 0.2182,
+      "step": 489
+    },
+    {
+      "epoch": 0.23167848699763594,
+      "grad_norm": 5.750499248504639,
+      "learning_rate": 1.8965225935317854e-05,
+      "loss": 0.2635,
+      "step": 490
+    },
+    {
+      "epoch": 0.23215130023640662,
+      "grad_norm": 5.786622047424316,
+      "learning_rate": 1.8957900430824793e-05,
+      "loss": 0.2483,
+      "step": 491
+    },
+    {
+      "epoch": 0.2326241134751773,
+      "grad_norm": 8.868030548095703,
+      "learning_rate": 1.895055051292981e-05,
+      "loss": 0.3858,
+      "step": 492
+    },
+    {
+      "epoch": 0.233096926713948,
+      "grad_norm": 5.2425408363342285,
+      "learning_rate": 1.8943176201664e-05,
+      "loss": 0.2107,
+      "step": 493
+    },
+    {
+      "epoch": 0.23356973995271868,
+      "grad_norm": 4.864797115325928,
+      "learning_rate": 1.8935777517124923e-05,
+      "loss": 0.3114,
+      "step": 494
+    },
+    {
+      "epoch": 0.23404255319148937,
+      "grad_norm": 6.035416603088379,
+      "learning_rate": 1.8928354479476577e-05,
+      "loss": 0.2315,
+      "step": 495
+    },
+    {
+      "epoch": 0.23451536643026005,
+      "grad_norm": 5.7510857582092285,
+      "learning_rate": 1.8920907108949335e-05,
+      "loss": 0.2423,
+      "step": 496
+    },
+    {
+      "epoch": 0.23498817966903074,
+      "grad_norm": 4.192838668823242,
+      "learning_rate": 1.8913435425839865e-05,
+      "loss": 0.2874,
+      "step": 497
+    },
+    {
+      "epoch": 0.23546099290780143,
+      "grad_norm": 5.289735317230225,
+      "learning_rate": 1.8905939450511117e-05,
+      "loss": 0.2984,
+      "step": 498
+    },
+    {
+      "epoch": 0.2359338061465721,
+      "grad_norm": 7.9628400802612305,
+      "learning_rate": 1.889841920339224e-05,
+      "loss": 0.3416,
+      "step": 499
+    },
+    {
+      "epoch": 0.2364066193853428,
+      "grad_norm": 4.809371471405029,
+      "learning_rate": 1.889087470497852e-05,
+      "loss": 0.2501,
+      "step": 500
+    },
+    {
+      "epoch": 0.2364066193853428,
+      "eval_accuracy": 0.8481152993348116,
+      "eval_f1": 0.6666666666666666,
+      "eval_loss": 0.3237150013446808,
+      "eval_precision": 0.8670886075949367,
+      "eval_recall": 0.541501976284585,
+      "eval_runtime": 47.4742,
+      "eval_samples_per_second": 5.814,
+      "eval_steps_per_second": 0.19,
+      "step": 500
+    },
+    {
+      "epoch": 0.23687943262411348,
+      "grad_norm": 4.674736499786377,
+      "learning_rate": 1.8883305975831357e-05,
+      "loss": 0.1875,
+      "step": 501
+    },
+    {
+      "epoch": 0.23735224586288417,
+      "grad_norm": 3.8804233074188232,
+      "learning_rate": 1.8875713036578168e-05,
+      "loss": 0.184,
+      "step": 502
+    },
+    {
+      "epoch": 0.23782505910165486,
+      "grad_norm": 6.960356712341309,
+      "learning_rate": 1.886809590791236e-05,
+      "loss": 0.2407,
+      "step": 503
+    },
+    {
+      "epoch": 0.23829787234042554,
+      "grad_norm": 6.6463823318481445,
+      "learning_rate": 1.886045461059327e-05,
+      "loss": 0.2633,
+      "step": 504
+    },
+    {
+      "epoch": 0.23877068557919623,
+      "grad_norm": 4.191521167755127,
+      "learning_rate": 1.8852789165446094e-05,
+      "loss": 0.218,
+      "step": 505
+    },
+    {
+      "epoch": 0.23924349881796692,
+      "grad_norm": 5.596932411193848,
+      "learning_rate": 1.8845099593361844e-05,
+      "loss": 0.2609,
+      "step": 506
+    },
+    {
+      "epoch": 0.2397163120567376,
+      "grad_norm": 4.571609973907471,
+      "learning_rate": 1.883738591529728e-05,
+      "loss": 0.1815,
+      "step": 507
+    },
+    {
+      "epoch": 0.2401891252955083,
+      "grad_norm": 5.630218505859375,
+      "learning_rate": 1.8829648152274872e-05,
+      "loss": 0.2727,
+      "step": 508
+    },
+    {
+      "epoch": 0.24066193853427895,
+      "grad_norm": 8.154533386230469,
+      "learning_rate": 1.8821886325382718e-05,
+      "loss": 0.3277,
+      "step": 509
+    },
+    {
+      "epoch": 0.24113475177304963,
+      "grad_norm": 5.654308795928955,
+      "learning_rate": 1.8814100455774504e-05,
+      "loss": 0.2938,
+      "step": 510
+    },
+    {
+      "epoch": 0.24160756501182032,
+      "grad_norm": 6.1714677810668945,
+      "learning_rate": 1.8806290564669435e-05,
+      "loss": 0.2314,
+      "step": 511
+    },
+    {
+      "epoch": 0.242080378250591,
+      "grad_norm": 5.192854404449463,
+      "learning_rate": 1.879845667335219e-05,
+      "loss": 0.2489,
+      "step": 512
+    },
+    {
+      "epoch": 0.2425531914893617,
+      "grad_norm": 7.861010551452637,
+      "learning_rate": 1.8790598803172857e-05,
+      "loss": 0.2815,
+      "step": 513
+    },
+    {
+      "epoch": 0.24302600472813238,
+      "grad_norm": 6.233393669128418,
+      "learning_rate": 1.878271697554687e-05,
+      "loss": 0.2584,
+      "step": 514
+    },
+    {
+      "epoch": 0.24349881796690306,
+      "grad_norm": 6.324631690979004,
+      "learning_rate": 1.8774811211954954e-05,
+      "loss": 0.3333,
+      "step": 515
+    },
+    {
+      "epoch": 0.24397163120567375,
+      "grad_norm": 7.769336700439453,
+      "learning_rate": 1.8766881533943074e-05,
+      "loss": 0.2968,
+      "step": 516
+    },
+    {
+      "epoch": 0.24444444444444444,
+      "grad_norm": 6.459347724914551,
+      "learning_rate": 1.875892796312237e-05,
+      "loss": 0.2193,
+      "step": 517
+    },
+    {
+      "epoch": 0.24491725768321512,
+      "grad_norm": 4.615235805511475,
+      "learning_rate": 1.875095052116909e-05,
+      "loss": 0.2396,
+      "step": 518
+    },
+    {
+      "epoch": 0.2453900709219858,
+      "grad_norm": 4.699162483215332,
+      "learning_rate": 1.874294922982455e-05,
+      "loss": 0.2726,
+      "step": 519
+    },
+    {
+      "epoch": 0.2458628841607565,
+      "grad_norm": 6.707888126373291,
+      "learning_rate": 1.8734924110895056e-05,
+      "loss": 0.2936,
+      "step": 520
+    },
+    {
+      "epoch": 0.2458628841607565,
+      "eval_accuracy": 0.835920177383592,
+      "eval_f1": 0.6205128205128205,
+      "eval_loss": 0.3496428430080414,
+      "eval_precision": 0.8832116788321168,
+      "eval_recall": 0.4782608695652174,
+      "eval_runtime": 49.5579,
+      "eval_samples_per_second": 5.569,
+      "eval_steps_per_second": 0.182,
+      "step": 520
+    },
+    {
+      "epoch": 0.24633569739952718,
+      "grad_norm": 6.290736198425293,
+      "learning_rate": 1.8726875186251856e-05,
+      "loss": 0.2605,
+      "step": 521
+    },
+    {
+      "epoch": 0.24680851063829787,
+      "grad_norm": 5.97813606262207,
+      "learning_rate": 1.8718802477831072e-05,
+      "loss": 0.257,
+      "step": 522
+    },
+    {
+      "epoch": 0.24728132387706855,
+      "grad_norm": 5.485353469848633,
+      "learning_rate": 1.8710706007633654e-05,
+      "loss": 0.2114,
+      "step": 523
+    },
+    {
+      "epoch": 0.24775413711583924,
+      "grad_norm": 4.747553825378418,
+      "learning_rate": 1.8702585797725308e-05,
+      "loss": 0.2579,
+      "step": 524
+    },
+    {
+      "epoch": 0.24822695035460993,
+      "grad_norm": 5.752557754516602,
+      "learning_rate": 1.869444187023643e-05,
+      "loss": 0.2706,
+      "step": 525
+    },
+    {
+      "epoch": 0.2486997635933806,
+      "grad_norm": 5.672857761383057,
+      "learning_rate": 1.8686274247362067e-05,
+      "loss": 0.23,
+      "step": 526
+    },
+    {
+      "epoch": 0.2491725768321513,
+      "grad_norm": 5.739321708679199,
+      "learning_rate": 1.8678082951361837e-05,
+      "loss": 0.2274,
+      "step": 527
+    },
+    {
+      "epoch": 0.24964539007092199,
+      "grad_norm": 8.147102355957031,
+      "learning_rate": 1.8669868004559878e-05,
+      "loss": 0.2682,
+      "step": 528
+    },
+    {
+      "epoch": 0.25011820330969264,
+      "grad_norm": 4.3418169021606445,
+      "learning_rate": 1.8661629429344782e-05,
+      "loss": 0.2552,
+      "step": 529
+    },
+    {
+      "epoch": 0.25059101654846333,
+      "grad_norm": 5.724131107330322,
+      "learning_rate": 1.8653367248169547e-05,
+      "loss": 0.2912,
+      "step": 530
+    },
+    {
+      "epoch": 0.251063829787234,
+      "grad_norm": 5.735341548919678,
+      "learning_rate": 1.864508148355149e-05,
+      "loss": 0.2843,
+      "step": 531
+    },
+    {
+      "epoch": 0.2515366430260047,
+      "grad_norm": 4.442595481872559,
+      "learning_rate": 1.863677215807221e-05,
+      "loss": 0.2334,
+      "step": 532
+    },
+    {
+      "epoch": 0.2520094562647754,
+      "grad_norm": 6.313594341278076,
+      "learning_rate": 1.862843929437751e-05,
+      "loss": 0.3263,
+      "step": 533
+    },
+    {
+      "epoch": 0.2524822695035461,
+      "grad_norm": 9.929341316223145,
+      "learning_rate": 1.8620082915177363e-05,
+      "loss": 0.2992,
+      "step": 534
+    },
+    {
+      "epoch": 0.25295508274231676,
+      "grad_norm": 4.474004745483398,
+      "learning_rate": 1.8611703043245807e-05,
+      "loss": 0.2582,
+      "step": 535
+    },
+    {
+      "epoch": 0.25342789598108745,
+      "grad_norm": 6.404626369476318,
+      "learning_rate": 1.8603299701420915e-05,
+      "loss": 0.2724,
+      "step": 536
+    },
+    {
+      "epoch": 0.25390070921985813,
+      "grad_norm": 5.970371723175049,
+      "learning_rate": 1.8594872912604723e-05,
+      "loss": 0.3189,
+      "step": 537
+    },
+    {
+      "epoch": 0.2543735224586288,
+      "grad_norm": 6.132597923278809,
+      "learning_rate": 1.858642269976317e-05,
+      "loss": 0.2448,
+      "step": 538
+    },
+    {
+      "epoch": 0.2548463356973995,
+      "grad_norm": 4.322042942047119,
+      "learning_rate": 1.8577949085926032e-05,
+      "loss": 0.1853,
+      "step": 539
+    },
+    {
+      "epoch": 0.2553191489361702,
+      "grad_norm": 4.78247594833374,
+      "learning_rate": 1.8569452094186863e-05,
+      "loss": 0.2012,
+      "step": 540
+    },
+    {
+      "epoch": 0.2553191489361702,
+      "eval_accuracy": 0.8403547671840355,
+      "eval_f1": 0.6470588235294118,
+      "eval_loss": 0.3362201750278473,
+      "eval_precision": 0.8516129032258064,
+      "eval_recall": 0.5217391304347826,
+      "eval_runtime": 48.1646,
+      "eval_samples_per_second": 5.73,
+      "eval_steps_per_second": 0.187,
+      "step": 540
+    },
+    {
+      "epoch": 0.2557919621749409,
+      "grad_norm": 5.21437406539917,
+      "learning_rate": 1.8560931747702924e-05,
+      "loss": 0.2784,
+      "step": 541
+    },
+    {
+      "epoch": 0.25626477541371157,
+      "grad_norm": 6.169255256652832,
+      "learning_rate": 1.855238806969513e-05,
+      "loss": 0.3128,
+      "step": 542
+    },
+    {
+      "epoch": 0.25673758865248225,
+      "grad_norm": 5.7340874671936035,
+      "learning_rate": 1.854382108344799e-05,
+      "loss": 0.2696,
+      "step": 543
+    },
+    {
+      "epoch": 0.25721040189125294,
+      "grad_norm": 7.384251594543457,
+      "learning_rate": 1.853523081230952e-05,
+      "loss": 0.2896,
+      "step": 544
+    },
+    {
+      "epoch": 0.2576832151300236,
+      "grad_norm": 7.609536170959473,
+      "learning_rate": 1.8526617279691207e-05,
+      "loss": 0.276,
+      "step": 545
+    },
+    {
+      "epoch": 0.2581560283687943,
+      "grad_norm": 5.231773853302002,
+      "learning_rate": 1.8517980509067926e-05,
+      "loss": 0.2765,
+      "step": 546
+    },
+    {
+      "epoch": 0.258628841607565,
+      "grad_norm": 4.057672023773193,
+      "learning_rate": 1.8509320523977895e-05,
+      "loss": 0.1932,
+      "step": 547
+    },
+    {
+      "epoch": 0.2591016548463357,
+      "grad_norm": 6.7243452072143555,
+      "learning_rate": 1.8500637348022594e-05,
+      "loss": 0.1775,
+      "step": 548
+    },
+    {
+      "epoch": 0.25957446808510637,
+      "grad_norm": 5.530993461608887,
+      "learning_rate": 1.84919310048667e-05,
+      "loss": 0.2299,
+      "step": 549
+    },
+    {
+      "epoch": 0.26004728132387706,
+      "grad_norm": 4.74931001663208,
+      "learning_rate": 1.8483201518238032e-05,
+      "loss": 0.1902,
+      "step": 550
+    },
+    {
+      "epoch": 0.26052009456264774,
+      "grad_norm": 6.366207122802734,
+      "learning_rate": 1.847444891192749e-05,
+      "loss": 0.3109,
+      "step": 551
+    },
+    {
+      "epoch": 0.26099290780141843,
+      "grad_norm": 8.735250473022461,
+      "learning_rate": 1.8465673209788975e-05,
+      "loss": 0.32,
+      "step": 552
+    },
+    {
+      "epoch": 0.2614657210401891,
+      "grad_norm": 5.171599864959717,
+      "learning_rate": 1.8456874435739337e-05,
+      "loss": 0.2245,
+      "step": 553
+    },
+    {
+      "epoch": 0.2619385342789598,
+      "grad_norm": 6.582614898681641,
+      "learning_rate": 1.8448052613758297e-05,
+      "loss": 0.2419,
+      "step": 554
+    },
+    {
+      "epoch": 0.2624113475177305,
+      "grad_norm": 5.810616970062256,
+      "learning_rate": 1.84392077678884e-05,
+      "loss": 0.2402,
+      "step": 555
+    },
+    {
+      "epoch": 0.2628841607565012,
+      "grad_norm": 6.7433271408081055,
+      "learning_rate": 1.843033992223494e-05,
+      "loss": 0.2887,
+      "step": 556
+    },
+    {
+      "epoch": 0.26335697399527186,
+      "grad_norm": 3.9062905311584473,
+      "learning_rate": 1.8421449100965884e-05,
+      "loss": 0.1842,
+      "step": 557
+    },
+    {
+      "epoch": 0.26382978723404255,
+      "grad_norm": 5.2100749015808105,
+      "learning_rate": 1.8412535328311813e-05,
+      "loss": 0.2191,
+      "step": 558
+    },
+    {
+      "epoch": 0.26430260047281323,
+      "grad_norm": 4.192863941192627,
+      "learning_rate": 1.8403598628565876e-05,
+      "loss": 0.1958,
+      "step": 559
+    },
+    {
+      "epoch": 0.2647754137115839,
+      "grad_norm": 8.32767105102539,
+      "learning_rate": 1.839463902608369e-05,
+      "loss": 0.3295,
+      "step": 560
+    },
+    {
+      "epoch": 0.2647754137115839,
+      "eval_accuracy": 0.8492239467849224,
+      "eval_f1": 0.6777251184834123,
+      "eval_loss": 0.3414818048477173,
+      "eval_precision": 0.8461538461538461,
+      "eval_recall": 0.5652173913043478,
+      "eval_runtime": 47.9942,
+      "eval_samples_per_second": 5.751,
+      "eval_steps_per_second": 0.188,
+      "step": 560
+    },
+    {
+      "epoch": 0.2652482269503546,
+      "grad_norm": 7.091763019561768,
+      "learning_rate": 1.8385656545283296e-05,
+      "loss": 0.3177,
+      "step": 561
+    },
+    {
+      "epoch": 0.2657210401891253,
+      "grad_norm": 4.874910354614258,
+      "learning_rate": 1.8376651210645085e-05,
+      "loss": 0.255,
+      "step": 562
+    },
+    {
+      "epoch": 0.266193853427896,
+      "grad_norm": 5.0358710289001465,
+      "learning_rate": 1.836762304671174e-05,
+      "loss": 0.2031,
+      "step": 563
+    },
+    {
+      "epoch": 0.26666666666666666,
+      "grad_norm": 6.5243330001831055,
+      "learning_rate": 1.8358572078088144e-05,
+      "loss": 0.2583,
+      "step": 564
+    },
+    {
+      "epoch": 0.26713947990543735,
+      "grad_norm": 5.2152862548828125,
+      "learning_rate": 1.8349498329441355e-05,
+      "loss": 0.191,
+      "step": 565
+    },
+    {
+      "epoch": 0.26761229314420804,
+      "grad_norm": 4.507115840911865,
+      "learning_rate": 1.8340401825500496e-05,
+      "loss": 0.1649,
+      "step": 566
+    },
+    {
+      "epoch": 0.2680851063829787,
+      "grad_norm": 4.836716651916504,
+      "learning_rate": 1.833128259105671e-05,
+      "loss": 0.2041,
+      "step": 567
+    },
+    {
+      "epoch": 0.2685579196217494,
+      "grad_norm": 5.212822437286377,
+      "learning_rate": 1.832214065096309e-05,
+      "loss": 0.2043,
+      "step": 568
+    },
+    {
+      "epoch": 0.2690307328605201,
+      "grad_norm": 9.472026824951172,
+      "learning_rate": 1.8312976030134613e-05,
+      "loss": 0.377,
+      "step": 569
+    },
+    {
+      "epoch": 0.2695035460992908,
+      "grad_norm": 7.958829879760742,
+      "learning_rate": 1.8303788753548065e-05,
+      "loss": 0.2281,
+      "step": 570
+    },
+    {
+      "epoch": 0.26997635933806147,
+      "grad_norm": 6.508796215057373,
+      "learning_rate": 1.829457884624198e-05,
+      "loss": 0.1819,
+      "step": 571
+    },
+    {
+      "epoch": 0.27044917257683215,
+      "grad_norm": 7.919535160064697,
+      "learning_rate": 1.8285346333316564e-05,
+      "loss": 0.2852,
+      "step": 572
+    },
+    {
+      "epoch": 0.27092198581560284,
+      "grad_norm": 5.031895637512207,
+      "learning_rate": 1.8276091239933634e-05,
+      "loss": 0.1962,
+      "step": 573
+    },
+    {
+      "epoch": 0.2713947990543735,
+      "grad_norm": 5.484118461608887,
+      "learning_rate": 1.8266813591316548e-05,
+      "loss": 0.2812,
+      "step": 574
+    },
+    {
+      "epoch": 0.2718676122931442,
+      "grad_norm": 6.0553460121154785,
+      "learning_rate": 1.825751341275013e-05,
+      "loss": 0.2536,
+      "step": 575
+    },
+    {
+      "epoch": 0.2723404255319149,
+      "grad_norm": 6.331978797912598,
+      "learning_rate": 1.8248190729580613e-05,
+      "loss": 0.2043,
+      "step": 576
+    },
+    {
+      "epoch": 0.2728132387706856,
+      "grad_norm": 5.793010711669922,
+      "learning_rate": 1.8238845567215554e-05,
+      "loss": 0.2921,
+      "step": 577
+    },
+    {
+      "epoch": 0.27328605200945627,
+      "grad_norm": 8.16348648071289,
+      "learning_rate": 1.8229477951123785e-05,
+      "loss": 0.3131,
+      "step": 578
+    },
+    {
+      "epoch": 0.27375886524822696,
+      "grad_norm": 4.672780513763428,
+      "learning_rate": 1.822008790683532e-05,
+      "loss": 0.2961,
+      "step": 579
+    },
+    {
+      "epoch": 0.27423167848699764,
+      "grad_norm": 5.705632209777832,
+      "learning_rate": 1.8210675459941306e-05,
+      "loss": 0.2859,
+      "step": 580
+    },
+    {
+      "epoch": 0.27423167848699764,
+      "eval_accuracy": 0.843680709534368,
+      "eval_f1": 0.6501240694789082,
+      "eval_loss": 0.336950421333313,
+      "eval_precision": 0.8733333333333333,
+      "eval_recall": 0.5177865612648221,
+      "eval_runtime": 48.0605,
+      "eval_samples_per_second": 5.743,
+      "eval_steps_per_second": 0.187,
+      "step": 580
+    },
+    {
+      "epoch": 0.27470449172576833,
+      "grad_norm": 6.904634952545166,
+      "learning_rate": 1.8201240636093948e-05,
+      "loss": 0.2677,
+      "step": 581
+    },
+    {
+      "epoch": 0.275177304964539,
+      "grad_norm": 6.0656328201293945,
+      "learning_rate": 1.819178346100642e-05,
+      "loss": 0.3325,
+      "step": 582
+    },
+    {
+      "epoch": 0.2756501182033097,
+      "grad_norm": 4.885197162628174,
+      "learning_rate": 1.8182303960452826e-05,
+      "loss": 0.2458,
+      "step": 583
+    },
+    {
+      "epoch": 0.2761229314420804,
+      "grad_norm": 5.368473052978516,
+      "learning_rate": 1.8172802160268116e-05,
+      "loss": 0.2929,
+      "step": 584
+    },
+    {
+      "epoch": 0.2765957446808511,
+      "grad_norm": 4.78055477142334,
+      "learning_rate": 1.8163278086347998e-05,
+      "loss": 0.2534,
+      "step": 585
+    },
+    {
+      "epoch": 0.27706855791962176,
+      "grad_norm": 8.146903038024902,
+      "learning_rate": 1.8153731764648907e-05,
+      "loss": 0.2733,
+      "step": 586
+    },
+    {
+      "epoch": 0.27754137115839245,
+      "grad_norm": 3.793304681777954,
+      "learning_rate": 1.8144163221187882e-05,
+      "loss": 0.2232,
+      "step": 587
+    },
+    {
+      "epoch": 0.27801418439716313,
+      "grad_norm": 4.330966949462891,
+      "learning_rate": 1.8134572482042555e-05,
+      "loss": 0.2709,
+      "step": 588
+    },
+    {
+      "epoch": 0.2784869976359338,
+      "grad_norm": 3.585026264190674,
+      "learning_rate": 1.8124959573351023e-05,
+      "loss": 0.1779,
+      "step": 589
+    },
+    {
+      "epoch": 0.2789598108747045,
+      "grad_norm": 5.1665754318237305,
+      "learning_rate": 1.8115324521311823e-05,
+      "loss": 0.2599,
+      "step": 590
+    },
+    {
+      "epoch": 0.2794326241134752,
+      "grad_norm": 4.275265216827393,
+      "learning_rate": 1.8105667352183823e-05,
+      "loss": 0.1805,
+      "step": 591
+    },
+    {
+      "epoch": 0.2799054373522459,
+      "grad_norm": 5.761347770690918,
+      "learning_rate": 1.809598809228618e-05,
+      "loss": 0.2043,
+      "step": 592
+    },
+    {
+      "epoch": 0.28037825059101656,
+      "grad_norm": 4.9206647872924805,
+      "learning_rate": 1.8086286767998253e-05,
+      "loss": 0.2351,
+      "step": 593
+    },
+    {
+      "epoch": 0.28085106382978725,
+      "grad_norm": 5.019208908081055,
+      "learning_rate": 1.807656340575953e-05,
+      "loss": 0.234,
+      "step": 594
+    },
+    {
+      "epoch": 0.28132387706855794,
+      "grad_norm": 6.9271626472473145,
+      "learning_rate": 1.8066818032069566e-05,
+      "loss": 0.3302,
+      "step": 595
+    },
+    {
+      "epoch": 0.2817966903073286,
+      "grad_norm": 6.962562084197998,
+      "learning_rate": 1.80570506734879e-05,
+      "loss": 0.265,
+      "step": 596
+    },
+    {
+      "epoch": 0.2822695035460993,
+      "grad_norm": 6.923299789428711,
+      "learning_rate": 1.804726135663399e-05,
+      "loss": 0.27,
+      "step": 597
+    },
+    {
+      "epoch": 0.28274231678487,
+      "grad_norm": 7.9528422355651855,
+      "learning_rate": 1.803745010818714e-05,
+      "loss": 0.2644,
+      "step": 598
+    },
+    {
+      "epoch": 0.2832151300236407,
+      "grad_norm": 6.256555557250977,
+      "learning_rate": 1.802761695488642e-05,
+      "loss": 0.296,
+      "step": 599
+    },
+    {
+      "epoch": 0.28368794326241137,
+      "grad_norm": 8.758502006530762,
+      "learning_rate": 1.8017761923530602e-05,
+      "loss": 0.2655,
+      "step": 600
+    },
+    {
+      "epoch": 0.28368794326241137,
+      "eval_accuracy": 0.8492239467849224,
+      "eval_f1": 0.6822429906542056,
+      "eval_loss": 0.32478421926498413,
+      "eval_precision": 0.8342857142857143,
+      "eval_recall": 0.5770750988142292,
+      "eval_runtime": 48.6007,
+      "eval_samples_per_second": 5.679,
+      "eval_steps_per_second": 0.185,
+      "step": 600
+    },
+    {
+      "epoch": 0.28416075650118205,
+      "grad_norm": 6.9206929206848145,
+      "learning_rate": 1.8007885040978078e-05,
+      "loss": 0.2534,
+      "step": 601
+    },
+    {
+      "epoch": 0.28463356973995274,
+      "grad_norm": 4.878746509552002,
+      "learning_rate": 1.7997986334146808e-05,
+      "loss": 0.2592,
+      "step": 602
+    },
+    {
+      "epoch": 0.2851063829787234,
+      "grad_norm": 6.951952934265137,
+      "learning_rate": 1.798806583001421e-05,
+      "loss": 0.3073,
+      "step": 603
+    },
+    {
+      "epoch": 0.2855791962174941,
+      "grad_norm": 6.235677242279053,
+      "learning_rate": 1.7978123555617116e-05,
+      "loss": 0.2217,
+      "step": 604
+    },
+    {
+      "epoch": 0.2860520094562648,
+      "grad_norm": 5.96851110458374,
+      "learning_rate": 1.7968159538051703e-05,
+      "loss": 0.3361,
+      "step": 605
+    },
+    {
+      "epoch": 0.2865248226950355,
+      "grad_norm": 7.12925386428833,
+      "learning_rate": 1.7958173804473373e-05,
+      "loss": 0.2454,
+      "step": 606
+    },
+    {
+      "epoch": 0.28699763593380617,
+      "grad_norm": 5.562047481536865,
+      "learning_rate": 1.7948166382096744e-05,
+      "loss": 0.2518,
+      "step": 607
+    },
+    {
+      "epoch": 0.28747044917257686,
+      "grad_norm": 5.448677062988281,
+      "learning_rate": 1.793813729819553e-05,
+      "loss": 0.241,
+      "step": 608
+    },
+    {
+      "epoch": 0.28794326241134754,
+      "grad_norm": 6.051489353179932,
+      "learning_rate": 1.7928086580102485e-05,
+      "loss": 0.2748,
+      "step": 609
+    },
+    {
+      "epoch": 0.28841607565011823,
+      "grad_norm": 5.5659661293029785,
+      "learning_rate": 1.791801425520931e-05,
+      "loss": 0.2435,
+      "step": 610
+    },
+    {
+      "epoch": 0.28888888888888886,
+      "grad_norm": 6.0021071434021,
+      "learning_rate": 1.790792035096661e-05,
+      "loss": 0.2785,
+      "step": 611
+    },
+    {
+      "epoch": 0.28936170212765955,
+      "grad_norm": 6.288322925567627,
+      "learning_rate": 1.789780489488379e-05,
+      "loss": 0.2959,
+      "step": 612
+    },
+    {
+      "epoch": 0.28983451536643023,
+      "grad_norm": 5.30917501449585,
+      "learning_rate": 1.7887667914528996e-05,
+      "loss": 0.1903,
+      "step": 613
+    },
+    {
+      "epoch": 0.2903073286052009,
+      "grad_norm": 5.338979721069336,
+      "learning_rate": 1.7877509437529032e-05,
+      "loss": 0.2522,
+      "step": 614
+    },
+    {
+      "epoch": 0.2907801418439716,
+      "grad_norm": 9.307381629943848,
+      "learning_rate": 1.7867329491569293e-05,
+      "loss": 0.3809,
+      "step": 615
+    },
+    {
+      "epoch": 0.2912529550827423,
+      "grad_norm": 8.209502220153809,
+      "learning_rate": 1.785712810439368e-05,
+      "loss": 0.3395,
+      "step": 616
+    },
+    {
+      "epoch": 0.291725768321513,
+      "grad_norm": 4.827156066894531,
+      "learning_rate": 1.7846905303804525e-05,
+      "loss": 0.2298,
+      "step": 617
+    },
+    {
+      "epoch": 0.29219858156028367,
+      "grad_norm": 3.754410743713379,
+      "learning_rate": 1.783666111766253e-05,
+      "loss": 0.149,
+      "step": 618
+    },
+    {
+      "epoch": 0.29267139479905435,
+      "grad_norm": 5.818631649017334,
+      "learning_rate": 1.782639557388667e-05,
+      "loss": 0.3478,
+      "step": 619
+    },
+    {
+      "epoch": 0.29314420803782504,
+      "grad_norm": 4.670815467834473,
+      "learning_rate": 1.781610870045414e-05,
+      "loss": 0.2646,
+      "step": 620
+    },
+    {
+      "epoch": 0.29314420803782504,
+      "eval_accuracy": 0.8481152993348116,
+      "eval_f1": 0.6682808716707022,
+      "eval_loss": 0.3289998471736908,
+      "eval_precision": 0.8625,
+      "eval_recall": 0.5454545454545454,
+      "eval_runtime": 47.6704,
+      "eval_samples_per_second": 5.79,
+      "eval_steps_per_second": 0.189,
+      "step": 620
+    },
+    {
+      "epoch": 0.2936170212765957,
+      "grad_norm": 4.68503475189209,
+      "learning_rate": 1.780580052540024e-05,
+      "loss": 0.2216,
+      "step": 621
+    },
+    {
+      "epoch": 0.2940898345153664,
+      "grad_norm": 8.284623146057129,
+      "learning_rate": 1.7795471076818356e-05,
+      "loss": 0.3633,
+      "step": 622
+    },
+    {
+      "epoch": 0.2945626477541371,
+      "grad_norm": 4.425292015075684,
+      "learning_rate": 1.7785120382859832e-05,
+      "loss": 0.2103,
+      "step": 623
+    },
+    {
+      "epoch": 0.2950354609929078,
+      "grad_norm": 5.457613468170166,
+      "learning_rate": 1.7774748471733915e-05,
+      "loss": 0.2615,
+      "step": 624
+    },
+    {
+      "epoch": 0.29550827423167847,
+      "grad_norm": 7.534078598022461,
+      "learning_rate": 1.776435537170768e-05,
+      "loss": 0.2994,
+      "step": 625
+    },
+    {
+      "epoch": 0.29598108747044916,
+      "grad_norm": 5.263175010681152,
+      "learning_rate": 1.7753941111105954e-05,
+      "loss": 0.221,
+      "step": 626
+    },
+    {
+      "epoch": 0.29645390070921984,
+      "grad_norm": 6.045238971710205,
+      "learning_rate": 1.7743505718311218e-05,
+      "loss": 0.2957,
+      "step": 627
+    },
+    {
+      "epoch": 0.29692671394799053,
+      "grad_norm": 6.148013591766357,
+      "learning_rate": 1.7733049221763565e-05,
+      "loss": 0.2611,
+      "step": 628
+    },
+    {
+      "epoch": 0.2973995271867612,
+      "grad_norm": 5.272988796234131,
+      "learning_rate": 1.772257164996059e-05,
+      "loss": 0.2432,
+      "step": 629
+    },
+    {
+      "epoch": 0.2978723404255319,
+      "grad_norm": 4.126927375793457,
+      "learning_rate": 1.7712073031457332e-05,
+      "loss": 0.2488,
+      "step": 630
+    },
+    {
+      "epoch": 0.2983451536643026,
+      "grad_norm": 5.052836894989014,
+      "learning_rate": 1.770155339486618e-05,
+      "loss": 0.3185,
+      "step": 631
+    },
+    {
+      "epoch": 0.2988179669030733,
+      "grad_norm": 6.2589240074157715,
+      "learning_rate": 1.7691012768856817e-05,
+      "loss": 0.2974,
+      "step": 632
+    },
+    {
+      "epoch": 0.29929078014184396,
+      "grad_norm": 9.090741157531738,
+      "learning_rate": 1.7680451182156123e-05,
+      "loss": 0.3296,
+      "step": 633
+    },
+    {
+      "epoch": 0.29976359338061465,
+      "grad_norm": 4.453991889953613,
+      "learning_rate": 1.7669868663548105e-05,
+      "loss": 0.2429,
+      "step": 634
+    },
+    {
+      "epoch": 0.30023640661938533,
+      "grad_norm": 4.265166282653809,
+      "learning_rate": 1.7659265241873815e-05,
+      "loss": 0.2038,
+      "step": 635
+    },
+    {
+      "epoch": 0.300709219858156,
+      "grad_norm": 6.1728901863098145,
+      "learning_rate": 1.7648640946031273e-05,
+      "loss": 0.2277,
+      "step": 636
+    },
+    {
+      "epoch": 0.3011820330969267,
+      "grad_norm": 3.938297986984253,
+      "learning_rate": 1.7637995804975392e-05,
+      "loss": 0.2425,
+      "step": 637
+    },
+    {
+      "epoch": 0.3016548463356974,
+      "grad_norm": 5.709317684173584,
+      "learning_rate": 1.7627329847717888e-05,
+      "loss": 0.2656,
+      "step": 638
+    },
+    {
+      "epoch": 0.3021276595744681,
+      "grad_norm": 5.384950160980225,
+      "learning_rate": 1.761664310332722e-05,
+      "loss": 0.3077,
+      "step": 639
+    },
+    {
+      "epoch": 0.30260047281323876,
+      "grad_norm": 5.811992645263672,
+      "learning_rate": 1.7605935600928486e-05,
+      "loss": 0.2706,
+      "step": 640
+    },
+    {
+      "epoch": 0.30260047281323876,
+      "eval_accuracy": 0.8481152993348116,
+      "eval_f1": 0.6836027713625866,
+      "eval_loss": 0.3192698657512665,
+      "eval_precision": 0.8222222222222222,
+      "eval_recall": 0.5849802371541502,
+      "eval_runtime": 48.4619,
+      "eval_samples_per_second": 5.695,
+      "eval_steps_per_second": 0.186,
+      "step": 640
+    },
+    {
+      "epoch": 0.30307328605200945,
+      "grad_norm": 7.0490241050720215,
+      "learning_rate": 1.759520736970337e-05,
+      "loss": 0.3842,
+      "step": 641
+    },
+    {
+      "epoch": 0.30354609929078014,
+      "grad_norm": 7.162063121795654,
+      "learning_rate": 1.7584458438890036e-05,
+      "loss": 0.3018,
+      "step": 642
+    },
+    {
+      "epoch": 0.3040189125295508,
+      "grad_norm": 7.344574928283691,
+      "learning_rate": 1.757368883778307e-05,
+      "loss": 0.281,
+      "step": 643
+    },
+    {
+      "epoch": 0.3044917257683215,
+      "grad_norm": 5.31951379776001,
+      "learning_rate": 1.7562898595733395e-05,
+      "loss": 0.2809,
+      "step": 644
+    },
+    {
+      "epoch": 0.3049645390070922,
+      "grad_norm": 6.758824348449707,
+      "learning_rate": 1.7552087742148176e-05,
+      "loss": 0.2333,
+      "step": 645
+    },
+    {
+      "epoch": 0.3054373522458629,
+      "grad_norm": 5.954471588134766,
+      "learning_rate": 1.754125630649076e-05,
+      "loss": 0.31,
+      "step": 646
+    },
+    {
+      "epoch": 0.30591016548463357,
+      "grad_norm": 5.111174583435059,
+      "learning_rate": 1.753040431828059e-05,
+      "loss": 0.2112,
+      "step": 647
+    },
+    {
+      "epoch": 0.30638297872340425,
+      "grad_norm": 3.9539942741394043,
+      "learning_rate": 1.751953180709311e-05,
+      "loss": 0.1611,
+      "step": 648
+    },
+    {
+      "epoch": 0.30685579196217494,
+      "grad_norm": 4.952718257904053,
+      "learning_rate": 1.750863880255971e-05,
+      "loss": 0.2869,
+      "step": 649
+    },
+    {
+      "epoch": 0.3073286052009456,
+      "grad_norm": 5.200640678405762,
+      "learning_rate": 1.7497725334367627e-05,
+      "loss": 0.2983,
+      "step": 650
+    },
+    {
+      "epoch": 0.3078014184397163,
+      "grad_norm": 9.411211013793945,
+      "learning_rate": 1.7486791432259858e-05,
+      "loss": 0.2823,
+      "step": 651
+    },
+    {
+      "epoch": 0.308274231678487,
+      "grad_norm": 5.560668468475342,
+      "learning_rate": 1.7475837126035105e-05,
+      "loss": 0.2646,
+      "step": 652
+    },
+    {
+      "epoch": 0.3087470449172577,
+      "grad_norm": 4.861005783081055,
+      "learning_rate": 1.746486244554767e-05,
+      "loss": 0.3096,
+      "step": 653
+    },
+    {
+      "epoch": 0.30921985815602837,
+      "grad_norm": 2.945842742919922,
+      "learning_rate": 1.7453867420707386e-05,
+      "loss": 0.1699,
+      "step": 654
+    },
+    {
+      "epoch": 0.30969267139479906,
+      "grad_norm": 5.4764933586120605,
+      "learning_rate": 1.7442852081479525e-05,
+      "loss": 0.2001,
+      "step": 655
+    },
+    {
+      "epoch": 0.31016548463356974,
+      "grad_norm": 5.168087005615234,
+      "learning_rate": 1.743181645788473e-05,
+      "loss": 0.207,
+      "step": 656
+    },
+    {
+      "epoch": 0.31063829787234043,
+      "grad_norm": 6.098107814788818,
+      "learning_rate": 1.742076057999892e-05,
+      "loss": 0.2383,
+      "step": 657
+    },
+    {
+      "epoch": 0.3111111111111111,
+      "grad_norm": 6.835310935974121,
+      "learning_rate": 1.7409684477953224e-05,
+      "loss": 0.2723,
+      "step": 658
+    },
+    {
+      "epoch": 0.3115839243498818,
+      "grad_norm": 8.645564079284668,
+      "learning_rate": 1.739858818193387e-05,
+      "loss": 0.3614,
+      "step": 659
+    },
+    {
+      "epoch": 0.3120567375886525,
+      "grad_norm": 5.342036724090576,
+      "learning_rate": 1.738747172218215e-05,
+      "loss": 0.2074,
+      "step": 660
+    },
+    {
+      "epoch": 0.3120567375886525,
+      "eval_accuracy": 0.8470066518847007,
+      "eval_f1": 0.655,
+      "eval_loss": 0.3505603075027466,
+      "eval_precision": 0.891156462585034,
+      "eval_recall": 0.5177865612648221,
+      "eval_runtime": 48.6815,
+      "eval_samples_per_second": 5.67,
+      "eval_steps_per_second": 0.185,
+      "step": 660
+    },
+    {
+      "epoch": 0.3125295508274232,
+      "grad_norm": 6.081332683563232,
+      "learning_rate": 1.7376335128994276e-05,
+      "loss": 0.3087,
+      "step": 661
+    },
+    {
+      "epoch": 0.31300236406619386,
+      "grad_norm": 5.119427680969238,
+      "learning_rate": 1.7365178432721358e-05,
+      "loss": 0.2799,
+      "step": 662
+    },
+    {
+      "epoch": 0.31347517730496455,
+      "grad_norm": 7.370607376098633,
+      "learning_rate": 1.7354001663769278e-05,
+      "loss": 0.2989,
+      "step": 663
+    },
+    {
+      "epoch": 0.31394799054373523,
+      "grad_norm": 4.7025885581970215,
+      "learning_rate": 1.734280485259863e-05,
+      "loss": 0.2622,
+      "step": 664
+    },
+    {
+      "epoch": 0.3144208037825059,
+      "grad_norm": 7.617417812347412,
+      "learning_rate": 1.7331588029724628e-05,
+      "loss": 0.3428,
+      "step": 665
+    },
+    {
+      "epoch": 0.3148936170212766,
+      "grad_norm": 4.964621543884277,
+      "learning_rate": 1.7320351225717025e-05,
+      "loss": 0.2216,
+      "step": 666
+    },
+    {
+      "epoch": 0.3153664302600473,
+      "grad_norm": 6.546290397644043,
+      "learning_rate": 1.730909447120003e-05,
+      "loss": 0.2092,
+      "step": 667
+    },
+    {
+      "epoch": 0.315839243498818,
+      "grad_norm": 6.265383243560791,
+      "learning_rate": 1.7297817796852227e-05,
+      "loss": 0.2734,
+      "step": 668
+    },
+    {
+      "epoch": 0.31631205673758866,
+      "grad_norm": 5.259603500366211,
+      "learning_rate": 1.728652123340648e-05,
+      "loss": 0.2409,
+      "step": 669
+    },
+    {
+      "epoch": 0.31678486997635935,
+      "grad_norm": 6.892948627471924,
+      "learning_rate": 1.7275204811649865e-05,
+      "loss": 0.311,
+      "step": 670
+    },
+    {
+      "epoch": 0.31725768321513004,
+      "grad_norm": 4.608394145965576,
+      "learning_rate": 1.7263868562423577e-05,
+      "loss": 0.2553,
+      "step": 671
+    },
+    {
+      "epoch": 0.3177304964539007,
+      "grad_norm": 6.246408462524414,
+      "learning_rate": 1.725251251662285e-05,
+      "loss": 0.2753,
+      "step": 672
+    },
+    {
+      "epoch": 0.3182033096926714,
+      "grad_norm": 5.615715026855469,
+      "learning_rate": 1.7241136705196865e-05,
+      "loss": 0.1744,
+      "step": 673
+    },
+    {
+      "epoch": 0.3186761229314421,
+      "grad_norm": 4.1983642578125,
+      "learning_rate": 1.7229741159148676e-05,
+      "loss": 0.2054,
+      "step": 674
+    },
+    {
+      "epoch": 0.3191489361702128,
+      "grad_norm": 7.475837230682373,
+      "learning_rate": 1.7218325909535118e-05,
+      "loss": 0.2695,
+      "step": 675
+    },
+    {
+      "epoch": 0.31962174940898347,
+      "grad_norm": 6.710148334503174,
+      "learning_rate": 1.7206890987466726e-05,
+      "loss": 0.2597,
+      "step": 676
+    },
+    {
+      "epoch": 0.32009456264775416,
+      "grad_norm": 5.378614902496338,
+      "learning_rate": 1.7195436424107648e-05,
+      "loss": 0.2669,
+      "step": 677
+    },
+    {
+      "epoch": 0.32056737588652484,
+      "grad_norm": 6.92887020111084,
+      "learning_rate": 1.7183962250675568e-05,
+      "loss": 0.3035,
+      "step": 678
+    },
+    {
+      "epoch": 0.3210401891252955,
+      "grad_norm": 5.467234134674072,
+      "learning_rate": 1.7172468498441604e-05,
+      "loss": 0.2622,
+      "step": 679
+    },
+    {
+      "epoch": 0.3215130023640662,
+      "grad_norm": 5.692148685455322,
+      "learning_rate": 1.7160955198730244e-05,
+      "loss": 0.2825,
+      "step": 680
+    },
+    {
+      "epoch": 0.3215130023640662,
+      "eval_accuracy": 0.8281596452328159,
+      "eval_f1": 0.5931758530183727,
+      "eval_loss": 0.35231587290763855,
+      "eval_precision": 0.8828125,
+      "eval_recall": 0.44664031620553357,
+      "eval_runtime": 47.4158,
+      "eval_samples_per_second": 5.821,
+      "eval_steps_per_second": 0.19,
+      "step": 680
+    },
+    {
+      "epoch": 0.3219858156028369,
+      "grad_norm": 5.253277778625488,
+      "learning_rate": 1.7149422382919237e-05,
+      "loss": 0.2007,
+      "step": 681
+    },
+    {
+      "epoch": 0.3224586288416076,
+      "grad_norm": 5.658674716949463,
+      "learning_rate": 1.7137870082439533e-05,
+      "loss": 0.2242,
+      "step": 682
+    },
+    {
+      "epoch": 0.3229314420803783,
+      "grad_norm": 6.746735095977783,
+      "learning_rate": 1.7126298328775175e-05,
+      "loss": 0.3869,
+      "step": 683
+    },
+    {
+      "epoch": 0.32340425531914896,
+      "grad_norm": 4.4457173347473145,
+      "learning_rate": 1.711470715346323e-05,
+      "loss": 0.207,
+      "step": 684
+    },
+    {
+      "epoch": 0.32387706855791965,
+      "grad_norm": 4.7827935218811035,
+      "learning_rate": 1.7103096588093686e-05,
+      "loss": 0.1964,
+      "step": 685
+    },
+    {
+      "epoch": 0.32434988179669033,
+      "grad_norm": 5.443333148956299,
+      "learning_rate": 1.7091466664309385e-05,
+      "loss": 0.2212,
+      "step": 686
+    },
+    {
+      "epoch": 0.324822695035461,
+      "grad_norm": 7.0208539962768555,
+      "learning_rate": 1.7079817413805927e-05,
+      "loss": 0.38,
+      "step": 687
+    },
+    {
+      "epoch": 0.3252955082742317,
+      "grad_norm": 4.507380485534668,
+      "learning_rate": 1.706814886833158e-05,
+      "loss": 0.2782,
+      "step": 688
+    },
+    {
+      "epoch": 0.3257683215130024,
+      "grad_norm": 5.8691301345825195,
+      "learning_rate": 1.7056461059687195e-05,
+      "loss": 0.2178,
+      "step": 689
+    },
+    {
+      "epoch": 0.3262411347517731,
+      "grad_norm": 7.219882011413574,
+      "learning_rate": 1.7044754019726127e-05,
+      "loss": 0.2707,
+      "step": 690
+    },
+    {
+      "epoch": 0.32671394799054376,
+      "grad_norm": 5.678999900817871,
+      "learning_rate": 1.703302778035415e-05,
+      "loss": 0.258,
+      "step": 691
+    },
+    {
+      "epoch": 0.3271867612293144,
+      "grad_norm": 6.334179878234863,
+      "learning_rate": 1.702128237352934e-05,
+      "loss": 0.2489,
+      "step": 692
+    },
+    {
+      "epoch": 0.3276595744680851,
+      "grad_norm": 7.485446453094482,
+      "learning_rate": 1.7009517831262034e-05,
+      "loss": 0.3043,
+      "step": 693
+    },
+    {
+      "epoch": 0.32813238770685577,
+      "grad_norm": 5.8358354568481445,
+      "learning_rate": 1.6997734185614712e-05,
+      "loss": 0.2401,
+      "step": 694
+    },
+    {
+      "epoch": 0.32860520094562645,
+      "grad_norm": 5.5207319259643555,
+      "learning_rate": 1.6985931468701915e-05,
+      "loss": 0.2512,
+      "step": 695
+    },
+    {
+      "epoch": 0.32907801418439714,
+      "grad_norm": 5.306708335876465,
+      "learning_rate": 1.6974109712690163e-05,
+      "loss": 0.2479,
+      "step": 696
+    },
+    {
+      "epoch": 0.3295508274231678,
+      "grad_norm": 5.970691204071045,
+      "learning_rate": 1.6962268949797862e-05,
+      "loss": 0.2745,
+      "step": 697
+    },
+    {
+      "epoch": 0.3300236406619385,
+      "grad_norm": 4.881795883178711,
+      "learning_rate": 1.695040921229522e-05,
+      "loss": 0.1999,
+      "step": 698
+    },
+    {
+      "epoch": 0.3304964539007092,
+      "grad_norm": 3.6859960556030273,
+      "learning_rate": 1.6938530532504155e-05,
+      "loss": 0.1434,
+      "step": 699
+    },
+    {
+      "epoch": 0.3309692671394799,
+      "grad_norm": 6.081749439239502,
+      "learning_rate": 1.692663294279821e-05,
+      "loss": 0.2718,
+      "step": 700
+    },
+    {
+      "epoch": 0.3309692671394799,
+      "eval_accuracy": 0.8270509977827051,
+      "eval_f1": 0.5828877005347594,
+      "eval_loss": 0.3708072304725647,
+      "eval_precision": 0.9008264462809917,
+      "eval_recall": 0.4308300395256917,
+      "eval_runtime": 49.3785,
+      "eval_samples_per_second": 5.589,
+      "eval_steps_per_second": 0.182,
+      "step": 700
+    },
+    {
+      "epoch": 0.33144208037825057,
+      "grad_norm": 4.934723377227783,
+      "learning_rate": 1.6914716475602474e-05,
+      "loss": 0.1914,
+      "step": 701
+    },
+    {
+      "epoch": 0.33191489361702126,
+      "grad_norm": 6.057024955749512,
+      "learning_rate": 1.690278116339346e-05,
+      "loss": 0.2151,
+      "step": 702
+    },
+    {
+      "epoch": 0.33238770685579194,
+      "grad_norm": 5.484874248504639,
+      "learning_rate": 1.689082703869907e-05,
+      "loss": 0.2675,
+      "step": 703
+    },
+    {
+      "epoch": 0.33286052009456263,
+      "grad_norm": 7.429450511932373,
+      "learning_rate": 1.687885413409845e-05,
+      "loss": 0.3249,
+      "step": 704
+    },
+    {
+      "epoch": 0.3333333333333333,
+      "grad_norm": 7.455477714538574,
+      "learning_rate": 1.6866862482221948e-05,
+      "loss": 0.3455,
+      "step": 705
+    },
+    {
+      "epoch": 0.333806146572104,
+      "grad_norm": 6.469564437866211,
+      "learning_rate": 1.685485211575099e-05,
+      "loss": 0.2674,
+      "step": 706
+    },
+    {
+      "epoch": 0.3342789598108747,
+      "grad_norm": 4.929532527923584,
+      "learning_rate": 1.684282306741802e-05,
+      "loss": 0.2082,
+      "step": 707
+    },
+    {
+      "epoch": 0.3347517730496454,
+      "grad_norm": 5.234260082244873,
+      "learning_rate": 1.6830775370006377e-05,
+      "loss": 0.1776,
+      "step": 708
+    },
+    {
+      "epoch": 0.33522458628841606,
+      "grad_norm": 7.192393779754639,
+      "learning_rate": 1.681870905635025e-05,
+      "loss": 0.2546,
+      "step": 709
+    },
+    {
+      "epoch": 0.33569739952718675,
+      "grad_norm": 5.962497711181641,
+      "learning_rate": 1.680662415933454e-05,
+      "loss": 0.2344,
+      "step": 710
+    },
+    {
+      "epoch": 0.33617021276595743,
+      "grad_norm": 6.120792865753174,
+      "learning_rate": 1.679452071189481e-05,
+      "loss": 0.2552,
+      "step": 711
+    },
+    {
+      "epoch": 0.3366430260047281,
+      "grad_norm": 8.747769355773926,
+      "learning_rate": 1.6782398747017176e-05,
+      "loss": 0.3015,
+      "step": 712
+    },
+    {
+      "epoch": 0.3371158392434988,
+      "grad_norm": 4.518637657165527,
+      "learning_rate": 1.6770258297738213e-05,
+      "loss": 0.2825,
+      "step": 713
+    },
+    {
+      "epoch": 0.3375886524822695,
+      "grad_norm": 6.613455295562744,
+      "learning_rate": 1.6758099397144884e-05,
+      "loss": 0.3259,
+      "step": 714
+    },
+    {
+      "epoch": 0.3380614657210402,
+      "grad_norm": 6.923367023468018,
+      "learning_rate": 1.674592207837443e-05,
+      "loss": 0.2302,
+      "step": 715
+    },
+    {
+      "epoch": 0.33853427895981086,
+      "grad_norm": 7.601401329040527,
+      "learning_rate": 1.6733726374614287e-05,
+      "loss": 0.2771,
+      "step": 716
+    },
+    {
+      "epoch": 0.33900709219858155,
+      "grad_norm": 4.864050388336182,
+      "learning_rate": 1.6721512319102006e-05,
+      "loss": 0.2364,
+      "step": 717
+    },
+    {
+      "epoch": 0.33947990543735224,
+      "grad_norm": 4.241363048553467,
+      "learning_rate": 1.670927994512514e-05,
+      "loss": 0.2275,
+      "step": 718
+    },
+    {
+      "epoch": 0.3399527186761229,
+      "grad_norm": 9.842682838439941,
+      "learning_rate": 1.6697029286021182e-05,
+      "loss": 0.3548,
+      "step": 719
+    },
+    {
+      "epoch": 0.3404255319148936,
+      "grad_norm": 7.171640396118164,
+      "learning_rate": 1.6684760375177442e-05,
+      "loss": 0.2172,
+      "step": 720
+    },
+    {
+      "epoch": 0.3404255319148936,
+      "eval_accuracy": 0.8237250554323725,
+      "eval_f1": 0.5667574931880109,
+      "eval_loss": 0.3734827935695648,
+      "eval_precision": 0.9122807017543859,
+      "eval_recall": 0.41106719367588934,
+      "eval_runtime": 47.8437,
+      "eval_samples_per_second": 5.769,
+      "eval_steps_per_second": 0.188,
+      "step": 720
+    },
+    {
+      "epoch": 0.3408983451536643,
+      "grad_norm": 5.395704746246338,
+      "learning_rate": 1.667247324603098e-05,
+      "loss": 0.1952,
+      "step": 721
+    },
+    {
+      "epoch": 0.341371158392435,
+      "grad_norm": 6.559274196624756,
+      "learning_rate": 1.666016793206851e-05,
+      "loss": 0.2231,
+      "step": 722
+    },
+    {
+      "epoch": 0.34184397163120567,
+      "grad_norm": 5.171023368835449,
+      "learning_rate": 1.6647844466826302e-05,
+      "loss": 0.251,
+      "step": 723
+    },
+    {
+      "epoch": 0.34231678486997635,
+      "grad_norm": 6.1227898597717285,
+      "learning_rate": 1.6635502883890098e-05,
+      "loss": 0.2674,
+      "step": 724
+    },
+    {
+      "epoch": 0.34278959810874704,
+      "grad_norm": 4.9917802810668945,
+      "learning_rate": 1.6623143216895008e-05,
+      "loss": 0.2228,
+      "step": 725
+    },
+    {
+      "epoch": 0.3432624113475177,
+      "grad_norm": 4.765135765075684,
+      "learning_rate": 1.661076549952544e-05,
+      "loss": 0.1833,
+      "step": 726
+    },
+    {
+      "epoch": 0.3437352245862884,
+      "grad_norm": 5.079737186431885,
+      "learning_rate": 1.6598369765514986e-05,
+      "loss": 0.2315,
+      "step": 727
+    },
+    {
+      "epoch": 0.3442080378250591,
+      "grad_norm": 8.060894012451172,
+      "learning_rate": 1.6585956048646345e-05,
+      "loss": 0.3144,
+      "step": 728
+    },
+    {
+      "epoch": 0.3446808510638298,
+      "grad_norm": 5.104706287384033,
+      "learning_rate": 1.657352438275122e-05,
+      "loss": 0.2241,
+      "step": 729
+    },
+    {
+      "epoch": 0.34515366430260047,
+      "grad_norm": 7.560702323913574,
+      "learning_rate": 1.656107480171024e-05,
+      "loss": 0.2651,
+      "step": 730
+    },
+    {
+      "epoch": 0.34562647754137116,
+      "grad_norm": 4.7865190505981445,
+      "learning_rate": 1.6548607339452853e-05,
+      "loss": 0.1864,
+      "step": 731
+    },
+    {
+      "epoch": 0.34609929078014184,
+      "grad_norm": 7.309717178344727,
+      "learning_rate": 1.6536122029957237e-05,
+      "loss": 0.2793,
+      "step": 732
+    },
+    {
+      "epoch": 0.34657210401891253,
+      "grad_norm": 5.886257171630859,
+      "learning_rate": 1.6523618907250215e-05,
+      "loss": 0.283,
+      "step": 733
+    },
+    {
+      "epoch": 0.3470449172576832,
+      "grad_norm": 7.503266334533691,
+      "learning_rate": 1.6511098005407157e-05,
+      "loss": 0.2675,
+      "step": 734
+    },
+    {
+      "epoch": 0.3475177304964539,
+      "grad_norm": 6.831967830657959,
+      "learning_rate": 1.6498559358551885e-05,
+      "loss": 0.2302,
+      "step": 735
+    },
+    {
+      "epoch": 0.3479905437352246,
+      "grad_norm": 5.59326696395874,
+      "learning_rate": 1.6486003000856587e-05,
+      "loss": 0.2629,
+      "step": 736
+    },
+    {
+      "epoch": 0.3484633569739953,
+      "grad_norm": 6.322920799255371,
+      "learning_rate": 1.647342896654171e-05,
+      "loss": 0.3043,
+      "step": 737
+    },
+    {
+      "epoch": 0.34893617021276596,
+      "grad_norm": 7.298335552215576,
+      "learning_rate": 1.6460837289875886e-05,
+      "loss": 0.2891,
+      "step": 738
+    },
+    {
+      "epoch": 0.34940898345153665,
+      "grad_norm": 5.698408126831055,
+      "learning_rate": 1.6448228005175818e-05,
+      "loss": 0.2265,
+      "step": 739
+    },
+    {
+      "epoch": 0.34988179669030733,
+      "grad_norm": 4.371240139007568,
+      "learning_rate": 1.643560114680621e-05,
+      "loss": 0.1876,
+      "step": 740
+    },
+    {
+      "epoch": 0.34988179669030733,
+      "eval_accuracy": 0.8392461197339246,
+      "eval_f1": 0.6214099216710183,
+      "eval_loss": 0.3518848717212677,
+      "eval_precision": 0.9153846153846154,
+      "eval_recall": 0.47035573122529645,
+      "eval_runtime": 47.5745,
+      "eval_samples_per_second": 5.801,
+      "eval_steps_per_second": 0.189,
+      "step": 740
+    },
+    {
+      "epoch": 0.350354609929078,
+      "grad_norm": 5.097601413726807,
+      "learning_rate": 1.642295674917965e-05,
+      "loss": 0.2459,
+      "step": 741
+    },
+    {
+      "epoch": 0.3508274231678487,
+      "grad_norm": 6.104417324066162,
+      "learning_rate": 1.641029484675653e-05,
+      "loss": 0.1901,
+      "step": 742
+    },
+    {
+      "epoch": 0.3513002364066194,
+      "grad_norm": 6.226688385009766,
+      "learning_rate": 1.639761547404495e-05,
+      "loss": 0.2534,
+      "step": 743
+    },
+    {
+      "epoch": 0.3517730496453901,
+      "grad_norm": 6.888615608215332,
+      "learning_rate": 1.6384918665600623e-05,
+      "loss": 0.2798,
+      "step": 744
+    },
+    {
+      "epoch": 0.35224586288416077,
+      "grad_norm": 3.7885279655456543,
+      "learning_rate": 1.6372204456026774e-05,
+      "loss": 0.177,
+      "step": 745
+    },
+    {
+      "epoch": 0.35271867612293145,
+      "grad_norm": 7.243451118469238,
+      "learning_rate": 1.6359472879974064e-05,
+      "loss": 0.2581,
+      "step": 746
+    },
+    {
+      "epoch": 0.35319148936170214,
+      "grad_norm": 5.321907043457031,
+      "learning_rate": 1.634672397214047e-05,
+      "loss": 0.2978,
+      "step": 747
+    },
+    {
+      "epoch": 0.3536643026004728,
+      "grad_norm": 4.163849830627441,
+      "learning_rate": 1.633395776727121e-05,
+      "loss": 0.1865,
+      "step": 748
+    },
+    {
+      "epoch": 0.3541371158392435,
+      "grad_norm": 5.830822467803955,
+      "learning_rate": 1.632117430015865e-05,
+      "loss": 0.2759,
+      "step": 749
+    },
+    {
+      "epoch": 0.3546099290780142,
+      "grad_norm": 6.779140949249268,
+      "learning_rate": 1.6308373605642192e-05,
+      "loss": 0.2363,
+      "step": 750
+    },
+    {
+      "epoch": 0.3550827423167849,
+      "grad_norm": 5.8843770027160645,
+      "learning_rate": 1.629555571860819e-05,
+      "loss": 0.2933,
+      "step": 751
+    },
+    {
+      "epoch": 0.35555555555555557,
+      "grad_norm": 4.655647277832031,
+      "learning_rate": 1.628272067398986e-05,
+      "loss": 0.1564,
+      "step": 752
+    },
+    {
+      "epoch": 0.35602836879432626,
+      "grad_norm": 5.365359306335449,
+      "learning_rate": 1.626986850676717e-05,
+      "loss": 0.1929,
+      "step": 753
+    },
+    {
+      "epoch": 0.35650118203309694,
+      "grad_norm": 4.647514820098877,
+      "learning_rate": 1.625699925196675e-05,
+      "loss": 0.1701,
+      "step": 754
+    },
+    {
+      "epoch": 0.35697399527186763,
+      "grad_norm": 5.949582099914551,
+      "learning_rate": 1.624411294466182e-05,
+      "loss": 0.1973,
+      "step": 755
+    },
+    {
+      "epoch": 0.3574468085106383,
+      "grad_norm": 6.162478446960449,
+      "learning_rate": 1.623120961997205e-05,
+      "loss": 0.1898,
+      "step": 756
+    },
+    {
+      "epoch": 0.357919621749409,
+      "grad_norm": 5.563331604003906,
+      "learning_rate": 1.6218289313063503e-05,
+      "loss": 0.228,
+      "step": 757
+    },
+    {
+      "epoch": 0.3583924349881797,
+      "grad_norm": 4.954248428344727,
+      "learning_rate": 1.6205352059148522e-05,
+      "loss": 0.2102,
+      "step": 758
+    },
+    {
+      "epoch": 0.3588652482269504,
+      "grad_norm": 5.003850936889648,
+      "learning_rate": 1.619239789348563e-05,
+      "loss": 0.2235,
+      "step": 759
+    },
+    {
+      "epoch": 0.35933806146572106,
+      "grad_norm": 7.022822856903076,
+      "learning_rate": 1.6179426851379443e-05,
+      "loss": 0.2788,
+      "step": 760
+    },
+    {
+      "epoch": 0.35933806146572106,
+      "eval_accuracy": 0.8348115299334812,
+      "eval_f1": 0.6246851385390428,
+      "eval_loss": 0.3573962152004242,
+      "eval_precision": 0.8611111111111112,
+      "eval_recall": 0.4901185770750988,
+      "eval_runtime": 47.6981,
+      "eval_samples_per_second": 5.786,
+      "eval_steps_per_second": 0.189,
+      "step": 760
+    },
+    {
+      "epoch": 0.35981087470449175,
+      "grad_norm": 5.163010597229004,
+      "learning_rate": 1.6166438968180582e-05,
+      "loss": 0.194,
+      "step": 761
+    },
+    {
+      "epoch": 0.36028368794326243,
+      "grad_norm": 7.249414920806885,
+      "learning_rate": 1.615343427928555e-05,
+      "loss": 0.2594,
+      "step": 762
+    },
+    {
+      "epoch": 0.3607565011820331,
+      "grad_norm": 3.0018277168273926,
+      "learning_rate": 1.614041282013666e-05,
+      "loss": 0.1381,
+      "step": 763
+    },
+    {
+      "epoch": 0.3612293144208038,
+      "grad_norm": 5.234002590179443,
+      "learning_rate": 1.6127374626221934e-05,
+      "loss": 0.2252,
+      "step": 764
+    },
+    {
+      "epoch": 0.3617021276595745,
+      "grad_norm": 5.7721848487854,
+      "learning_rate": 1.6114319733074986e-05,
+      "loss": 0.3073,
+      "step": 765
+    },
+    {
+      "epoch": 0.3621749408983452,
+      "grad_norm": 7.471461296081543,
+      "learning_rate": 1.6101248176274958e-05,
+      "loss": 0.2948,
+      "step": 766
+    },
+    {
+      "epoch": 0.36264775413711586,
+      "grad_norm": 6.112615585327148,
+      "learning_rate": 1.6088159991446397e-05,
+      "loss": 0.2433,
+      "step": 767
+    },
+    {
+      "epoch": 0.36312056737588655,
+      "grad_norm": 5.112131118774414,
+      "learning_rate": 1.6075055214259174e-05,
+      "loss": 0.1972,
+      "step": 768
+    },
+    {
+      "epoch": 0.36359338061465724,
+      "grad_norm": 6.367164611816406,
+      "learning_rate": 1.606193388042837e-05,
+      "loss": 0.2283,
+      "step": 769
+    },
+    {
+      "epoch": 0.3640661938534279,
+      "grad_norm": 6.986507892608643,
+      "learning_rate": 1.60487960257142e-05,
+      "loss": 0.233,
+      "step": 770
+    },
+    {
+      "epoch": 0.3645390070921986,
+      "grad_norm": 4.459200382232666,
+      "learning_rate": 1.6035641685921895e-05,
+      "loss": 0.1947,
+      "step": 771
+    },
+    {
+      "epoch": 0.3650118203309693,
+      "grad_norm": 4.415493965148926,
+      "learning_rate": 1.602247089690162e-05,
+      "loss": 0.1612,
+      "step": 772
+    },
+    {
+      "epoch": 0.3654846335697399,
+      "grad_norm": 6.583262920379639,
+      "learning_rate": 1.6009283694548365e-05,
+      "loss": 0.234,
+      "step": 773
+    },
+    {
+      "epoch": 0.3659574468085106,
+      "grad_norm": 7.73126745223999,
+      "learning_rate": 1.5996080114801858e-05,
+      "loss": 0.2687,
+      "step": 774
+    },
+    {
+      "epoch": 0.3664302600472813,
+      "grad_norm": 6.350796222686768,
+      "learning_rate": 1.598286019364645e-05,
+      "loss": 0.2279,
+      "step": 775
+    },
+    {
+      "epoch": 0.366903073286052,
+      "grad_norm": 4.372172832489014,
+      "learning_rate": 1.596962396711104e-05,
+      "loss": 0.1742,
+      "step": 776
+    },
+    {
+      "epoch": 0.36737588652482267,
+      "grad_norm": 7.295071125030518,
+      "learning_rate": 1.5956371471268968e-05,
+      "loss": 0.2632,
+      "step": 777
+    },
+    {
+      "epoch": 0.36784869976359336,
+      "grad_norm": 7.446830749511719,
+      "learning_rate": 1.5943102742237894e-05,
+      "loss": 0.2026,
+      "step": 778
+    },
+    {
+      "epoch": 0.36832151300236404,
+      "grad_norm": 6.453531742095947,
+      "learning_rate": 1.5929817816179733e-05,
+      "loss": 0.3007,
+      "step": 779
+    },
+    {
+      "epoch": 0.36879432624113473,
+      "grad_norm": 5.014437198638916,
+      "learning_rate": 1.591651672930054e-05,
+      "loss": 0.305,
+      "step": 780
+    },
+    {
+      "epoch": 0.36879432624113473,
+      "eval_accuracy": 0.8580931263858093,
+      "eval_f1": 0.7037037037037037,
+      "eval_loss": 0.31539109349250793,
+      "eval_precision": 0.8491620111731844,
+      "eval_recall": 0.6007905138339921,
+      "eval_runtime": 47.4123,
+      "eval_samples_per_second": 5.821,
+      "eval_steps_per_second": 0.19,
+      "step": 780
+    },
+    {
+      "epoch": 0.3692671394799054,
+      "grad_norm": 4.877912521362305,
+      "learning_rate": 1.5903199517850422e-05,
+      "loss": 0.2521,
+      "step": 781
+    },
+    {
+      "epoch": 0.3697399527186761,
+      "grad_norm": 5.180963516235352,
+      "learning_rate": 1.5889866218123414e-05,
+      "loss": 0.2296,
+      "step": 782
+    },
+    {
+      "epoch": 0.3702127659574468,
+      "grad_norm": 6.379220008850098,
+      "learning_rate": 1.5876516866457412e-05,
+      "loss": 0.252,
+      "step": 783
+    },
+    {
+      "epoch": 0.3706855791962175,
+      "grad_norm": 5.103360652923584,
+      "learning_rate": 1.5863151499234053e-05,
+      "loss": 0.2349,
+      "step": 784
+    },
+    {
+      "epoch": 0.37115839243498816,
+      "grad_norm": 5.573483943939209,
+      "learning_rate": 1.5849770152878622e-05,
+      "loss": 0.2627,
+      "step": 785
+    },
+    {
+      "epoch": 0.37163120567375885,
+      "grad_norm": 4.954294681549072,
+      "learning_rate": 1.583637286385995e-05,
+      "loss": 0.2475,
+      "step": 786
+    },
+    {
+      "epoch": 0.37210401891252953,
+      "grad_norm": 8.134040832519531,
+      "learning_rate": 1.5822959668690325e-05,
+      "loss": 0.4456,
+      "step": 787
+    },
+    {
+      "epoch": 0.3725768321513002,
+      "grad_norm": 5.643916130065918,
+      "learning_rate": 1.5809530603925378e-05,
+      "loss": 0.3051,
+      "step": 788
+    },
+    {
+      "epoch": 0.3730496453900709,
+      "grad_norm": 5.253788471221924,
+      "learning_rate": 1.5796085706163997e-05,
+      "loss": 0.254,
+      "step": 789
+    },
+    {
+      "epoch": 0.3735224586288416,
+      "grad_norm": 7.210267543792725,
+      "learning_rate": 1.5782625012048212e-05,
+      "loss": 0.2753,
+      "step": 790
+    },
+    {
+      "epoch": 0.3739952718676123,
+      "grad_norm": 4.508166790008545,
+      "learning_rate": 1.5769148558263108e-05,
+      "loss": 0.3106,
+      "step": 791
+    },
+    {
+      "epoch": 0.37446808510638296,
+      "grad_norm": 4.896869659423828,
+      "learning_rate": 1.575565638153672e-05,
+      "loss": 0.2384,
+      "step": 792
+    },
+    {
+      "epoch": 0.37494089834515365,
+      "grad_norm": 4.60262393951416,
+      "learning_rate": 1.574214851863993e-05,
+      "loss": 0.2742,
+      "step": 793
+    },
+    {
+      "epoch": 0.37541371158392434,
+      "grad_norm": 5.893795490264893,
+      "learning_rate": 1.572862500638639e-05,
+      "loss": 0.2526,
+      "step": 794
+    },
+    {
+      "epoch": 0.375886524822695,
+      "grad_norm": 4.181454181671143,
+      "learning_rate": 1.5715085881632366e-05,
+      "loss": 0.2012,
+      "step": 795
+    },
+    {
+      "epoch": 0.3763593380614657,
+      "grad_norm": 5.785640716552734,
+      "learning_rate": 1.5701531181276703e-05,
+      "loss": 0.2868,
+      "step": 796
+    },
+    {
+      "epoch": 0.3768321513002364,
+      "grad_norm": 4.539717197418213,
+      "learning_rate": 1.5687960942260687e-05,
+      "loss": 0.1804,
+      "step": 797
+    },
+    {
+      "epoch": 0.3773049645390071,
+      "grad_norm": 4.89398717880249,
+      "learning_rate": 1.5674375201567948e-05,
+      "loss": 0.1735,
+      "step": 798
+    },
+    {
+      "epoch": 0.37777777777777777,
+      "grad_norm": 6.449411392211914,
+      "learning_rate": 1.566077399622436e-05,
+      "loss": 0.342,
+      "step": 799
+    },
+    {
+      "epoch": 0.37825059101654845,
+      "grad_norm": 4.286831378936768,
+      "learning_rate": 1.5647157363297964e-05,
+      "loss": 0.2726,
+      "step": 800
+    },
+    {
+      "epoch": 0.37825059101654845,
+      "eval_accuracy": 0.8458980044345898,
+      "eval_f1": 0.6567901234567901,
+      "eval_loss": 0.3148706555366516,
+      "eval_precision": 0.875,
+      "eval_recall": 0.525691699604743,
+      "eval_runtime": 47.1619,
+      "eval_samples_per_second": 5.852,
+      "eval_steps_per_second": 0.191,
+      "step": 800
+    },
+    {
+      "epoch": 0.37872340425531914,
+      "grad_norm": 5.2504563331604,
+      "learning_rate": 1.5633525339898818e-05,
+      "loss": 0.2679,
+      "step": 801
+    },
+    {
+      "epoch": 0.3791962174940898,
+      "grad_norm": 5.007554531097412,
+      "learning_rate": 1.5619877963178952e-05,
+      "loss": 0.2399,
+      "step": 802
+    },
+    {
+      "epoch": 0.3796690307328605,
+      "grad_norm": 3.274820566177368,
+      "learning_rate": 1.5606215270332216e-05,
+      "loss": 0.1511,
+      "step": 803
+    },
+    {
+      "epoch": 0.3801418439716312,
+      "grad_norm": 4.302379131317139,
+      "learning_rate": 1.559253729859421e-05,
+      "loss": 0.2247,
+      "step": 804
+    },
+    {
+      "epoch": 0.3806146572104019,
+      "grad_norm": 4.160916805267334,
+      "learning_rate": 1.5578844085242185e-05,
+      "loss": 0.2082,
+      "step": 805
+    },
+    {
+      "epoch": 0.38108747044917257,
+      "grad_norm": 5.578160285949707,
+      "learning_rate": 1.5565135667594916e-05,
+      "loss": 0.3049,
+      "step": 806
+    },
+    {
+      "epoch": 0.38156028368794326,
+      "grad_norm": 7.35500431060791,
+      "learning_rate": 1.555141208301262e-05,
+      "loss": 0.2808,
+      "step": 807
+    },
+    {
+      "epoch": 0.38203309692671394,
+      "grad_norm": 5.54599666595459,
+      "learning_rate": 1.5537673368896853e-05,
+      "loss": 0.2069,
+      "step": 808
+    },
+    {
+      "epoch": 0.38250591016548463,
+      "grad_norm": 4.696985721588135,
+      "learning_rate": 1.55239195626904e-05,
+      "loss": 0.2765,
+      "step": 809
+    },
+    {
+      "epoch": 0.3829787234042553,
+      "grad_norm": 6.143385410308838,
+      "learning_rate": 1.5510150701877178e-05,
+      "loss": 0.1958,
+      "step": 810
+    },
+    {
+      "epoch": 0.383451536643026,
+      "grad_norm": 6.515667915344238,
+      "learning_rate": 1.549636682398213e-05,
+      "loss": 0.2543,
+      "step": 811
+    },
+    {
+      "epoch": 0.3839243498817967,
+      "grad_norm": 4.393880367279053,
+      "learning_rate": 1.5482567966571136e-05,
+      "loss": 0.2278,
+      "step": 812
+    },
+    {
+      "epoch": 0.3843971631205674,
+      "grad_norm": 8.271415710449219,
+      "learning_rate": 1.546875416725089e-05,
+      "loss": 0.25,
+      "step": 813
+    },
+    {
+      "epoch": 0.38486997635933806,
+      "grad_norm": 5.564967155456543,
+      "learning_rate": 1.5454925463668812e-05,
+      "loss": 0.2286,
+      "step": 814
+    },
+    {
+      "epoch": 0.38534278959810875,
+      "grad_norm": 4.746275424957275,
+      "learning_rate": 1.5441081893512933e-05,
+      "loss": 0.2164,
+      "step": 815
+    },
+    {
+      "epoch": 0.38581560283687943,
+      "grad_norm": 7.916270732879639,
+      "learning_rate": 1.5427223494511824e-05,
+      "loss": 0.3749,
+      "step": 816
+    },
+    {
+      "epoch": 0.3862884160756501,
+      "grad_norm": 4.836629867553711,
+      "learning_rate": 1.541335030443444e-05,
+      "loss": 0.1946,
+      "step": 817
+    },
+    {
+      "epoch": 0.3867612293144208,
+      "grad_norm": 5.497342586517334,
+      "learning_rate": 1.539946236109007e-05,
+      "loss": 0.2712,
+      "step": 818
+    },
+    {
+      "epoch": 0.3872340425531915,
+      "grad_norm": 4.717584133148193,
+      "learning_rate": 1.5385559702328195e-05,
+      "loss": 0.239,
+      "step": 819
+    },
+    {
+      "epoch": 0.3877068557919622,
+      "grad_norm": 6.673068046569824,
+      "learning_rate": 1.5371642366038412e-05,
+      "loss": 0.2819,
+      "step": 820
+    },
+    {
+      "epoch": 0.3877068557919622,
+      "eval_accuracy": 0.8580931263858093,
+      "eval_f1": 0.7276595744680852,
+      "eval_loss": 0.30154746770858765,
+      "eval_precision": 0.7880184331797235,
+      "eval_recall": 0.6758893280632411,
+      "eval_runtime": 48.4441,
+      "eval_samples_per_second": 5.697,
+      "eval_steps_per_second": 0.186,
+      "step": 820
+    },
+    {
+      "epoch": 0.38817966903073287,
+      "grad_norm": 8.295758247375488,
+      "learning_rate": 1.5357710390150312e-05,
+      "loss": 0.2953,
+      "step": 821
+    },
+    {
+      "epoch": 0.38865248226950355,
+      "grad_norm": 6.9379730224609375,
+      "learning_rate": 1.5343763812633393e-05,
+      "loss": 0.2614,
+      "step": 822
+    },
+    {
+      "epoch": 0.38912529550827424,
+      "grad_norm": 5.640291690826416,
+      "learning_rate": 1.5329802671496935e-05,
+      "loss": 0.2978,
+      "step": 823
+    },
+    {
+      "epoch": 0.3895981087470449,
+      "grad_norm": 5.361009120941162,
+      "learning_rate": 1.5315827004789918e-05,
+      "loss": 0.3108,
+      "step": 824
+    },
+    {
+      "epoch": 0.3900709219858156,
+      "grad_norm": 5.312415599822998,
+      "learning_rate": 1.53018368506009e-05,
+      "loss": 0.1958,
+      "step": 825
+    },
+    {
+      "epoch": 0.3905437352245863,
+      "grad_norm": 4.690582752227783,
+      "learning_rate": 1.5287832247057936e-05,
+      "loss": 0.2102,
+      "step": 826
+    },
+    {
+      "epoch": 0.391016548463357,
+      "grad_norm": 5.4220099449157715,
+      "learning_rate": 1.527381323232845e-05,
+      "loss": 0.1965,
+      "step": 827
+    },
+    {
+      "epoch": 0.39148936170212767,
+      "grad_norm": 6.630805969238281,
+      "learning_rate": 1.5259779844619152e-05,
+      "loss": 0.2573,
+      "step": 828
+    },
+    {
+      "epoch": 0.39196217494089836,
+      "grad_norm": 4.912630081176758,
+      "learning_rate": 1.524573212217591e-05,
+      "loss": 0.2715,
+      "step": 829
+    },
+    {
+      "epoch": 0.39243498817966904,
+      "grad_norm": 5.768490314483643,
+      "learning_rate": 1.5231670103283665e-05,
+      "loss": 0.2107,
+      "step": 830
+    },
+    {
+      "epoch": 0.39290780141843973,
+      "grad_norm": 11.683192253112793,
+      "learning_rate": 1.521759382626632e-05,
+      "loss": 0.3559,
+      "step": 831
+    },
+    {
+      "epoch": 0.3933806146572104,
+      "grad_norm": 6.212742805480957,
+      "learning_rate": 1.5203503329486649e-05,
+      "loss": 0.299,
+      "step": 832
+    },
+    {
+      "epoch": 0.3938534278959811,
+      "grad_norm": 11.167441368103027,
+      "learning_rate": 1.5189398651346153e-05,
+      "loss": 0.4404,
+      "step": 833
+    },
+    {
+      "epoch": 0.3943262411347518,
+      "grad_norm": 4.801130294799805,
+      "learning_rate": 1.5175279830285006e-05,
+      "loss": 0.2968,
+      "step": 834
+    },
+    {
+      "epoch": 0.3947990543735225,
+      "grad_norm": 4.244668006896973,
+      "learning_rate": 1.5161146904781918e-05,
+      "loss": 0.2195,
+      "step": 835
+    },
+    {
+      "epoch": 0.39527186761229316,
+      "grad_norm": 4.198855876922607,
+      "learning_rate": 1.514699991335404e-05,
+      "loss": 0.2572,
+      "step": 836
+    },
+    {
+      "epoch": 0.39574468085106385,
+      "grad_norm": 3.7614452838897705,
+      "learning_rate": 1.5132838894556848e-05,
+      "loss": 0.2454,
+      "step": 837
+    },
+    {
+      "epoch": 0.39621749408983453,
+      "grad_norm": 3.7950305938720703,
+      "learning_rate": 1.5118663886984065e-05,
+      "loss": 0.2254,
+      "step": 838
+    },
+    {
+      "epoch": 0.3966903073286052,
+      "grad_norm": 7.833040714263916,
+      "learning_rate": 1.510447492926752e-05,
+      "loss": 0.3283,
+      "step": 839
+    },
+    {
+      "epoch": 0.3971631205673759,
+      "grad_norm": 4.6039204597473145,
+      "learning_rate": 1.5090272060077081e-05,
+      "loss": 0.2596,
+      "step": 840
+    },
+    {
+      "epoch": 0.3971631205673759,
+      "eval_accuracy": 0.8547671840354767,
+      "eval_f1": 0.7298969072164948,
+      "eval_loss": 0.3099477291107178,
+      "eval_precision": 0.7629310344827587,
+      "eval_recall": 0.6996047430830039,
+      "eval_runtime": 48.4738,
+      "eval_samples_per_second": 5.694,
+      "eval_steps_per_second": 0.186,
+      "step": 840
+    },
+    {
+      "epoch": 0.3976359338061466,
+      "grad_norm": 6.1639485359191895,
+      "learning_rate": 1.5076055318120508e-05,
+      "loss": 0.2616,
+      "step": 841
+    },
+    {
+      "epoch": 0.3981087470449173,
+      "grad_norm": 5.882129192352295,
+      "learning_rate": 1.5061824742143388e-05,
+      "loss": 0.2296,
+      "step": 842
+    },
+    {
+      "epoch": 0.39858156028368796,
+      "grad_norm": 6.4986772537231445,
+      "learning_rate": 1.5047580370928994e-05,
+      "loss": 0.3221,
+      "step": 843
+    },
+    {
+      "epoch": 0.39905437352245865,
+      "grad_norm": 4.624194622039795,
+      "learning_rate": 1.5033322243298209e-05,
+      "loss": 0.2522,
+      "step": 844
+    },
+    {
+      "epoch": 0.39952718676122934,
+      "grad_norm": 4.8714280128479,
+      "learning_rate": 1.50190503981094e-05,
+      "loss": 0.2503,
+      "step": 845
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 6.177154541015625,
+      "learning_rate": 1.5004764874258327e-05,
+      "loss": 0.283,
+      "step": 846
+    },
+    {
+      "epoch": 0.4004728132387707,
+      "grad_norm": 6.643271446228027,
+      "learning_rate": 1.4990465710678015e-05,
+      "loss": 0.3263,
+      "step": 847
+    },
+    {
+      "epoch": 0.4009456264775414,
+      "grad_norm": 5.074257850646973,
+      "learning_rate": 1.4976152946338673e-05,
+      "loss": 0.2613,
+      "step": 848
+    },
+    {
+      "epoch": 0.4014184397163121,
+      "grad_norm": 4.194014072418213,
+      "learning_rate": 1.4961826620247574e-05,
+      "loss": 0.221,
+      "step": 849
+    },
+    {
+      "epoch": 0.40189125295508277,
+      "grad_norm": 4.170263767242432,
+      "learning_rate": 1.4947486771448955e-05,
+      "loss": 0.2559,
+      "step": 850
+    },
+    {
+      "epoch": 0.40236406619385345,
+      "grad_norm": 5.984470844268799,
+      "learning_rate": 1.4933133439023903e-05,
+      "loss": 0.3017,
+      "step": 851
+    },
+    {
+      "epoch": 0.40283687943262414,
+      "grad_norm": 3.760006904602051,
+      "learning_rate": 1.4918766662090248e-05,
+      "loss": 0.197,
+      "step": 852
+    },
+    {
+      "epoch": 0.4033096926713948,
+      "grad_norm": 4.673705101013184,
+      "learning_rate": 1.4904386479802471e-05,
+      "loss": 0.2784,
+      "step": 853
+    },
+    {
+      "epoch": 0.4037825059101655,
+      "grad_norm": 8.06790828704834,
+      "learning_rate": 1.4889992931351578e-05,
+      "loss": 0.2297,
+      "step": 854
+    },
+    {
+      "epoch": 0.40425531914893614,
+      "grad_norm": 3.7845892906188965,
+      "learning_rate": 1.4875586055965014e-05,
+      "loss": 0.2101,
+      "step": 855
+    },
+    {
+      "epoch": 0.40472813238770683,
+      "grad_norm": 3.8769285678863525,
+      "learning_rate": 1.4861165892906532e-05,
+      "loss": 0.1641,
+      "step": 856
+    },
+    {
+      "epoch": 0.4052009456264775,
+      "grad_norm": 3.3952763080596924,
+      "learning_rate": 1.4846732481476105e-05,
+      "loss": 0.1768,
+      "step": 857
+    },
+    {
+      "epoch": 0.4056737588652482,
+      "grad_norm": 6.208580493927002,
+      "learning_rate": 1.4832285861009812e-05,
+      "loss": 0.1995,
+      "step": 858
+    },
+    {
+      "epoch": 0.4061465721040189,
+      "grad_norm": 6.829061508178711,
+      "learning_rate": 1.4817826070879732e-05,
+      "loss": 0.3429,
+      "step": 859
+    },
+    {
+      "epoch": 0.4066193853427896,
+      "grad_norm": 4.501508712768555,
+      "learning_rate": 1.4803353150493834e-05,
+      "loss": 0.185,
+      "step": 860
+    },
+    {
+      "epoch": 0.4066193853427896,
+      "eval_accuracy": 0.8614190687361419,
+      "eval_f1": 0.7203579418344519,
+      "eval_loss": 0.3079231381416321,
+      "eval_precision": 0.8298969072164949,
+      "eval_recall": 0.6363636363636364,
+      "eval_runtime": 48.0809,
+      "eval_samples_per_second": 5.74,
+      "eval_steps_per_second": 0.187,
+      "step": 860
+    },
+    {
+      "epoch": 0.40709219858156026,
+      "grad_norm": 7.904217720031738,
+      "learning_rate": 1.478886713929587e-05,
+      "loss": 0.2896,
+      "step": 861
+    },
+    {
+      "epoch": 0.40756501182033095,
+      "grad_norm": 5.54583740234375,
+      "learning_rate": 1.4774368076765272e-05,
+      "loss": 0.2334,
+      "step": 862
+    },
+    {
+      "epoch": 0.40803782505910163,
+      "grad_norm": 6.930192470550537,
+      "learning_rate": 1.4759856002417046e-05,
+      "loss": 0.233,
+      "step": 863
+    },
+    {
+      "epoch": 0.4085106382978723,
+      "grad_norm": 7.0124335289001465,
+      "learning_rate": 1.4745330955801644e-05,
+      "loss": 0.2996,
+      "step": 864
+    },
+    {
+      "epoch": 0.408983451536643,
+      "grad_norm": 7.793242454528809,
+      "learning_rate": 1.4730792976504892e-05,
+      "loss": 0.1966,
+      "step": 865
+    },
+    {
+      "epoch": 0.4094562647754137,
+      "grad_norm": 6.164129734039307,
+      "learning_rate": 1.4716242104147849e-05,
+      "loss": 0.2556,
+      "step": 866
+    },
+    {
+      "epoch": 0.4099290780141844,
+      "grad_norm": 5.059127330780029,
+      "learning_rate": 1.470167837838671e-05,
+      "loss": 0.1843,
+      "step": 867
+    },
+    {
+      "epoch": 0.41040189125295506,
+      "grad_norm": 7.891740798950195,
+      "learning_rate": 1.4687101838912713e-05,
+      "loss": 0.2942,
+      "step": 868
+    },
+    {
+      "epoch": 0.41087470449172575,
+      "grad_norm": 8.02418327331543,
+      "learning_rate": 1.467251252545201e-05,
+      "loss": 0.2544,
+      "step": 869
+    },
+    {
+      "epoch": 0.41134751773049644,
+      "grad_norm": 7.103123188018799,
+      "learning_rate": 1.4657910477765564e-05,
+      "loss": 0.2167,
+      "step": 870
+    },
+    {
+      "epoch": 0.4118203309692671,
+      "grad_norm": 6.880304336547852,
+      "learning_rate": 1.4643295735649044e-05,
+      "loss": 0.3523,
+      "step": 871
+    },
+    {
+      "epoch": 0.4122931442080378,
+      "grad_norm": 5.1397576332092285,
+      "learning_rate": 1.4628668338932721e-05,
+      "loss": 0.2939,
+      "step": 872
+    },
+    {
+      "epoch": 0.4127659574468085,
+      "grad_norm": 4.4353346824646,
+      "learning_rate": 1.461402832748135e-05,
+      "loss": 0.2673,
+      "step": 873
+    },
+    {
+      "epoch": 0.4132387706855792,
+      "grad_norm": 4.128648281097412,
+      "learning_rate": 1.4599375741194069e-05,
+      "loss": 0.1686,
+      "step": 874
+    },
+    {
+      "epoch": 0.41371158392434987,
+      "grad_norm": 5.588024616241455,
+      "learning_rate": 1.4584710620004284e-05,
+      "loss": 0.2412,
+      "step": 875
+    },
+    {
+      "epoch": 0.41418439716312055,
+      "grad_norm": 5.182522296905518,
+      "learning_rate": 1.4570033003879556e-05,
+      "loss": 0.2453,
+      "step": 876
+    },
+    {
+      "epoch": 0.41465721040189124,
+      "grad_norm": 4.976614475250244,
+      "learning_rate": 1.4555342932821517e-05,
+      "loss": 0.2493,
+      "step": 877
+    },
+    {
+      "epoch": 0.4151300236406619,
+      "grad_norm": 6.306532859802246,
+      "learning_rate": 1.4540640446865723e-05,
+      "loss": 0.2481,
+      "step": 878
+    },
+    {
+      "epoch": 0.4156028368794326,
+      "grad_norm": 4.86607027053833,
+      "learning_rate": 1.4525925586081584e-05,
+      "loss": 0.1933,
+      "step": 879
+    },
+    {
+      "epoch": 0.4160756501182033,
+      "grad_norm": 4.547597885131836,
+      "learning_rate": 1.4511198390572219e-05,
+      "loss": 0.189,
+      "step": 880
+    },
+    {
+      "epoch": 0.4160756501182033,
+      "eval_accuracy": 0.8503325942350333,
+      "eval_f1": 0.6666666666666666,
+      "eval_loss": 0.32481706142425537,
+      "eval_precision": 0.8881578947368421,
+      "eval_recall": 0.5335968379446641,
+      "eval_runtime": 48.5726,
+      "eval_samples_per_second": 5.682,
+      "eval_steps_per_second": 0.185,
+      "step": 880
+    },
+    {
+      "epoch": 0.416548463356974,
+      "grad_norm": 6.4413886070251465,
+      "learning_rate": 1.4496458900474371e-05,
+      "loss": 0.2284,
+      "step": 881
+    },
+    {
+      "epoch": 0.41702127659574467,
+      "grad_norm": 3.7408576011657715,
+      "learning_rate": 1.4481707155958291e-05,
+      "loss": 0.1963,
+      "step": 882
+    },
+    {
+      "epoch": 0.41749408983451536,
+      "grad_norm": 5.2726664543151855,
+      "learning_rate": 1.446694319722763e-05,
+      "loss": 0.2463,
+      "step": 883
+    },
+    {
+      "epoch": 0.41796690307328604,
+      "grad_norm": 4.192355155944824,
+      "learning_rate": 1.4452167064519316e-05,
+      "loss": 0.2065,
+      "step": 884
+    },
+    {
+      "epoch": 0.41843971631205673,
+      "grad_norm": 7.111584663391113,
+      "learning_rate": 1.4437378798103467e-05,
+      "loss": 0.3013,
+      "step": 885
+    },
+    {
+      "epoch": 0.4189125295508274,
+      "grad_norm": 7.128089427947998,
+      "learning_rate": 1.4422578438283263e-05,
+      "loss": 0.2477,
+      "step": 886
+    },
+    {
+      "epoch": 0.4193853427895981,
+      "grad_norm": 6.053483486175537,
+      "learning_rate": 1.4407766025394847e-05,
+      "loss": 0.2003,
+      "step": 887
+    },
+    {
+      "epoch": 0.4198581560283688,
+      "grad_norm": 6.564062118530273,
+      "learning_rate": 1.4392941599807206e-05,
+      "loss": 0.2808,
+      "step": 888
+    },
+    {
+      "epoch": 0.4203309692671395,
+      "grad_norm": 4.815242290496826,
+      "learning_rate": 1.4378105201922073e-05,
+      "loss": 0.1874,
+      "step": 889
+    },
+    {
+      "epoch": 0.42080378250591016,
+      "grad_norm": 6.174993991851807,
+      "learning_rate": 1.4363256872173801e-05,
+      "loss": 0.1918,
+      "step": 890
+    },
+    {
+      "epoch": 0.42127659574468085,
+      "grad_norm": 7.473939418792725,
+      "learning_rate": 1.4348396651029261e-05,
+      "loss": 0.2361,
+      "step": 891
+    },
+    {
+      "epoch": 0.42174940898345153,
+      "grad_norm": 8.417937278747559,
+      "learning_rate": 1.4333524578987748e-05,
+      "loss": 0.4323,
+      "step": 892
+    },
+    {
+      "epoch": 0.4222222222222222,
+      "grad_norm": 5.9651007652282715,
+      "learning_rate": 1.4318640696580834e-05,
+      "loss": 0.3207,
+      "step": 893
+    },
+    {
+      "epoch": 0.4226950354609929,
+      "grad_norm": 4.948203086853027,
+      "learning_rate": 1.4303745044372293e-05,
+      "loss": 0.2782,
+      "step": 894
+    },
+    {
+      "epoch": 0.4231678486997636,
+      "grad_norm": 9.178805351257324,
+      "learning_rate": 1.4288837662957969e-05,
+      "loss": 0.334,
+      "step": 895
+    },
+    {
+      "epoch": 0.4236406619385343,
+      "grad_norm": 7.537435054779053,
+      "learning_rate": 1.4273918592965674e-05,
+      "loss": 0.3307,
+      "step": 896
+    },
+    {
+      "epoch": 0.42411347517730497,
+      "grad_norm": 5.170799732208252,
+      "learning_rate": 1.4258987875055077e-05,
+      "loss": 0.2322,
+      "step": 897
+    },
+    {
+      "epoch": 0.42458628841607565,
+      "grad_norm": 7.296963214874268,
+      "learning_rate": 1.4244045549917587e-05,
+      "loss": 0.292,
+      "step": 898
+    },
+    {
+      "epoch": 0.42505910165484634,
+      "grad_norm": 5.456043720245361,
+      "learning_rate": 1.422909165827625e-05,
+      "loss": 0.2374,
+      "step": 899
+    },
+    {
+      "epoch": 0.425531914893617,
+      "grad_norm": 4.878541946411133,
+      "learning_rate": 1.421412624088564e-05,
+      "loss": 0.299,
+      "step": 900
+    },
+    {
+      "epoch": 0.425531914893617,
+      "eval_accuracy": 0.852549889135255,
+      "eval_f1": 0.6825775656324582,
+      "eval_loss": 0.31735506653785706,
+      "eval_precision": 0.8614457831325302,
+      "eval_recall": 0.5652173913043478,
+      "eval_runtime": 48.299,
+      "eval_samples_per_second": 5.714,
+      "eval_steps_per_second": 0.186,
+      "step": 900
+    },
+    {
+      "epoch": 0.4260047281323877,
+      "grad_norm": 5.303489685058594,
+      "learning_rate": 1.419914933853173e-05,
+      "loss": 0.2548,
+      "step": 901
+    },
+    {
+      "epoch": 0.4264775413711584,
+      "grad_norm": 5.416555404663086,
+      "learning_rate": 1.4184160992031806e-05,
+      "loss": 0.249,
+      "step": 902
+    },
+    {
+      "epoch": 0.4269503546099291,
+      "grad_norm": 5.52853536605835,
+      "learning_rate": 1.4169161242234335e-05,
+      "loss": 0.2135,
+      "step": 903
+    },
+    {
+      "epoch": 0.42742316784869977,
+      "grad_norm": 5.232771396636963,
+      "learning_rate": 1.4154150130018867e-05,
+      "loss": 0.2314,
+      "step": 904
+    },
+    {
+      "epoch": 0.42789598108747046,
+      "grad_norm": 5.249035835266113,
+      "learning_rate": 1.4139127696295913e-05,
+      "loss": 0.188,
+      "step": 905
+    },
+    {
+      "epoch": 0.42836879432624114,
+      "grad_norm": 8.240036010742188,
+      "learning_rate": 1.4124093982006846e-05,
+      "loss": 0.2678,
+      "step": 906
+    },
+    {
+      "epoch": 0.42884160756501183,
+      "grad_norm": 5.175498008728027,
+      "learning_rate": 1.410904902812378e-05,
+      "loss": 0.2565,
+      "step": 907
+    },
+    {
+      "epoch": 0.4293144208037825,
+      "grad_norm": 3.9959726333618164,
+      "learning_rate": 1.4093992875649456e-05,
+      "loss": 0.2413,
+      "step": 908
+    },
+    {
+      "epoch": 0.4297872340425532,
+      "grad_norm": 3.8025238513946533,
+      "learning_rate": 1.407892556561714e-05,
+      "loss": 0.1705,
+      "step": 909
+    },
+    {
+      "epoch": 0.4302600472813239,
+      "grad_norm": 5.208123683929443,
+      "learning_rate": 1.4063847139090507e-05,
+      "loss": 0.2492,
+      "step": 910
+    },
+    {
+      "epoch": 0.4307328605200946,
+      "grad_norm": 4.154348850250244,
+      "learning_rate": 1.4048757637163529e-05,
+      "loss": 0.2182,
+      "step": 911
+    },
+    {
+      "epoch": 0.43120567375886526,
+      "grad_norm": 5.2830939292907715,
+      "learning_rate": 1.4033657100960356e-05,
+      "loss": 0.2097,
+      "step": 912
+    },
+    {
+      "epoch": 0.43167848699763595,
+      "grad_norm": 3.8644347190856934,
+      "learning_rate": 1.4018545571635209e-05,
+      "loss": 0.214,
+      "step": 913
+    },
+    {
+      "epoch": 0.43215130023640663,
+      "grad_norm": 4.06352424621582,
+      "learning_rate": 1.4003423090372286e-05,
+      "loss": 0.2284,
+      "step": 914
+    },
+    {
+      "epoch": 0.4326241134751773,
+      "grad_norm": 6.2407355308532715,
+      "learning_rate": 1.3988289698385608e-05,
+      "loss": 0.2216,
+      "step": 915
+    },
+    {
+      "epoch": 0.433096926713948,
+      "grad_norm": 6.083385467529297,
+      "learning_rate": 1.3973145436918957e-05,
+      "loss": 0.268,
+      "step": 916
+    },
+    {
+      "epoch": 0.4335697399527187,
+      "grad_norm": 7.127196311950684,
+      "learning_rate": 1.3957990347245717e-05,
+      "loss": 0.3019,
+      "step": 917
+    },
+    {
+      "epoch": 0.4340425531914894,
+      "grad_norm": 4.245884418487549,
+      "learning_rate": 1.3942824470668796e-05,
+      "loss": 0.2615,
+      "step": 918
+    },
+    {
+      "epoch": 0.43451536643026006,
+      "grad_norm": 6.33418083190918,
+      "learning_rate": 1.3927647848520493e-05,
+      "loss": 0.2592,
+      "step": 919
+    },
+    {
+      "epoch": 0.43498817966903075,
+      "grad_norm": 6.671105861663818,
+      "learning_rate": 1.3912460522162396e-05,
+      "loss": 0.199,
+      "step": 920
+    },
+    {
+      "epoch": 0.43498817966903075,
+      "eval_accuracy": 0.8392461197339246,
+      "eval_f1": 0.6253229974160207,
+      "eval_loss": 0.33865952491760254,
+      "eval_precision": 0.9029850746268657,
+      "eval_recall": 0.4782608695652174,
+      "eval_runtime": 48.0155,
+      "eval_samples_per_second": 5.748,
+      "eval_steps_per_second": 0.187,
+      "step": 920
+    },
+    {
+      "epoch": 0.43546099290780144,
+      "grad_norm": 4.163972854614258,
+      "learning_rate": 1.3897262532985263e-05,
+      "loss": 0.184,
+      "step": 921
+    },
+    {
+      "epoch": 0.4359338061465721,
+      "grad_norm": 8.20583438873291,
+      "learning_rate": 1.3882053922408915e-05,
+      "loss": 0.288,
+      "step": 922
+    },
+    {
+      "epoch": 0.4364066193853428,
+      "grad_norm": 5.573141098022461,
+      "learning_rate": 1.3866834731882117e-05,
+      "loss": 0.1807,
+      "step": 923
+    },
+    {
+      "epoch": 0.4368794326241135,
+      "grad_norm": 9.644611358642578,
+      "learning_rate": 1.3851605002882472e-05,
+      "loss": 0.4276,
+      "step": 924
+    },
+    {
+      "epoch": 0.4373522458628842,
+      "grad_norm": 7.489835739135742,
+      "learning_rate": 1.38363647769163e-05,
+      "loss": 0.3496,
+      "step": 925
+    },
+    {
+      "epoch": 0.43782505910165487,
+      "grad_norm": 4.446575164794922,
+      "learning_rate": 1.3821114095518529e-05,
+      "loss": 0.1963,
+      "step": 926
+    },
+    {
+      "epoch": 0.43829787234042555,
+      "grad_norm": 4.232187271118164,
+      "learning_rate": 1.3805853000252584e-05,
+      "loss": 0.2081,
+      "step": 927
+    },
+    {
+      "epoch": 0.43877068557919624,
+      "grad_norm": 5.939121246337891,
+      "learning_rate": 1.379058153271027e-05,
+      "loss": 0.2361,
+      "step": 928
+    },
+    {
+      "epoch": 0.4392434988179669,
+      "grad_norm": 8.863687515258789,
+      "learning_rate": 1.3775299734511663e-05,
+      "loss": 0.341,
+      "step": 929
+    },
+    {
+      "epoch": 0.4397163120567376,
+      "grad_norm": 6.206582546234131,
+      "learning_rate": 1.3760007647304987e-05,
+      "loss": 0.23,
+      "step": 930
+    },
+    {
+      "epoch": 0.4401891252955083,
+      "grad_norm": 7.478794574737549,
+      "learning_rate": 1.3744705312766517e-05,
+      "loss": 0.241,
+      "step": 931
+    },
+    {
+      "epoch": 0.440661938534279,
+      "grad_norm": 9.208320617675781,
+      "learning_rate": 1.3729392772600445e-05,
+      "loss": 0.2495,
+      "step": 932
+    },
+    {
+      "epoch": 0.44113475177304967,
+      "grad_norm": 5.460510730743408,
+      "learning_rate": 1.3714070068538785e-05,
+      "loss": 0.1938,
+      "step": 933
+    },
+    {
+      "epoch": 0.44160756501182036,
+      "grad_norm": 6.056775093078613,
+      "learning_rate": 1.3698737242341245e-05,
+      "loss": 0.3128,
+      "step": 934
+    },
+    {
+      "epoch": 0.44208037825059104,
+      "grad_norm": 6.535298824310303,
+      "learning_rate": 1.3683394335795126e-05,
+      "loss": 0.2466,
+      "step": 935
+    },
+    {
+      "epoch": 0.4425531914893617,
+      "grad_norm": 6.023354530334473,
+      "learning_rate": 1.3668041390715195e-05,
+      "loss": 0.2496,
+      "step": 936
+    },
+    {
+      "epoch": 0.44302600472813236,
+      "grad_norm": 5.573044300079346,
+      "learning_rate": 1.365267844894358e-05,
+      "loss": 0.2324,
+      "step": 937
+    },
+    {
+      "epoch": 0.44349881796690305,
+      "grad_norm": 7.519514560699463,
+      "learning_rate": 1.3637305552349656e-05,
+      "loss": 0.3045,
+      "step": 938
+    },
+    {
+      "epoch": 0.44397163120567373,
+      "grad_norm": 3.165480852127075,
+      "learning_rate": 1.3621922742829923e-05,
+      "loss": 0.1767,
+      "step": 939
+    },
+    {
+      "epoch": 0.4444444444444444,
+      "grad_norm": 5.499924659729004,
+      "learning_rate": 1.3606530062307902e-05,
+      "loss": 0.2886,
+      "step": 940
+    },
+    {
+      "epoch": 0.4444444444444444,
+      "eval_accuracy": 0.8381374722838137,
+      "eval_f1": 0.6294416243654822,
+      "eval_loss": 0.33129268884658813,
+      "eval_precision": 0.8794326241134752,
+      "eval_recall": 0.4901185770750988,
+      "eval_runtime": 48.7368,
+      "eval_samples_per_second": 5.663,
+      "eval_steps_per_second": 0.185,
+      "step": 940
+    },
+    {
+      "epoch": 0.4449172576832151,
+      "grad_norm": 4.935895919799805,
+      "learning_rate": 1.3591127552734018e-05,
+      "loss": 0.1841,
+      "step": 941
+    },
+    {
+      "epoch": 0.4453900709219858,
+      "grad_norm": 5.519673824310303,
+      "learning_rate": 1.3575715256085474e-05,
+      "loss": 0.2634,
+      "step": 942
+    },
+    {
+      "epoch": 0.4458628841607565,
+      "grad_norm": 5.731892108917236,
+      "learning_rate": 1.3560293214366152e-05,
+      "loss": 0.2458,
+      "step": 943
+    },
+    {
+      "epoch": 0.44633569739952716,
+      "grad_norm": 6.215859413146973,
+      "learning_rate": 1.3544861469606495e-05,
+      "loss": 0.3181,
+      "step": 944
+    },
+    {
+      "epoch": 0.44680851063829785,
+      "grad_norm": 5.27556848526001,
+      "learning_rate": 1.352942006386339e-05,
+      "loss": 0.2382,
+      "step": 945
+    },
+    {
+      "epoch": 0.44728132387706854,
+      "grad_norm": 5.7444963455200195,
+      "learning_rate": 1.351396903922005e-05,
+      "loss": 0.218,
+      "step": 946
+    },
+    {
+      "epoch": 0.4477541371158392,
+      "grad_norm": 6.035450458526611,
+      "learning_rate": 1.3498508437785897e-05,
+      "loss": 0.239,
+      "step": 947
+    },
+    {
+      "epoch": 0.4482269503546099,
+      "grad_norm": 3.9959163665771484,
+      "learning_rate": 1.3483038301696473e-05,
+      "loss": 0.1595,
+      "step": 948
+    },
+    {
+      "epoch": 0.4486997635933806,
+      "grad_norm": 3.841958999633789,
+      "learning_rate": 1.3467558673113286e-05,
+      "loss": 0.1573,
+      "step": 949
+    },
+    {
+      "epoch": 0.4491725768321513,
+      "grad_norm": 4.796578884124756,
+      "learning_rate": 1.345206959422372e-05,
+      "loss": 0.2151,
+      "step": 950
+    },
+    {
+      "epoch": 0.44964539007092197,
+      "grad_norm": 6.64060640335083,
+      "learning_rate": 1.3436571107240919e-05,
+      "loss": 0.2468,
+      "step": 951
+    },
+    {
+      "epoch": 0.45011820330969265,
+      "grad_norm": 4.541578769683838,
+      "learning_rate": 1.3421063254403657e-05,
+      "loss": 0.2113,
+      "step": 952
+    },
+    {
+      "epoch": 0.45059101654846334,
+      "grad_norm": 6.632504940032959,
+      "learning_rate": 1.3405546077976249e-05,
+      "loss": 0.2745,
+      "step": 953
+    },
+    {
+      "epoch": 0.451063829787234,
+      "grad_norm": 4.1523284912109375,
+      "learning_rate": 1.3390019620248403e-05,
+      "loss": 0.1837,
+      "step": 954
+    },
+    {
+      "epoch": 0.4515366430260047,
+      "grad_norm": 5.9142351150512695,
+      "learning_rate": 1.3374483923535136e-05,
+      "loss": 0.2732,
+      "step": 955
+    },
+    {
+      "epoch": 0.4520094562647754,
+      "grad_norm": 4.184595584869385,
+      "learning_rate": 1.335893903017663e-05,
+      "loss": 0.2433,
+      "step": 956
+    },
+    {
+      "epoch": 0.4524822695035461,
+      "grad_norm": 5.808665752410889,
+      "learning_rate": 1.334338498253815e-05,
+      "loss": 0.2497,
+      "step": 957
+    },
+    {
+      "epoch": 0.4529550827423168,
+      "grad_norm": 4.390594959259033,
+      "learning_rate": 1.332782182300989e-05,
+      "loss": 0.2517,
+      "step": 958
+    },
+    {
+      "epoch": 0.45342789598108746,
+      "grad_norm": 4.8440165519714355,
+      "learning_rate": 1.3312249594006893e-05,
+      "loss": 0.2194,
+      "step": 959
+    },
+    {
+      "epoch": 0.45390070921985815,
+      "grad_norm": 7.286986827850342,
+      "learning_rate": 1.3296668337968904e-05,
+      "loss": 0.2641,
+      "step": 960
+    },
+    {
+      "epoch": 0.45390070921985815,
+      "eval_accuracy": 0.8636363636363636,
+      "eval_f1": 0.7159353348729792,
+      "eval_loss": 0.3095405697822571,
+      "eval_precision": 0.8611111111111112,
+      "eval_recall": 0.6126482213438735,
+      "eval_runtime": 48.7277,
+      "eval_samples_per_second": 5.664,
+      "eval_steps_per_second": 0.185,
+      "step": 960
+    },
+    {
+      "epoch": 0.45437352245862883,
+      "grad_norm": 5.716742992401123,
+      "learning_rate": 1.3281078097360287e-05,
+      "loss": 0.1991,
+      "step": 961
+    },
+    {
+      "epoch": 0.4548463356973995,
+      "grad_norm": 6.432254791259766,
+      "learning_rate": 1.3265478914669878e-05,
+      "loss": 0.2438,
+      "step": 962
+    },
+    {
+      "epoch": 0.4553191489361702,
+      "grad_norm": 6.0999250411987305,
+      "learning_rate": 1.3249870832410886e-05,
+      "loss": 0.2145,
+      "step": 963
+    },
+    {
+      "epoch": 0.4557919621749409,
+      "grad_norm": 6.828171730041504,
+      "learning_rate": 1.323425389312079e-05,
+      "loss": 0.2465,
+      "step": 964
+    },
+    {
+      "epoch": 0.4562647754137116,
+      "grad_norm": 5.544858455657959,
+      "learning_rate": 1.3218628139361178e-05,
+      "loss": 0.1922,
+      "step": 965
+    },
+    {
+      "epoch": 0.45673758865248226,
+      "grad_norm": 5.448679447174072,
+      "learning_rate": 1.3202993613717688e-05,
+      "loss": 0.2383,
+      "step": 966
+    },
+    {
+      "epoch": 0.45721040189125295,
+      "grad_norm": 4.943000793457031,
+      "learning_rate": 1.3187350358799846e-05,
+      "loss": 0.2142,
+      "step": 967
+    },
+    {
+      "epoch": 0.45768321513002364,
+      "grad_norm": 4.455641746520996,
+      "learning_rate": 1.3171698417240984e-05,
+      "loss": 0.2669,
+      "step": 968
+    },
+    {
+      "epoch": 0.4581560283687943,
+      "grad_norm": 6.673210144042969,
+      "learning_rate": 1.3156037831698094e-05,
+      "loss": 0.2913,
+      "step": 969
+    },
+    {
+      "epoch": 0.458628841607565,
+      "grad_norm": 4.279630661010742,
+      "learning_rate": 1.3140368644851735e-05,
+      "loss": 0.1963,
+      "step": 970
+    },
+    {
+      "epoch": 0.4591016548463357,
+      "grad_norm": 6.872097969055176,
+      "learning_rate": 1.3124690899405903e-05,
+      "loss": 0.3115,
+      "step": 971
+    },
+    {
+      "epoch": 0.4595744680851064,
+      "grad_norm": 8.577292442321777,
+      "learning_rate": 1.3109004638087919e-05,
+      "loss": 0.2241,
+      "step": 972
+    },
+    {
+      "epoch": 0.46004728132387707,
+      "grad_norm": 6.13325834274292,
+      "learning_rate": 1.3093309903648316e-05,
+      "loss": 0.2008,
+      "step": 973
+    },
+    {
+      "epoch": 0.46052009456264775,
+      "grad_norm": 7.323633193969727,
+      "learning_rate": 1.3077606738860719e-05,
+      "loss": 0.2284,
+      "step": 974
+    },
+    {
+      "epoch": 0.46099290780141844,
+      "grad_norm": 5.5575642585754395,
+      "learning_rate": 1.3061895186521724e-05,
+      "loss": 0.2657,
+      "step": 975
+    },
+    {
+      "epoch": 0.4614657210401891,
+      "grad_norm": 6.12337064743042,
+      "learning_rate": 1.304617528945079e-05,
+      "loss": 0.2165,
+      "step": 976
+    },
+    {
+      "epoch": 0.4619385342789598,
+      "grad_norm": 6.96366024017334,
+      "learning_rate": 1.3030447090490117e-05,
+      "loss": 0.321,
+      "step": 977
+    },
+    {
+      "epoch": 0.4624113475177305,
+      "grad_norm": 6.608788967132568,
+      "learning_rate": 1.3014710632504533e-05,
+      "loss": 0.2851,
+      "step": 978
+    },
+    {
+      "epoch": 0.4628841607565012,
+      "grad_norm": 7.216396808624268,
+      "learning_rate": 1.299896595838137e-05,
+      "loss": 0.3212,
+      "step": 979
+    },
+    {
+      "epoch": 0.46335697399527187,
+      "grad_norm": 5.307373523712158,
+      "learning_rate": 1.2983213111030355e-05,
+      "loss": 0.2316,
+      "step": 980
+    },
+    {
+      "epoch": 0.46335697399527187,
+      "eval_accuracy": 0.8603104212860311,
+      "eval_f1": 0.71875,
+      "eval_loss": 0.3029595613479614,
+      "eval_precision": 0.8256410256410256,
+      "eval_recall": 0.6363636363636364,
+      "eval_runtime": 48.9893,
+      "eval_samples_per_second": 5.634,
+      "eval_steps_per_second": 0.184,
+      "step": 980
+    },
+    {
+      "epoch": 0.46382978723404256,
+      "grad_norm": 7.238687992095947,
+      "learning_rate": 1.2967452133383494e-05,
+      "loss": 0.3382,
+      "step": 981
+    },
+    {
+      "epoch": 0.46430260047281324,
+      "grad_norm": 5.481350421905518,
+      "learning_rate": 1.2951683068394941e-05,
+      "loss": 0.2074,
+      "step": 982
+    },
+    {
+      "epoch": 0.46477541371158393,
+      "grad_norm": 6.286655426025391,
+      "learning_rate": 1.2935905959040898e-05,
+      "loss": 0.2536,
+      "step": 983
+    },
+    {
+      "epoch": 0.4652482269503546,
+      "grad_norm": 7.73200798034668,
+      "learning_rate": 1.2920120848319483e-05,
+      "loss": 0.2815,
+      "step": 984
+    },
+    {
+      "epoch": 0.4657210401891253,
+      "grad_norm": 5.538710117340088,
+      "learning_rate": 1.2904327779250638e-05,
+      "loss": 0.2503,
+      "step": 985
+    },
+    {
+      "epoch": 0.466193853427896,
+      "grad_norm": 8.157992362976074,
+      "learning_rate": 1.2888526794875975e-05,
+      "loss": 0.2675,
+      "step": 986
+    },
+    {
+      "epoch": 0.4666666666666667,
+      "grad_norm": 4.824194431304932,
+      "learning_rate": 1.2872717938258688e-05,
+      "loss": 0.2185,
+      "step": 987
+    },
+    {
+      "epoch": 0.46713947990543736,
+      "grad_norm": 3.831620931625366,
+      "learning_rate": 1.285690125248342e-05,
+      "loss": 0.2046,
+      "step": 988
+    },
+    {
+      "epoch": 0.46761229314420805,
+      "grad_norm": 5.231266498565674,
+      "learning_rate": 1.2841076780656155e-05,
+      "loss": 0.2472,
+      "step": 989
+    },
+    {
+      "epoch": 0.46808510638297873,
+      "grad_norm": 6.9529194831848145,
+      "learning_rate": 1.28252445659041e-05,
+      "loss": 0.2855,
+      "step": 990
+    },
+    {
+      "epoch": 0.4685579196217494,
+      "grad_norm": 6.860682964324951,
+      "learning_rate": 1.2809404651375554e-05,
+      "loss": 0.2526,
+      "step": 991
+    },
+    {
+      "epoch": 0.4690307328605201,
+      "grad_norm": 6.531607627868652,
+      "learning_rate": 1.2793557080239819e-05,
+      "loss": 0.266,
+      "step": 992
+    },
+    {
+      "epoch": 0.4695035460992908,
+      "grad_norm": 4.6222758293151855,
+      "learning_rate": 1.2777701895687034e-05,
+      "loss": 0.2346,
+      "step": 993
+    },
+    {
+      "epoch": 0.4699763593380615,
+      "grad_norm": 5.676296710968018,
+      "learning_rate": 1.2761839140928119e-05,
+      "loss": 0.3332,
+      "step": 994
+    },
+    {
+      "epoch": 0.47044917257683216,
+      "grad_norm": 8.922492027282715,
+      "learning_rate": 1.2745968859194604e-05,
+      "loss": 0.2986,
+      "step": 995
+    },
+    {
+      "epoch": 0.47092198581560285,
+      "grad_norm": 3.270632266998291,
+      "learning_rate": 1.2730091093738545e-05,
+      "loss": 0.122,
+      "step": 996
+    },
+    {
+      "epoch": 0.47139479905437354,
+      "grad_norm": 4.889394283294678,
+      "learning_rate": 1.2714205887832388e-05,
+      "loss": 0.2348,
+      "step": 997
+    },
+    {
+      "epoch": 0.4718676122931442,
+      "grad_norm": 6.802956581115723,
+      "learning_rate": 1.2698313284768852e-05,
+      "loss": 0.2074,
+      "step": 998
+    },
+    {
+      "epoch": 0.4723404255319149,
+      "grad_norm": 5.15386962890625,
+      "learning_rate": 1.2682413327860827e-05,
+      "loss": 0.2129,
+      "step": 999
+    },
+    {
+      "epoch": 0.4728132387706856,
+      "grad_norm": 4.577718257904053,
+      "learning_rate": 1.2666506060441237e-05,
+      "loss": 0.2116,
+      "step": 1000
+    },
+    {
+      "epoch": 0.4728132387706856,
+      "eval_accuracy": 0.8580931263858093,
+      "eval_f1": 0.7009345794392523,
+      "eval_loss": 0.3230363726615906,
+      "eval_precision": 0.8571428571428571,
+      "eval_recall": 0.5928853754940712,
+      "eval_runtime": 48.9793,
+      "eval_samples_per_second": 5.635,
+      "eval_steps_per_second": 0.184,
+      "step": 1000
+    },
+    {
+      "epoch": 0.4732860520094563,
+      "grad_norm": 6.338871479034424,
+      "learning_rate": 1.2650591525862934e-05,
+      "loss": 0.2665,
+      "step": 1001
+    },
+    {
+      "epoch": 0.47375886524822697,
+      "grad_norm": 6.019141674041748,
+      "learning_rate": 1.2634669767498573e-05,
+      "loss": 0.2079,
+      "step": 1002
+    },
+    {
+      "epoch": 0.47423167848699765,
+      "grad_norm": 4.347167015075684,
+      "learning_rate": 1.2618740828740494e-05,
+      "loss": 0.1908,
+      "step": 1003
+    },
+    {
+      "epoch": 0.47470449172576834,
+      "grad_norm": 6.210932731628418,
+      "learning_rate": 1.2602804753000611e-05,
+      "loss": 0.1847,
+      "step": 1004
+    },
+    {
+      "epoch": 0.475177304964539,
+      "grad_norm": 5.755384922027588,
+      "learning_rate": 1.2586861583710289e-05,
+      "loss": 0.2592,
+      "step": 1005
+    },
+    {
+      "epoch": 0.4756501182033097,
+      "grad_norm": 7.43326473236084,
+      "learning_rate": 1.2570911364320218e-05,
+      "loss": 0.2216,
+      "step": 1006
+    },
+    {
+      "epoch": 0.4761229314420804,
+      "grad_norm": 6.543978214263916,
+      "learning_rate": 1.2554954138300307e-05,
+      "loss": 0.2118,
+      "step": 1007
+    },
+    {
+      "epoch": 0.4765957446808511,
+      "grad_norm": 4.375254154205322,
+      "learning_rate": 1.2538989949139567e-05,
+      "loss": 0.1908,
+      "step": 1008
+    },
+    {
+      "epoch": 0.47706855791962177,
+      "grad_norm": 6.078047275543213,
+      "learning_rate": 1.2523018840345972e-05,
+      "loss": 0.2619,
+      "step": 1009
+    },
+    {
+      "epoch": 0.47754137115839246,
+      "grad_norm": 4.737030506134033,
+      "learning_rate": 1.2507040855446371e-05,
+      "loss": 0.1731,
+      "step": 1010
+    },
+    {
+      "epoch": 0.47801418439716314,
+      "grad_norm": 5.818294525146484,
+      "learning_rate": 1.2491056037986334e-05,
+      "loss": 0.2438,
+      "step": 1011
+    },
+    {
+      "epoch": 0.47848699763593383,
+      "grad_norm": 6.881172180175781,
+      "learning_rate": 1.2475064431530066e-05,
+      "loss": 0.2313,
+      "step": 1012
+    },
+    {
+      "epoch": 0.4789598108747045,
+      "grad_norm": 5.162444591522217,
+      "learning_rate": 1.245906607966027e-05,
+      "loss": 0.2579,
+      "step": 1013
+    },
+    {
+      "epoch": 0.4794326241134752,
+      "grad_norm": 8.821483612060547,
+      "learning_rate": 1.2443061025978034e-05,
+      "loss": 0.3318,
+      "step": 1014
+    },
+    {
+      "epoch": 0.4799054373522459,
+      "grad_norm": 7.8657684326171875,
+      "learning_rate": 1.2427049314102708e-05,
+      "loss": 0.2404,
+      "step": 1015
+    },
+    {
+      "epoch": 0.4803782505910166,
+      "grad_norm": 5.313066482543945,
+      "learning_rate": 1.2411030987671791e-05,
+      "loss": 0.1851,
+      "step": 1016
+    },
+    {
+      "epoch": 0.4808510638297872,
+      "grad_norm": 6.415999412536621,
+      "learning_rate": 1.2395006090340804e-05,
+      "loss": 0.2219,
+      "step": 1017
+    },
+    {
+      "epoch": 0.4813238770685579,
+      "grad_norm": 6.840671539306641,
+      "learning_rate": 1.2378974665783184e-05,
+      "loss": 0.3221,
+      "step": 1018
+    },
+    {
+      "epoch": 0.4817966903073286,
+      "grad_norm": 7.59630823135376,
+      "learning_rate": 1.236293675769015e-05,
+      "loss": 0.3566,
+      "step": 1019
+    },
+    {
+      "epoch": 0.48226950354609927,
+      "grad_norm": 5.026065349578857,
+      "learning_rate": 1.2346892409770594e-05,
+      "loss": 0.2134,
+      "step": 1020
+    },
+    {
+      "epoch": 0.48226950354609927,
+      "eval_accuracy": 0.8625277161862528,
+      "eval_f1": 0.7327586206896551,
+      "eval_loss": 0.3039931058883667,
+      "eval_precision": 0.8056872037914692,
+      "eval_recall": 0.6719367588932806,
+      "eval_runtime": 49.0789,
+      "eval_samples_per_second": 5.624,
+      "eval_steps_per_second": 0.183,
+      "step": 1020
+    },
+    {
+      "epoch": 0.48274231678486995,
+      "grad_norm": 5.331277370452881,
+      "learning_rate": 1.2330841665750954e-05,
+      "loss": 0.2262,
+      "step": 1021
+    },
+    {
+      "epoch": 0.48321513002364064,
+      "grad_norm": 8.84730052947998,
+      "learning_rate": 1.2314784569375114e-05,
+      "loss": 0.3046,
+      "step": 1022
+    },
+    {
+      "epoch": 0.4836879432624113,
+      "grad_norm": 6.711941719055176,
+      "learning_rate": 1.2298721164404249e-05,
+      "loss": 0.3397,
+      "step": 1023
+    },
+    {
+      "epoch": 0.484160756501182,
+      "grad_norm": 5.642043113708496,
+      "learning_rate": 1.2282651494616742e-05,
+      "loss": 0.2586,
+      "step": 1024
+    },
+    {
+      "epoch": 0.4846335697399527,
+      "grad_norm": 5.215063571929932,
+      "learning_rate": 1.226657560380805e-05,
+      "loss": 0.2402,
+      "step": 1025
+    },
+    {
+      "epoch": 0.4851063829787234,
+      "grad_norm": 7.698766231536865,
+      "learning_rate": 1.2250493535790574e-05,
+      "loss": 0.304,
+      "step": 1026
+    },
+    {
+      "epoch": 0.48557919621749407,
+      "grad_norm": 5.472048282623291,
+      "learning_rate": 1.223440533439356e-05,
+      "loss": 0.229,
+      "step": 1027
+    },
+    {
+      "epoch": 0.48605200945626476,
+      "grad_norm": 4.721035480499268,
+      "learning_rate": 1.2218311043462964e-05,
+      "loss": 0.1878,
+      "step": 1028
+    },
+    {
+      "epoch": 0.48652482269503544,
+      "grad_norm": 5.494324207305908,
+      "learning_rate": 1.2202210706861346e-05,
+      "loss": 0.2146,
+      "step": 1029
+    },
+    {
+      "epoch": 0.48699763593380613,
+      "grad_norm": 5.183449745178223,
+      "learning_rate": 1.218610436846773e-05,
+      "loss": 0.2175,
+      "step": 1030
+    },
+    {
+      "epoch": 0.4874704491725768,
+      "grad_norm": 8.610817909240723,
+      "learning_rate": 1.216999207217751e-05,
+      "loss": 0.3331,
+      "step": 1031
+    },
+    {
+      "epoch": 0.4879432624113475,
+      "grad_norm": 6.789135456085205,
+      "learning_rate": 1.21538738619023e-05,
+      "loss": 0.3608,
+      "step": 1032
+    },
+    {
+      "epoch": 0.4884160756501182,
+      "grad_norm": 3.8762876987457275,
+      "learning_rate": 1.2137749781569857e-05,
+      "loss": 0.2002,
+      "step": 1033
+    },
+    {
+      "epoch": 0.4888888888888889,
+      "grad_norm": 5.960103511810303,
+      "learning_rate": 1.2121619875123914e-05,
+      "loss": 0.2497,
+      "step": 1034
+    },
+    {
+      "epoch": 0.48936170212765956,
+      "grad_norm": 7.955074787139893,
+      "learning_rate": 1.2105484186524088e-05,
+      "loss": 0.3593,
+      "step": 1035
+    },
+    {
+      "epoch": 0.48983451536643025,
+      "grad_norm": 4.501315116882324,
+      "learning_rate": 1.2089342759745761e-05,
+      "loss": 0.2412,
+      "step": 1036
+    },
+    {
+      "epoch": 0.49030732860520093,
+      "grad_norm": 4.577963352203369,
+      "learning_rate": 1.2073195638779944e-05,
+      "loss": 0.2328,
+      "step": 1037
+    },
+    {
+      "epoch": 0.4907801418439716,
+      "grad_norm": 18.703994750976562,
+      "learning_rate": 1.2057042867633178e-05,
+      "loss": 0.2931,
+      "step": 1038
+    },
+    {
+      "epoch": 0.4912529550827423,
+      "grad_norm": 4.856638431549072,
+      "learning_rate": 1.2040884490327391e-05,
+      "loss": 0.2607,
+      "step": 1039
+    },
+    {
+      "epoch": 0.491725768321513,
+      "grad_norm": 5.463403701782227,
+      "learning_rate": 1.2024720550899798e-05,
+      "loss": 0.2139,
+      "step": 1040
+    },
+    {
+      "epoch": 0.491725768321513,
+      "eval_accuracy": 0.844789356984479,
+      "eval_f1": 0.6446700507614214,
+      "eval_loss": 0.3279857337474823,
+      "eval_precision": 0.900709219858156,
+      "eval_recall": 0.5019762845849802,
+      "eval_runtime": 48.0379,
+      "eval_samples_per_second": 5.745,
+      "eval_steps_per_second": 0.187,
+      "step": 1040
+    },
+    {
+      "epoch": 0.4921985815602837,
+      "grad_norm": 6.326286315917969,
+      "learning_rate": 1.2008551093402763e-05,
+      "loss": 0.264,
+      "step": 1041
+    },
+    {
+      "epoch": 0.49267139479905436,
+      "grad_norm": 5.787569046020508,
+      "learning_rate": 1.1992376161903705e-05,
+      "loss": 0.228,
+      "step": 1042
+    },
+    {
+      "epoch": 0.49314420803782505,
+      "grad_norm": 6.124124526977539,
+      "learning_rate": 1.1976195800484945e-05,
+      "loss": 0.1668,
+      "step": 1043
+    },
+    {
+      "epoch": 0.49361702127659574,
+      "grad_norm": 5.056814670562744,
+      "learning_rate": 1.1960010053243613e-05,
+      "loss": 0.1894,
+      "step": 1044
+    },
+    {
+      "epoch": 0.4940898345153664,
+      "grad_norm": 7.828837871551514,
+      "learning_rate": 1.194381896429151e-05,
+      "loss": 0.3602,
+      "step": 1045
+    },
+    {
+      "epoch": 0.4945626477541371,
+      "grad_norm": 4.001469135284424,
+      "learning_rate": 1.1927622577755003e-05,
+      "loss": 0.1379,
+      "step": 1046
+    },
+    {
+      "epoch": 0.4950354609929078,
+      "grad_norm": 7.635477542877197,
+      "learning_rate": 1.191142093777489e-05,
+      "loss": 0.244,
+      "step": 1047
+    },
+    {
+      "epoch": 0.4955082742316785,
+      "grad_norm": 7.2881364822387695,
+      "learning_rate": 1.1895214088506284e-05,
+      "loss": 0.3006,
+      "step": 1048
+    },
+    {
+      "epoch": 0.49598108747044917,
+      "grad_norm": 5.0428619384765625,
+      "learning_rate": 1.1879002074118512e-05,
+      "loss": 0.2994,
+      "step": 1049
+    },
+    {
+      "epoch": 0.49645390070921985,
+      "grad_norm": 5.892991542816162,
+      "learning_rate": 1.1862784938794951e-05,
+      "loss": 0.229,
+      "step": 1050
+    },
+    {
+      "epoch": 0.49692671394799054,
+      "grad_norm": 6.7257304191589355,
+      "learning_rate": 1.184656272673296e-05,
+      "loss": 0.3032,
+      "step": 1051
+    },
+    {
+      "epoch": 0.4973995271867612,
+      "grad_norm": 6.495220184326172,
+      "learning_rate": 1.1830335482143718e-05,
+      "loss": 0.2918,
+      "step": 1052
+    },
+    {
+      "epoch": 0.4978723404255319,
+      "grad_norm": 4.424355983734131,
+      "learning_rate": 1.1814103249252124e-05,
+      "loss": 0.2097,
+      "step": 1053
+    },
+    {
+      "epoch": 0.4983451536643026,
+      "grad_norm": 4.175996780395508,
+      "learning_rate": 1.1797866072296676e-05,
+      "loss": 0.1882,
+      "step": 1054
+    },
+    {
+      "epoch": 0.4988179669030733,
+      "grad_norm": 5.1931328773498535,
+      "learning_rate": 1.1781623995529341e-05,
+      "loss": 0.2526,
+      "step": 1055
+    },
+    {
+      "epoch": 0.49929078014184397,
+      "grad_norm": 5.245265960693359,
+      "learning_rate": 1.1765377063215436e-05,
+      "loss": 0.204,
+      "step": 1056
+    },
+    {
+      "epoch": 0.49976359338061466,
+      "grad_norm": 4.931206226348877,
+      "learning_rate": 1.1749125319633523e-05,
+      "loss": 0.2239,
+      "step": 1057
+    },
+    {
+      "epoch": 0.5002364066193853,
+      "grad_norm": 4.764687538146973,
+      "learning_rate": 1.1732868809075266e-05,
+      "loss": 0.2257,
+      "step": 1058
+    },
+    {
+      "epoch": 0.500709219858156,
+      "grad_norm": 6.108907222747803,
+      "learning_rate": 1.1716607575845327e-05,
+      "loss": 0.271,
+      "step": 1059
+    },
+    {
+      "epoch": 0.5011820330969267,
+      "grad_norm": 5.150505065917969,
+      "learning_rate": 1.1700341664261233e-05,
+      "loss": 0.1949,
+      "step": 1060
+    },
+    {
+      "epoch": 0.5011820330969267,
+      "eval_accuracy": 0.8625277161862528,
+      "eval_f1": 0.7089201877934272,
+      "eval_loss": 0.31160375475883484,
+      "eval_precision": 0.8728323699421965,
+      "eval_recall": 0.5968379446640316,
+      "eval_runtime": 48.5889,
+      "eval_samples_per_second": 5.68,
+      "eval_steps_per_second": 0.185,
+      "step": 1060
+    },
+    {
+      "epoch": 0.5016548463356973,
+      "grad_norm": 5.739040851593018,
+      "learning_rate": 1.1684071118653262e-05,
+      "loss": 0.238,
+      "step": 1061
+    },
+    {
+      "epoch": 0.502127659574468,
+      "grad_norm": 4.2889862060546875,
+      "learning_rate": 1.1667795983364332e-05,
+      "loss": 0.1881,
+      "step": 1062
+    },
+    {
+      "epoch": 0.5026004728132387,
+      "grad_norm": 6.57220458984375,
+      "learning_rate": 1.1651516302749854e-05,
+      "loss": 0.3294,
+      "step": 1063
+    },
+    {
+      "epoch": 0.5030732860520094,
+      "grad_norm": 6.348330497741699,
+      "learning_rate": 1.1635232121177637e-05,
+      "loss": 0.2125,
+      "step": 1064
+    },
+    {
+      "epoch": 0.5035460992907801,
+      "grad_norm": 4.948648452758789,
+      "learning_rate": 1.1618943483027749e-05,
+      "loss": 0.2409,
+      "step": 1065
+    },
+    {
+      "epoch": 0.5040189125295508,
+      "grad_norm": 5.066147804260254,
+      "learning_rate": 1.1602650432692417e-05,
+      "loss": 0.317,
+      "step": 1066
+    },
+    {
+      "epoch": 0.5044917257683215,
+      "grad_norm": 5.450560092926025,
+      "learning_rate": 1.1586353014575875e-05,
+      "loss": 0.2262,
+      "step": 1067
+    },
+    {
+      "epoch": 0.5049645390070922,
+      "grad_norm": 4.708855152130127,
+      "learning_rate": 1.1570051273094277e-05,
+      "loss": 0.2362,
+      "step": 1068
+    },
+    {
+      "epoch": 0.5054373522458628,
+      "grad_norm": 10.365764617919922,
+      "learning_rate": 1.1553745252675541e-05,
+      "loss": 0.3124,
+      "step": 1069
+    },
+    {
+      "epoch": 0.5059101654846335,
+      "grad_norm": 3.386537790298462,
+      "learning_rate": 1.153743499775927e-05,
+      "loss": 0.121,
+      "step": 1070
+    },
+    {
+      "epoch": 0.5063829787234042,
+      "grad_norm": 6.828433990478516,
+      "learning_rate": 1.152112055279659e-05,
+      "loss": 0.2726,
+      "step": 1071
+    },
+    {
+      "epoch": 0.5068557919621749,
+      "grad_norm": 5.744606971740723,
+      "learning_rate": 1.1504801962250055e-05,
+      "loss": 0.2195,
+      "step": 1072
+    },
+    {
+      "epoch": 0.5073286052009456,
+      "grad_norm": 4.951056480407715,
+      "learning_rate": 1.1488479270593507e-05,
+      "loss": 0.2528,
+      "step": 1073
+    },
+    {
+      "epoch": 0.5078014184397163,
+      "grad_norm": 5.981780529022217,
+      "learning_rate": 1.1472152522311974e-05,
+      "loss": 0.2478,
+      "step": 1074
+    },
+    {
+      "epoch": 0.508274231678487,
+      "grad_norm": 3.798823118209839,
+      "learning_rate": 1.1455821761901544e-05,
+      "loss": 0.2075,
+      "step": 1075
+    },
+    {
+      "epoch": 0.5087470449172576,
+      "grad_norm": 6.254341125488281,
+      "learning_rate": 1.1439487033869226e-05,
+      "loss": 0.301,
+      "step": 1076
+    },
+    {
+      "epoch": 0.5092198581560283,
+      "grad_norm": 6.011257648468018,
+      "learning_rate": 1.1423148382732854e-05,
+      "loss": 0.257,
+      "step": 1077
+    },
+    {
+      "epoch": 0.509692671394799,
+      "grad_norm": 5.492880344390869,
+      "learning_rate": 1.1406805853020944e-05,
+      "loss": 0.2582,
+      "step": 1078
+    },
+    {
+      "epoch": 0.5101654846335697,
+      "grad_norm": 4.94982385635376,
+      "learning_rate": 1.139045948927259e-05,
+      "loss": 0.2408,
+      "step": 1079
+    },
+    {
+      "epoch": 0.5106382978723404,
+      "grad_norm": 4.847739219665527,
+      "learning_rate": 1.1374109336037331e-05,
+      "loss": 0.2255,
+      "step": 1080
+    },
+    {
+      "epoch": 0.5106382978723404,
+      "eval_accuracy": 0.8592017738359202,
+      "eval_f1": 0.7011764705882353,
+      "eval_loss": 0.31947237253189087,
+      "eval_precision": 0.8662790697674418,
+      "eval_recall": 0.5889328063241107,
+      "eval_runtime": 48.5383,
+      "eval_samples_per_second": 5.686,
+      "eval_steps_per_second": 0.185,
+      "step": 1080
+    },
+    {
+      "epoch": 0.5111111111111111,
+      "grad_norm": 5.068375587463379,
+      "learning_rate": 1.135775543787504e-05,
+      "loss": 0.2263,
+      "step": 1081
+    },
+    {
+      "epoch": 0.5115839243498818,
+      "grad_norm": 4.222960472106934,
+      "learning_rate": 1.1341397839355786e-05,
+      "loss": 0.2395,
+      "step": 1082
+    },
+    {
+      "epoch": 0.5120567375886524,
+      "grad_norm": 4.976379871368408,
+      "learning_rate": 1.1325036585059732e-05,
+      "loss": 0.2215,
+      "step": 1083
+    },
+    {
+      "epoch": 0.5125295508274231,
+      "grad_norm": 5.738669395446777,
+      "learning_rate": 1.1308671719576997e-05,
+      "loss": 0.22,
+      "step": 1084
+    },
+    {
+      "epoch": 0.5130023640661938,
+      "grad_norm": 7.325643539428711,
+      "learning_rate": 1.1292303287507546e-05,
+      "loss": 0.2674,
+      "step": 1085
+    },
+    {
+      "epoch": 0.5134751773049645,
+      "grad_norm": 7.6121320724487305,
+      "learning_rate": 1.1275931333461065e-05,
+      "loss": 0.3137,
+      "step": 1086
+    },
+    {
+      "epoch": 0.5139479905437352,
+      "grad_norm": 4.593527793884277,
+      "learning_rate": 1.1259555902056838e-05,
+      "loss": 0.2287,
+      "step": 1087
+    },
+    {
+      "epoch": 0.5144208037825059,
+      "grad_norm": 5.258584499359131,
+      "learning_rate": 1.1243177037923623e-05,
+      "loss": 0.2492,
+      "step": 1088
+    },
+    {
+      "epoch": 0.5148936170212766,
+      "grad_norm": 6.010392189025879,
+      "learning_rate": 1.1226794785699531e-05,
+      "loss": 0.2251,
+      "step": 1089
+    },
+    {
+      "epoch": 0.5153664302600472,
+      "grad_norm": 7.567986488342285,
+      "learning_rate": 1.121040919003192e-05,
+      "loss": 0.3043,
+      "step": 1090
+    },
+    {
+      "epoch": 0.5158392434988179,
+      "grad_norm": 5.762569427490234,
+      "learning_rate": 1.1194020295577246e-05,
+      "loss": 0.2001,
+      "step": 1091
+    },
+    {
+      "epoch": 0.5163120567375886,
+      "grad_norm": 8.211880683898926,
+      "learning_rate": 1.1177628147000961e-05,
+      "loss": 0.2575,
+      "step": 1092
+    },
+    {
+      "epoch": 0.5167848699763593,
+      "grad_norm": 4.067295074462891,
+      "learning_rate": 1.1161232788977385e-05,
+      "loss": 0.1754,
+      "step": 1093
+    },
+    {
+      "epoch": 0.51725768321513,
+      "grad_norm": 5.805228233337402,
+      "learning_rate": 1.1144834266189585e-05,
+      "loss": 0.2111,
+      "step": 1094
+    },
+    {
+      "epoch": 0.5177304964539007,
+      "grad_norm": 4.621476650238037,
+      "learning_rate": 1.1128432623329256e-05,
+      "loss": 0.1363,
+      "step": 1095
+    },
+    {
+      "epoch": 0.5182033096926714,
+      "grad_norm": 7.750375747680664,
+      "learning_rate": 1.111202790509659e-05,
+      "loss": 0.2732,
+      "step": 1096
+    },
+    {
+      "epoch": 0.518676122931442,
+      "grad_norm": 4.444814682006836,
+      "learning_rate": 1.1095620156200166e-05,
+      "loss": 0.2107,
+      "step": 1097
+    },
+    {
+      "epoch": 0.5191489361702127,
+      "grad_norm": 12.939567565917969,
+      "learning_rate": 1.1079209421356816e-05,
+      "loss": 0.2695,
+      "step": 1098
+    },
+    {
+      "epoch": 0.5196217494089834,
+      "grad_norm": 7.756330966949463,
+      "learning_rate": 1.1062795745291519e-05,
+      "loss": 0.2247,
+      "step": 1099
+    },
+    {
+      "epoch": 0.5200945626477541,
+      "grad_norm": 6.402958393096924,
+      "learning_rate": 1.1046379172737264e-05,
+      "loss": 0.2452,
+      "step": 1100
+    },
+    {
+      "epoch": 0.5200945626477541,
+      "eval_accuracy": 0.8425720620842572,
+      "eval_f1": 0.6395939086294417,
+      "eval_loss": 0.34635570645332336,
+      "eval_precision": 0.8936170212765957,
+      "eval_recall": 0.4980237154150198,
+      "eval_runtime": 47.8847,
+      "eval_samples_per_second": 5.764,
+      "eval_steps_per_second": 0.188,
+      "step": 1100
+    },
+    {
+      "epoch": 0.5205673758865248,
+      "grad_norm": 7.110340118408203,
+      "learning_rate": 1.1029959748434935e-05,
+      "loss": 0.2357,
+      "step": 1101
+    },
+    {
+      "epoch": 0.5210401891252955,
+      "grad_norm": 6.949429512023926,
+      "learning_rate": 1.1013537517133184e-05,
+      "loss": 0.3259,
+      "step": 1102
+    },
+    {
+      "epoch": 0.5215130023640662,
+      "grad_norm": 5.027368068695068,
+      "learning_rate": 1.0997112523588322e-05,
+      "loss": 0.1423,
+      "step": 1103
+    },
+    {
+      "epoch": 0.5219858156028369,
+      "grad_norm": 6.545793056488037,
+      "learning_rate": 1.0980684812564183e-05,
+      "loss": 0.1863,
+      "step": 1104
+    },
+    {
+      "epoch": 0.5224586288416075,
+      "grad_norm": 5.906529903411865,
+      "learning_rate": 1.0964254428832007e-05,
+      "loss": 0.2981,
+      "step": 1105
+    },
+    {
+      "epoch": 0.5229314420803782,
+      "grad_norm": 4.966193675994873,
+      "learning_rate": 1.0947821417170313e-05,
+      "loss": 0.2378,
+      "step": 1106
+    },
+    {
+      "epoch": 0.5234042553191489,
+      "grad_norm": 5.323748588562012,
+      "learning_rate": 1.0931385822364796e-05,
+      "loss": 0.2183,
+      "step": 1107
+    },
+    {
+      "epoch": 0.5238770685579196,
+      "grad_norm": 7.892477035522461,
+      "learning_rate": 1.0914947689208171e-05,
+      "loss": 0.3732,
+      "step": 1108
+    },
+    {
+      "epoch": 0.5243498817966903,
+      "grad_norm": 4.786356449127197,
+      "learning_rate": 1.0898507062500095e-05,
+      "loss": 0.2391,
+      "step": 1109
+    },
+    {
+      "epoch": 0.524822695035461,
+      "grad_norm": 6.325803279876709,
+      "learning_rate": 1.0882063987047e-05,
+      "loss": 0.2397,
+      "step": 1110
+    },
+    {
+      "epoch": 0.5252955082742317,
+      "grad_norm": 6.990598201751709,
+      "learning_rate": 1.0865618507662001e-05,
+      "loss": 0.2782,
+      "step": 1111
+    },
+    {
+      "epoch": 0.5257683215130023,
+      "grad_norm": 8.039189338684082,
+      "learning_rate": 1.0849170669164764e-05,
+      "loss": 0.262,
+      "step": 1112
+    },
+    {
+      "epoch": 0.526241134751773,
+      "grad_norm": 6.111503601074219,
+      "learning_rate": 1.0832720516381382e-05,
+      "loss": 0.2201,
+      "step": 1113
+    },
+    {
+      "epoch": 0.5267139479905437,
+      "grad_norm": 6.7883124351501465,
+      "learning_rate": 1.0816268094144257e-05,
+      "loss": 0.2615,
+      "step": 1114
+    },
+    {
+      "epoch": 0.5271867612293144,
+      "grad_norm": 6.257448673248291,
+      "learning_rate": 1.0799813447291979e-05,
+      "loss": 0.232,
+      "step": 1115
+    },
+    {
+      "epoch": 0.5276595744680851,
+      "grad_norm": 8.060059547424316,
+      "learning_rate": 1.0783356620669195e-05,
+      "loss": 0.2726,
+      "step": 1116
+    },
+    {
+      "epoch": 0.5281323877068558,
+      "grad_norm": 4.382721424102783,
+      "learning_rate": 1.0766897659126491e-05,
+      "loss": 0.2114,
+      "step": 1117
+    },
+    {
+      "epoch": 0.5286052009456265,
+      "grad_norm": 5.4973859786987305,
+      "learning_rate": 1.0750436607520287e-05,
+      "loss": 0.2706,
+      "step": 1118
+    },
+    {
+      "epoch": 0.5290780141843971,
+      "grad_norm": 8.051422119140625,
+      "learning_rate": 1.0733973510712682e-05,
+      "loss": 0.2354,
+      "step": 1119
+    },
+    {
+      "epoch": 0.5295508274231678,
+      "grad_norm": 3.799506664276123,
+      "learning_rate": 1.0717508413571349e-05,
+      "loss": 0.2038,
+      "step": 1120
+    },
+    {
+      "epoch": 0.5295508274231678,
+      "eval_accuracy": 0.8569844789356984,
+      "eval_f1": 0.6921241050119332,
+      "eval_loss": 0.31673863530158997,
+      "eval_precision": 0.8734939759036144,
+      "eval_recall": 0.5731225296442688,
+      "eval_runtime": 48.009,
+      "eval_samples_per_second": 5.749,
+      "eval_steps_per_second": 0.187,
+      "step": 1120
+    },
+    {
+      "epoch": 0.5300236406619385,
+      "grad_norm": 7.479004383087158,
+      "learning_rate": 1.0701041360969428e-05,
+      "loss": 0.2895,
+      "step": 1121
+    },
+    {
+      "epoch": 0.5304964539007092,
+      "grad_norm": 4.519740104675293,
+      "learning_rate": 1.068457239778537e-05,
+      "loss": 0.2641,
+      "step": 1122
+    },
+    {
+      "epoch": 0.5309692671394799,
+      "grad_norm": 5.979281425476074,
+      "learning_rate": 1.0668101568902852e-05,
+      "loss": 0.2297,
+      "step": 1123
+    },
+    {
+      "epoch": 0.5314420803782506,
+      "grad_norm": 4.343296051025391,
+      "learning_rate": 1.0651628919210615e-05,
+      "loss": 0.1811,
+      "step": 1124
+    },
+    {
+      "epoch": 0.5319148936170213,
+      "grad_norm": 5.795645713806152,
+      "learning_rate": 1.063515449360238e-05,
+      "loss": 0.2214,
+      "step": 1125
+    },
+    {
+      "epoch": 0.532387706855792,
+      "grad_norm": 4.395986080169678,
+      "learning_rate": 1.0618678336976695e-05,
+      "loss": 0.2373,
+      "step": 1126
+    },
+    {
+      "epoch": 0.5328605200945626,
+      "grad_norm": 4.035050392150879,
+      "learning_rate": 1.0602200494236837e-05,
+      "loss": 0.185,
+      "step": 1127
+    },
+    {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 5.2432780265808105,
+      "learning_rate": 1.0585721010290668e-05,
+      "loss": 0.201,
+      "step": 1128
+    },
+    {
+      "epoch": 0.533806146572104,
+      "grad_norm": 6.4242777824401855,
+      "learning_rate": 1.0569239930050532e-05,
+      "loss": 0.2681,
+      "step": 1129
+    },
+    {
+      "epoch": 0.5342789598108747,
+      "grad_norm": 6.040828227996826,
+      "learning_rate": 1.0552757298433113e-05,
+      "loss": 0.1799,
+      "step": 1130
+    },
+    {
+      "epoch": 0.5347517730496454,
+      "grad_norm": 8.409934043884277,
+      "learning_rate": 1.0536273160359335e-05,
+      "loss": 0.3153,
+      "step": 1131
+    },
+    {
+      "epoch": 0.5352245862884161,
+      "grad_norm": 5.49470853805542,
+      "learning_rate": 1.0519787560754215e-05,
+      "loss": 0.2344,
+      "step": 1132
+    },
+    {
+      "epoch": 0.5356973995271868,
+      "grad_norm": 4.8799967765808105,
+      "learning_rate": 1.050330054454677e-05,
+      "loss": 0.1693,
+      "step": 1133
+    },
+    {
+      "epoch": 0.5361702127659574,
+      "grad_norm": 7.70962381362915,
+      "learning_rate": 1.0486812156669859e-05,
+      "loss": 0.1999,
+      "step": 1134
+    },
+    {
+      "epoch": 0.5366430260047281,
+      "grad_norm": 6.684405326843262,
+      "learning_rate": 1.0470322442060089e-05,
+      "loss": 0.1878,
+      "step": 1135
+    },
+    {
+      "epoch": 0.5371158392434988,
+      "grad_norm": 7.799801826477051,
+      "learning_rate": 1.045383144565768e-05,
+      "loss": 0.2943,
+      "step": 1136
+    },
+    {
+      "epoch": 0.5375886524822695,
+      "grad_norm": 9.22608470916748,
+      "learning_rate": 1.043733921240635e-05,
+      "loss": 0.3668,
+      "step": 1137
+    },
+    {
+      "epoch": 0.5380614657210402,
+      "grad_norm": 5.817656517028809,
+      "learning_rate": 1.0420845787253189e-05,
+      "loss": 0.2449,
+      "step": 1138
+    },
+    {
+      "epoch": 0.5385342789598109,
+      "grad_norm": 9.814664840698242,
+      "learning_rate": 1.0404351215148523e-05,
+      "loss": 0.3372,
+      "step": 1139
+    },
+    {
+      "epoch": 0.5390070921985816,
+      "grad_norm": 4.843449592590332,
+      "learning_rate": 1.0387855541045815e-05,
+      "loss": 0.2496,
+      "step": 1140
+    },
+    {
+      "epoch": 0.5390070921985816,
+      "eval_accuracy": 0.8592017738359202,
+      "eval_f1": 0.6968973747016707,
+      "eval_loss": 0.31810781359672546,
+      "eval_precision": 0.8795180722891566,
+      "eval_recall": 0.5770750988142292,
+      "eval_runtime": 49.0428,
+      "eval_samples_per_second": 5.628,
+      "eval_steps_per_second": 0.184,
+      "step": 1140
+    },
+    {
+      "epoch": 0.5394799054373522,
+      "grad_norm": 6.5451765060424805,
+      "learning_rate": 1.0371358809901529e-05,
+      "loss": 0.266,
+      "step": 1141
+    },
+    {
+      "epoch": 0.5399527186761229,
+      "grad_norm": 4.096044540405273,
+      "learning_rate": 1.0354861066675008e-05,
+      "loss": 0.1938,
+      "step": 1142
+    },
+    {
+      "epoch": 0.5404255319148936,
+      "grad_norm": 5.981978416442871,
+      "learning_rate": 1.0338362356328355e-05,
+      "loss": 0.2721,
+      "step": 1143
+    },
+    {
+      "epoch": 0.5408983451536643,
+      "grad_norm": 4.459275245666504,
+      "learning_rate": 1.0321862723826311e-05,
+      "loss": 0.2085,
+      "step": 1144
+    },
+    {
+      "epoch": 0.541371158392435,
+      "grad_norm": 4.302639484405518,
+      "learning_rate": 1.0305362214136122e-05,
+      "loss": 0.2267,
+      "step": 1145
+    },
+    {
+      "epoch": 0.5418439716312057,
+      "grad_norm": 8.027523040771484,
+      "learning_rate": 1.028886087222743e-05,
+      "loss": 0.3361,
+      "step": 1146
+    },
+    {
+      "epoch": 0.5423167848699764,
+      "grad_norm": 6.380166530609131,
+      "learning_rate": 1.0272358743072152e-05,
+      "loss": 0.2274,
+      "step": 1147
+    },
+    {
+      "epoch": 0.542789598108747,
+      "grad_norm": 7.479015827178955,
+      "learning_rate": 1.0255855871644338e-05,
+      "loss": 0.3562,
+      "step": 1148
+    },
+    {
+      "epoch": 0.5432624113475177,
+      "grad_norm": 4.3820295333862305,
+      "learning_rate": 1.0239352302920067e-05,
+      "loss": 0.1709,
+      "step": 1149
+    },
+    {
+      "epoch": 0.5437352245862884,
+      "grad_norm": 6.630291938781738,
+      "learning_rate": 1.0222848081877316e-05,
+      "loss": 0.2615,
+      "step": 1150
+    },
+    {
+      "epoch": 0.5442080378250591,
+      "grad_norm": 5.88150691986084,
+      "learning_rate": 1.0206343253495848e-05,
+      "loss": 0.2611,
+      "step": 1151
+    },
+    {
+      "epoch": 0.5446808510638298,
+      "grad_norm": 6.246159553527832,
+      "learning_rate": 1.0189837862757068e-05,
+      "loss": 0.2713,
+      "step": 1152
+    },
+    {
+      "epoch": 0.5451536643026005,
+      "grad_norm": 6.391038417816162,
+      "learning_rate": 1.0173331954643926e-05,
+      "loss": 0.1998,
+      "step": 1153
+    },
+    {
+      "epoch": 0.5456264775413712,
+      "grad_norm": 5.693717002868652,
+      "learning_rate": 1.0156825574140769e-05,
+      "loss": 0.2219,
+      "step": 1154
+    },
+    {
+      "epoch": 0.5460992907801419,
+      "grad_norm": 4.549108982086182,
+      "learning_rate": 1.0140318766233247e-05,
+      "loss": 0.239,
+      "step": 1155
+    },
+    {
+      "epoch": 0.5465721040189125,
+      "grad_norm": 7.300600528717041,
+      "learning_rate": 1.0123811575908166e-05,
+      "loss": 0.3028,
+      "step": 1156
+    },
+    {
+      "epoch": 0.5470449172576832,
+      "grad_norm": 4.2444071769714355,
+      "learning_rate": 1.0107304048153372e-05,
+      "loss": 0.1432,
+      "step": 1157
+    },
+    {
+      "epoch": 0.5475177304964539,
+      "grad_norm": 5.09889030456543,
+      "learning_rate": 1.0090796227957633e-05,
+      "loss": 0.2697,
+      "step": 1158
+    },
+    {
+      "epoch": 0.5479905437352246,
+      "grad_norm": 5.837294101715088,
+      "learning_rate": 1.0074288160310514e-05,
+      "loss": 0.2371,
+      "step": 1159
+    },
+    {
+      "epoch": 0.5484633569739953,
+      "grad_norm": 6.027414798736572,
+      "learning_rate": 1.0057779890202259e-05,
+      "loss": 0.2864,
+      "step": 1160
+    },
+    {
+      "epoch": 0.5484633569739953,
+      "eval_accuracy": 0.8514412416851441,
+      "eval_f1": 0.6731707317073171,
+      "eval_loss": 0.32011911273002625,
+      "eval_precision": 0.8789808917197452,
+      "eval_recall": 0.5454545454545454,
+      "eval_runtime": 48.0201,
+      "eval_samples_per_second": 5.748,
+      "eval_steps_per_second": 0.187,
+      "step": 1160
+    },
+    {
+      "epoch": 0.548936170212766,
+      "grad_norm": 3.9856438636779785,
+      "learning_rate": 1.0041271462623658e-05,
+      "loss": 0.2113,
+      "step": 1161
+    },
+    {
+      "epoch": 0.5494089834515367,
+      "grad_norm": 4.562050819396973,
+      "learning_rate": 1.0024762922565933e-05,
+      "loss": 0.2173,
+      "step": 1162
+    },
+    {
+      "epoch": 0.5498817966903073,
+      "grad_norm": 4.3589558601379395,
+      "learning_rate": 1.0008254315020607e-05,
+      "loss": 0.185,
+      "step": 1163
+    },
+    {
+      "epoch": 0.550354609929078,
+      "grad_norm": 5.3740620613098145,
+      "learning_rate": 9.991745684979394e-06,
+      "loss": 0.2472,
+      "step": 1164
+    },
+    {
+      "epoch": 0.5508274231678487,
+      "grad_norm": 5.081512451171875,
+      "learning_rate": 9.97523707743407e-06,
+      "loss": 0.219,
+      "step": 1165
+    },
+    {
+      "epoch": 0.5513002364066194,
+      "grad_norm": 4.1024346351623535,
+      "learning_rate": 9.958728537376345e-06,
+      "loss": 0.1668,
+      "step": 1166
+    },
+    {
+      "epoch": 0.5517730496453901,
+      "grad_norm": 3.816474199295044,
+      "learning_rate": 9.942220109797746e-06,
+      "loss": 0.2022,
+      "step": 1167
+    },
+    {
+      "epoch": 0.5522458628841608,
+      "grad_norm": 5.035168647766113,
+      "learning_rate": 9.925711839689487e-06,
+      "loss": 0.2188,
+      "step": 1168
+    },
+    {
+      "epoch": 0.5527186761229315,
+      "grad_norm": 5.621501922607422,
+      "learning_rate": 9.909203772042369e-06,
+      "loss": 0.2612,
+      "step": 1169
+    },
+    {
+      "epoch": 0.5531914893617021,
+      "grad_norm": 3.7916884422302246,
+      "learning_rate": 9.892695951846631e-06,
+      "loss": 0.1537,
+      "step": 1170
+    },
+    {
+      "epoch": 0.5536643026004728,
+      "grad_norm": 6.460813045501709,
+      "learning_rate": 9.876188424091837e-06,
+      "loss": 0.2258,
+      "step": 1171
+    },
+    {
+      "epoch": 0.5541371158392435,
+      "grad_norm": 5.5038604736328125,
+      "learning_rate": 9.859681233766756e-06,
+      "loss": 0.1853,
+      "step": 1172
+    },
+    {
+      "epoch": 0.5546099290780142,
+      "grad_norm": 9.788790702819824,
+      "learning_rate": 9.843174425859231e-06,
+      "loss": 0.3384,
+      "step": 1173
+    },
+    {
+      "epoch": 0.5550827423167849,
+      "grad_norm": 8.492478370666504,
+      "learning_rate": 9.826668045356078e-06,
+      "loss": 0.2906,
+      "step": 1174
+    },
+    {
+      "epoch": 0.5555555555555556,
+      "grad_norm": 5.153669357299805,
+      "learning_rate": 9.810162137242935e-06,
+      "loss": 0.215,
+      "step": 1175
+    },
+    {
+      "epoch": 0.5560283687943263,
+      "grad_norm": 6.732087135314941,
+      "learning_rate": 9.793656746504155e-06,
+      "loss": 0.2921,
+      "step": 1176
+    },
+    {
+      "epoch": 0.556501182033097,
+      "grad_norm": 4.027410507202148,
+      "learning_rate": 9.777151918122684e-06,
+      "loss": 0.1983,
+      "step": 1177
+    },
+    {
+      "epoch": 0.5569739952718676,
+      "grad_norm": 5.1011061668396,
+      "learning_rate": 9.760647697079936e-06,
+      "loss": 0.2095,
+      "step": 1178
+    },
+    {
+      "epoch": 0.5574468085106383,
+      "grad_norm": 5.0621538162231445,
+      "learning_rate": 9.744144128355665e-06,
+      "loss": 0.1637,
+      "step": 1179
+    },
+    {
+      "epoch": 0.557919621749409,
+      "grad_norm": 6.033471584320068,
+      "learning_rate": 9.72764125692785e-06,
+      "loss": 0.2342,
+      "step": 1180
+    },
+    {
+      "epoch": 0.557919621749409,
+      "eval_accuracy": 0.8647450110864745,
+      "eval_f1": 0.7252252252252253,
+      "eval_loss": 0.3139636218547821,
+      "eval_precision": 0.8429319371727748,
+      "eval_recall": 0.6363636363636364,
+      "eval_runtime": 48.235,
+      "eval_samples_per_second": 5.722,
+      "eval_steps_per_second": 0.187,
+      "step": 1180
+    },
+    {
+      "epoch": 0.5583924349881797,
+      "grad_norm": 6.157944679260254,
+      "learning_rate": 9.711139127772568e-06,
+      "loss": 0.2866,
+      "step": 1181
+    },
+    {
+      "epoch": 0.5588652482269504,
+      "grad_norm": 5.42804479598999,
+      "learning_rate": 9.69463778586388e-06,
+      "loss": 0.1551,
+      "step": 1182
+    },
+    {
+      "epoch": 0.5593380614657211,
+      "grad_norm": 5.639116287231445,
+      "learning_rate": 9.678137276173692e-06,
+      "loss": 0.1956,
+      "step": 1183
+    },
+    {
+      "epoch": 0.5598108747044918,
+      "grad_norm": 6.913265705108643,
+      "learning_rate": 9.661637643671647e-06,
+      "loss": 0.2281,
+      "step": 1184
+    },
+    {
+      "epoch": 0.5602836879432624,
+      "grad_norm": 5.190281867980957,
+      "learning_rate": 9.645138933324994e-06,
+      "loss": 0.1959,
+      "step": 1185
+    },
+    {
+      "epoch": 0.5607565011820331,
+      "grad_norm": 4.637751579284668,
+      "learning_rate": 9.628641190098473e-06,
+      "loss": 0.2019,
+      "step": 1186
+    },
+    {
+      "epoch": 0.5612293144208038,
+      "grad_norm": 4.302716255187988,
+      "learning_rate": 9.612144458954189e-06,
+      "loss": 0.1699,
+      "step": 1187
+    },
+    {
+      "epoch": 0.5617021276595745,
+      "grad_norm": 6.4986395835876465,
+      "learning_rate": 9.59564878485148e-06,
+      "loss": 0.2111,
+      "step": 1188
+    },
+    {
+      "epoch": 0.5621749408983452,
+      "grad_norm": 6.432104587554932,
+      "learning_rate": 9.579154212746815e-06,
+      "loss": 0.199,
+      "step": 1189
+    },
+    {
+      "epoch": 0.5626477541371159,
+      "grad_norm": 5.450148105621338,
+      "learning_rate": 9.56266078759365e-06,
+      "loss": 0.2335,
+      "step": 1190
+    },
+    {
+      "epoch": 0.5631205673758866,
+      "grad_norm": 5.353931427001953,
+      "learning_rate": 9.546168554342323e-06,
+      "loss": 0.1919,
+      "step": 1191
+    },
+    {
+      "epoch": 0.5635933806146572,
+      "grad_norm": 5.608835220336914,
+      "learning_rate": 9.529677557939916e-06,
+      "loss": 0.2217,
+      "step": 1192
+    },
+    {
+      "epoch": 0.5640661938534279,
+      "grad_norm": 7.61819314956665,
+      "learning_rate": 9.513187843330146e-06,
+      "loss": 0.2864,
+      "step": 1193
+    },
+    {
+      "epoch": 0.5645390070921986,
+      "grad_norm": 7.839981555938721,
+      "learning_rate": 9.496699455453232e-06,
+      "loss": 0.2923,
+      "step": 1194
+    },
+    {
+      "epoch": 0.5650118203309693,
+      "grad_norm": 4.617547035217285,
+      "learning_rate": 9.480212439245785e-06,
+      "loss": 0.1815,
+      "step": 1195
+    },
+    {
+      "epoch": 0.56548463356974,
+      "grad_norm": 5.598609924316406,
+      "learning_rate": 9.463726839640667e-06,
+      "loss": 0.238,
+      "step": 1196
+    },
+    {
+      "epoch": 0.5659574468085107,
+      "grad_norm": 5.537100791931152,
+      "learning_rate": 9.44724270156689e-06,
+      "loss": 0.1757,
+      "step": 1197
+    },
+    {
+      "epoch": 0.5664302600472814,
+      "grad_norm": 4.509025573730469,
+      "learning_rate": 9.430760069949473e-06,
+      "loss": 0.2335,
+      "step": 1198
+    },
+    {
+      "epoch": 0.566903073286052,
+      "grad_norm": 6.317657470703125,
+      "learning_rate": 9.414278989709334e-06,
+      "loss": 0.1729,
+      "step": 1199
+    },
+    {
+      "epoch": 0.5673758865248227,
+      "grad_norm": 4.740533351898193,
+      "learning_rate": 9.397799505763167e-06,
+      "loss": 0.1366,
+      "step": 1200
+    },
+    {
+      "epoch": 0.5673758865248227,
+      "eval_accuracy": 0.8680709534368071,
+      "eval_f1": 0.7361419068736141,
+      "eval_loss": 0.30103373527526855,
+      "eval_precision": 0.8383838383838383,
+      "eval_recall": 0.6561264822134387,
+      "eval_runtime": 47.2489,
+      "eval_samples_per_second": 5.841,
+      "eval_steps_per_second": 0.19,
+      "step": 1200
+    },
+    {
+      "epoch": 0.5678486997635934,
+      "grad_norm": 6.263066291809082,
+      "learning_rate": 9.381321663023308e-06,
+      "loss": 0.2202,
+      "step": 1201
+    },
+    {
+      "epoch": 0.5683215130023641,
+      "grad_norm": 6.9543070793151855,
+      "learning_rate": 9.364845506397625e-06,
+      "loss": 0.1869,
+      "step": 1202
+    },
+    {
+      "epoch": 0.5687943262411348,
+      "grad_norm": 4.8995513916015625,
+      "learning_rate": 9.348371080789387e-06,
+      "loss": 0.2227,
+      "step": 1203
+    },
+    {
+      "epoch": 0.5692671394799055,
+      "grad_norm": 3.913970470428467,
+      "learning_rate": 9.331898431097153e-06,
+      "loss": 0.1941,
+      "step": 1204
+    },
+    {
+      "epoch": 0.5697399527186762,
+      "grad_norm": 4.263607025146484,
+      "learning_rate": 9.315427602214631e-06,
+      "loss": 0.2026,
+      "step": 1205
+    },
+    {
+      "epoch": 0.5702127659574469,
+      "grad_norm": 4.99878454208374,
+      "learning_rate": 9.298958639030577e-06,
+      "loss": 0.1717,
+      "step": 1206
+    },
+    {
+      "epoch": 0.5706855791962175,
+      "grad_norm": 4.628468036651611,
+      "learning_rate": 9.282491586428655e-06,
+      "loss": 0.1845,
+      "step": 1207
+    },
+    {
+      "epoch": 0.5711583924349882,
+      "grad_norm": 6.5551533699035645,
+      "learning_rate": 9.266026489287323e-06,
+      "loss": 0.2557,
+      "step": 1208
+    },
+    {
+      "epoch": 0.5716312056737589,
+      "grad_norm": 5.44743013381958,
+      "learning_rate": 9.249563392479715e-06,
+      "loss": 0.2666,
+      "step": 1209
+    },
+    {
+      "epoch": 0.5721040189125296,
+      "grad_norm": 5.58568000793457,
+      "learning_rate": 9.23310234087351e-06,
+      "loss": 0.2257,
+      "step": 1210
+    },
+    {
+      "epoch": 0.5725768321513003,
+      "grad_norm": 7.3130574226379395,
+      "learning_rate": 9.21664337933081e-06,
+      "loss": 0.3227,
+      "step": 1211
+    },
+    {
+      "epoch": 0.573049645390071,
+      "grad_norm": 6.498375415802002,
+      "learning_rate": 9.200186552708023e-06,
+      "loss": 0.2122,
+      "step": 1212
+    },
+    {
+      "epoch": 0.5735224586288417,
+      "grad_norm": 5.47324275970459,
+      "learning_rate": 9.183731905855746e-06,
+      "loss": 0.243,
+      "step": 1213
+    },
+    {
+      "epoch": 0.5739952718676123,
+      "grad_norm": 5.8507866859436035,
+      "learning_rate": 9.167279483618623e-06,
+      "loss": 0.1633,
+      "step": 1214
+    },
+    {
+      "epoch": 0.574468085106383,
+      "grad_norm": 4.788534641265869,
+      "learning_rate": 9.150829330835241e-06,
+      "loss": 0.182,
+      "step": 1215
+    },
+    {
+      "epoch": 0.5749408983451537,
+      "grad_norm": 7.402541160583496,
+      "learning_rate": 9.134381492338e-06,
+      "loss": 0.3063,
+      "step": 1216
+    },
+    {
+      "epoch": 0.5754137115839244,
+      "grad_norm": 4.93443489074707,
+      "learning_rate": 9.117936012953002e-06,
+      "loss": 0.208,
+      "step": 1217
+    },
+    {
+      "epoch": 0.5758865248226951,
+      "grad_norm": 8.154093742370605,
+      "learning_rate": 9.101492937499909e-06,
+      "loss": 0.2389,
+      "step": 1218
+    },
+    {
+      "epoch": 0.5763593380614658,
+      "grad_norm": 7.1925368309021,
+      "learning_rate": 9.08505231079183e-06,
+      "loss": 0.3203,
+      "step": 1219
+    },
+    {
+      "epoch": 0.5768321513002365,
+      "grad_norm": 7.500906467437744,
+      "learning_rate": 9.068614177635211e-06,
+      "loss": 0.2301,
+      "step": 1220
+    },
+    {
+      "epoch": 0.5768321513002365,
+      "eval_accuracy": 0.8625277161862528,
+      "eval_f1": 0.7142857142857143,
+      "eval_loss": 0.30109164118766785,
+      "eval_precision": 0.856353591160221,
+      "eval_recall": 0.6126482213438735,
+      "eval_runtime": 47.0544,
+      "eval_samples_per_second": 5.866,
+      "eval_steps_per_second": 0.191,
+      "step": 1220
+    },
+    {
+      "epoch": 0.577304964539007,
+      "grad_norm": 5.455466270446777,
+      "learning_rate": 9.052178582829687e-06,
+      "loss": 0.2111,
+      "step": 1221
+    },
+    {
+      "epoch": 0.5777777777777777,
+      "grad_norm": 4.1028618812561035,
+      "learning_rate": 9.035745571167996e-06,
+      "loss": 0.206,
+      "step": 1222
+    },
+    {
+      "epoch": 0.5782505910165484,
+      "grad_norm": 4.987546443939209,
+      "learning_rate": 9.01931518743582e-06,
+      "loss": 0.2396,
+      "step": 1223
+    },
+    {
+      "epoch": 0.5787234042553191,
+      "grad_norm": 9.012516975402832,
+      "learning_rate": 9.002887476411681e-06,
+      "loss": 0.3507,
+      "step": 1224
+    },
+    {
+      "epoch": 0.5791962174940898,
+      "grad_norm": 6.798236846923828,
+      "learning_rate": 8.986462482866817e-06,
+      "loss": 0.2712,
+      "step": 1225
+    },
+    {
+      "epoch": 0.5796690307328605,
+      "grad_norm": 5.508780479431152,
+      "learning_rate": 8.970040251565068e-06,
+      "loss": 0.2785,
+      "step": 1226
+    },
+    {
+      "epoch": 0.5801418439716312,
+      "grad_norm": 3.585559606552124,
+      "learning_rate": 8.953620827262739e-06,
+      "loss": 0.1821,
+      "step": 1227
+    },
+    {
+      "epoch": 0.5806146572104018,
+      "grad_norm": 4.184317588806152,
+      "learning_rate": 8.937204254708486e-06,
+      "loss": 0.2308,
+      "step": 1228
+    },
+    {
+      "epoch": 0.5810874704491725,
+      "grad_norm": 4.5350518226623535,
+      "learning_rate": 8.920790578643186e-06,
+      "loss": 0.2078,
+      "step": 1229
+    },
+    {
+      "epoch": 0.5815602836879432,
+      "grad_norm": 5.0740742683410645,
+      "learning_rate": 8.904379843799838e-06,
+      "loss": 0.2313,
+      "step": 1230
+    },
+    {
+      "epoch": 0.5820330969267139,
+      "grad_norm": 6.0607147216796875,
+      "learning_rate": 8.887972094903412e-06,
+      "loss": 0.2334,
+      "step": 1231
+    },
+    {
+      "epoch": 0.5825059101654846,
+      "grad_norm": 5.125598430633545,
+      "learning_rate": 8.871567376670747e-06,
+      "loss": 0.2739,
+      "step": 1232
+    },
+    {
+      "epoch": 0.5829787234042553,
+      "grad_norm": 7.169873237609863,
+      "learning_rate": 8.85516573381042e-06,
+      "loss": 0.2739,
+      "step": 1233
+    },
+    {
+      "epoch": 0.583451536643026,
+      "grad_norm": 6.202165603637695,
+      "learning_rate": 8.838767211022616e-06,
+      "loss": 0.3156,
+      "step": 1234
+    },
+    {
+      "epoch": 0.5839243498817966,
+      "grad_norm": 5.986494541168213,
+      "learning_rate": 8.82237185299904e-06,
+      "loss": 0.1882,
+      "step": 1235
+    },
+    {
+      "epoch": 0.5843971631205673,
+      "grad_norm": 4.3089470863342285,
+      "learning_rate": 8.805979704422758e-06,
+      "loss": 0.1905,
+      "step": 1236
+    },
+    {
+      "epoch": 0.584869976359338,
+      "grad_norm": 4.750925540924072,
+      "learning_rate": 8.789590809968082e-06,
+      "loss": 0.2272,
+      "step": 1237
+    },
+    {
+      "epoch": 0.5853427895981087,
+      "grad_norm": 4.529053688049316,
+      "learning_rate": 8.773205214300469e-06,
+      "loss": 0.2583,
+      "step": 1238
+    },
+    {
+      "epoch": 0.5858156028368794,
+      "grad_norm": 5.315147876739502,
+      "learning_rate": 8.756822962076382e-06,
+      "loss": 0.2463,
+      "step": 1239
+    },
+    {
+      "epoch": 0.5862884160756501,
+      "grad_norm": 5.5175909996032715,
+      "learning_rate": 8.740444097943166e-06,
+      "loss": 0.2873,
+      "step": 1240
+    },
+    {
+      "epoch": 0.5862884160756501,
+      "eval_accuracy": 0.8625277161862528,
+      "eval_f1": 0.7142857142857143,
+      "eval_loss": 0.30490389466285706,
+      "eval_precision": 0.856353591160221,
+      "eval_recall": 0.6126482213438735,
+      "eval_runtime": 47.7126,
+      "eval_samples_per_second": 5.785,
+      "eval_steps_per_second": 0.189,
+      "step": 1240
+    },
+    {
+      "epoch": 0.5867612293144208,
+      "grad_norm": 5.6907572746276855,
+      "learning_rate": 8.724068666538938e-06,
+      "loss": 0.2456,
+      "step": 1241
+    },
+    {
+      "epoch": 0.5872340425531914,
+      "grad_norm": 5.550398826599121,
+      "learning_rate": 8.707696712492455e-06,
+      "loss": 0.2122,
+      "step": 1242
+    },
+    {
+      "epoch": 0.5877068557919621,
+      "grad_norm": 5.554051876068115,
+      "learning_rate": 8.691328280423004e-06,
+      "loss": 0.1672,
+      "step": 1243
+    },
+    {
+      "epoch": 0.5881796690307328,
+      "grad_norm": 5.504934787750244,
+      "learning_rate": 8.674963414940271e-06,
+      "loss": 0.1918,
+      "step": 1244
+    },
+    {
+      "epoch": 0.5886524822695035,
+      "grad_norm": 6.041418075561523,
+      "learning_rate": 8.658602160644216e-06,
+      "loss": 0.2718,
+      "step": 1245
+    },
+    {
+      "epoch": 0.5891252955082742,
+      "grad_norm": 6.632382392883301,
+      "learning_rate": 8.642244562124962e-06,
+      "loss": 0.316,
+      "step": 1246
+    },
+    {
+      "epoch": 0.5895981087470449,
+      "grad_norm": 4.766592502593994,
+      "learning_rate": 8.625890663962669e-06,
+      "loss": 0.2298,
+      "step": 1247
+    },
+    {
+      "epoch": 0.5900709219858156,
+      "grad_norm": 5.895883560180664,
+      "learning_rate": 8.609540510727412e-06,
+      "loss": 0.2365,
+      "step": 1248
+    },
+    {
+      "epoch": 0.5905437352245863,
+      "grad_norm": 5.390053749084473,
+      "learning_rate": 8.593194146979059e-06,
+      "loss": 0.1977,
+      "step": 1249
+    },
+    {
+      "epoch": 0.5910165484633569,
+      "grad_norm": 7.53000020980835,
+      "learning_rate": 8.576851617267151e-06,
+      "loss": 0.2733,
+      "step": 1250
+    },
+    {
+      "epoch": 0.5914893617021276,
+      "grad_norm": 5.143542766571045,
+      "learning_rate": 8.560512966130775e-06,
+      "loss": 0.2405,
+      "step": 1251
+    },
+    {
+      "epoch": 0.5919621749408983,
+      "grad_norm": 5.577294826507568,
+      "learning_rate": 8.544178238098458e-06,
+      "loss": 0.2378,
+      "step": 1252
+    },
+    {
+      "epoch": 0.592434988179669,
+      "grad_norm": 4.410736083984375,
+      "learning_rate": 8.527847477688027e-06,
+      "loss": 0.2437,
+      "step": 1253
+    },
+    {
+      "epoch": 0.5929078014184397,
+      "grad_norm": 6.536932945251465,
+      "learning_rate": 8.511520729406498e-06,
+      "loss": 0.2503,
+      "step": 1254
+    },
+    {
+      "epoch": 0.5933806146572104,
+      "grad_norm": 5.56400728225708,
+      "learning_rate": 8.49519803774995e-06,
+      "loss": 0.2324,
+      "step": 1255
+    },
+    {
+      "epoch": 0.5938534278959811,
+      "grad_norm": 4.7766923904418945,
+      "learning_rate": 8.478879447203411e-06,
+      "loss": 0.1441,
+      "step": 1256
+    },
+    {
+      "epoch": 0.5943262411347517,
+      "grad_norm": 5.121423721313477,
+      "learning_rate": 8.462565002240733e-06,
+      "loss": 0.2649,
+      "step": 1257
+    },
+    {
+      "epoch": 0.5947990543735224,
+      "grad_norm": 4.3107404708862305,
+      "learning_rate": 8.446254747324462e-06,
+      "loss": 0.1711,
+      "step": 1258
+    },
+    {
+      "epoch": 0.5952718676122931,
+      "grad_norm": 5.047919273376465,
+      "learning_rate": 8.42994872690573e-06,
+      "loss": 0.2281,
+      "step": 1259
+    },
+    {
+      "epoch": 0.5957446808510638,
+      "grad_norm": 6.451530933380127,
+      "learning_rate": 8.413646985424127e-06,
+      "loss": 0.2467,
+      "step": 1260
+    },
+    {
+      "epoch": 0.5957446808510638,
+      "eval_accuracy": 0.8625277161862528,
+      "eval_f1": 0.7102803738317757,
+      "eval_loss": 0.3106723129749298,
+      "eval_precision": 0.8685714285714285,
+      "eval_recall": 0.6007905138339921,
+      "eval_runtime": 48.2804,
+      "eval_samples_per_second": 5.717,
+      "eval_steps_per_second": 0.186,
+      "step": 1260
+    },
+    {
+      "epoch": 0.5962174940898345,
+      "grad_norm": 5.580362796783447,
+      "learning_rate": 8.397349567307586e-06,
+      "loss": 0.2108,
+      "step": 1261
+    },
+    {
+      "epoch": 0.5966903073286052,
+      "grad_norm": 4.948899745941162,
+      "learning_rate": 8.381056516972253e-06,
+      "loss": 0.2347,
+      "step": 1262
+    },
+    {
+      "epoch": 0.5971631205673759,
+      "grad_norm": 3.7529051303863525,
+      "learning_rate": 8.364767878822368e-06,
+      "loss": 0.1665,
+      "step": 1263
+    },
+    {
+      "epoch": 0.5976359338061465,
+      "grad_norm": 7.008377552032471,
+      "learning_rate": 8.34848369725015e-06,
+      "loss": 0.2132,
+      "step": 1264
+    },
+    {
+      "epoch": 0.5981087470449172,
+      "grad_norm": 5.252836227416992,
+      "learning_rate": 8.332204016635672e-06,
+      "loss": 0.1941,
+      "step": 1265
+    },
+    {
+      "epoch": 0.5985815602836879,
+      "grad_norm": 6.382559776306152,
+      "learning_rate": 8.31592888134674e-06,
+      "loss": 0.2376,
+      "step": 1266
+    },
+    {
+      "epoch": 0.5990543735224586,
+      "grad_norm": 6.636437892913818,
+      "learning_rate": 8.299658335738772e-06,
+      "loss": 0.3327,
+      "step": 1267
+    },
+    {
+      "epoch": 0.5995271867612293,
+      "grad_norm": 5.242986679077148,
+      "learning_rate": 8.28339242415468e-06,
+      "loss": 0.2062,
+      "step": 1268
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 5.419365882873535,
+      "learning_rate": 8.267131190924737e-06,
+      "loss": 0.2488,
+      "step": 1269
+    },
+    {
+      "epoch": 0.6004728132387707,
+      "grad_norm": 6.189310550689697,
+      "learning_rate": 8.25087468036648e-06,
+      "loss": 0.2743,
+      "step": 1270
+    },
+    {
+      "epoch": 0.6009456264775414,
+      "grad_norm": 5.147764205932617,
+      "learning_rate": 8.234622936784566e-06,
+      "loss": 0.1907,
+      "step": 1271
+    },
+    {
+      "epoch": 0.601418439716312,
+      "grad_norm": 5.64257287979126,
+      "learning_rate": 8.218376004470665e-06,
+      "loss": 0.2655,
+      "step": 1272
+    },
+    {
+      "epoch": 0.6018912529550827,
+      "grad_norm": 3.5520944595336914,
+      "learning_rate": 8.202133927703324e-06,
+      "loss": 0.1818,
+      "step": 1273
+    },
+    {
+      "epoch": 0.6023640661938534,
+      "grad_norm": 5.096825122833252,
+      "learning_rate": 8.185896750747878e-06,
+      "loss": 0.1918,
+      "step": 1274
+    },
+    {
+      "epoch": 0.6028368794326241,
+      "grad_norm": 5.38516092300415,
+      "learning_rate": 8.169664517856287e-06,
+      "loss": 0.2708,
+      "step": 1275
+    },
+    {
+      "epoch": 0.6033096926713948,
+      "grad_norm": 5.871916770935059,
+      "learning_rate": 8.153437273267045e-06,
+      "loss": 0.1947,
+      "step": 1276
+    },
+    {
+      "epoch": 0.6037825059101655,
+      "grad_norm": 4.89730167388916,
+      "learning_rate": 8.137215061205049e-06,
+      "loss": 0.2103,
+      "step": 1277
+    },
+    {
+      "epoch": 0.6042553191489362,
+      "grad_norm": 5.5777587890625,
+      "learning_rate": 8.120997925881492e-06,
+      "loss": 0.2599,
+      "step": 1278
+    },
+    {
+      "epoch": 0.6047281323877068,
+      "grad_norm": 4.445948600769043,
+      "learning_rate": 8.10478591149372e-06,
+      "loss": 0.2191,
+      "step": 1279
+    },
+    {
+      "epoch": 0.6052009456264775,
+      "grad_norm": 7.9579267501831055,
+      "learning_rate": 8.088579062225116e-06,
+      "loss": 0.3175,
+      "step": 1280
+    },
+    {
+      "epoch": 0.6052009456264775,
+      "eval_accuracy": 0.8580931263858093,
+      "eval_f1": 0.69377990430622,
+      "eval_loss": 0.312032550573349,
+      "eval_precision": 0.8787878787878788,
+      "eval_recall": 0.5731225296442688,
+      "eval_runtime": 50.2837,
+      "eval_samples_per_second": 5.489,
+      "eval_steps_per_second": 0.179,
+      "step": 1280
+    },
+    {
+      "epoch": 0.6056737588652482,
+      "grad_norm": 3.942072629928589,
+      "learning_rate": 8.072377422245002e-06,
+      "loss": 0.1949,
+      "step": 1281
+    },
+    {
+      "epoch": 0.6061465721040189,
+      "grad_norm": 4.524231910705566,
+      "learning_rate": 8.05618103570849e-06,
+      "loss": 0.1867,
+      "step": 1282
+    },
+    {
+      "epoch": 0.6066193853427896,
+      "grad_norm": 5.806196689605713,
+      "learning_rate": 8.039989946756388e-06,
+      "loss": 0.2334,
+      "step": 1283
+    },
+    {
+      "epoch": 0.6070921985815603,
+      "grad_norm": 5.2232489585876465,
+      "learning_rate": 8.02380419951506e-06,
+      "loss": 0.2788,
+      "step": 1284
+    },
+    {
+      "epoch": 0.607565011820331,
+      "grad_norm": 6.341989517211914,
+      "learning_rate": 8.0076238380963e-06,
+      "loss": 0.2481,
+      "step": 1285
+    },
+    {
+      "epoch": 0.6080378250591016,
+      "grad_norm": 5.141717433929443,
+      "learning_rate": 7.991448906597237e-06,
+      "loss": 0.2083,
+      "step": 1286
+    },
+    {
+      "epoch": 0.6085106382978723,
+      "grad_norm": 5.809133052825928,
+      "learning_rate": 7.975279449100207e-06,
+      "loss": 0.2377,
+      "step": 1287
+    },
+    {
+      "epoch": 0.608983451536643,
+      "grad_norm": 4.600372314453125,
+      "learning_rate": 7.959115509672612e-06,
+      "loss": 0.2026,
+      "step": 1288
+    },
+    {
+      "epoch": 0.6094562647754137,
+      "grad_norm": 7.412517547607422,
+      "learning_rate": 7.942957132366827e-06,
+      "loss": 0.3106,
+      "step": 1289
+    },
+    {
+      "epoch": 0.6099290780141844,
+      "grad_norm": 10.773149490356445,
+      "learning_rate": 7.926804361220056e-06,
+      "loss": 0.2309,
+      "step": 1290
+    },
+    {
+      "epoch": 0.6104018912529551,
+      "grad_norm": 4.44931173324585,
+      "learning_rate": 7.910657240254242e-06,
+      "loss": 0.2072,
+      "step": 1291
+    },
+    {
+      "epoch": 0.6108747044917258,
+      "grad_norm": 6.045795917510986,
+      "learning_rate": 7.894515813475914e-06,
+      "loss": 0.2879,
+      "step": 1292
+    },
+    {
+      "epoch": 0.6113475177304964,
+      "grad_norm": 4.986977577209473,
+      "learning_rate": 7.87838012487609e-06,
+      "loss": 0.1959,
+      "step": 1293
+    },
+    {
+      "epoch": 0.6118203309692671,
+      "grad_norm": 6.099925518035889,
+      "learning_rate": 7.862250218430147e-06,
+      "loss": 0.2966,
+      "step": 1294
+    },
+    {
+      "epoch": 0.6122931442080378,
+      "grad_norm": 5.837856292724609,
+      "learning_rate": 7.846126138097698e-06,
+      "loss": 0.2563,
+      "step": 1295
+    },
+    {
+      "epoch": 0.6127659574468085,
+      "grad_norm": 6.82401704788208,
+      "learning_rate": 7.830007927822494e-06,
+      "loss": 0.1892,
+      "step": 1296
+    },
+    {
+      "epoch": 0.6132387706855792,
+      "grad_norm": 6.041834354400635,
+      "learning_rate": 7.813895631532271e-06,
+      "loss": 0.1974,
+      "step": 1297
+    },
+    {
+      "epoch": 0.6137115839243499,
+      "grad_norm": 5.327773094177246,
+      "learning_rate": 7.797789293138657e-06,
+      "loss": 0.2551,
+      "step": 1298
+    },
+    {
+      "epoch": 0.6141843971631206,
+      "grad_norm": 3.487072467803955,
+      "learning_rate": 7.781688956537034e-06,
+      "loss": 0.1987,
+      "step": 1299
+    },
+    {
+      "epoch": 0.6146572104018913,
+      "grad_norm": 4.819819450378418,
+      "learning_rate": 7.765594665606441e-06,
+      "loss": 0.1988,
+      "step": 1300
+    },
+    {
+      "epoch": 0.6146572104018913,
+      "eval_accuracy": 0.8636363636363636,
+      "eval_f1": 0.7146171693735499,
+      "eval_loss": 0.3020324409008026,
+      "eval_precision": 0.8651685393258427,
+      "eval_recall": 0.6086956521739131,
+      "eval_runtime": 48.6185,
+      "eval_samples_per_second": 5.677,
+      "eval_steps_per_second": 0.185,
+      "step": 1300
+    },
+    {
+      "epoch": 0.6151300236406619,
+      "grad_norm": 5.6234612464904785,
+      "learning_rate": 7.749506464209428e-06,
+      "loss": 0.2889,
+      "step": 1301
+    },
+    {
+      "epoch": 0.6156028368794326,
+      "grad_norm": 5.582950592041016,
+      "learning_rate": 7.733424396191955e-06,
+      "loss": 0.2902,
+      "step": 1302
+    },
+    {
+      "epoch": 0.6160756501182033,
+      "grad_norm": 5.391469955444336,
+      "learning_rate": 7.71734850538326e-06,
+      "loss": 0.213,
+      "step": 1303
+    },
+    {
+      "epoch": 0.616548463356974,
+      "grad_norm": 4.6382060050964355,
+      "learning_rate": 7.701278835595753e-06,
+      "loss": 0.1684,
+      "step": 1304
+    },
+    {
+      "epoch": 0.6170212765957447,
+      "grad_norm": 8.098640441894531,
+      "learning_rate": 7.685215430624891e-06,
+      "loss": 0.4206,
+      "step": 1305
+    },
+    {
+      "epoch": 0.6174940898345154,
+      "grad_norm": 4.473232746124268,
+      "learning_rate": 7.669158334249048e-06,
+      "loss": 0.243,
+      "step": 1306
+    },
+    {
+      "epoch": 0.6179669030732861,
+      "grad_norm": 5.509943008422852,
+      "learning_rate": 7.65310759022941e-06,
+      "loss": 0.1861,
+      "step": 1307
+    },
+    {
+      "epoch": 0.6184397163120567,
+      "grad_norm": 6.489039421081543,
+      "learning_rate": 7.637063242309852e-06,
+      "loss": 0.2912,
+      "step": 1308
+    },
+    {
+      "epoch": 0.6189125295508274,
+      "grad_norm": 4.711176872253418,
+      "learning_rate": 7.621025334216819e-06,
+      "loss": 0.243,
+      "step": 1309
+    },
+    {
+      "epoch": 0.6193853427895981,
+      "grad_norm": 5.736166000366211,
+      "learning_rate": 7.604993909659198e-06,
+      "loss": 0.2759,
+      "step": 1310
+    },
+    {
+      "epoch": 0.6198581560283688,
+      "grad_norm": 7.324904441833496,
+      "learning_rate": 7.588969012328214e-06,
+      "loss": 0.2655,
+      "step": 1311
+    },
+    {
+      "epoch": 0.6203309692671395,
+      "grad_norm": 5.770148754119873,
+      "learning_rate": 7.572950685897295e-06,
+      "loss": 0.2062,
+      "step": 1312
+    },
+    {
+      "epoch": 0.6208037825059102,
+      "grad_norm": 5.873038291931152,
+      "learning_rate": 7.556938974021969e-06,
+      "loss": 0.2604,
+      "step": 1313
+    },
+    {
+      "epoch": 0.6212765957446809,
+      "grad_norm": 5.717566013336182,
+      "learning_rate": 7.540933920339733e-06,
+      "loss": 0.1932,
+      "step": 1314
+    },
+    {
+      "epoch": 0.6217494089834515,
+      "grad_norm": 5.060842514038086,
+      "learning_rate": 7.524935568469939e-06,
+      "loss": 0.2813,
+      "step": 1315
+    },
+    {
+      "epoch": 0.6222222222222222,
+      "grad_norm": 5.583745002746582,
+      "learning_rate": 7.50894396201367e-06,
+      "loss": 0.23,
+      "step": 1316
+    },
+    {
+      "epoch": 0.6226950354609929,
+      "grad_norm": 10.26961898803711,
+      "learning_rate": 7.4929591445536336e-06,
+      "loss": 0.2003,
+      "step": 1317
+    },
+    {
+      "epoch": 0.6231678486997636,
+      "grad_norm": 4.721004009246826,
+      "learning_rate": 7.4769811596540285e-06,
+      "loss": 0.1755,
+      "step": 1318
+    },
+    {
+      "epoch": 0.6236406619385343,
+      "grad_norm": 4.784334659576416,
+      "learning_rate": 7.461010050860438e-06,
+      "loss": 0.2046,
+      "step": 1319
+    },
+    {
+      "epoch": 0.624113475177305,
+      "grad_norm": 7.074143886566162,
+      "learning_rate": 7.445045861699696e-06,
+      "loss": 0.2081,
+      "step": 1320
+    },
+    {
+      "epoch": 0.624113475177305,
+      "eval_accuracy": 0.8558758314855875,
+      "eval_f1": 0.6859903381642513,
+      "eval_loss": 0.31748807430267334,
+      "eval_precision": 0.8819875776397516,
+      "eval_recall": 0.5612648221343873,
+      "eval_runtime": 47.9172,
+      "eval_samples_per_second": 5.76,
+      "eval_steps_per_second": 0.188,
+      "step": 1320
+    },
+    {
+      "epoch": 0.6245862884160757,
+      "grad_norm": 6.400256156921387,
+      "learning_rate": 7.429088635679786e-06,
+      "loss": 0.2797,
+      "step": 1321
+    },
+    {
+      "epoch": 0.6250591016548463,
+      "grad_norm": 7.948604583740234,
+      "learning_rate": 7.413138416289716e-06,
+      "loss": 0.2883,
+      "step": 1322
+    },
+    {
+      "epoch": 0.625531914893617,
+      "grad_norm": 5.226047039031982,
+      "learning_rate": 7.397195246999391e-06,
+      "loss": 0.2944,
+      "step": 1323
+    },
+    {
+      "epoch": 0.6260047281323877,
+      "grad_norm": 4.652298450469971,
+      "learning_rate": 7.381259171259509e-06,
+      "loss": 0.2375,
+      "step": 1324
+    },
+    {
+      "epoch": 0.6264775413711584,
+      "grad_norm": 6.352631568908691,
+      "learning_rate": 7.365330232501427e-06,
+      "loss": 0.2923,
+      "step": 1325
+    },
+    {
+      "epoch": 0.6269503546099291,
+      "grad_norm": 5.204030513763428,
+      "learning_rate": 7.349408474137067e-06,
+      "loss": 0.2485,
+      "step": 1326
+    },
+    {
+      "epoch": 0.6274231678486998,
+      "grad_norm": 6.4170026779174805,
+      "learning_rate": 7.333493939558764e-06,
+      "loss": 0.3025,
+      "step": 1327
+    },
+    {
+      "epoch": 0.6278959810874705,
+      "grad_norm": 5.421019077301025,
+      "learning_rate": 7.317586672139177e-06,
+      "loss": 0.2311,
+      "step": 1328
+    },
+    {
+      "epoch": 0.6283687943262412,
+      "grad_norm": 6.363109111785889,
+      "learning_rate": 7.301686715231149e-06,
+      "loss": 0.244,
+      "step": 1329
+    },
+    {
+      "epoch": 0.6288416075650118,
+      "grad_norm": 4.805910587310791,
+      "learning_rate": 7.285794112167615e-06,
+      "loss": 0.2314,
+      "step": 1330
+    },
+    {
+      "epoch": 0.6293144208037825,
+      "grad_norm": 4.570178508758545,
+      "learning_rate": 7.269908906261458e-06,
+      "loss": 0.2186,
+      "step": 1331
+    },
+    {
+      "epoch": 0.6297872340425532,
+      "grad_norm": 4.513207912445068,
+      "learning_rate": 7.254031140805399e-06,
+      "loss": 0.2176,
+      "step": 1332
+    },
+    {
+      "epoch": 0.6302600472813239,
+      "grad_norm": 5.025672435760498,
+      "learning_rate": 7.238160859071885e-06,
+      "loss": 0.275,
+      "step": 1333
+    },
+    {
+      "epoch": 0.6307328605200946,
+      "grad_norm": 5.059742450714111,
+      "learning_rate": 7.222298104312966e-06,
+      "loss": 0.2367,
+      "step": 1334
+    },
+    {
+      "epoch": 0.6312056737588653,
+      "grad_norm": 5.431969165802002,
+      "learning_rate": 7.206442919760186e-06,
+      "loss": 0.24,
+      "step": 1335
+    },
+    {
+      "epoch": 0.631678486997636,
+      "grad_norm": 11.056987762451172,
+      "learning_rate": 7.190595348624447e-06,
+      "loss": 0.3124,
+      "step": 1336
+    },
+    {
+      "epoch": 0.6321513002364066,
+      "grad_norm": 6.1571197509765625,
+      "learning_rate": 7.1747554340959055e-06,
+      "loss": 0.2398,
+      "step": 1337
+    },
+    {
+      "epoch": 0.6326241134751773,
+      "grad_norm": 5.703886032104492,
+      "learning_rate": 7.158923219343845e-06,
+      "loss": 0.2612,
+      "step": 1338
+    },
+    {
+      "epoch": 0.633096926713948,
+      "grad_norm": 5.536457061767578,
+      "learning_rate": 7.1430987475165834e-06,
+      "loss": 0.2558,
+      "step": 1339
+    },
+    {
+      "epoch": 0.6335697399527187,
+      "grad_norm": 4.489446640014648,
+      "learning_rate": 7.127282061741316e-06,
+      "loss": 0.1784,
+      "step": 1340
+    },
+    {
+      "epoch": 0.6335697399527187,
+      "eval_accuracy": 0.8647450110864745,
+      "eval_f1": 0.7324561403508771,
+      "eval_loss": 0.2959369122982025,
+      "eval_precision": 0.8226600985221675,
+      "eval_recall": 0.6600790513833992,
+      "eval_runtime": 49.1184,
+      "eval_samples_per_second": 5.619,
+      "eval_steps_per_second": 0.183,
+      "step": 1340
+    },
+    {
+      "epoch": 0.6340425531914894,
+      "grad_norm": 4.977541923522949,
+      "learning_rate": 7.11147320512403e-06,
+      "loss": 0.2467,
+      "step": 1341
+    },
+    {
+      "epoch": 0.6345153664302601,
+      "grad_norm": 4.624886989593506,
+      "learning_rate": 7.095672220749367e-06,
+      "loss": 0.258,
+      "step": 1342
+    },
+    {
+      "epoch": 0.6349881796690308,
+      "grad_norm": 4.496685028076172,
+      "learning_rate": 7.079879151680516e-06,
+      "loss": 0.206,
+      "step": 1343
+    },
+    {
+      "epoch": 0.6354609929078014,
+      "grad_norm": 5.0084919929504395,
+      "learning_rate": 7.064094040959107e-06,
+      "loss": 0.1829,
+      "step": 1344
+    },
+    {
+      "epoch": 0.6359338061465721,
+      "grad_norm": 5.6750168800354,
+      "learning_rate": 7.048316931605062e-06,
+      "loss": 0.2466,
+      "step": 1345
+    },
+    {
+      "epoch": 0.6364066193853428,
+      "grad_norm": 6.028310298919678,
+      "learning_rate": 7.032547866616512e-06,
+      "loss": 0.2048,
+      "step": 1346
+    },
+    {
+      "epoch": 0.6368794326241135,
+      "grad_norm": 4.741923809051514,
+      "learning_rate": 7.0167868889696445e-06,
+      "loss": 0.1621,
+      "step": 1347
+    },
+    {
+      "epoch": 0.6373522458628842,
+      "grad_norm": 5.013643264770508,
+      "learning_rate": 7.001034041618632e-06,
+      "loss": 0.3119,
+      "step": 1348
+    },
+    {
+      "epoch": 0.6378250591016549,
+      "grad_norm": 7.612677097320557,
+      "learning_rate": 6.985289367495469e-06,
+      "loss": 0.3243,
+      "step": 1349
+    },
+    {
+      "epoch": 0.6382978723404256,
+      "grad_norm": 6.442877769470215,
+      "learning_rate": 6.969552909509885e-06,
+      "loss": 0.2928,
+      "step": 1350
+    },
+    {
+      "epoch": 0.6387706855791963,
+      "grad_norm": 4.449213981628418,
+      "learning_rate": 6.953824710549212e-06,
+      "loss": 0.1977,
+      "step": 1351
+    },
+    {
+      "epoch": 0.6392434988179669,
+      "grad_norm": 5.301755905151367,
+      "learning_rate": 6.938104813478279e-06,
+      "loss": 0.2666,
+      "step": 1352
+    },
+    {
+      "epoch": 0.6397163120567376,
+      "grad_norm": 4.733539581298828,
+      "learning_rate": 6.922393261139284e-06,
+      "loss": 0.1967,
+      "step": 1353
+    },
+    {
+      "epoch": 0.6401891252955083,
+      "grad_norm": 5.527211666107178,
+      "learning_rate": 6.9066900963516855e-06,
+      "loss": 0.2261,
+      "step": 1354
+    },
+    {
+      "epoch": 0.640661938534279,
+      "grad_norm": 7.788763999938965,
+      "learning_rate": 6.8909953619120836e-06,
+      "loss": 0.2244,
+      "step": 1355
+    },
+    {
+      "epoch": 0.6411347517730497,
+      "grad_norm": 4.974414825439453,
+      "learning_rate": 6.875309100594098e-06,
+      "loss": 0.2021,
+      "step": 1356
+    },
+    {
+      "epoch": 0.6416075650118204,
+      "grad_norm": 4.9365739822387695,
+      "learning_rate": 6.859631355148266e-06,
+      "loss": 0.1671,
+      "step": 1357
+    },
+    {
+      "epoch": 0.642080378250591,
+      "grad_norm": 5.185995578765869,
+      "learning_rate": 6.843962168301907e-06,
+      "loss": 0.2056,
+      "step": 1358
+    },
+    {
+      "epoch": 0.6425531914893617,
+      "grad_norm": 4.460386276245117,
+      "learning_rate": 6.828301582759018e-06,
+      "loss": 0.1665,
+      "step": 1359
+    },
+    {
+      "epoch": 0.6430260047281324,
+      "grad_norm": 6.994537353515625,
+      "learning_rate": 6.8126496412001545e-06,
+      "loss": 0.2712,
+      "step": 1360
+    },
+    {
+      "epoch": 0.6430260047281324,
+      "eval_accuracy": 0.8592017738359202,
+      "eval_f1": 0.7066974595842956,
+      "eval_loss": 0.31329602003097534,
+      "eval_precision": 0.85,
+      "eval_recall": 0.6047430830039525,
+      "eval_runtime": 47.7506,
+      "eval_samples_per_second": 5.78,
+      "eval_steps_per_second": 0.188,
+      "step": 1360
+    },
+    {
+      "epoch": 0.6434988179669031,
+      "grad_norm": 4.288215160369873,
+      "learning_rate": 6.797006386282316e-06,
+      "loss": 0.1407,
+      "step": 1361
+    },
+    {
+      "epoch": 0.6439716312056738,
+      "grad_norm": 7.29041862487793,
+      "learning_rate": 6.7813718606388255e-06,
+      "loss": 0.2459,
+      "step": 1362
+    },
+    {
+      "epoch": 0.6444444444444445,
+      "grad_norm": 4.9920268058776855,
+      "learning_rate": 6.7657461068792164e-06,
+      "loss": 0.2132,
+      "step": 1363
+    },
+    {
+      "epoch": 0.6449172576832152,
+      "grad_norm": 5.254515171051025,
+      "learning_rate": 6.750129167589113e-06,
+      "loss": 0.2016,
+      "step": 1364
+    },
+    {
+      "epoch": 0.6453900709219859,
+      "grad_norm": 6.032263278961182,
+      "learning_rate": 6.734521085330126e-06,
+      "loss": 0.1932,
+      "step": 1365
+    },
+    {
+      "epoch": 0.6458628841607565,
+      "grad_norm": 4.635222434997559,
+      "learning_rate": 6.718921902639717e-06,
+      "loss": 0.202,
+      "step": 1366
+    },
+    {
+      "epoch": 0.6463356973995272,
+      "grad_norm": 5.365309715270996,
+      "learning_rate": 6.7033316620310985e-06,
+      "loss": 0.2137,
+      "step": 1367
+    },
+    {
+      "epoch": 0.6468085106382979,
+      "grad_norm": 4.981945991516113,
+      "learning_rate": 6.687750405993113e-06,
+      "loss": 0.2489,
+      "step": 1368
+    },
+    {
+      "epoch": 0.6472813238770686,
+      "grad_norm": 6.213076591491699,
+      "learning_rate": 6.672178176990112e-06,
+      "loss": 0.2583,
+      "step": 1369
+    },
+    {
+      "epoch": 0.6477541371158393,
+      "grad_norm": 8.723681449890137,
+      "learning_rate": 6.656615017461854e-06,
+      "loss": 0.2961,
+      "step": 1370
+    },
+    {
+      "epoch": 0.64822695035461,
+      "grad_norm": 3.6889824867248535,
+      "learning_rate": 6.641060969823372e-06,
+      "loss": 0.1616,
+      "step": 1371
+    },
+    {
+      "epoch": 0.6486997635933807,
+      "grad_norm": 5.324930667877197,
+      "learning_rate": 6.625516076464871e-06,
+      "loss": 0.2571,
+      "step": 1372
+    },
+    {
+      "epoch": 0.6491725768321513,
+      "grad_norm": 7.705888748168945,
+      "learning_rate": 6.6099803797516e-06,
+      "loss": 0.3487,
+      "step": 1373
+    },
+    {
+      "epoch": 0.649645390070922,
+      "grad_norm": 7.570559024810791,
+      "learning_rate": 6.5944539220237555e-06,
+      "loss": 0.2652,
+      "step": 1374
+    },
+    {
+      "epoch": 0.6501182033096927,
+      "grad_norm": 5.115143299102783,
+      "learning_rate": 6.578936745596346e-06,
+      "loss": 0.1846,
+      "step": 1375
+    },
+    {
+      "epoch": 0.6505910165484634,
+      "grad_norm": 5.2409162521362305,
+      "learning_rate": 6.563428892759087e-06,
+      "loss": 0.1869,
+      "step": 1376
+    },
+    {
+      "epoch": 0.6510638297872341,
+      "grad_norm": 7.305501937866211,
+      "learning_rate": 6.547930405776282e-06,
+      "loss": 0.2298,
+      "step": 1377
+    },
+    {
+      "epoch": 0.6515366430260048,
+      "grad_norm": 5.585699081420898,
+      "learning_rate": 6.532441326886716e-06,
+      "loss": 0.1531,
+      "step": 1378
+    },
+    {
+      "epoch": 0.6520094562647755,
+      "grad_norm": 5.718395709991455,
+      "learning_rate": 6.5169616983035285e-06,
+      "loss": 0.2375,
+      "step": 1379
+    },
+    {
+      "epoch": 0.6524822695035462,
+      "grad_norm": 7.011470317840576,
+      "learning_rate": 6.501491562214104e-06,
+      "loss": 0.2463,
+      "step": 1380
+    },
+    {
+      "epoch": 0.6524822695035462,
+      "eval_accuracy": 0.8547671840354767,
+      "eval_f1": 0.6960556844547564,
+      "eval_loss": 0.3179844319820404,
+      "eval_precision": 0.8426966292134831,
+      "eval_recall": 0.5928853754940712,
+      "eval_runtime": 48.0507,
+      "eval_samples_per_second": 5.744,
+      "eval_steps_per_second": 0.187,
+      "step": 1380
+    },
+    {
+      "epoch": 0.6529550827423168,
+      "grad_norm": 6.096155643463135,
+      "learning_rate": 6.486030960779956e-06,
+      "loss": 0.2143,
+      "step": 1381
+    },
+    {
+      "epoch": 0.6534278959810875,
+      "grad_norm": 6.085330486297607,
+      "learning_rate": 6.470579936136612e-06,
+      "loss": 0.2922,
+      "step": 1382
+    },
+    {
+      "epoch": 0.6539007092198581,
+      "grad_norm": 4.465743541717529,
+      "learning_rate": 6.455138530393508e-06,
+      "loss": 0.2069,
+      "step": 1383
+    },
+    {
+      "epoch": 0.6543735224586288,
+      "grad_norm": 8.364693641662598,
+      "learning_rate": 6.4397067856338524e-06,
+      "loss": 0.2768,
+      "step": 1384
+    },
+    {
+      "epoch": 0.6548463356973995,
+      "grad_norm": 4.662283897399902,
+      "learning_rate": 6.424284743914532e-06,
+      "loss": 0.2401,
+      "step": 1385
+    },
+    {
+      "epoch": 0.6553191489361702,
+      "grad_norm": 6.041397571563721,
+      "learning_rate": 6.408872447265984e-06,
+      "loss": 0.2707,
+      "step": 1386
+    },
+    {
+      "epoch": 0.6557919621749408,
+      "grad_norm": 5.31654167175293,
+      "learning_rate": 6.393469937692101e-06,
+      "loss": 0.2028,
+      "step": 1387
+    },
+    {
+      "epoch": 0.6562647754137115,
+      "grad_norm": 5.916751384735107,
+      "learning_rate": 6.378077257170081e-06,
+      "loss": 0.2362,
+      "step": 1388
+    },
+    {
+      "epoch": 0.6567375886524822,
+      "grad_norm": 5.025276184082031,
+      "learning_rate": 6.3626944476503485e-06,
+      "loss": 0.2574,
+      "step": 1389
+    },
+    {
+      "epoch": 0.6572104018912529,
+      "grad_norm": 9.537090301513672,
+      "learning_rate": 6.34732155105642e-06,
+      "loss": 0.3416,
+      "step": 1390
+    },
+    {
+      "epoch": 0.6576832151300236,
+      "grad_norm": 4.489987850189209,
+      "learning_rate": 6.331958609284806e-06,
+      "loss": 0.2113,
+      "step": 1391
+    },
+    {
+      "epoch": 0.6581560283687943,
+      "grad_norm": 6.175182819366455,
+      "learning_rate": 6.316605664204878e-06,
+      "loss": 0.2733,
+      "step": 1392
+    },
+    {
+      "epoch": 0.658628841607565,
+      "grad_norm": 8.544486999511719,
+      "learning_rate": 6.301262757658758e-06,
+      "loss": 0.254,
+      "step": 1393
+    },
+    {
+      "epoch": 0.6591016548463356,
+      "grad_norm": 7.164076328277588,
+      "learning_rate": 6.285929931461218e-06,
+      "loss": 0.2604,
+      "step": 1394
+    },
+    {
+      "epoch": 0.6595744680851063,
+      "grad_norm": 7.023167133331299,
+      "learning_rate": 6.2706072273995546e-06,
+      "loss": 0.2135,
+      "step": 1395
+    },
+    {
+      "epoch": 0.660047281323877,
+      "grad_norm": 3.9141695499420166,
+      "learning_rate": 6.255294687233484e-06,
+      "loss": 0.1969,
+      "step": 1396
+    },
+    {
+      "epoch": 0.6605200945626477,
+      "grad_norm": 9.266695976257324,
+      "learning_rate": 6.239992352695016e-06,
+      "loss": 0.2915,
+      "step": 1397
+    },
+    {
+      "epoch": 0.6609929078014184,
+      "grad_norm": 5.782726764678955,
+      "learning_rate": 6.224700265488343e-06,
+      "loss": 0.2525,
+      "step": 1398
+    },
+    {
+      "epoch": 0.6614657210401891,
+      "grad_norm": 5.57002067565918,
+      "learning_rate": 6.209418467289731e-06,
+      "loss": 0.2601,
+      "step": 1399
+    },
+    {
+      "epoch": 0.6619385342789598,
+      "grad_norm": 8.637619018554688,
+      "learning_rate": 6.194146999747419e-06,
+      "loss": 0.3991,
+      "step": 1400
+    },
+    {
+      "epoch": 0.6619385342789598,
+      "eval_accuracy": 0.8625277161862528,
+      "eval_f1": 0.7061611374407583,
+      "eval_loss": 0.3167264759540558,
+      "eval_precision": 0.8816568047337278,
+      "eval_recall": 0.5889328063241107,
+      "eval_runtime": 49.4666,
+      "eval_samples_per_second": 5.58,
+      "eval_steps_per_second": 0.182,
+      "step": 1400
+    },
+    {
+      "epoch": 0.6624113475177305,
+      "grad_norm": 8.265732765197754,
+      "learning_rate": 6.1788859044814755e-06,
+      "loss": 0.369,
+      "step": 1401
+    },
+    {
+      "epoch": 0.6628841607565011,
+      "grad_norm": 6.062003135681152,
+      "learning_rate": 6.163635223083706e-06,
+      "loss": 0.2058,
+      "step": 1402
+    },
+    {
+      "epoch": 0.6633569739952718,
+      "grad_norm": 3.8213107585906982,
+      "learning_rate": 6.148394997117532e-06,
+      "loss": 0.1434,
+      "step": 1403
+    },
+    {
+      "epoch": 0.6638297872340425,
+      "grad_norm": 4.005511283874512,
+      "learning_rate": 6.133165268117885e-06,
+      "loss": 0.2036,
+      "step": 1404
+    },
+    {
+      "epoch": 0.6643026004728132,
+      "grad_norm": 5.579016208648682,
+      "learning_rate": 6.117946077591087e-06,
+      "loss": 0.2527,
+      "step": 1405
+    },
+    {
+      "epoch": 0.6647754137115839,
+      "grad_norm": 8.702812194824219,
+      "learning_rate": 6.102737467014739e-06,
+      "loss": 0.2678,
+      "step": 1406
+    },
+    {
+      "epoch": 0.6652482269503546,
+      "grad_norm": 5.449542999267578,
+      "learning_rate": 6.087539477837609e-06,
+      "loss": 0.2133,
+      "step": 1407
+    },
+    {
+      "epoch": 0.6657210401891253,
+      "grad_norm": 5.406524181365967,
+      "learning_rate": 6.072352151479508e-06,
+      "loss": 0.2225,
+      "step": 1408
+    },
+    {
+      "epoch": 0.6661938534278959,
+      "grad_norm": 7.414266586303711,
+      "learning_rate": 6.057175529331205e-06,
+      "loss": 0.2549,
+      "step": 1409
+    },
+    {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 6.921921730041504,
+      "learning_rate": 6.0420096527542835e-06,
+      "loss": 0.2658,
+      "step": 1410
+    },
+    {
+      "epoch": 0.6671394799054373,
+      "grad_norm": 6.305230617523193,
+      "learning_rate": 6.026854563081046e-06,
+      "loss": 0.2819,
+      "step": 1411
+    },
+    {
+      "epoch": 0.667612293144208,
+      "grad_norm": 5.2894511222839355,
+      "learning_rate": 6.0117103016143915e-06,
+      "loss": 0.1989,
+      "step": 1412
+    },
+    {
+      "epoch": 0.6680851063829787,
+      "grad_norm": 7.54205846786499,
+      "learning_rate": 5.996576909627718e-06,
+      "loss": 0.2917,
+      "step": 1413
+    },
+    {
+      "epoch": 0.6685579196217494,
+      "grad_norm": 3.777785539627075,
+      "learning_rate": 5.981454428364792e-06,
+      "loss": 0.1905,
+      "step": 1414
+    },
+    {
+      "epoch": 0.6690307328605201,
+      "grad_norm": 4.611357688903809,
+      "learning_rate": 5.96634289903965e-06,
+      "loss": 0.2231,
+      "step": 1415
+    },
+    {
+      "epoch": 0.6695035460992907,
+      "grad_norm": 9.586370468139648,
+      "learning_rate": 5.951242362836475e-06,
+      "loss": 0.2447,
+      "step": 1416
+    },
+    {
+      "epoch": 0.6699763593380614,
+      "grad_norm": 5.429812431335449,
+      "learning_rate": 5.936152860909492e-06,
+      "loss": 0.2668,
+      "step": 1417
+    },
+    {
+      "epoch": 0.6704491725768321,
+      "grad_norm": 4.418676376342773,
+      "learning_rate": 5.921074434382861e-06,
+      "loss": 0.216,
+      "step": 1418
+    },
+    {
+      "epoch": 0.6709219858156028,
+      "grad_norm": 4.1734089851379395,
+      "learning_rate": 5.906007124350547e-06,
+      "loss": 0.1834,
+      "step": 1419
+    },
+    {
+      "epoch": 0.6713947990543735,
+      "grad_norm": 5.029831886291504,
+      "learning_rate": 5.8909509718762235e-06,
+      "loss": 0.154,
+      "step": 1420
+    },
+    {
+      "epoch": 0.6713947990543735,
+      "eval_accuracy": 0.8636363636363636,
+      "eval_f1": 0.7146171693735499,
+      "eval_loss": 0.30272066593170166,
+      "eval_precision": 0.8651685393258427,
+      "eval_recall": 0.6086956521739131,
+      "eval_runtime": 47.8176,
+      "eval_samples_per_second": 5.772,
+      "eval_steps_per_second": 0.188,
+      "step": 1420
+    },
+    {
+      "epoch": 0.6718676122931442,
+      "grad_norm": 4.48655366897583,
+      "learning_rate": 5.875906017993156e-06,
+      "loss": 0.1879,
+      "step": 1421
+    },
+    {
+      "epoch": 0.6723404255319149,
+      "grad_norm": 5.680935382843018,
+      "learning_rate": 5.8608723037040894e-06,
+      "loss": 0.2809,
+      "step": 1422
+    },
+    {
+      "epoch": 0.6728132387706856,
+      "grad_norm": 6.1803083419799805,
+      "learning_rate": 5.845849869981137e-06,
+      "loss": 0.195,
+      "step": 1423
+    },
+    {
+      "epoch": 0.6732860520094562,
+      "grad_norm": 9.49524974822998,
+      "learning_rate": 5.830838757765671e-06,
+      "loss": 0.3723,
+      "step": 1424
+    },
+    {
+      "epoch": 0.6737588652482269,
+      "grad_norm": 4.156935214996338,
+      "learning_rate": 5.815839007968196e-06,
+      "loss": 0.2042,
+      "step": 1425
+    },
+    {
+      "epoch": 0.6742316784869976,
+      "grad_norm": 5.995987892150879,
+      "learning_rate": 5.8008506614682714e-06,
+      "loss": 0.2007,
+      "step": 1426
+    },
+    {
+      "epoch": 0.6747044917257683,
+      "grad_norm": 4.0844645500183105,
+      "learning_rate": 5.785873759114364e-06,
+      "loss": 0.1592,
+      "step": 1427
+    },
+    {
+      "epoch": 0.675177304964539,
+      "grad_norm": 7.70731782913208,
+      "learning_rate": 5.770908341723752e-06,
+      "loss": 0.2633,
+      "step": 1428
+    },
+    {
+      "epoch": 0.6756501182033097,
+      "grad_norm": 4.959256649017334,
+      "learning_rate": 5.755954450082417e-06,
+      "loss": 0.2326,
+      "step": 1429
+    },
+    {
+      "epoch": 0.6761229314420804,
+      "grad_norm": 7.28727912902832,
+      "learning_rate": 5.741012124944925e-06,
+      "loss": 0.3043,
+      "step": 1430
+    },
+    {
+      "epoch": 0.676595744680851,
+      "grad_norm": 6.117763519287109,
+      "learning_rate": 5.726081407034327e-06,
+      "loss": 0.1876,
+      "step": 1431
+    },
+    {
+      "epoch": 0.6770685579196217,
+      "grad_norm": 4.031482696533203,
+      "learning_rate": 5.711162337042033e-06,
+      "loss": 0.2204,
+      "step": 1432
+    },
+    {
+      "epoch": 0.6775413711583924,
+      "grad_norm": 4.445287227630615,
+      "learning_rate": 5.6962549556277134e-06,
+      "loss": 0.1462,
+      "step": 1433
+    },
+    {
+      "epoch": 0.6780141843971631,
+      "grad_norm": 5.196186542510986,
+      "learning_rate": 5.681359303419169e-06,
+      "loss": 0.2448,
+      "step": 1434
+    },
+    {
+      "epoch": 0.6784869976359338,
+      "grad_norm": 5.29311990737915,
+      "learning_rate": 5.666475421012256e-06,
+      "loss": 0.1858,
+      "step": 1435
+    },
+    {
+      "epoch": 0.6789598108747045,
+      "grad_norm": 4.104085445404053,
+      "learning_rate": 5.651603348970741e-06,
+      "loss": 0.1939,
+      "step": 1436
+    },
+    {
+      "epoch": 0.6794326241134752,
+      "grad_norm": 6.132163047790527,
+      "learning_rate": 5.636743127826205e-06,
+      "loss": 0.2087,
+      "step": 1437
+    },
+    {
+      "epoch": 0.6799054373522458,
+      "grad_norm": 5.092171669006348,
+      "learning_rate": 5.621894798077928e-06,
+      "loss": 0.1947,
+      "step": 1438
+    },
+    {
+      "epoch": 0.6803782505910165,
+      "grad_norm": 5.59557580947876,
+      "learning_rate": 5.607058400192793e-06,
+      "loss": 0.219,
+      "step": 1439
+    },
+    {
+      "epoch": 0.6808510638297872,
+      "grad_norm": 6.623174667358398,
+      "learning_rate": 5.592233974605154e-06,
+      "loss": 0.1944,
+      "step": 1440
+    },
+    {
+      "epoch": 0.6808510638297872,
+      "eval_accuracy": 0.8625277161862528,
+      "eval_f1": 0.7075471698113207,
+      "eval_loss": 0.3171689808368683,
+      "eval_precision": 0.8771929824561403,
+      "eval_recall": 0.5928853754940712,
+      "eval_runtime": 46.6382,
+      "eval_samples_per_second": 5.918,
+      "eval_steps_per_second": 0.193,
+      "step": 1440
+    },
+    {
+      "epoch": 0.6813238770685579,
+      "grad_norm": 5.490980625152588,
+      "learning_rate": 5.577421561716738e-06,
+      "loss": 0.2387,
+      "step": 1441
+    },
+    {
+      "epoch": 0.6817966903073286,
+      "grad_norm": 6.905361652374268,
+      "learning_rate": 5.5626212018965344e-06,
+      "loss": 0.214,
+      "step": 1442
+    },
+    {
+      "epoch": 0.6822695035460993,
+      "grad_norm": 4.289844036102295,
+      "learning_rate": 5.547832935480686e-06,
+      "loss": 0.1257,
+      "step": 1443
+    },
+    {
+      "epoch": 0.68274231678487,
+      "grad_norm": 5.823220729827881,
+      "learning_rate": 5.533056802772374e-06,
+      "loss": 0.256,
+      "step": 1444
+    },
+    {
+      "epoch": 0.6832151300236406,
+      "grad_norm": 5.129421234130859,
+      "learning_rate": 5.518292844041711e-06,
+      "loss": 0.2614,
+      "step": 1445
+    },
+    {
+      "epoch": 0.6836879432624113,
+      "grad_norm": 4.678886890411377,
+      "learning_rate": 5.503541099525633e-06,
+      "loss": 0.1629,
+      "step": 1446
+    },
+    {
+      "epoch": 0.684160756501182,
+      "grad_norm": 8.273910522460938,
+      "learning_rate": 5.488801609427783e-06,
+      "loss": 0.2119,
+      "step": 1447
+    },
+    {
+      "epoch": 0.6846335697399527,
+      "grad_norm": 4.722292900085449,
+      "learning_rate": 5.474074413918418e-06,
+      "loss": 0.1892,
+      "step": 1448
+    },
+    {
+      "epoch": 0.6851063829787234,
+      "grad_norm": 6.923647880554199,
+      "learning_rate": 5.459359553134278e-06,
+      "loss": 0.2873,
+      "step": 1449
+    },
+    {
+      "epoch": 0.6855791962174941,
+      "grad_norm": 4.769101619720459,
+      "learning_rate": 5.444657067178487e-06,
+      "loss": 0.163,
+      "step": 1450
+    },
+    {
+      "epoch": 0.6860520094562648,
+      "grad_norm": 5.0238518714904785,
+      "learning_rate": 5.429966996120446e-06,
+      "loss": 0.1423,
+      "step": 1451
+    },
+    {
+      "epoch": 0.6865248226950355,
+      "grad_norm": 6.839998245239258,
+      "learning_rate": 5.415289379995723e-06,
+      "loss": 0.2498,
+      "step": 1452
+    },
+    {
+      "epoch": 0.6869976359338061,
+      "grad_norm": 5.654500484466553,
+      "learning_rate": 5.400624258805935e-06,
+      "loss": 0.1813,
+      "step": 1453
+    },
+    {
+      "epoch": 0.6874704491725768,
+      "grad_norm": 6.542251110076904,
+      "learning_rate": 5.385971672518653e-06,
+      "loss": 0.1936,
+      "step": 1454
+    },
+    {
+      "epoch": 0.6879432624113475,
+      "grad_norm": 6.039957046508789,
+      "learning_rate": 5.371331661067284e-06,
+      "loss": 0.1988,
+      "step": 1455
+    },
+    {
+      "epoch": 0.6884160756501182,
+      "grad_norm": 6.784928321838379,
+      "learning_rate": 5.356704264350958e-06,
+      "loss": 0.244,
+      "step": 1456
+    },
+    {
+      "epoch": 0.6888888888888889,
+      "grad_norm": 6.722204685211182,
+      "learning_rate": 5.342089522234439e-06,
+      "loss": 0.2621,
+      "step": 1457
+    },
+    {
+      "epoch": 0.6893617021276596,
+      "grad_norm": 4.733044147491455,
+      "learning_rate": 5.327487474547992e-06,
+      "loss": 0.2154,
+      "step": 1458
+    },
+    {
+      "epoch": 0.6898345153664303,
+      "grad_norm": 6.589846134185791,
+      "learning_rate": 5.312898161087288e-06,
+      "loss": 0.2647,
+      "step": 1459
+    },
+    {
+      "epoch": 0.6903073286052009,
+      "grad_norm": 5.961693286895752,
+      "learning_rate": 5.298321621613292e-06,
+      "loss": 0.2434,
+      "step": 1460
+    },
+    {
+      "epoch": 0.6903073286052009,
+      "eval_accuracy": 0.8691796008869179,
+      "eval_f1": 0.7412280701754386,
+      "eval_loss": 0.303468257188797,
+      "eval_precision": 0.8325123152709359,
+      "eval_recall": 0.6679841897233202,
+      "eval_runtime": 47.3008,
+      "eval_samples_per_second": 5.835,
+      "eval_steps_per_second": 0.19,
+      "step": 1460
+    },
+    {
+      "epoch": 0.6907801418439716,
+      "grad_norm": 7.759005069732666,
+      "learning_rate": 5.283757895852156e-06,
+      "loss": 0.2391,
+      "step": 1461
+    },
+    {
+      "epoch": 0.6912529550827423,
+      "grad_norm": 7.777105331420898,
+      "learning_rate": 5.269207023495112e-06,
+      "loss": 0.2959,
+      "step": 1462
+    },
+    {
+      "epoch": 0.691725768321513,
+      "grad_norm": 5.419680595397949,
+      "learning_rate": 5.25466904419836e-06,
+      "loss": 0.2324,
+      "step": 1463
+    },
+    {
+      "epoch": 0.6921985815602837,
+      "grad_norm": 8.358044624328613,
+      "learning_rate": 5.240143997582956e-06,
+      "loss": 0.242,
+      "step": 1464
+    },
+    {
+      "epoch": 0.6926713947990544,
+      "grad_norm": 7.01899528503418,
+      "learning_rate": 5.2256319232347275e-06,
+      "loss": 0.2361,
+      "step": 1465
+    },
+    {
+      "epoch": 0.6931442080378251,
+      "grad_norm": 9.41346549987793,
+      "learning_rate": 5.211132860704131e-06,
+      "loss": 0.2523,
+      "step": 1466
+    },
+    {
+      "epoch": 0.6936170212765957,
+      "grad_norm": 5.518009185791016,
+      "learning_rate": 5.196646849506169e-06,
+      "loss": 0.271,
+      "step": 1467
+    },
+    {
+      "epoch": 0.6940898345153664,
+      "grad_norm": 8.015327453613281,
+      "learning_rate": 5.18217392912027e-06,
+      "loss": 0.1744,
+      "step": 1468
+    },
+    {
+      "epoch": 0.6945626477541371,
+      "grad_norm": 6.272970199584961,
+      "learning_rate": 5.16771413899019e-06,
+      "loss": 0.2644,
+      "step": 1469
+    },
+    {
+      "epoch": 0.6950354609929078,
+      "grad_norm": 5.460439682006836,
+      "learning_rate": 5.153267518523899e-06,
+      "loss": 0.1546,
+      "step": 1470
+    },
+    {
+      "epoch": 0.6955082742316785,
+      "grad_norm": 4.355556488037109,
+      "learning_rate": 5.1388341070934735e-06,
+      "loss": 0.1737,
+      "step": 1471
+    },
+    {
+      "epoch": 0.6959810874704492,
+      "grad_norm": 5.721870422363281,
+      "learning_rate": 5.124413944034992e-06,
+      "loss": 0.2474,
+      "step": 1472
+    },
+    {
+      "epoch": 0.6964539007092199,
+      "grad_norm": 6.737970352172852,
+      "learning_rate": 5.110007068648422e-06,
+      "loss": 0.2093,
+      "step": 1473
+    },
+    {
+      "epoch": 0.6969267139479906,
+      "grad_norm": 5.622700214385986,
+      "learning_rate": 5.095613520197533e-06,
+      "loss": 0.2962,
+      "step": 1474
+    },
+    {
+      "epoch": 0.6973995271867612,
+      "grad_norm": 7.287775993347168,
+      "learning_rate": 5.081233337909756e-06,
+      "loss": 0.237,
+      "step": 1475
+    },
+    {
+      "epoch": 0.6978723404255319,
+      "grad_norm": 5.515003204345703,
+      "learning_rate": 5.066866560976102e-06,
+      "loss": 0.2765,
+      "step": 1476
+    },
+    {
+      "epoch": 0.6983451536643026,
+      "grad_norm": 6.383912563323975,
+      "learning_rate": 5.052513228551048e-06,
+      "loss": 0.234,
+      "step": 1477
+    },
+    {
+      "epoch": 0.6988179669030733,
+      "grad_norm": 6.042670726776123,
+      "learning_rate": 5.038173379752425e-06,
+      "loss": 0.3074,
+      "step": 1478
+    },
+    {
+      "epoch": 0.699290780141844,
+      "grad_norm": 7.7684831619262695,
+      "learning_rate": 5.0238470536613315e-06,
+      "loss": 0.2179,
+      "step": 1479
+    },
+    {
+      "epoch": 0.6997635933806147,
+      "grad_norm": 6.858969211578369,
+      "learning_rate": 5.009534289321991e-06,
+      "loss": 0.2346,
+      "step": 1480
+    },
+    {
+      "epoch": 0.6997635933806147,
+      "eval_accuracy": 0.8625277161862528,
+      "eval_f1": 0.7089201877934272,
+      "eval_loss": 0.3162979483604431,
+      "eval_precision": 0.8728323699421965,
+      "eval_recall": 0.5968379446640316,
+      "eval_runtime": 47.1019,
+      "eval_samples_per_second": 5.86,
+      "eval_steps_per_second": 0.191,
+      "step": 1480
+    },
+    {
+      "epoch": 0.7002364066193854,
+      "grad_norm": 4.629432201385498,
+      "learning_rate": 4.99523512574168e-06,
+      "loss": 0.1548,
+      "step": 1481
+    },
+    {
+      "epoch": 0.700709219858156,
+      "grad_norm": 5.3954548835754395,
+      "learning_rate": 4.9809496018906e-06,
+      "loss": 0.2797,
+      "step": 1482
+    },
+    {
+      "epoch": 0.7011820330969267,
+      "grad_norm": 5.510435104370117,
+      "learning_rate": 4.9666777567017935e-06,
+      "loss": 0.2455,
+      "step": 1483
+    },
+    {
+      "epoch": 0.7016548463356974,
+      "grad_norm": 5.090349197387695,
+      "learning_rate": 4.9524196290710095e-06,
+      "loss": 0.2056,
+      "step": 1484
+    },
+    {
+      "epoch": 0.7021276595744681,
+      "grad_norm": 3.495950698852539,
+      "learning_rate": 4.938175257856618e-06,
+      "loss": 0.1664,
+      "step": 1485
+    },
+    {
+      "epoch": 0.7026004728132388,
+      "grad_norm": 5.755441188812256,
+      "learning_rate": 4.9239446818794914e-06,
+      "loss": 0.247,
+      "step": 1486
+    },
+    {
+      "epoch": 0.7030732860520095,
+      "grad_norm": 6.540046215057373,
+      "learning_rate": 4.90972793992292e-06,
+      "loss": 0.2858,
+      "step": 1487
+    },
+    {
+      "epoch": 0.7035460992907802,
+      "grad_norm": 6.471051216125488,
+      "learning_rate": 4.89552507073248e-06,
+      "loss": 0.1972,
+      "step": 1488
+    },
+    {
+      "epoch": 0.7040189125295508,
+      "grad_norm": 4.746458530426025,
+      "learning_rate": 4.881336113015939e-06,
+      "loss": 0.2121,
+      "step": 1489
+    },
+    {
+      "epoch": 0.7044917257683215,
+      "grad_norm": 4.542480945587158,
+      "learning_rate": 4.867161105443158e-06,
+      "loss": 0.2013,
+      "step": 1490
+    },
+    {
+      "epoch": 0.7049645390070922,
+      "grad_norm": 5.85392951965332,
+      "learning_rate": 4.853000086645965e-06,
+      "loss": 0.2253,
+      "step": 1491
+    },
+    {
+      "epoch": 0.7054373522458629,
+      "grad_norm": 6.674161911010742,
+      "learning_rate": 4.838853095218085e-06,
+      "loss": 0.2491,
+      "step": 1492
+    },
+    {
+      "epoch": 0.7059101654846336,
+      "grad_norm": 4.372111797332764,
+      "learning_rate": 4.824720169714997e-06,
+      "loss": 0.1928,
+      "step": 1493
+    },
+    {
+      "epoch": 0.7063829787234043,
+      "grad_norm": 6.305669784545898,
+      "learning_rate": 4.8106013486538505e-06,
+      "loss": 0.2462,
+      "step": 1494
+    },
+    {
+      "epoch": 0.706855791962175,
+      "grad_norm": 5.79583215713501,
+      "learning_rate": 4.796496670513354e-06,
+      "loss": 0.2655,
+      "step": 1495
+    },
+    {
+      "epoch": 0.7073286052009456,
+      "grad_norm": 6.358242511749268,
+      "learning_rate": 4.782406173733678e-06,
+      "loss": 0.1943,
+      "step": 1496
+    },
+    {
+      "epoch": 0.7078014184397163,
+      "grad_norm": 6.863159656524658,
+      "learning_rate": 4.768329896716337e-06,
+      "loss": 0.2634,
+      "step": 1497
+    },
+    {
+      "epoch": 0.708274231678487,
+      "grad_norm": 6.125742435455322,
+      "learning_rate": 4.7542678778240925e-06,
+      "loss": 0.2209,
+      "step": 1498
+    },
+    {
+      "epoch": 0.7087470449172577,
+      "grad_norm": 5.767421722412109,
+      "learning_rate": 4.74022015538085e-06,
+      "loss": 0.2381,
+      "step": 1499
+    },
+    {
+      "epoch": 0.7092198581560284,
+      "grad_norm": 5.935783863067627,
+      "learning_rate": 4.72618676767155e-06,
+      "loss": 0.2532,
+      "step": 1500
+    },
+    {
+      "epoch": 0.7092198581560284,
+      "eval_accuracy": 0.8658536585365854,
+      "eval_f1": 0.7328918322295805,
+      "eval_loss": 0.2938072085380554,
+      "eval_precision": 0.83,
+      "eval_recall": 0.6561264822134387,
+      "eval_runtime": 47.9899,
+      "eval_samples_per_second": 5.751,
+      "eval_steps_per_second": 0.188,
+      "step": 1500
+    },
+    {
+      "epoch": 0.7096926713947991,
+      "grad_norm": 6.156116008758545,
+      "learning_rate": 4.712167752942067e-06,
+      "loss": 0.2343,
+      "step": 1501
+    },
+    {
+      "epoch": 0.7101654846335698,
+      "grad_norm": 7.0279412269592285,
+      "learning_rate": 4.698163149399104e-06,
+      "loss": 0.2633,
+      "step": 1502
+    },
+    {
+      "epoch": 0.7106382978723405,
+      "grad_norm": 5.053689002990723,
+      "learning_rate": 4.68417299521009e-06,
+      "loss": 0.2181,
+      "step": 1503
+    },
+    {
+      "epoch": 0.7111111111111111,
+      "grad_norm": 5.44726037979126,
+      "learning_rate": 4.670197328503067e-06,
+      "loss": 0.2635,
+      "step": 1504
+    },
+    {
+      "epoch": 0.7115839243498818,
+      "grad_norm": 6.307510852813721,
+      "learning_rate": 4.656236187366607e-06,
+      "loss": 0.2775,
+      "step": 1505
+    },
+    {
+      "epoch": 0.7120567375886525,
+      "grad_norm": 7.774320602416992,
+      "learning_rate": 4.642289609849686e-06,
+      "loss": 0.2855,
+      "step": 1506
+    },
+    {
+      "epoch": 0.7125295508274232,
+      "grad_norm": 7.836673736572266,
+      "learning_rate": 4.628357633961589e-06,
+      "loss": 0.3376,
+      "step": 1507
+    },
+    {
+      "epoch": 0.7130023640661939,
+      "grad_norm": 6.827236175537109,
+      "learning_rate": 4.614440297671806e-06,
+      "loss": 0.2518,
+      "step": 1508
+    },
+    {
+      "epoch": 0.7134751773049646,
+      "grad_norm": 4.354206085205078,
+      "learning_rate": 4.600537638909933e-06,
+      "loss": 0.2073,
+      "step": 1509
+    },
+    {
+      "epoch": 0.7139479905437353,
+      "grad_norm": 5.115018367767334,
+      "learning_rate": 4.586649695565563e-06,
+      "loss": 0.2146,
+      "step": 1510
+    },
+    {
+      "epoch": 0.7144208037825059,
+      "grad_norm": 6.804549694061279,
+      "learning_rate": 4.572776505488181e-06,
+      "loss": 0.2646,
+      "step": 1511
+    },
+    {
+      "epoch": 0.7148936170212766,
+      "grad_norm": 5.3641581535339355,
+      "learning_rate": 4.558918106487065e-06,
+      "loss": 0.2636,
+      "step": 1512
+    },
+    {
+      "epoch": 0.7153664302600473,
+      "grad_norm": 7.315088272094727,
+      "learning_rate": 4.545074536331191e-06,
+      "loss": 0.2897,
+      "step": 1513
+    },
+    {
+      "epoch": 0.715839243498818,
+      "grad_norm": 5.816035747528076,
+      "learning_rate": 4.531245832749112e-06,
+      "loss": 0.2956,
+      "step": 1514
+    },
+    {
+      "epoch": 0.7163120567375887,
+      "grad_norm": 4.486478328704834,
+      "learning_rate": 4.517432033428864e-06,
+      "loss": 0.2543,
+      "step": 1515
+    },
+    {
+      "epoch": 0.7167848699763594,
+      "grad_norm": 4.364136219024658,
+      "learning_rate": 4.5036331760178695e-06,
+      "loss": 0.1811,
+      "step": 1516
+    },
+    {
+      "epoch": 0.7172576832151301,
+      "grad_norm": 8.54796314239502,
+      "learning_rate": 4.4898492981228245e-06,
+      "loss": 0.237,
+      "step": 1517
+    },
+    {
+      "epoch": 0.7177304964539007,
+      "grad_norm": 6.10654354095459,
+      "learning_rate": 4.4760804373096036e-06,
+      "loss": 0.2353,
+      "step": 1518
+    },
+    {
+      "epoch": 0.7182033096926714,
+      "grad_norm": 4.113856792449951,
+      "learning_rate": 4.46232663110315e-06,
+      "loss": 0.1487,
+      "step": 1519
+    },
+    {
+      "epoch": 0.7186761229314421,
+      "grad_norm": 3.579453468322754,
+      "learning_rate": 4.448587916987384e-06,
+      "loss": 0.1815,
+      "step": 1520
+    },
+    {
+      "epoch": 0.7186761229314421,
+      "eval_accuracy": 0.8569844789356984,
+      "eval_f1": 0.6861313868613139,
+      "eval_loss": 0.3156262934207916,
+      "eval_precision": 0.8924050632911392,
+      "eval_recall": 0.5573122529644269,
+      "eval_runtime": 47.8319,
+      "eval_samples_per_second": 5.77,
+      "eval_steps_per_second": 0.188,
+      "step": 1520
+    },
+    {
+      "epoch": 0.7191489361702128,
+      "grad_norm": 3.734877824783325,
+      "learning_rate": 4.434864332405085e-06,
+      "loss": 0.1694,
+      "step": 1521
+    },
+    {
+      "epoch": 0.7196217494089835,
+      "grad_norm": 5.21559476852417,
+      "learning_rate": 4.421155914757817e-06,
+      "loss": 0.2566,
+      "step": 1522
+    },
+    {
+      "epoch": 0.7200945626477542,
+      "grad_norm": 5.495852470397949,
+      "learning_rate": 4.407462701405791e-06,
+      "loss": 0.2993,
+      "step": 1523
+    },
+    {
+      "epoch": 0.7205673758865249,
+      "grad_norm": 4.870457649230957,
+      "learning_rate": 4.393784729667788e-06,
+      "loss": 0.2035,
+      "step": 1524
+    },
+    {
+      "epoch": 0.7210401891252955,
+      "grad_norm": 10.591519355773926,
+      "learning_rate": 4.380122036821048e-06,
+      "loss": 0.4052,
+      "step": 1525
+    },
+    {
+      "epoch": 0.7215130023640662,
+      "grad_norm": 5.435426712036133,
+      "learning_rate": 4.366474660101183e-06,
+      "loss": 0.2258,
+      "step": 1526
+    },
+    {
+      "epoch": 0.7219858156028369,
+      "grad_norm": 4.526400089263916,
+      "learning_rate": 4.3528426367020405e-06,
+      "loss": 0.1775,
+      "step": 1527
+    },
+    {
+      "epoch": 0.7224586288416076,
+      "grad_norm": 3.7634830474853516,
+      "learning_rate": 4.339226003775642e-06,
+      "loss": 0.2018,
+      "step": 1528
+    },
+    {
+      "epoch": 0.7229314420803783,
+      "grad_norm": 5.57973051071167,
+      "learning_rate": 4.325624798432059e-06,
+      "loss": 0.2942,
+      "step": 1529
+    },
+    {
+      "epoch": 0.723404255319149,
+      "grad_norm": 4.420356750488281,
+      "learning_rate": 4.312039057739316e-06,
+      "loss": 0.217,
+      "step": 1530
+    },
+    {
+      "epoch": 0.7238770685579197,
+      "grad_norm": 6.249464511871338,
+      "learning_rate": 4.298468818723298e-06,
+      "loss": 0.2769,
+      "step": 1531
+    },
+    {
+      "epoch": 0.7243498817966904,
+      "grad_norm": 4.786191463470459,
+      "learning_rate": 4.284914118367637e-06,
+      "loss": 0.2363,
+      "step": 1532
+    },
+    {
+      "epoch": 0.724822695035461,
+      "grad_norm": 3.865898847579956,
+      "learning_rate": 4.271374993613615e-06,
+      "loss": 0.1605,
+      "step": 1533
+    },
+    {
+      "epoch": 0.7252955082742317,
+      "grad_norm": 4.794460296630859,
+      "learning_rate": 4.257851481360066e-06,
+      "loss": 0.1329,
+      "step": 1534
+    },
+    {
+      "epoch": 0.7257683215130024,
+      "grad_norm": 4.994689464569092,
+      "learning_rate": 4.244343618463281e-06,
+      "loss": 0.2508,
+      "step": 1535
+    },
+    {
+      "epoch": 0.7262411347517731,
+      "grad_norm": 5.947475910186768,
+      "learning_rate": 4.2308514417368974e-06,
+      "loss": 0.1934,
+      "step": 1536
+    },
+    {
+      "epoch": 0.7267139479905438,
+      "grad_norm": 5.171843528747559,
+      "learning_rate": 4.2173749879517945e-06,
+      "loss": 0.2216,
+      "step": 1537
+    },
+    {
+      "epoch": 0.7271867612293145,
+      "grad_norm": 3.876723527908325,
+      "learning_rate": 4.2039142938360086e-06,
+      "loss": 0.1718,
+      "step": 1538
+    },
+    {
+      "epoch": 0.7276595744680852,
+      "grad_norm": 4.720749855041504,
+      "learning_rate": 4.190469396074622e-06,
+      "loss": 0.1817,
+      "step": 1539
+    },
+    {
+      "epoch": 0.7281323877068558,
+      "grad_norm": 4.733708381652832,
+      "learning_rate": 4.177040331309678e-06,
+      "loss": 0.1989,
+      "step": 1540
+    },
+    {
+      "epoch": 0.7281323877068558,
+      "eval_accuracy": 0.8614190687361419,
+      "eval_f1": 0.7016706443914081,
+      "eval_loss": 0.3186676502227783,
+      "eval_precision": 0.8855421686746988,
+      "eval_recall": 0.5810276679841897,
+      "eval_runtime": 48.0319,
+      "eval_samples_per_second": 5.746,
+      "eval_steps_per_second": 0.187,
+      "step": 1540
+    },
+    {
+      "epoch": 0.7286052009456265,
+      "grad_norm": 4.963865756988525,
+      "learning_rate": 4.163627136140054e-06,
+      "loss": 0.1798,
+      "step": 1541
+    },
+    {
+      "epoch": 0.7290780141843972,
+      "grad_norm": 5.173526763916016,
+      "learning_rate": 4.150229847121384e-06,
+      "loss": 0.2075,
+      "step": 1542
+    },
+    {
+      "epoch": 0.7295508274231679,
+      "grad_norm": 4.9639387130737305,
+      "learning_rate": 4.136848500765948e-06,
+      "loss": 0.2293,
+      "step": 1543
+    },
+    {
+      "epoch": 0.7300236406619386,
+      "grad_norm": 5.996505260467529,
+      "learning_rate": 4.123483133542588e-06,
+      "loss": 0.2557,
+      "step": 1544
+    },
+    {
+      "epoch": 0.7304964539007093,
+      "grad_norm": 3.7886509895324707,
+      "learning_rate": 4.110133781876587e-06,
+      "loss": 0.1741,
+      "step": 1545
+    },
+    {
+      "epoch": 0.7309692671394799,
+      "grad_norm": 9.337357521057129,
+      "learning_rate": 4.0968004821495845e-06,
+      "loss": 0.2775,
+      "step": 1546
+    },
+    {
+      "epoch": 0.7314420803782505,
+      "grad_norm": 7.194756984710693,
+      "learning_rate": 4.083483270699461e-06,
+      "loss": 0.2572,
+      "step": 1547
+    },
+    {
+      "epoch": 0.7319148936170212,
+      "grad_norm": 5.332988739013672,
+      "learning_rate": 4.070182183820272e-06,
+      "loss": 0.1859,
+      "step": 1548
+    },
+    {
+      "epoch": 0.7323877068557919,
+      "grad_norm": 7.105772972106934,
+      "learning_rate": 4.056897257762111e-06,
+      "loss": 0.2279,
+      "step": 1549
+    },
+    {
+      "epoch": 0.7328605200945626,
+      "grad_norm": 3.7316532135009766,
+      "learning_rate": 4.043628528731036e-06,
+      "loss": 0.1744,
+      "step": 1550
+    },
+    {
+      "epoch": 0.7333333333333333,
+      "grad_norm": 5.632213592529297,
+      "learning_rate": 4.030376032888959e-06,
+      "loss": 0.1418,
+      "step": 1551
+    },
+    {
+      "epoch": 0.733806146572104,
+      "grad_norm": 11.740776062011719,
+      "learning_rate": 4.01713980635355e-06,
+      "loss": 0.4084,
+      "step": 1552
+    },
+    {
+      "epoch": 0.7342789598108747,
+      "grad_norm": 5.153548717498779,
+      "learning_rate": 4.003919885198145e-06,
+      "loss": 0.1908,
+      "step": 1553
+    },
+    {
+      "epoch": 0.7347517730496453,
+      "grad_norm": 7.000768184661865,
+      "learning_rate": 3.990716305451636e-06,
+      "loss": 0.158,
+      "step": 1554
+    },
+    {
+      "epoch": 0.735224586288416,
+      "grad_norm": 7.075311183929443,
+      "learning_rate": 3.977529103098382e-06,
+      "loss": 0.3135,
+      "step": 1555
+    },
+    {
+      "epoch": 0.7356973995271867,
+      "grad_norm": 4.545357704162598,
+      "learning_rate": 3.964358314078107e-06,
+      "loss": 0.1733,
+      "step": 1556
+    },
+    {
+      "epoch": 0.7361702127659574,
+      "grad_norm": 7.8234357833862305,
+      "learning_rate": 3.951203974285805e-06,
+      "loss": 0.2067,
+      "step": 1557
+    },
+    {
+      "epoch": 0.7366430260047281,
+      "grad_norm": 7.197999000549316,
+      "learning_rate": 3.938066119571634e-06,
+      "loss": 0.1974,
+      "step": 1558
+    },
+    {
+      "epoch": 0.7371158392434988,
+      "grad_norm": 4.149715900421143,
+      "learning_rate": 3.9249447857408316e-06,
+      "loss": 0.1895,
+      "step": 1559
+    },
+    {
+      "epoch": 0.7375886524822695,
+      "grad_norm": 6.287510395050049,
+      "learning_rate": 3.911840008553604e-06,
+      "loss": 0.1749,
+      "step": 1560
+    },
+    {
+      "epoch": 0.7375886524822695,
+      "eval_accuracy": 0.8647450110864745,
+      "eval_f1": 0.7136150234741784,
+      "eval_loss": 0.3169473111629486,
+      "eval_precision": 0.8786127167630058,
+      "eval_recall": 0.6007905138339921,
+      "eval_runtime": 49.2192,
+      "eval_samples_per_second": 5.608,
+      "eval_steps_per_second": 0.183,
+      "step": 1560
+    },
+    {
+      "epoch": 0.7380614657210401,
+      "grad_norm": 6.008514881134033,
+      "learning_rate": 3.898751823725044e-06,
+      "loss": 0.2882,
+      "step": 1561
+    },
+    {
+      "epoch": 0.7385342789598108,
+      "grad_norm": 4.948297023773193,
+      "learning_rate": 3.885680266925016e-06,
+      "loss": 0.2198,
+      "step": 1562
+    },
+    {
+      "epoch": 0.7390070921985815,
+      "grad_norm": 8.02566146850586,
+      "learning_rate": 3.87262537377807e-06,
+      "loss": 0.2223,
+      "step": 1563
+    },
+    {
+      "epoch": 0.7394799054373522,
+      "grad_norm": 5.575436592102051,
+      "learning_rate": 3.85958717986334e-06,
+      "loss": 0.2097,
+      "step": 1564
+    },
+    {
+      "epoch": 0.7399527186761229,
+      "grad_norm": 6.199014663696289,
+      "learning_rate": 3.846565720714451e-06,
+      "loss": 0.2203,
+      "step": 1565
+    },
+    {
+      "epoch": 0.7404255319148936,
+      "grad_norm": 4.431324481964111,
+      "learning_rate": 3.83356103181942e-06,
+      "loss": 0.196,
+      "step": 1566
+    },
+    {
+      "epoch": 0.7408983451536643,
+      "grad_norm": 8.445691108703613,
+      "learning_rate": 3.820573148620559e-06,
+      "loss": 0.278,
+      "step": 1567
+    },
+    {
+      "epoch": 0.741371158392435,
+      "grad_norm": 7.028994083404541,
+      "learning_rate": 3.807602106514375e-06,
+      "loss": 0.2681,
+      "step": 1568
+    },
+    {
+      "epoch": 0.7418439716312056,
+      "grad_norm": 4.2099995613098145,
+      "learning_rate": 3.79464794085148e-06,
+      "loss": 0.2066,
+      "step": 1569
+    },
+    {
+      "epoch": 0.7423167848699763,
+      "grad_norm": 4.710866928100586,
+      "learning_rate": 3.781710686936497e-06,
+      "loss": 0.183,
+      "step": 1570
+    },
+    {
+      "epoch": 0.742789598108747,
+      "grad_norm": 5.328328609466553,
+      "learning_rate": 3.7687903800279513e-06,
+      "loss": 0.1954,
+      "step": 1571
+    },
+    {
+      "epoch": 0.7432624113475177,
+      "grad_norm": 7.036726474761963,
+      "learning_rate": 3.755887055338183e-06,
+      "loss": 0.2555,
+      "step": 1572
+    },
+    {
+      "epoch": 0.7437352245862884,
+      "grad_norm": 6.250298500061035,
+      "learning_rate": 3.743000748033252e-06,
+      "loss": 0.2065,
+      "step": 1573
+    },
+    {
+      "epoch": 0.7442080378250591,
+      "grad_norm": 6.400665760040283,
+      "learning_rate": 3.730131493232837e-06,
+      "loss": 0.2693,
+      "step": 1574
+    },
+    {
+      "epoch": 0.7446808510638298,
+      "grad_norm": 5.254453659057617,
+      "learning_rate": 3.7172793260101446e-06,
+      "loss": 0.1433,
+      "step": 1575
+    },
+    {
+      "epoch": 0.7451536643026004,
+      "grad_norm": 7.966073989868164,
+      "learning_rate": 3.7044442813918125e-06,
+      "loss": 0.2912,
+      "step": 1576
+    },
+    {
+      "epoch": 0.7456264775413711,
+      "grad_norm": 7.94743537902832,
+      "learning_rate": 3.6916263943578123e-06,
+      "loss": 0.1966,
+      "step": 1577
+    },
+    {
+      "epoch": 0.7460992907801418,
+      "grad_norm": 6.912722110748291,
+      "learning_rate": 3.6788256998413506e-06,
+      "loss": 0.1794,
+      "step": 1578
+    },
+    {
+      "epoch": 0.7465721040189125,
+      "grad_norm": 6.084993839263916,
+      "learning_rate": 3.6660422327287914e-06,
+      "loss": 0.1606,
+      "step": 1579
+    },
+    {
+      "epoch": 0.7470449172576832,
+      "grad_norm": 5.905043125152588,
+      "learning_rate": 3.6532760278595345e-06,
+      "loss": 0.2141,
+      "step": 1580
+    },
+    {
+      "epoch": 0.7470449172576832,
+      "eval_accuracy": 0.8669623059866962,
+      "eval_f1": 0.7272727272727273,
+      "eval_loss": 0.3045748174190521,
+      "eval_precision": 0.8556149732620321,
+      "eval_recall": 0.6324110671936759,
+      "eval_runtime": 46.807,
+      "eval_samples_per_second": 5.897,
+      "eval_steps_per_second": 0.192,
+      "step": 1580
+    },
+    {
+      "epoch": 0.7475177304964539,
+      "grad_norm": 6.382122039794922,
+      "learning_rate": 3.6405271200259406e-06,
+      "loss": 0.1741,
+      "step": 1581
+    },
+    {
+      "epoch": 0.7479905437352246,
+      "grad_norm": 7.32953405380249,
+      "learning_rate": 3.627795543973228e-06,
+      "loss": 0.3185,
+      "step": 1582
+    },
+    {
+      "epoch": 0.7484633569739952,
+      "grad_norm": 5.959390163421631,
+      "learning_rate": 3.6150813343993817e-06,
+      "loss": 0.244,
+      "step": 1583
+    },
+    {
+      "epoch": 0.7489361702127659,
+      "grad_norm": 7.206792831420898,
+      "learning_rate": 3.6023845259550526e-06,
+      "loss": 0.2935,
+      "step": 1584
+    },
+    {
+      "epoch": 0.7494089834515366,
+      "grad_norm": 6.780135154724121,
+      "learning_rate": 3.5897051532434746e-06,
+      "loss": 0.2841,
+      "step": 1585
+    },
+    {
+      "epoch": 0.7498817966903073,
+      "grad_norm": 5.990156173706055,
+      "learning_rate": 3.5770432508203525e-06,
+      "loss": 0.2146,
+      "step": 1586
+    },
+    {
+      "epoch": 0.750354609929078,
+      "grad_norm": 6.680501937866211,
+      "learning_rate": 3.5643988531937923e-06,
+      "loss": 0.2593,
+      "step": 1587
+    },
+    {
+      "epoch": 0.7508274231678487,
+      "grad_norm": 6.1346282958984375,
+      "learning_rate": 3.5517719948241837e-06,
+      "loss": 0.2365,
+      "step": 1588
+    },
+    {
+      "epoch": 0.7513002364066194,
+      "grad_norm": 6.420486927032471,
+      "learning_rate": 3.5391627101241187e-06,
+      "loss": 0.2646,
+      "step": 1589
+    },
+    {
+      "epoch": 0.75177304964539,
+      "grad_norm": 5.62802791595459,
+      "learning_rate": 3.5265710334582924e-06,
+      "loss": 0.2584,
+      "step": 1590
+    },
+    {
+      "epoch": 0.7522458628841607,
+      "grad_norm": 5.959242820739746,
+      "learning_rate": 3.5139969991434132e-06,
+      "loss": 0.1629,
+      "step": 1591
+    },
+    {
+      "epoch": 0.7527186761229314,
+      "grad_norm": 7.4486517906188965,
+      "learning_rate": 3.5014406414481173e-06,
+      "loss": 0.3043,
+      "step": 1592
+    },
+    {
+      "epoch": 0.7531914893617021,
+      "grad_norm": 11.137663841247559,
+      "learning_rate": 3.488901994592846e-06,
+      "loss": 0.2216,
+      "step": 1593
+    },
+    {
+      "epoch": 0.7536643026004728,
+      "grad_norm": 7.19482946395874,
+      "learning_rate": 3.476381092749789e-06,
+      "loss": 0.1895,
+      "step": 1594
+    },
+    {
+      "epoch": 0.7541371158392435,
+      "grad_norm": 5.478968620300293,
+      "learning_rate": 3.463877970042765e-06,
+      "loss": 0.2568,
+      "step": 1595
+    },
+    {
+      "epoch": 0.7546099290780142,
+      "grad_norm": 5.759098052978516,
+      "learning_rate": 3.4513926605471504e-06,
+      "loss": 0.2078,
+      "step": 1596
+    },
+    {
+      "epoch": 0.7550827423167848,
+      "grad_norm": 5.912849426269531,
+      "learning_rate": 3.438925198289762e-06,
+      "loss": 0.2184,
+      "step": 1597
+    },
+    {
+      "epoch": 0.7555555555555555,
+      "grad_norm": 7.000945091247559,
+      "learning_rate": 3.4264756172487813e-06,
+      "loss": 0.2958,
+      "step": 1598
+    },
+    {
+      "epoch": 0.7560283687943262,
+      "grad_norm": 6.138962745666504,
+      "learning_rate": 3.414043951353656e-06,
+      "loss": 0.3196,
+      "step": 1599
+    },
+    {
+      "epoch": 0.7565011820330969,
+      "grad_norm": 6.125826835632324,
+      "learning_rate": 3.401630234485014e-06,
+      "loss": 0.2638,
+      "step": 1600
+    },
+    {
+      "epoch": 0.7565011820330969,
+      "eval_accuracy": 0.8669623059866962,
+      "eval_f1": 0.7309417040358744,
+      "eval_loss": 0.29756960272789,
+      "eval_precision": 0.844559585492228,
+      "eval_recall": 0.6442687747035574,
+      "eval_runtime": 47.3187,
+      "eval_samples_per_second": 5.833,
+      "eval_steps_per_second": 0.19,
+      "step": 1600
+    },
+    {
+      "epoch": 0.7569739952718676,
+      "grad_norm": 6.0788445472717285,
+      "learning_rate": 3.3892345004745607e-06,
+      "loss": 0.1994,
+      "step": 1601
+    },
+    {
+      "epoch": 0.7574468085106383,
+      "grad_norm": 6.959369659423828,
+      "learning_rate": 3.376856783104996e-06,
+      "loss": 0.3052,
+      "step": 1602
+    },
+    {
+      "epoch": 0.757919621749409,
+      "grad_norm": 4.413602352142334,
+      "learning_rate": 3.3644971161099083e-06,
+      "loss": 0.1861,
+      "step": 1603
+    },
+    {
+      "epoch": 0.7583924349881797,
+      "grad_norm": 4.487978935241699,
+      "learning_rate": 3.3521555331736987e-06,
+      "loss": 0.2593,
+      "step": 1604
+    },
+    {
+      "epoch": 0.7588652482269503,
+      "grad_norm": 5.322134017944336,
+      "learning_rate": 3.339832067931491e-06,
+      "loss": 0.2151,
+      "step": 1605
+    },
+    {
+      "epoch": 0.759338061465721,
+      "grad_norm": 5.43377685546875,
+      "learning_rate": 3.3275267539690225e-06,
+      "loss": 0.2738,
+      "step": 1606
+    },
+    {
+      "epoch": 0.7598108747044917,
+      "grad_norm": 5.074190616607666,
+      "learning_rate": 3.315239624822563e-06,
+      "loss": 0.1439,
+      "step": 1607
+    },
+    {
+      "epoch": 0.7602836879432624,
+      "grad_norm": 5.0913920402526855,
+      "learning_rate": 3.30297071397882e-06,
+      "loss": 0.2314,
+      "step": 1608
+    },
+    {
+      "epoch": 0.7607565011820331,
+      "grad_norm": 6.6666107177734375,
+      "learning_rate": 3.29072005487486e-06,
+      "loss": 0.3486,
+      "step": 1609
+    },
+    {
+      "epoch": 0.7612293144208038,
+      "grad_norm": 5.526213645935059,
+      "learning_rate": 3.278487680897997e-06,
+      "loss": 0.2517,
+      "step": 1610
+    },
+    {
+      "epoch": 0.7617021276595745,
+      "grad_norm": 5.9422736167907715,
+      "learning_rate": 3.2662736253857154e-06,
+      "loss": 0.219,
+      "step": 1611
+    },
+    {
+      "epoch": 0.7621749408983451,
+      "grad_norm": 5.5532426834106445,
+      "learning_rate": 3.254077921625578e-06,
+      "loss": 0.2077,
+      "step": 1612
+    },
+    {
+      "epoch": 0.7626477541371158,
+      "grad_norm": 4.330904960632324,
+      "learning_rate": 3.2419006028551205e-06,
+      "loss": 0.1412,
+      "step": 1613
+    },
+    {
+      "epoch": 0.7631205673758865,
+      "grad_norm": 8.491339683532715,
+      "learning_rate": 3.2297417022617904e-06,
+      "loss": 0.3303,
+      "step": 1614
+    },
+    {
+      "epoch": 0.7635933806146572,
+      "grad_norm": 6.322214603424072,
+      "learning_rate": 3.2176012529828295e-06,
+      "loss": 0.2718,
+      "step": 1615
+    },
+    {
+      "epoch": 0.7640661938534279,
+      "grad_norm": 6.424625873565674,
+      "learning_rate": 3.2054792881051933e-06,
+      "loss": 0.2817,
+      "step": 1616
+    },
+    {
+      "epoch": 0.7645390070921986,
+      "grad_norm": 5.700141429901123,
+      "learning_rate": 3.1933758406654615e-06,
+      "loss": 0.273,
+      "step": 1617
+    },
+    {
+      "epoch": 0.7650118203309693,
+      "grad_norm": 4.833881378173828,
+      "learning_rate": 3.181290943649753e-06,
+      "loss": 0.2003,
+      "step": 1618
+    },
+    {
+      "epoch": 0.76548463356974,
+      "grad_norm": 5.849541187286377,
+      "learning_rate": 3.1692246299936234e-06,
+      "loss": 0.2389,
+      "step": 1619
+    },
+    {
+      "epoch": 0.7659574468085106,
+      "grad_norm": 4.621654510498047,
+      "learning_rate": 3.1571769325819834e-06,
+      "loss": 0.2215,
+      "step": 1620
+    },
+    {
+      "epoch": 0.7659574468085106,
+      "eval_accuracy": 0.8647450110864745,
+      "eval_f1": 0.7276785714285714,
+      "eval_loss": 0.2926580309867859,
+      "eval_precision": 0.8358974358974359,
+      "eval_recall": 0.6442687747035574,
+      "eval_runtime": 48.4804,
+      "eval_samples_per_second": 5.693,
+      "eval_steps_per_second": 0.186,
+      "step": 1620
+    },
+    {
+      "epoch": 0.7664302600472813,
+      "grad_norm": 4.861423492431641,
+      "learning_rate": 3.1451478842490114e-06,
+      "loss": 0.2547,
+      "step": 1621
+    },
+    {
+      "epoch": 0.766903073286052,
+      "grad_norm": 5.893204212188721,
+      "learning_rate": 3.133137517778054e-06,
+      "loss": 0.1872,
+      "step": 1622
+    },
+    {
+      "epoch": 0.7673758865248227,
+      "grad_norm": 5.206727504730225,
+      "learning_rate": 3.1211458659015513e-06,
+      "loss": 0.198,
+      "step": 1623
+    },
+    {
+      "epoch": 0.7678486997635934,
+      "grad_norm": 5.422547340393066,
+      "learning_rate": 3.1091729613009346e-06,
+      "loss": 0.25,
+      "step": 1624
+    },
+    {
+      "epoch": 0.7683215130023641,
+      "grad_norm": 6.1905999183654785,
+      "learning_rate": 3.0972188366065424e-06,
+      "loss": 0.2626,
+      "step": 1625
+    },
+    {
+      "epoch": 0.7687943262411348,
+      "grad_norm": 4.419033050537109,
+      "learning_rate": 3.08528352439753e-06,
+      "loss": 0.2188,
+      "step": 1626
+    },
+    {
+      "epoch": 0.7692671394799054,
+      "grad_norm": 4.382445335388184,
+      "learning_rate": 3.0733670572017894e-06,
+      "loss": 0.1589,
+      "step": 1627
+    },
+    {
+      "epoch": 0.7697399527186761,
+      "grad_norm": 4.622806549072266,
+      "learning_rate": 3.0614694674958477e-06,
+      "loss": 0.2515,
+      "step": 1628
+    },
+    {
+      "epoch": 0.7702127659574468,
+      "grad_norm": 6.51718807220459,
+      "learning_rate": 3.0495907877047836e-06,
+      "loss": 0.2507,
+      "step": 1629
+    },
+    {
+      "epoch": 0.7706855791962175,
+      "grad_norm": 3.415192127227783,
+      "learning_rate": 3.0377310502021405e-06,
+      "loss": 0.1726,
+      "step": 1630
+    },
+    {
+      "epoch": 0.7711583924349882,
+      "grad_norm": 4.835279941558838,
+      "learning_rate": 3.0258902873098406e-06,
+      "loss": 0.1817,
+      "step": 1631
+    },
+    {
+      "epoch": 0.7716312056737589,
+      "grad_norm": 6.565097808837891,
+      "learning_rate": 3.014068531298089e-06,
+      "loss": 0.2459,
+      "step": 1632
+    },
+    {
+      "epoch": 0.7721040189125296,
+      "grad_norm": 6.267763614654541,
+      "learning_rate": 3.0022658143852923e-06,
+      "loss": 0.2536,
+      "step": 1633
+    },
+    {
+      "epoch": 0.7725768321513002,
+      "grad_norm": 4.700669765472412,
+      "learning_rate": 2.990482168737967e-06,
+      "loss": 0.161,
+      "step": 1634
+    },
+    {
+      "epoch": 0.7730496453900709,
+      "grad_norm": 6.653107166290283,
+      "learning_rate": 2.978717626470663e-06,
+      "loss": 0.2972,
+      "step": 1635
+    },
+    {
+      "epoch": 0.7735224586288416,
+      "grad_norm": 5.026117324829102,
+      "learning_rate": 2.966972219645855e-06,
+      "loss": 0.237,
+      "step": 1636
+    },
+    {
+      "epoch": 0.7739952718676123,
+      "grad_norm": 6.812114238739014,
+      "learning_rate": 2.9552459802738733e-06,
+      "loss": 0.1928,
+      "step": 1637
+    },
+    {
+      "epoch": 0.774468085106383,
+      "grad_norm": 5.485900402069092,
+      "learning_rate": 2.943538940312807e-06,
+      "loss": 0.1795,
+      "step": 1638
+    },
+    {
+      "epoch": 0.7749408983451537,
+      "grad_norm": 4.413486003875732,
+      "learning_rate": 2.931851131668423e-06,
+      "loss": 0.2056,
+      "step": 1639
+    },
+    {
+      "epoch": 0.7754137115839244,
+      "grad_norm": 6.196752071380615,
+      "learning_rate": 2.920182586194075e-06,
+      "loss": 0.2587,
+      "step": 1640
+    },
+    {
+      "epoch": 0.7754137115839244,
+      "eval_accuracy": 0.8647450110864745,
+      "eval_f1": 0.7067307692307693,
+      "eval_loss": 0.31787875294685364,
+      "eval_precision": 0.901840490797546,
+      "eval_recall": 0.5810276679841897,
+      "eval_runtime": 48.6346,
+      "eval_samples_per_second": 5.675,
+      "eval_steps_per_second": 0.185,
+      "step": 1640
+    },
+    {
+      "epoch": 0.775886524822695,
+      "grad_norm": 4.845483303070068,
+      "learning_rate": 2.9085333356906165e-06,
+      "loss": 0.1588,
+      "step": 1641
+    },
+    {
+      "epoch": 0.7763593380614657,
+      "grad_norm": 5.5834479331970215,
+      "learning_rate": 2.8969034119063176e-06,
+      "loss": 0.2241,
+      "step": 1642
+    },
+    {
+      "epoch": 0.7768321513002364,
+      "grad_norm": 5.414841175079346,
+      "learning_rate": 2.8852928465367726e-06,
+      "loss": 0.2914,
+      "step": 1643
+    },
+    {
+      "epoch": 0.7773049645390071,
+      "grad_norm": 6.023651599884033,
+      "learning_rate": 2.8737016712248258e-06,
+      "loss": 0.2307,
+      "step": 1644
+    },
+    {
+      "epoch": 0.7777777777777778,
+      "grad_norm": 4.881513595581055,
+      "learning_rate": 2.862129917560469e-06,
+      "loss": 0.1618,
+      "step": 1645
+    },
+    {
+      "epoch": 0.7782505910165485,
+      "grad_norm": 5.7092814445495605,
+      "learning_rate": 2.850577617080764e-06,
+      "loss": 0.2415,
+      "step": 1646
+    },
+    {
+      "epoch": 0.7787234042553192,
+      "grad_norm": 6.310904026031494,
+      "learning_rate": 2.839044801269756e-06,
+      "loss": 0.2487,
+      "step": 1647
+    },
+    {
+      "epoch": 0.7791962174940898,
+      "grad_norm": 8.262921333312988,
+      "learning_rate": 2.827531501558395e-06,
+      "loss": 0.2799,
+      "step": 1648
+    },
+    {
+      "epoch": 0.7796690307328605,
+      "grad_norm": 6.071582317352295,
+      "learning_rate": 2.8160377493244363e-06,
+      "loss": 0.2469,
+      "step": 1649
+    },
+    {
+      "epoch": 0.7801418439716312,
+      "grad_norm": 4.781665802001953,
+      "learning_rate": 2.8045635758923563e-06,
+      "loss": 0.169,
+      "step": 1650
+    },
+    {
+      "epoch": 0.7806146572104019,
+      "grad_norm": 4.784432411193848,
+      "learning_rate": 2.7931090125332806e-06,
+      "loss": 0.2056,
+      "step": 1651
+    },
+    {
+      "epoch": 0.7810874704491726,
+      "grad_norm": 5.871290683746338,
+      "learning_rate": 2.7816740904648866e-06,
+      "loss": 0.2034,
+      "step": 1652
+    },
+    {
+      "epoch": 0.7815602836879433,
+      "grad_norm": 7.995057106018066,
+      "learning_rate": 2.7702588408513276e-06,
+      "loss": 0.3481,
+      "step": 1653
+    },
+    {
+      "epoch": 0.782033096926714,
+      "grad_norm": 4.378397464752197,
+      "learning_rate": 2.758863294803138e-06,
+      "loss": 0.182,
+      "step": 1654
+    },
+    {
+      "epoch": 0.7825059101654847,
+      "grad_norm": 6.422306060791016,
+      "learning_rate": 2.7474874833771524e-06,
+      "loss": 0.2954,
+      "step": 1655
+    },
+    {
+      "epoch": 0.7829787234042553,
+      "grad_norm": 4.291572570800781,
+      "learning_rate": 2.7361314375764215e-06,
+      "loss": 0.1982,
+      "step": 1656
+    },
+    {
+      "epoch": 0.783451536643026,
+      "grad_norm": 4.588647365570068,
+      "learning_rate": 2.7247951883501343e-06,
+      "loss": 0.1613,
+      "step": 1657
+    },
+    {
+      "epoch": 0.7839243498817967,
+      "grad_norm": 5.927759647369385,
+      "learning_rate": 2.7134787665935213e-06,
+      "loss": 0.3002,
+      "step": 1658
+    },
+    {
+      "epoch": 0.7843971631205674,
+      "grad_norm": 6.183173656463623,
+      "learning_rate": 2.7021822031477773e-06,
+      "loss": 0.2178,
+      "step": 1659
+    },
+    {
+      "epoch": 0.7848699763593381,
+      "grad_norm": 6.231297492980957,
+      "learning_rate": 2.6909055287999698e-06,
+      "loss": 0.2216,
+      "step": 1660
+    },
+    {
+      "epoch": 0.7848699763593381,
+      "eval_accuracy": 0.8713968957871396,
+      "eval_f1": 0.7289719626168224,
+      "eval_loss": 0.3045533299446106,
+      "eval_precision": 0.8914285714285715,
+      "eval_recall": 0.616600790513834,
+      "eval_runtime": 49.0889,
+      "eval_samples_per_second": 5.622,
+      "eval_steps_per_second": 0.183,
+      "step": 1660
+    },
+    {
+      "epoch": 0.7853427895981088,
+      "grad_norm": 6.300743103027344,
+      "learning_rate": 2.6796487742829758e-06,
+      "loss": 0.2452,
+      "step": 1661
+    },
+    {
+      "epoch": 0.7858156028368795,
+      "grad_norm": 5.218397617340088,
+      "learning_rate": 2.668411970275374e-06,
+      "loss": 0.2879,
+      "step": 1662
+    },
+    {
+      "epoch": 0.7862884160756501,
+      "grad_norm": 5.584065914154053,
+      "learning_rate": 2.6571951474013734e-06,
+      "loss": 0.2256,
+      "step": 1663
+    },
+    {
+      "epoch": 0.7867612293144208,
+      "grad_norm": 6.801513671875,
+      "learning_rate": 2.6459983362307263e-06,
+      "loss": 0.2637,
+      "step": 1664
+    },
+    {
+      "epoch": 0.7872340425531915,
+      "grad_norm": 3.2993884086608887,
+      "learning_rate": 2.6348215672786435e-06,
+      "loss": 0.1469,
+      "step": 1665
+    },
+    {
+      "epoch": 0.7877068557919622,
+      "grad_norm": 5.288236141204834,
+      "learning_rate": 2.6236648710057244e-06,
+      "loss": 0.1577,
+      "step": 1666
+    },
+    {
+      "epoch": 0.7881796690307329,
+      "grad_norm": 4.468508243560791,
+      "learning_rate": 2.612528277817853e-06,
+      "loss": 0.188,
+      "step": 1667
+    },
+    {
+      "epoch": 0.7886524822695036,
+      "grad_norm": 5.90925407409668,
+      "learning_rate": 2.6014118180661284e-06,
+      "loss": 0.229,
+      "step": 1668
+    },
+    {
+      "epoch": 0.7891252955082743,
+      "grad_norm": 5.6658124923706055,
+      "learning_rate": 2.590315522046779e-06,
+      "loss": 0.2122,
+      "step": 1669
+    },
+    {
+      "epoch": 0.789598108747045,
+      "grad_norm": 6.5475921630859375,
+      "learning_rate": 2.5792394200010805e-06,
+      "loss": 0.2117,
+      "step": 1670
+    },
+    {
+      "epoch": 0.7900709219858156,
+      "grad_norm": 5.4423112869262695,
+      "learning_rate": 2.5681835421152736e-06,
+      "loss": 0.2646,
+      "step": 1671
+    },
+    {
+      "epoch": 0.7905437352245863,
+      "grad_norm": 4.928060531616211,
+      "learning_rate": 2.5571479185204785e-06,
+      "loss": 0.2074,
+      "step": 1672
+    },
+    {
+      "epoch": 0.791016548463357,
+      "grad_norm": 5.3929243087768555,
+      "learning_rate": 2.546132579292616e-06,
+      "loss": 0.2094,
+      "step": 1673
+    },
+    {
+      "epoch": 0.7914893617021277,
+      "grad_norm": 6.514987468719482,
+      "learning_rate": 2.5351375544523306e-06,
+      "loss": 0.2043,
+      "step": 1674
+    },
+    {
+      "epoch": 0.7919621749408984,
+      "grad_norm": 4.381026268005371,
+      "learning_rate": 2.524162873964896e-06,
+      "loss": 0.2086,
+      "step": 1675
+    },
+    {
+      "epoch": 0.7924349881796691,
+      "grad_norm": 5.139743328094482,
+      "learning_rate": 2.513208567740144e-06,
+      "loss": 0.1823,
+      "step": 1676
+    },
+    {
+      "epoch": 0.7929078014184398,
+      "grad_norm": 4.477554798126221,
+      "learning_rate": 2.502274665632377e-06,
+      "loss": 0.1828,
+      "step": 1677
+    },
+    {
+      "epoch": 0.7933806146572104,
+      "grad_norm": 7.049522399902344,
+      "learning_rate": 2.491361197440291e-06,
+      "loss": 0.2235,
+      "step": 1678
+    },
+    {
+      "epoch": 0.7938534278959811,
+      "grad_norm": 5.63670539855957,
+      "learning_rate": 2.4804681929068907e-06,
+      "loss": 0.2404,
+      "step": 1679
+    },
+    {
+      "epoch": 0.7943262411347518,
+      "grad_norm": 5.916224956512451,
+      "learning_rate": 2.4695956817194134e-06,
+      "loss": 0.2357,
+      "step": 1680
+    },
+    {
+      "epoch": 0.7943262411347518,
+      "eval_accuracy": 0.8669623059866962,
+      "eval_f1": 0.7272727272727273,
+      "eval_loss": 0.2966913878917694,
+      "eval_precision": 0.8556149732620321,
+      "eval_recall": 0.6324110671936759,
+      "eval_runtime": 48.6495,
+      "eval_samples_per_second": 5.673,
+      "eval_steps_per_second": 0.185,
+      "step": 1680
+    },
+    {
+      "epoch": 0.7947990543735225,
+      "grad_norm": 6.325137138366699,
+      "learning_rate": 2.4587436935092424e-06,
+      "loss": 0.2087,
+      "step": 1681
+    },
+    {
+      "epoch": 0.7952718676122932,
+      "grad_norm": 4.733521461486816,
+      "learning_rate": 2.4479122578518257e-06,
+      "loss": 0.2256,
+      "step": 1682
+    },
+    {
+      "epoch": 0.7957446808510639,
+      "grad_norm": 4.68524169921875,
+      "learning_rate": 2.4371014042666074e-06,
+      "loss": 0.2188,
+      "step": 1683
+    },
+    {
+      "epoch": 0.7962174940898346,
+      "grad_norm": 6.98213529586792,
+      "learning_rate": 2.42631116221693e-06,
+      "loss": 0.2244,
+      "step": 1684
+    },
+    {
+      "epoch": 0.7966903073286052,
+      "grad_norm": 6.548198223114014,
+      "learning_rate": 2.4155415611099664e-06,
+      "loss": 0.2656,
+      "step": 1685
+    },
+    {
+      "epoch": 0.7971631205673759,
+      "grad_norm": 5.059558391571045,
+      "learning_rate": 2.404792630296633e-06,
+      "loss": 0.1769,
+      "step": 1686
+    },
+    {
+      "epoch": 0.7976359338061466,
+      "grad_norm": 6.935822010040283,
+      "learning_rate": 2.394064399071515e-06,
+      "loss": 0.2474,
+      "step": 1687
+    },
+    {
+      "epoch": 0.7981087470449173,
+      "grad_norm": 6.197619438171387,
+      "learning_rate": 2.3833568966727837e-06,
+      "loss": 0.2132,
+      "step": 1688
+    },
+    {
+      "epoch": 0.798581560283688,
+      "grad_norm": 6.507517337799072,
+      "learning_rate": 2.372670152282114e-06,
+      "loss": 0.2625,
+      "step": 1689
+    },
+    {
+      "epoch": 0.7990543735224587,
+      "grad_norm": 8.216846466064453,
+      "learning_rate": 2.362004195024613e-06,
+      "loss": 0.2823,
+      "step": 1690
+    },
+    {
+      "epoch": 0.7995271867612294,
+      "grad_norm": 5.132957935333252,
+      "learning_rate": 2.351359053968728e-06,
+      "loss": 0.1989,
+      "step": 1691
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 6.100037574768066,
+      "learning_rate": 2.3407347581261863e-06,
+      "loss": 0.2593,
+      "step": 1692
+    },
+    {
+      "epoch": 0.8004728132387707,
+      "grad_norm": 8.391918182373047,
+      "learning_rate": 2.3301313364518964e-06,
+      "loss": 0.3208,
+      "step": 1693
+    },
+    {
+      "epoch": 0.8009456264775414,
+      "grad_norm": 4.401480674743652,
+      "learning_rate": 2.3195488178438785e-06,
+      "loss": 0.1518,
+      "step": 1694
+    },
+    {
+      "epoch": 0.8014184397163121,
+      "grad_norm": 6.447848796844482,
+      "learning_rate": 2.308987231143186e-06,
+      "loss": 0.2173,
+      "step": 1695
+    },
+    {
+      "epoch": 0.8018912529550828,
+      "grad_norm": 6.483435153961182,
+      "learning_rate": 2.298446605133824e-06,
+      "loss": 0.1744,
+      "step": 1696
+    },
+    {
+      "epoch": 0.8023640661938535,
+      "grad_norm": 5.87816858291626,
+      "learning_rate": 2.2879269685426742e-06,
+      "loss": 0.1838,
+      "step": 1697
+    },
+    {
+      "epoch": 0.8028368794326242,
+      "grad_norm": 8.297409057617188,
+      "learning_rate": 2.2774283500394134e-06,
+      "loss": 0.2732,
+      "step": 1698
+    },
+    {
+      "epoch": 0.8033096926713948,
+      "grad_norm": 9.192248344421387,
+      "learning_rate": 2.2669507782364387e-06,
+      "loss": 0.3547,
+      "step": 1699
+    },
+    {
+      "epoch": 0.8037825059101655,
+      "grad_norm": 5.559706211090088,
+      "learning_rate": 2.2564942816887837e-06,
+      "loss": 0.1972,
+      "step": 1700
+    },
+    {
+      "epoch": 0.8037825059101655,
+      "eval_accuracy": 0.8658536585365854,
+      "eval_f1": 0.7218390804597701,
+      "eval_loss": 0.30020540952682495,
+      "eval_precision": 0.8626373626373627,
+      "eval_recall": 0.6205533596837944,
+      "eval_runtime": 48.6466,
+      "eval_samples_per_second": 5.674,
+      "eval_steps_per_second": 0.185,
+      "step": 1700
+    },
+    {
+      "epoch": 0.8042553191489362,
+      "grad_norm": 5.664395332336426,
+      "learning_rate": 2.2460588888940504e-06,
+      "loss": 0.2274,
+      "step": 1701
+    },
+    {
+      "epoch": 0.8047281323877069,
+      "grad_norm": 5.178661346435547,
+      "learning_rate": 2.235644628292323e-06,
+      "loss": 0.2305,
+      "step": 1702
+    },
+    {
+      "epoch": 0.8052009456264776,
+      "grad_norm": 6.627544403076172,
+      "learning_rate": 2.225251528266089e-06,
+      "loss": 0.2816,
+      "step": 1703
+    },
+    {
+      "epoch": 0.8056737588652483,
+      "grad_norm": 5.081453323364258,
+      "learning_rate": 2.214879617140171e-06,
+      "loss": 0.1905,
+      "step": 1704
+    },
+    {
+      "epoch": 0.806146572104019,
+      "grad_norm": 6.601840496063232,
+      "learning_rate": 2.204528923181648e-06,
+      "loss": 0.2067,
+      "step": 1705
+    },
+    {
+      "epoch": 0.8066193853427897,
+      "grad_norm": 5.457771301269531,
+      "learning_rate": 2.194199474599763e-06,
+      "loss": 0.2434,
+      "step": 1706
+    },
+    {
+      "epoch": 0.8070921985815603,
+      "grad_norm": 6.326608657836914,
+      "learning_rate": 2.1838912995458673e-06,
+      "loss": 0.2722,
+      "step": 1707
+    },
+    {
+      "epoch": 0.807565011820331,
+      "grad_norm": 5.285124778747559,
+      "learning_rate": 2.1736044261133305e-06,
+      "loss": 0.2349,
+      "step": 1708
+    },
+    {
+      "epoch": 0.8080378250591016,
+      "grad_norm": 7.561131477355957,
+      "learning_rate": 2.1633388823374722e-06,
+      "loss": 0.2804,
+      "step": 1709
+    },
+    {
+      "epoch": 0.8085106382978723,
+      "grad_norm": 5.1685309410095215,
+      "learning_rate": 2.153094696195478e-06,
+      "loss": 0.179,
+      "step": 1710
+    },
+    {
+      "epoch": 0.808983451536643,
+      "grad_norm": 5.682158470153809,
+      "learning_rate": 2.1428718956063253e-06,
+      "loss": 0.2478,
+      "step": 1711
+    },
+    {
+      "epoch": 0.8094562647754137,
+      "grad_norm": 6.980831146240234,
+      "learning_rate": 2.132670508430711e-06,
+      "loss": 0.1889,
+      "step": 1712
+    },
+    {
+      "epoch": 0.8099290780141843,
+      "grad_norm": 4.640564441680908,
+      "learning_rate": 2.1224905624709692e-06,
+      "loss": 0.1338,
+      "step": 1713
+    },
+    {
+      "epoch": 0.810401891252955,
+      "grad_norm": 5.731657981872559,
+      "learning_rate": 2.112332085471006e-06,
+      "loss": 0.2535,
+      "step": 1714
+    },
+    {
+      "epoch": 0.8108747044917257,
+      "grad_norm": 5.084789276123047,
+      "learning_rate": 2.102195105116215e-06,
+      "loss": 0.271,
+      "step": 1715
+    },
+    {
+      "epoch": 0.8113475177304964,
+      "grad_norm": 5.400364875793457,
+      "learning_rate": 2.092079649033395e-06,
+      "loss": 0.1942,
+      "step": 1716
+    },
+    {
+      "epoch": 0.8118203309692671,
+      "grad_norm": 4.337155818939209,
+      "learning_rate": 2.081985744790691e-06,
+      "loss": 0.1472,
+      "step": 1717
+    },
+    {
+      "epoch": 0.8122931442080378,
+      "grad_norm": 4.337014675140381,
+      "learning_rate": 2.0719134198975187e-06,
+      "loss": 0.1988,
+      "step": 1718
+    },
+    {
+      "epoch": 0.8127659574468085,
+      "grad_norm": 4.499454498291016,
+      "learning_rate": 2.06186270180447e-06,
+      "loss": 0.1955,
+      "step": 1719
+    },
+    {
+      "epoch": 0.8132387706855791,
+      "grad_norm": 6.413626670837402,
+      "learning_rate": 2.051833617903257e-06,
+      "loss": 0.2602,
+      "step": 1720
+    },
+    {
+      "epoch": 0.8132387706855791,
+      "eval_accuracy": 0.8669623059866962,
+      "eval_f1": 0.7209302325581395,
+      "eval_loss": 0.30013346672058105,
+      "eval_precision": 0.8757062146892656,
+      "eval_recall": 0.6126482213438735,
+      "eval_runtime": 48.9633,
+      "eval_samples_per_second": 5.637,
+      "eval_steps_per_second": 0.184,
+      "step": 1720
+    },
+    {
+      "epoch": 0.8137115839243498,
+      "grad_norm": 6.098957538604736,
+      "learning_rate": 2.041826195526627e-06,
+      "loss": 0.3015,
+      "step": 1721
+    },
+    {
+      "epoch": 0.8141843971631205,
+      "grad_norm": 6.596921443939209,
+      "learning_rate": 2.031840461948301e-06,
+      "loss": 0.3017,
+      "step": 1722
+    },
+    {
+      "epoch": 0.8146572104018912,
+      "grad_norm": 6.857213497161865,
+      "learning_rate": 2.021876444382882e-06,
+      "loss": 0.2186,
+      "step": 1723
+    },
+    {
+      "epoch": 0.8151300236406619,
+      "grad_norm": 5.775735378265381,
+      "learning_rate": 2.011934169985792e-06,
+      "loss": 0.2594,
+      "step": 1724
+    },
+    {
+      "epoch": 0.8156028368794326,
+      "grad_norm": 6.679286956787109,
+      "learning_rate": 2.0020136658531964e-06,
+      "loss": 0.3236,
+      "step": 1725
+    },
+    {
+      "epoch": 0.8160756501182033,
+      "grad_norm": 5.931440353393555,
+      "learning_rate": 1.9921149590219213e-06,
+      "loss": 0.2564,
+      "step": 1726
+    },
+    {
+      "epoch": 0.816548463356974,
+      "grad_norm": 4.7152581214904785,
+      "learning_rate": 1.9822380764694027e-06,
+      "loss": 0.1825,
+      "step": 1727
+    },
+    {
+      "epoch": 0.8170212765957446,
+      "grad_norm": 5.601851940155029,
+      "learning_rate": 1.972383045113585e-06,
+      "loss": 0.2052,
+      "step": 1728
+    },
+    {
+      "epoch": 0.8174940898345153,
+      "grad_norm": 5.7181501388549805,
+      "learning_rate": 1.962549891812865e-06,
+      "loss": 0.2467,
+      "step": 1729
+    },
+    {
+      "epoch": 0.817966903073286,
+      "grad_norm": 11.021007537841797,
+      "learning_rate": 1.952738643366011e-06,
+      "loss": 0.3405,
+      "step": 1730
+    },
+    {
+      "epoch": 0.8184397163120567,
+      "grad_norm": 5.372162342071533,
+      "learning_rate": 1.9429493265121026e-06,
+      "loss": 0.2504,
+      "step": 1731
+    },
+    {
+      "epoch": 0.8189125295508274,
+      "grad_norm": 4.431525707244873,
+      "learning_rate": 1.9331819679304376e-06,
+      "loss": 0.2204,
+      "step": 1732
+    },
+    {
+      "epoch": 0.8193853427895981,
+      "grad_norm": 4.9457268714904785,
+      "learning_rate": 1.923436594240473e-06,
+      "loss": 0.1216,
+      "step": 1733
+    },
+    {
+      "epoch": 0.8198581560283688,
+      "grad_norm": 6.442474365234375,
+      "learning_rate": 1.9137132320017505e-06,
+      "loss": 0.2644,
+      "step": 1734
+    },
+    {
+      "epoch": 0.8203309692671394,
+      "grad_norm": 4.034286975860596,
+      "learning_rate": 1.904011907713823e-06,
+      "loss": 0.1679,
+      "step": 1735
+    },
+    {
+      "epoch": 0.8208037825059101,
+      "grad_norm": 5.20193338394165,
+      "learning_rate": 1.8943326478161806e-06,
+      "loss": 0.1667,
+      "step": 1736
+    },
+    {
+      "epoch": 0.8212765957446808,
+      "grad_norm": 5.608093738555908,
+      "learning_rate": 1.8846754786881816e-06,
+      "loss": 0.2191,
+      "step": 1737
+    },
+    {
+      "epoch": 0.8217494089834515,
+      "grad_norm": 4.049744606018066,
+      "learning_rate": 1.8750404266489796e-06,
+      "loss": 0.186,
+      "step": 1738
+    },
+    {
+      "epoch": 0.8222222222222222,
+      "grad_norm": 5.3404436111450195,
+      "learning_rate": 1.8654275179574477e-06,
+      "loss": 0.2918,
+      "step": 1739
+    },
+    {
+      "epoch": 0.8226950354609929,
+      "grad_norm": 6.187091827392578,
+      "learning_rate": 1.855836778812118e-06,
+      "loss": 0.1873,
+      "step": 1740
+    },
+    {
+      "epoch": 0.8226950354609929,
+      "eval_accuracy": 0.8647450110864745,
+      "eval_f1": 0.7136150234741784,
+      "eval_loss": 0.30463746190071106,
+      "eval_precision": 0.8786127167630058,
+      "eval_recall": 0.6007905138339921,
+      "eval_runtime": 49.7049,
+      "eval_samples_per_second": 5.553,
+      "eval_steps_per_second": 0.181,
+      "step": 1740
+    },
+    {
+      "epoch": 0.8231678486997636,
+      "grad_norm": 5.610990524291992,
+      "learning_rate": 1.8462682353510974e-06,
+      "loss": 0.258,
+      "step": 1741
+    },
+    {
+      "epoch": 0.8236406619385342,
+      "grad_norm": 7.3444318771362305,
+      "learning_rate": 1.836721913652002e-06,
+      "loss": 0.2804,
+      "step": 1742
+    },
+    {
+      "epoch": 0.8241134751773049,
+      "grad_norm": 5.178664684295654,
+      "learning_rate": 1.8271978397318868e-06,
+      "loss": 0.2232,
+      "step": 1743
+    },
+    {
+      "epoch": 0.8245862884160756,
+      "grad_norm": 7.083937168121338,
+      "learning_rate": 1.8176960395471754e-06,
+      "loss": 0.306,
+      "step": 1744
+    },
+    {
+      "epoch": 0.8250591016548463,
+      "grad_norm": 5.501087188720703,
+      "learning_rate": 1.8082165389935836e-06,
+      "loss": 0.2164,
+      "step": 1745
+    },
+    {
+      "epoch": 0.825531914893617,
+      "grad_norm": 6.170442581176758,
+      "learning_rate": 1.7987593639060586e-06,
+      "loss": 0.2403,
+      "step": 1746
+    },
+    {
+      "epoch": 0.8260047281323877,
+      "grad_norm": 6.09285306930542,
+      "learning_rate": 1.7893245400586967e-06,
+      "loss": 0.2852,
+      "step": 1747
+    },
+    {
+      "epoch": 0.8264775413711584,
+      "grad_norm": 5.350312232971191,
+      "learning_rate": 1.7799120931646819e-06,
+      "loss": 0.251,
+      "step": 1748
+    },
+    {
+      "epoch": 0.826950354609929,
+      "grad_norm": 5.884124755859375,
+      "learning_rate": 1.7705220488762187e-06,
+      "loss": 0.2269,
+      "step": 1749
+    },
+    {
+      "epoch": 0.8274231678486997,
+      "grad_norm": 5.945254325866699,
+      "learning_rate": 1.7611544327844487e-06,
+      "loss": 0.206,
+      "step": 1750
+    },
+    {
+      "epoch": 0.8278959810874704,
+      "grad_norm": 4.885594844818115,
+      "learning_rate": 1.7518092704193913e-06,
+      "loss": 0.2674,
+      "step": 1751
+    },
+    {
+      "epoch": 0.8283687943262411,
+      "grad_norm": 5.776851654052734,
+      "learning_rate": 1.742486587249873e-06,
+      "loss": 0.2314,
+      "step": 1752
+    },
+    {
+      "epoch": 0.8288416075650118,
+      "grad_norm": 4.074375629425049,
+      "learning_rate": 1.733186408683456e-06,
+      "loss": 0.1666,
+      "step": 1753
+    },
+    {
+      "epoch": 0.8293144208037825,
+      "grad_norm": 3.9429306983947754,
+      "learning_rate": 1.7239087600663684e-06,
+      "loss": 0.2021,
+      "step": 1754
+    },
+    {
+      "epoch": 0.8297872340425532,
+      "grad_norm": 4.927017688751221,
+      "learning_rate": 1.714653666683439e-06,
+      "loss": 0.2131,
+      "step": 1755
+    },
+    {
+      "epoch": 0.8302600472813239,
+      "grad_norm": 4.356184959411621,
+      "learning_rate": 1.7054211537580201e-06,
+      "loss": 0.1633,
+      "step": 1756
+    },
+    {
+      "epoch": 0.8307328605200945,
+      "grad_norm": 6.772974967956543,
+      "learning_rate": 1.6962112464519343e-06,
+      "loss": 0.2083,
+      "step": 1757
+    },
+    {
+      "epoch": 0.8312056737588652,
+      "grad_norm": 5.196053504943848,
+      "learning_rate": 1.6870239698653879e-06,
+      "loss": 0.2203,
+      "step": 1758
+    },
+    {
+      "epoch": 0.8316784869976359,
+      "grad_norm": 5.340625762939453,
+      "learning_rate": 1.677859349036911e-06,
+      "loss": 0.2047,
+      "step": 1759
+    },
+    {
+      "epoch": 0.8321513002364066,
+      "grad_norm": 3.65738582611084,
+      "learning_rate": 1.6687174089432934e-06,
+      "loss": 0.1663,
+      "step": 1760
+    },
+    {
+      "epoch": 0.8321513002364066,
+      "eval_accuracy": 0.8658536585365854,
+      "eval_f1": 0.717948717948718,
+      "eval_loss": 0.2977656424045563,
+      "eval_precision": 0.875,
+      "eval_recall": 0.6086956521739131,
+      "eval_runtime": 47.9133,
+      "eval_samples_per_second": 5.76,
+      "eval_steps_per_second": 0.188,
+      "step": 1760
+    },
+    {
+      "epoch": 0.8326241134751773,
+      "grad_norm": 5.490943431854248,
+      "learning_rate": 1.659598174499505e-06,
+      "loss": 0.2443,
+      "step": 1761
+    },
+    {
+      "epoch": 0.833096926713948,
+      "grad_norm": 5.882487773895264,
+      "learning_rate": 1.6505016705586475e-06,
+      "loss": 0.2925,
+      "step": 1762
+    },
+    {
+      "epoch": 0.8335697399527187,
+      "grad_norm": 4.92294979095459,
+      "learning_rate": 1.6414279219118568e-06,
+      "loss": 0.1559,
+      "step": 1763
+    },
+    {
+      "epoch": 0.8340425531914893,
+      "grad_norm": 8.455697059631348,
+      "learning_rate": 1.632376953288265e-06,
+      "loss": 0.3205,
+      "step": 1764
+    },
+    {
+      "epoch": 0.83451536643026,
+      "grad_norm": 5.372105598449707,
+      "learning_rate": 1.623348789354916e-06,
+      "loss": 0.2579,
+      "step": 1765
+    },
+    {
+      "epoch": 0.8349881796690307,
+      "grad_norm": 6.4145307540893555,
+      "learning_rate": 1.614343454716707e-06,
+      "loss": 0.2467,
+      "step": 1766
+    },
+    {
+      "epoch": 0.8354609929078014,
+      "grad_norm": 5.563887119293213,
+      "learning_rate": 1.6053609739163134e-06,
+      "loss": 0.192,
+      "step": 1767
+    },
+    {
+      "epoch": 0.8359338061465721,
+      "grad_norm": 4.483576774597168,
+      "learning_rate": 1.5964013714341275e-06,
+      "loss": 0.1964,
+      "step": 1768
+    },
+    {
+      "epoch": 0.8364066193853428,
+      "grad_norm": 6.770689010620117,
+      "learning_rate": 1.587464671688187e-06,
+      "loss": 0.2926,
+      "step": 1769
+    },
+    {
+      "epoch": 0.8368794326241135,
+      "grad_norm": 3.7878456115722656,
+      "learning_rate": 1.5785508990341192e-06,
+      "loss": 0.1907,
+      "step": 1770
+    },
+    {
+      "epoch": 0.8373522458628841,
+      "grad_norm": 7.301677227020264,
+      "learning_rate": 1.5696600777650606e-06,
+      "loss": 0.2305,
+      "step": 1771
+    },
+    {
+      "epoch": 0.8378250591016548,
+      "grad_norm": 3.8965413570404053,
+      "learning_rate": 1.560792232111601e-06,
+      "loss": 0.1244,
+      "step": 1772
+    },
+    {
+      "epoch": 0.8382978723404255,
+      "grad_norm": 5.381835460662842,
+      "learning_rate": 1.551947386241708e-06,
+      "loss": 0.2294,
+      "step": 1773
+    },
+    {
+      "epoch": 0.8387706855791962,
+      "grad_norm": 3.8923234939575195,
+      "learning_rate": 1.543125564260668e-06,
+      "loss": 0.1775,
+      "step": 1774
+    },
+    {
+      "epoch": 0.8392434988179669,
+      "grad_norm": 4.11511754989624,
+      "learning_rate": 1.5343267902110282e-06,
+      "loss": 0.1614,
+      "step": 1775
+    },
+    {
+      "epoch": 0.8397163120567376,
+      "grad_norm": 4.022883892059326,
+      "learning_rate": 1.5255510880725133e-06,
+      "loss": 0.2149,
+      "step": 1776
+    },
+    {
+      "epoch": 0.8401891252955083,
+      "grad_norm": 5.9938836097717285,
+      "learning_rate": 1.5167984817619709e-06,
+      "loss": 0.2138,
+      "step": 1777
+    },
+    {
+      "epoch": 0.840661938534279,
+      "grad_norm": 6.684109210968018,
+      "learning_rate": 1.5080689951333017e-06,
+      "loss": 0.2798,
+      "step": 1778
+    },
+    {
+      "epoch": 0.8411347517730496,
+      "grad_norm": 5.152201175689697,
+      "learning_rate": 1.4993626519774073e-06,
+      "loss": 0.2239,
+      "step": 1779
+    },
+    {
+      "epoch": 0.8416075650118203,
+      "grad_norm": 8.338132858276367,
+      "learning_rate": 1.4906794760221032e-06,
+      "loss": 0.363,
+      "step": 1780
+    },
+    {
+      "epoch": 0.8416075650118203,
+      "eval_accuracy": 0.8647450110864745,
+      "eval_f1": 0.7149532710280374,
+      "eval_loss": 0.2977047264575958,
+      "eval_precision": 0.8742857142857143,
+      "eval_recall": 0.6047430830039525,
+      "eval_runtime": 48.5608,
+      "eval_samples_per_second": 5.684,
+      "eval_steps_per_second": 0.185,
+      "step": 1780
+    },
+    {
+      "epoch": 0.842080378250591,
+      "grad_norm": 5.853981971740723,
+      "learning_rate": 1.482019490932074e-06,
+      "loss": 0.2162,
+      "step": 1781
+    },
+    {
+      "epoch": 0.8425531914893617,
+      "grad_norm": 5.779383182525635,
+      "learning_rate": 1.473382720308797e-06,
+      "loss": 0.2139,
+      "step": 1782
+    },
+    {
+      "epoch": 0.8430260047281324,
+      "grad_norm": 5.416299343109131,
+      "learning_rate": 1.4647691876904835e-06,
+      "loss": 0.1742,
+      "step": 1783
+    },
+    {
+      "epoch": 0.8434988179669031,
+      "grad_norm": 6.843648433685303,
+      "learning_rate": 1.4561789165520136e-06,
+      "loss": 0.3138,
+      "step": 1784
+    },
+    {
+      "epoch": 0.8439716312056738,
+      "grad_norm": 6.401846885681152,
+      "learning_rate": 1.4476119303048709e-06,
+      "loss": 0.259,
+      "step": 1785
+    },
+    {
+      "epoch": 0.8444444444444444,
+      "grad_norm": 6.052865982055664,
+      "learning_rate": 1.43906825229708e-06,
+      "loss": 0.2637,
+      "step": 1786
+    },
+    {
+      "epoch": 0.8449172576832151,
+      "grad_norm": 5.836279392242432,
+      "learning_rate": 1.4305479058131389e-06,
+      "loss": 0.2327,
+      "step": 1787
+    },
+    {
+      "epoch": 0.8453900709219858,
+      "grad_norm": 6.56742525100708,
+      "learning_rate": 1.4220509140739692e-06,
+      "loss": 0.2571,
+      "step": 1788
+    },
+    {
+      "epoch": 0.8458628841607565,
+      "grad_norm": 5.522575378417969,
+      "learning_rate": 1.4135773002368314e-06,
+      "loss": 0.1913,
+      "step": 1789
+    },
+    {
+      "epoch": 0.8463356973995272,
+      "grad_norm": 6.318970203399658,
+      "learning_rate": 1.4051270873952794e-06,
+      "loss": 0.2334,
+      "step": 1790
+    },
+    {
+      "epoch": 0.8468085106382979,
+      "grad_norm": 5.222054958343506,
+      "learning_rate": 1.3967002985790878e-06,
+      "loss": 0.2156,
+      "step": 1791
+    },
+    {
+      "epoch": 0.8472813238770686,
+      "grad_norm": 4.232369422912598,
+      "learning_rate": 1.3882969567541959e-06,
+      "loss": 0.2233,
+      "step": 1792
+    },
+    {
+      "epoch": 0.8477541371158392,
+      "grad_norm": 3.875591993331909,
+      "learning_rate": 1.3799170848226395e-06,
+      "loss": 0.1502,
+      "step": 1793
+    },
+    {
+      "epoch": 0.8482269503546099,
+      "grad_norm": 7.354650020599365,
+      "learning_rate": 1.37156070562249e-06,
+      "loss": 0.249,
+      "step": 1794
+    },
+    {
+      "epoch": 0.8486997635933806,
+      "grad_norm": 5.512684345245361,
+      "learning_rate": 1.3632278419277933e-06,
+      "loss": 0.2428,
+      "step": 1795
+    },
+    {
+      "epoch": 0.8491725768321513,
+      "grad_norm": 8.732033729553223,
+      "learning_rate": 1.3549185164485135e-06,
+      "loss": 0.3614,
+      "step": 1796
+    },
+    {
+      "epoch": 0.849645390070922,
+      "grad_norm": 7.074683666229248,
+      "learning_rate": 1.3466327518304555e-06,
+      "loss": 0.3366,
+      "step": 1797
+    },
+    {
+      "epoch": 0.8501182033096927,
+      "grad_norm": 4.561595916748047,
+      "learning_rate": 1.3383705706552174e-06,
+      "loss": 0.2038,
+      "step": 1798
+    },
+    {
+      "epoch": 0.8505910165484634,
+      "grad_norm": 4.308145523071289,
+      "learning_rate": 1.3301319954401248e-06,
+      "loss": 0.2003,
+      "step": 1799
+    },
+    {
+      "epoch": 0.851063829787234,
+      "grad_norm": 4.1011552810668945,
+      "learning_rate": 1.3219170486381671e-06,
+      "loss": 0.1727,
+      "step": 1800
+    },
+    {
+      "epoch": 0.851063829787234,
+      "eval_accuracy": 0.8658536585365854,
+      "eval_f1": 0.717948717948718,
+      "eval_loss": 0.29891377687454224,
+      "eval_precision": 0.875,
+      "eval_recall": 0.6086956521739131,
+      "eval_runtime": 49.9353,
+      "eval_samples_per_second": 5.527,
+      "eval_steps_per_second": 0.18,
+      "step": 1800
+    },
+    {
+      "epoch": 0.8515366430260047,
+      "grad_norm": 4.582373142242432,
+      "learning_rate": 1.3137257526379366e-06,
+      "loss": 0.1734,
+      "step": 1801
+    },
+    {
+      "epoch": 0.8520094562647754,
+      "grad_norm": 5.546594619750977,
+      "learning_rate": 1.3055581297635734e-06,
+      "loss": 0.1714,
+      "step": 1802
+    },
+    {
+      "epoch": 0.8524822695035461,
+      "grad_norm": 6.5857696533203125,
+      "learning_rate": 1.2974142022746971e-06,
+      "loss": 0.2197,
+      "step": 1803
+    },
+    {
+      "epoch": 0.8529550827423168,
+      "grad_norm": 3.709681987762451,
+      "learning_rate": 1.289293992366346e-06,
+      "loss": 0.1239,
+      "step": 1804
+    },
+    {
+      "epoch": 0.8534278959810875,
+      "grad_norm": 3.4839696884155273,
+      "learning_rate": 1.2811975221689289e-06,
+      "loss": 0.1857,
+      "step": 1805
+    },
+    {
+      "epoch": 0.8539007092198582,
+      "grad_norm": 4.332188606262207,
+      "learning_rate": 1.2731248137481468e-06,
+      "loss": 0.2506,
+      "step": 1806
+    },
+    {
+      "epoch": 0.8543735224586289,
+      "grad_norm": 5.268299579620361,
+      "learning_rate": 1.2650758891049464e-06,
+      "loss": 0.2326,
+      "step": 1807
+    },
+    {
+      "epoch": 0.8548463356973995,
+      "grad_norm": 4.915356636047363,
+      "learning_rate": 1.257050770175452e-06,
+      "loss": 0.1439,
+      "step": 1808
+    },
+    {
+      "epoch": 0.8553191489361702,
+      "grad_norm": 5.468433856964111,
+      "learning_rate": 1.2490494788309115e-06,
+      "loss": 0.2292,
+      "step": 1809
+    },
+    {
+      "epoch": 0.8557919621749409,
+      "grad_norm": 8.169693946838379,
+      "learning_rate": 1.241072036877633e-06,
+      "loss": 0.2483,
+      "step": 1810
+    },
+    {
+      "epoch": 0.8562647754137116,
+      "grad_norm": 4.5563063621521,
+      "learning_rate": 1.2331184660569284e-06,
+      "loss": 0.213,
+      "step": 1811
+    },
+    {
+      "epoch": 0.8567375886524823,
+      "grad_norm": 5.421559810638428,
+      "learning_rate": 1.2251887880450498e-06,
+      "loss": 0.2602,
+      "step": 1812
+    },
+    {
+      "epoch": 0.857210401891253,
+      "grad_norm": 5.384074687957764,
+      "learning_rate": 1.217283024453133e-06,
+      "loss": 0.2221,
+      "step": 1813
+    },
+    {
+      "epoch": 0.8576832151300237,
+      "grad_norm": 4.89252233505249,
+      "learning_rate": 1.2094011968271447e-06,
+      "loss": 0.1907,
+      "step": 1814
+    },
+    {
+      "epoch": 0.8581560283687943,
+      "grad_norm": 6.57568359375,
+      "learning_rate": 1.2015433266478105e-06,
+      "loss": 0.2879,
+      "step": 1815
+    },
+    {
+      "epoch": 0.858628841607565,
+      "grad_norm": 7.750422954559326,
+      "learning_rate": 1.1937094353305679e-06,
+      "loss": 0.2787,
+      "step": 1816
+    },
+    {
+      "epoch": 0.8591016548463357,
+      "grad_norm": 5.562004089355469,
+      "learning_rate": 1.1858995442254984e-06,
+      "loss": 0.2327,
+      "step": 1817
+    },
+    {
+      "epoch": 0.8595744680851064,
+      "grad_norm": 5.795264720916748,
+      "learning_rate": 1.178113674617285e-06,
+      "loss": 0.2329,
+      "step": 1818
+    },
+    {
+      "epoch": 0.8600472813238771,
+      "grad_norm": 4.586887836456299,
+      "learning_rate": 1.1703518477251296e-06,
+      "loss": 0.2449,
+      "step": 1819
+    },
+    {
+      "epoch": 0.8605200945626478,
+      "grad_norm": 5.4445648193359375,
+      "learning_rate": 1.1626140847027211e-06,
+      "loss": 0.1995,
+      "step": 1820
+    },
+    {
+      "epoch": 0.8605200945626478,
+      "eval_accuracy": 0.8625277161862528,
+      "eval_f1": 0.7089201877934272,
+      "eval_loss": 0.30064335465431213,
+      "eval_precision": 0.8728323699421965,
+      "eval_recall": 0.5968379446640316,
+      "eval_runtime": 47.2996,
+      "eval_samples_per_second": 5.835,
+      "eval_steps_per_second": 0.19,
+      "step": 1820
+    },
+    {
+      "epoch": 0.8609929078014185,
+      "grad_norm": 5.280003070831299,
+      "learning_rate": 1.154900406638161e-06,
+      "loss": 0.2273,
+      "step": 1821
+    },
+    {
+      "epoch": 0.8614657210401891,
+      "grad_norm": 5.873126029968262,
+      "learning_rate": 1.147210834553908e-06,
+      "loss": 0.3027,
+      "step": 1822
+    },
+    {
+      "epoch": 0.8619385342789598,
+      "grad_norm": 5.633482456207275,
+      "learning_rate": 1.1395453894067322e-06,
+      "loss": 0.2282,
+      "step": 1823
+    },
+    {
+      "epoch": 0.8624113475177305,
+      "grad_norm": 7.417043209075928,
+      "learning_rate": 1.1319040920876412e-06,
+      "loss": 0.261,
+      "step": 1824
+    },
+    {
+      "epoch": 0.8628841607565012,
+      "grad_norm": 4.741674900054932,
+      "learning_rate": 1.1242869634218355e-06,
+      "loss": 0.2136,
+      "step": 1825
+    },
+    {
+      "epoch": 0.8633569739952719,
+      "grad_norm": 6.367619037628174,
+      "learning_rate": 1.1166940241686453e-06,
+      "loss": 0.2331,
+      "step": 1826
+    },
+    {
+      "epoch": 0.8638297872340426,
+      "grad_norm": 7.427839756011963,
+      "learning_rate": 1.1091252950214793e-06,
+      "loss": 0.2836,
+      "step": 1827
+    },
+    {
+      "epoch": 0.8643026004728133,
+      "grad_norm": 4.886536598205566,
+      "learning_rate": 1.1015807966077641e-06,
+      "loss": 0.2326,
+      "step": 1828
+    },
+    {
+      "epoch": 0.864775413711584,
+      "grad_norm": 6.093448162078857,
+      "learning_rate": 1.0940605494888856e-06,
+      "loss": 0.1806,
+      "step": 1829
+    },
+    {
+      "epoch": 0.8652482269503546,
+      "grad_norm": 5.902658939361572,
+      "learning_rate": 1.0865645741601372e-06,
+      "loss": 0.2035,
+      "step": 1830
+    },
+    {
+      "epoch": 0.8657210401891253,
+      "grad_norm": 6.096370697021484,
+      "learning_rate": 1.0790928910506705e-06,
+      "loss": 0.1924,
+      "step": 1831
+    },
+    {
+      "epoch": 0.866193853427896,
+      "grad_norm": 6.3981499671936035,
+      "learning_rate": 1.0716455205234244e-06,
+      "loss": 0.2536,
+      "step": 1832
+    },
+    {
+      "epoch": 0.8666666666666667,
+      "grad_norm": 5.565893650054932,
+      "learning_rate": 1.0642224828750803e-06,
+      "loss": 0.2512,
+      "step": 1833
+    },
+    {
+      "epoch": 0.8671394799054374,
+      "grad_norm": 6.9906206130981445,
+      "learning_rate": 1.0568237983360041e-06,
+      "loss": 0.2001,
+      "step": 1834
+    },
+    {
+      "epoch": 0.8676122931442081,
+      "grad_norm": 5.311846733093262,
+      "learning_rate": 1.0494494870701889e-06,
+      "loss": 0.2169,
+      "step": 1835
+    },
+    {
+      "epoch": 0.8680851063829788,
+      "grad_norm": 4.316741466522217,
+      "learning_rate": 1.0420995691752079e-06,
+      "loss": 0.2258,
+      "step": 1836
+    },
+    {
+      "epoch": 0.8685579196217494,
+      "grad_norm": 4.4594316482543945,
+      "learning_rate": 1.034774064682148e-06,
+      "loss": 0.1806,
+      "step": 1837
+    },
+    {
+      "epoch": 0.8690307328605201,
+      "grad_norm": 4.841372013092041,
+      "learning_rate": 1.027472993555565e-06,
+      "loss": 0.1777,
+      "step": 1838
+    },
+    {
+      "epoch": 0.8695035460992908,
+      "grad_norm": 4.721182346343994,
+      "learning_rate": 1.0201963756934164e-06,
+      "loss": 0.1937,
+      "step": 1839
+    },
+    {
+      "epoch": 0.8699763593380615,
+      "grad_norm": 5.975325584411621,
+      "learning_rate": 1.012944230927031e-06,
+      "loss": 0.154,
+      "step": 1840
+    },
+    {
+      "epoch": 0.8699763593380615,
+      "eval_accuracy": 0.8680709534368071,
+      "eval_f1": 0.7264367816091954,
+      "eval_loss": 0.29660460352897644,
+      "eval_precision": 0.8681318681318682,
+      "eval_recall": 0.6245059288537549,
+      "eval_runtime": 47.2449,
+      "eval_samples_per_second": 5.842,
+      "eval_steps_per_second": 0.19,
+      "step": 1840
+    },
+    {
+      "epoch": 0.8704491725768322,
+      "grad_norm": 4.6500020027160645,
+      "learning_rate": 1.0057165790210277e-06,
+      "loss": 0.1928,
+      "step": 1841
+    },
+    {
+      "epoch": 0.8709219858156029,
+      "grad_norm": 5.256702423095703,
+      "learning_rate": 9.985134396732798e-07,
+      "loss": 0.2108,
+      "step": 1842
+    },
+    {
+      "epoch": 0.8713947990543736,
+      "grad_norm": 4.254281997680664,
+      "learning_rate": 9.913348325148498e-07,
+      "loss": 0.2064,
+      "step": 1843
+    },
+    {
+      "epoch": 0.8718676122931442,
+      "grad_norm": 4.128483772277832,
+      "learning_rate": 9.841807771099498e-07,
+      "loss": 0.1908,
+      "step": 1844
+    },
+    {
+      "epoch": 0.8723404255319149,
+      "grad_norm": 6.125643730163574,
+      "learning_rate": 9.77051292955873e-07,
+      "loss": 0.2637,
+      "step": 1845
+    },
+    {
+      "epoch": 0.8728132387706856,
+      "grad_norm": 5.453957557678223,
+      "learning_rate": 9.699463994829495e-07,
+      "loss": 0.2566,
+      "step": 1846
+    },
+    {
+      "epoch": 0.8732860520094563,
+      "grad_norm": 15.336091041564941,
+      "learning_rate": 9.628661160544905e-07,
+      "loss": 0.2678,
+      "step": 1847
+    },
+    {
+      "epoch": 0.873758865248227,
+      "grad_norm": 7.426636695861816,
+      "learning_rate": 9.558104619667386e-07,
+      "loss": 0.1946,
+      "step": 1848
+    },
+    {
+      "epoch": 0.8742316784869977,
+      "grad_norm": 6.527373313903809,
+      "learning_rate": 9.487794564488106e-07,
+      "loss": 0.2772,
+      "step": 1849
+    },
+    {
+      "epoch": 0.8747044917257684,
+      "grad_norm": 5.806751728057861,
+      "learning_rate": 9.417731186626466e-07,
+      "loss": 0.1703,
+      "step": 1850
+    },
+    {
+      "epoch": 0.875177304964539,
+      "grad_norm": 5.468467712402344,
+      "learning_rate": 9.347914677029624e-07,
+      "loss": 0.2873,
+      "step": 1851
+    },
+    {
+      "epoch": 0.8756501182033097,
+      "grad_norm": 7.120368957519531,
+      "learning_rate": 9.278345225971863e-07,
+      "loss": 0.296,
+      "step": 1852
+    },
+    {
+      "epoch": 0.8761229314420804,
+      "grad_norm": 6.212596893310547,
+      "learning_rate": 9.209023023054253e-07,
+      "loss": 0.2348,
+      "step": 1853
+    },
+    {
+      "epoch": 0.8765957446808511,
+      "grad_norm": 3.766883373260498,
+      "learning_rate": 9.139948257203934e-07,
+      "loss": 0.1481,
+      "step": 1854
+    },
+    {
+      "epoch": 0.8770685579196218,
+      "grad_norm": 3.6534929275512695,
+      "learning_rate": 9.071121116673731e-07,
+      "loss": 0.1831,
+      "step": 1855
+    },
+    {
+      "epoch": 0.8775413711583925,
+      "grad_norm": 6.801371097564697,
+      "learning_rate": 9.002541789041608e-07,
+      "loss": 0.257,
+      "step": 1856
+    },
+    {
+      "epoch": 0.8780141843971632,
+      "grad_norm": 6.545820713043213,
+      "learning_rate": 8.934210461210136e-07,
+      "loss": 0.2464,
+      "step": 1857
+    },
+    {
+      "epoch": 0.8784869976359339,
+      "grad_norm": 7.8918914794921875,
+      "learning_rate": 8.866127319406004e-07,
+      "loss": 0.2951,
+      "step": 1858
+    },
+    {
+      "epoch": 0.8789598108747045,
+      "grad_norm": 7.128468036651611,
+      "learning_rate": 8.79829254917951e-07,
+      "loss": 0.3351,
+      "step": 1859
+    },
+    {
+      "epoch": 0.8794326241134752,
+      "grad_norm": 7.129080772399902,
+      "learning_rate": 8.73070633540406e-07,
+      "loss": 0.1821,
+      "step": 1860
+    },
+    {
+      "epoch": 0.8794326241134752,
+      "eval_accuracy": 0.8669623059866962,
+      "eval_f1": 0.7235023041474654,
+      "eval_loss": 0.29677459597587585,
+      "eval_precision": 0.8674033149171271,
+      "eval_recall": 0.6205533596837944,
+      "eval_runtime": 48.3135,
+      "eval_samples_per_second": 5.713,
+      "eval_steps_per_second": 0.186,
+      "step": 1860
+    },
+    {
+      "epoch": 0.8799054373522459,
+      "grad_norm": 5.813145637512207,
+      "learning_rate": 8.663368862275634e-07,
+      "loss": 0.2184,
+      "step": 1861
+    },
+    {
+      "epoch": 0.8803782505910166,
+      "grad_norm": 4.450648307800293,
+      "learning_rate": 8.596280313312355e-07,
+      "loss": 0.2037,
+      "step": 1862
+    },
+    {
+      "epoch": 0.8808510638297873,
+      "grad_norm": 4.639596939086914,
+      "learning_rate": 8.5294408713539e-07,
+      "loss": 0.2164,
+      "step": 1863
+    },
+    {
+      "epoch": 0.881323877068558,
+      "grad_norm": 5.317780017852783,
+      "learning_rate": 8.462850718561045e-07,
+      "loss": 0.2591,
+      "step": 1864
+    },
+    {
+      "epoch": 0.8817966903073287,
+      "grad_norm": 5.928182125091553,
+      "learning_rate": 8.396510036415173e-07,
+      "loss": 0.2807,
+      "step": 1865
+    },
+    {
+      "epoch": 0.8822695035460993,
+      "grad_norm": 8.71645736694336,
+      "learning_rate": 8.330419005717782e-07,
+      "loss": 0.3168,
+      "step": 1866
+    },
+    {
+      "epoch": 0.88274231678487,
+      "grad_norm": 5.529267311096191,
+      "learning_rate": 8.264577806589968e-07,
+      "loss": 0.2113,
+      "step": 1867
+    },
+    {
+      "epoch": 0.8832151300236407,
+      "grad_norm": 4.838929176330566,
+      "learning_rate": 8.198986618471949e-07,
+      "loss": 0.1428,
+      "step": 1868
+    },
+    {
+      "epoch": 0.8836879432624114,
+      "grad_norm": 5.252522945404053,
+      "learning_rate": 8.133645620122566e-07,
+      "loss": 0.2061,
+      "step": 1869
+    },
+    {
+      "epoch": 0.8841607565011821,
+      "grad_norm": 5.35953950881958,
+      "learning_rate": 8.068554989618871e-07,
+      "loss": 0.1998,
+      "step": 1870
+    },
+    {
+      "epoch": 0.8846335697399527,
+      "grad_norm": 5.610535621643066,
+      "learning_rate": 8.003714904355486e-07,
+      "loss": 0.1787,
+      "step": 1871
+    },
+    {
+      "epoch": 0.8851063829787233,
+      "grad_norm": 7.672272205352783,
+      "learning_rate": 7.939125541044268e-07,
+      "loss": 0.147,
+      "step": 1872
+    },
+    {
+      "epoch": 0.885579196217494,
+      "grad_norm": 6.3421711921691895,
+      "learning_rate": 7.874787075713742e-07,
+      "loss": 0.2605,
+      "step": 1873
+    },
+    {
+      "epoch": 0.8860520094562647,
+      "grad_norm": 6.709553241729736,
+      "learning_rate": 7.810699683708644e-07,
+      "loss": 0.2765,
+      "step": 1874
+    },
+    {
+      "epoch": 0.8865248226950354,
+      "grad_norm": 7.121283531188965,
+      "learning_rate": 7.74686353968952e-07,
+      "loss": 0.2537,
+      "step": 1875
+    },
+    {
+      "epoch": 0.8869976359338061,
+      "grad_norm": 7.508021831512451,
+      "learning_rate": 7.683278817632056e-07,
+      "loss": 0.2712,
+      "step": 1876
+    },
+    {
+      "epoch": 0.8874704491725768,
+      "grad_norm": 6.003512859344482,
+      "learning_rate": 7.619945690826824e-07,
+      "loss": 0.2222,
+      "step": 1877
+    },
+    {
+      "epoch": 0.8879432624113475,
+      "grad_norm": 6.198127746582031,
+      "learning_rate": 7.556864331878633e-07,
+      "loss": 0.2216,
+      "step": 1878
+    },
+    {
+      "epoch": 0.8884160756501182,
+      "grad_norm": 4.947995185852051,
+      "learning_rate": 7.494034912706227e-07,
+      "loss": 0.1685,
+      "step": 1879
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 7.408123016357422,
+      "learning_rate": 7.43145760454167e-07,
+      "loss": 0.2354,
+      "step": 1880
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "eval_accuracy": 0.8669623059866962,
+      "eval_f1": 0.726027397260274,
+      "eval_loss": 0.2952026128768921,
+      "eval_precision": 0.8594594594594595,
+      "eval_recall": 0.6284584980237155,
+      "eval_runtime": 47.4827,
+      "eval_samples_per_second": 5.813,
+      "eval_steps_per_second": 0.19,
+      "step": 1880
+    },
+    {
+      "epoch": 0.8893617021276595,
+      "grad_norm": 5.347765922546387,
+      "learning_rate": 7.369132577929938e-07,
+      "loss": 0.2025,
+      "step": 1881
+    },
+    {
+      "epoch": 0.8898345153664302,
+      "grad_norm": 5.331334590911865,
+      "learning_rate": 7.307060002728462e-07,
+      "loss": 0.2239,
+      "step": 1882
+    },
+    {
+      "epoch": 0.8903073286052009,
+      "grad_norm": 4.435246467590332,
+      "learning_rate": 7.245240048106628e-07,
+      "loss": 0.1737,
+      "step": 1883
+    },
+    {
+      "epoch": 0.8907801418439716,
+      "grad_norm": 7.154726028442383,
+      "learning_rate": 7.183672882545401e-07,
+      "loss": 0.2582,
+      "step": 1884
+    },
+    {
+      "epoch": 0.8912529550827423,
+      "grad_norm": 4.464818000793457,
+      "learning_rate": 7.122358673836782e-07,
+      "loss": 0.1574,
+      "step": 1885
+    },
+    {
+      "epoch": 0.891725768321513,
+      "grad_norm": 6.102884769439697,
+      "learning_rate": 7.061297589083327e-07,
+      "loss": 0.2082,
+      "step": 1886
+    },
+    {
+      "epoch": 0.8921985815602836,
+      "grad_norm": 5.337555408477783,
+      "learning_rate": 7.000489794697774e-07,
+      "loss": 0.237,
+      "step": 1887
+    },
+    {
+      "epoch": 0.8926713947990543,
+      "grad_norm": 6.383353233337402,
+      "learning_rate": 6.939935456402613e-07,
+      "loss": 0.2242,
+      "step": 1888
+    },
+    {
+      "epoch": 0.893144208037825,
+      "grad_norm": 5.135204792022705,
+      "learning_rate": 6.879634739229502e-07,
+      "loss": 0.2586,
+      "step": 1889
+    },
+    {
+      "epoch": 0.8936170212765957,
+      "grad_norm": 13.136929512023926,
+      "learning_rate": 6.819587807518924e-07,
+      "loss": 0.3131,
+      "step": 1890
+    },
+    {
+      "epoch": 0.8940898345153664,
+      "grad_norm": 5.313321590423584,
+      "learning_rate": 6.759794824919686e-07,
+      "loss": 0.2519,
+      "step": 1891
+    },
+    {
+      "epoch": 0.8945626477541371,
+      "grad_norm": 5.850648403167725,
+      "learning_rate": 6.700255954388535e-07,
+      "loss": 0.2373,
+      "step": 1892
+    },
+    {
+      "epoch": 0.8950354609929078,
+      "grad_norm": 5.615677356719971,
+      "learning_rate": 6.640971358189651e-07,
+      "loss": 0.1992,
+      "step": 1893
+    },
+    {
+      "epoch": 0.8955082742316784,
+      "grad_norm": 6.700225830078125,
+      "learning_rate": 6.581941197894226e-07,
+      "loss": 0.2574,
+      "step": 1894
+    },
+    {
+      "epoch": 0.8959810874704491,
+      "grad_norm": 9.15202808380127,
+      "learning_rate": 6.523165634380047e-07,
+      "loss": 0.3287,
+      "step": 1895
+    },
+    {
+      "epoch": 0.8964539007092198,
+      "grad_norm": 4.325416564941406,
+      "learning_rate": 6.464644827830945e-07,
+      "loss": 0.1514,
+      "step": 1896
+    },
+    {
+      "epoch": 0.8969267139479905,
+      "grad_norm": 5.760486602783203,
+      "learning_rate": 6.406378937736602e-07,
+      "loss": 0.2557,
+      "step": 1897
+    },
+    {
+      "epoch": 0.8973995271867612,
+      "grad_norm": 5.745640754699707,
+      "learning_rate": 6.348368122891857e-07,
+      "loss": 0.1752,
+      "step": 1898
+    },
+    {
+      "epoch": 0.8978723404255319,
+      "grad_norm": 6.136902809143066,
+      "learning_rate": 6.29061254139639e-07,
+      "loss": 0.2628,
+      "step": 1899
+    },
+    {
+      "epoch": 0.8983451536643026,
+      "grad_norm": 10.711871147155762,
+      "learning_rate": 6.233112350654302e-07,
+      "loss": 0.3563,
+      "step": 1900
+    },
+    {
+      "epoch": 0.8983451536643026,
+      "eval_accuracy": 0.8669623059866962,
+      "eval_f1": 0.7272727272727273,
+      "eval_loss": 0.2932513654232025,
+      "eval_precision": 0.8556149732620321,
+      "eval_recall": 0.6324110671936759,
+      "eval_runtime": 47.8382,
+      "eval_samples_per_second": 5.769,
+      "eval_steps_per_second": 0.188,
+      "step": 1900
+    },
+    {
+      "epoch": 0.8988179669030733,
+      "grad_norm": 6.118460178375244,
+      "learning_rate": 6.175867707373695e-07,
+      "loss": 0.2678,
+      "step": 1901
+    },
+    {
+      "epoch": 0.8992907801418439,
+      "grad_norm": 5.572527885437012,
+      "learning_rate": 6.118878767566139e-07,
+      "loss": 0.2428,
+      "step": 1902
+    },
+    {
+      "epoch": 0.8997635933806146,
+      "grad_norm": 6.919821262359619,
+      "learning_rate": 6.062145686546383e-07,
+      "loss": 0.1785,
+      "step": 1903
+    },
+    {
+      "epoch": 0.9002364066193853,
+      "grad_norm": 5.680126667022705,
+      "learning_rate": 6.00566861893186e-07,
+      "loss": 0.2201,
+      "step": 1904
+    },
+    {
+      "epoch": 0.900709219858156,
+      "grad_norm": 5.649215221405029,
+      "learning_rate": 5.949447718642254e-07,
+      "loss": 0.169,
+      "step": 1905
+    },
+    {
+      "epoch": 0.9011820330969267,
+      "grad_norm": 6.076656341552734,
+      "learning_rate": 5.893483138899125e-07,
+      "loss": 0.219,
+      "step": 1906
+    },
+    {
+      "epoch": 0.9016548463356974,
+      "grad_norm": 5.83716344833374,
+      "learning_rate": 5.837775032225479e-07,
+      "loss": 0.2754,
+      "step": 1907
+    },
+    {
+      "epoch": 0.902127659574468,
+      "grad_norm": 4.6485819816589355,
+      "learning_rate": 5.782323550445313e-07,
+      "loss": 0.2558,
+      "step": 1908
+    },
+    {
+      "epoch": 0.9026004728132387,
+      "grad_norm": 5.645073890686035,
+      "learning_rate": 5.727128844683227e-07,
+      "loss": 0.214,
+      "step": 1909
+    },
+    {
+      "epoch": 0.9030732860520094,
+      "grad_norm": 7.4083476066589355,
+      "learning_rate": 5.672191065364097e-07,
+      "loss": 0.2417,
+      "step": 1910
+    },
+    {
+      "epoch": 0.9035460992907801,
+      "grad_norm": 6.812260150909424,
+      "learning_rate": 5.617510362212486e-07,
+      "loss": 0.3103,
+      "step": 1911
+    },
+    {
+      "epoch": 0.9040189125295508,
+      "grad_norm": 5.349275588989258,
+      "learning_rate": 5.563086884252389e-07,
+      "loss": 0.206,
+      "step": 1912
+    },
+    {
+      "epoch": 0.9044917257683215,
+      "grad_norm": 5.61432409286499,
+      "learning_rate": 5.508920779806748e-07,
+      "loss": 0.2645,
+      "step": 1913
+    },
+    {
+      "epoch": 0.9049645390070922,
+      "grad_norm": 8.072186470031738,
+      "learning_rate": 5.455012196497089e-07,
+      "loss": 0.2231,
+      "step": 1914
+    },
+    {
+      "epoch": 0.9054373522458629,
+      "grad_norm": 6.193761348724365,
+      "learning_rate": 5.4013612812431e-07,
+      "loss": 0.2029,
+      "step": 1915
+    },
+    {
+      "epoch": 0.9059101654846335,
+      "grad_norm": 3.4695332050323486,
+      "learning_rate": 5.34796818026222e-07,
+      "loss": 0.194,
+      "step": 1916
+    },
+    {
+      "epoch": 0.9063829787234042,
+      "grad_norm": 4.863160133361816,
+      "learning_rate": 5.294833039069269e-07,
+      "loss": 0.1776,
+      "step": 1917
+    },
+    {
+      "epoch": 0.9068557919621749,
+      "grad_norm": 5.608933448791504,
+      "learning_rate": 5.241956002476045e-07,
+      "loss": 0.2093,
+      "step": 1918
+    },
+    {
+      "epoch": 0.9073286052009456,
+      "grad_norm": 4.8589959144592285,
+      "learning_rate": 5.189337214590895e-07,
+      "loss": 0.1433,
+      "step": 1919
+    },
+    {
+      "epoch": 0.9078014184397163,
+      "grad_norm": 5.196472644805908,
+      "learning_rate": 5.136976818818373e-07,
+      "loss": 0.2716,
+      "step": 1920
+    },
+    {
+      "epoch": 0.9078014184397163,
+      "eval_accuracy": 0.8647450110864745,
+      "eval_f1": 0.7188940092165899,
+      "eval_loss": 0.296786367893219,
+      "eval_precision": 0.861878453038674,
+      "eval_recall": 0.616600790513834,
+      "eval_runtime": 48.2695,
+      "eval_samples_per_second": 5.718,
+      "eval_steps_per_second": 0.186,
+      "step": 1920
+    },
+    {
+      "epoch": 0.908274231678487,
+      "grad_norm": 5.688756942749023,
+      "learning_rate": 5.08487495785881e-07,
+      "loss": 0.1862,
+      "step": 1921
+    },
+    {
+      "epoch": 0.9087470449172577,
+      "grad_norm": 4.335201263427734,
+      "learning_rate": 5.03303177370793e-07,
+      "loss": 0.2051,
+      "step": 1922
+    },
+    {
+      "epoch": 0.9092198581560283,
+      "grad_norm": 5.4330735206604,
+      "learning_rate": 4.981447407656504e-07,
+      "loss": 0.2108,
+      "step": 1923
+    },
+    {
+      "epoch": 0.909692671394799,
+      "grad_norm": 7.466004848480225,
+      "learning_rate": 4.930122000289905e-07,
+      "loss": 0.2334,
+      "step": 1924
+    },
+    {
+      "epoch": 0.9101654846335697,
+      "grad_norm": 8.651698112487793,
+      "learning_rate": 4.879055691487767e-07,
+      "loss": 0.2628,
+      "step": 1925
+    },
+    {
+      "epoch": 0.9106382978723404,
+      "grad_norm": 6.537608623504639,
+      "learning_rate": 4.828248620423559e-07,
+      "loss": 0.2477,
+      "step": 1926
+    },
+    {
+      "epoch": 0.9111111111111111,
+      "grad_norm": 5.294820308685303,
+      "learning_rate": 4.77770092556431e-07,
+      "loss": 0.2558,
+      "step": 1927
+    },
+    {
+      "epoch": 0.9115839243498818,
+      "grad_norm": 6.457219123840332,
+      "learning_rate": 4.72741274467009e-07,
+      "loss": 0.2544,
+      "step": 1928
+    },
+    {
+      "epoch": 0.9120567375886525,
+      "grad_norm": 6.060578346252441,
+      "learning_rate": 4.6773842147937234e-07,
+      "loss": 0.2504,
+      "step": 1929
+    },
+    {
+      "epoch": 0.9125295508274232,
+      "grad_norm": 6.253387928009033,
+      "learning_rate": 4.627615472280389e-07,
+      "loss": 0.2758,
+      "step": 1930
+    },
+    {
+      "epoch": 0.9130023640661938,
+      "grad_norm": 5.318558692932129,
+      "learning_rate": 4.5781066527673003e-07,
+      "loss": 0.1307,
+      "step": 1931
+    },
+    {
+      "epoch": 0.9134751773049645,
+      "grad_norm": 6.5254316329956055,
+      "learning_rate": 4.528857891183214e-07,
+      "loss": 0.2367,
+      "step": 1932
+    },
+    {
+      "epoch": 0.9139479905437352,
+      "grad_norm": 4.4486165046691895,
+      "learning_rate": 4.479869321748187e-07,
+      "loss": 0.1974,
+      "step": 1933
+    },
+    {
+      "epoch": 0.9144208037825059,
+      "grad_norm": 5.705449104309082,
+      "learning_rate": 4.431141077973156e-07,
+      "loss": 0.1546,
+      "step": 1934
+    },
+    {
+      "epoch": 0.9148936170212766,
+      "grad_norm": 6.80421781539917,
+      "learning_rate": 4.382673292659545e-07,
+      "loss": 0.2338,
+      "step": 1935
+    },
+    {
+      "epoch": 0.9153664302600473,
+      "grad_norm": 6.027945518493652,
+      "learning_rate": 4.334466097899015e-07,
+      "loss": 0.2387,
+      "step": 1936
+    },
+    {
+      "epoch": 0.915839243498818,
+      "grad_norm": 7.638448715209961,
+      "learning_rate": 4.28651962507296e-07,
+      "loss": 0.3043,
+      "step": 1937
+    },
+    {
+      "epoch": 0.9163120567375886,
+      "grad_norm": 5.784573078155518,
+      "learning_rate": 4.2388340048522325e-07,
+      "loss": 0.1626,
+      "step": 1938
+    },
+    {
+      "epoch": 0.9167848699763593,
+      "grad_norm": 7.274070739746094,
+      "learning_rate": 4.191409367196753e-07,
+      "loss": 0.3126,
+      "step": 1939
+    },
+    {
+      "epoch": 0.91725768321513,
+      "grad_norm": 4.2528533935546875,
+      "learning_rate": 4.1442458413552324e-07,
+      "loss": 0.1428,
+      "step": 1940
+    },
+    {
+      "epoch": 0.91725768321513,
+      "eval_accuracy": 0.8636363636363636,
+      "eval_f1": 0.7146171693735499,
+      "eval_loss": 0.2969609200954437,
+      "eval_precision": 0.8651685393258427,
+      "eval_recall": 0.6086956521739131,
+      "eval_runtime": 49.0031,
+      "eval_samples_per_second": 5.632,
+      "eval_steps_per_second": 0.184,
+      "step": 1940
+    },
+    {
+      "epoch": 0.9177304964539007,
+      "grad_norm": 6.376473426818848,
+      "learning_rate": 4.097343555864719e-07,
+      "loss": 0.3121,
+      "step": 1941
+    },
+    {
+      "epoch": 0.9182033096926714,
+      "grad_norm": 4.471124172210693,
+      "learning_rate": 4.0507026385502747e-07,
+      "loss": 0.2247,
+      "step": 1942
+    },
+    {
+      "epoch": 0.9186761229314421,
+      "grad_norm": 4.96635103225708,
+      "learning_rate": 4.0043232165246413e-07,
+      "loss": 0.1916,
+      "step": 1943
+    },
+    {
+      "epoch": 0.9191489361702128,
+      "grad_norm": 4.768991947174072,
+      "learning_rate": 3.958205416187966e-07,
+      "loss": 0.1832,
+      "step": 1944
+    },
+    {
+      "epoch": 0.9196217494089834,
+      "grad_norm": 3.4908788204193115,
+      "learning_rate": 3.9123493632272967e-07,
+      "loss": 0.1689,
+      "step": 1945
+    },
+    {
+      "epoch": 0.9200945626477541,
+      "grad_norm": 7.56951379776001,
+      "learning_rate": 3.8667551826163774e-07,
+      "loss": 0.2176,
+      "step": 1946
+    },
+    {
+      "epoch": 0.9205673758865248,
+      "grad_norm": 6.852828502655029,
+      "learning_rate": 3.821422998615254e-07,
+      "loss": 0.2735,
+      "step": 1947
+    },
+    {
+      "epoch": 0.9210401891252955,
+      "grad_norm": 5.238857269287109,
+      "learning_rate": 3.776352934769911e-07,
+      "loss": 0.2495,
+      "step": 1948
+    },
+    {
+      "epoch": 0.9215130023640662,
+      "grad_norm": 6.270791530609131,
+      "learning_rate": 3.731545113912005e-07,
+      "loss": 0.2455,
+      "step": 1949
+    },
+    {
+      "epoch": 0.9219858156028369,
+      "grad_norm": 6.1830034255981445,
+      "learning_rate": 3.6869996581584746e-07,
+      "loss": 0.252,
+      "step": 1950
+    },
+    {
+      "epoch": 0.9224586288416076,
+      "grad_norm": 6.186679840087891,
+      "learning_rate": 3.6427166889112184e-07,
+      "loss": 0.2653,
+      "step": 1951
+    },
+    {
+      "epoch": 0.9229314420803783,
+      "grad_norm": 4.7130126953125,
+      "learning_rate": 3.5986963268567433e-07,
+      "loss": 0.1775,
+      "step": 1952
+    },
+    {
+      "epoch": 0.9234042553191489,
+      "grad_norm": 4.696549892425537,
+      "learning_rate": 3.5549386919659033e-07,
+      "loss": 0.2533,
+      "step": 1953
+    },
+    {
+      "epoch": 0.9238770685579196,
+      "grad_norm": 4.767563819885254,
+      "learning_rate": 3.5114439034935053e-07,
+      "loss": 0.2097,
+      "step": 1954
+    },
+    {
+      "epoch": 0.9243498817966903,
+      "grad_norm": 3.9315848350524902,
+      "learning_rate": 3.468212079978017e-07,
+      "loss": 0.1625,
+      "step": 1955
+    },
+    {
+      "epoch": 0.924822695035461,
+      "grad_norm": 3.269307851791382,
+      "learning_rate": 3.4252433392412244e-07,
+      "loss": 0.1028,
+      "step": 1956
+    },
+    {
+      "epoch": 0.9252955082742317,
+      "grad_norm": 6.641714096069336,
+      "learning_rate": 3.3825377983879195e-07,
+      "loss": 0.2642,
+      "step": 1957
+    },
+    {
+      "epoch": 0.9257683215130024,
+      "grad_norm": 6.65203332901001,
+      "learning_rate": 3.340095573805613e-07,
+      "loss": 0.2346,
+      "step": 1958
+    },
+    {
+      "epoch": 0.926241134751773,
+      "grad_norm": 6.2382025718688965,
+      "learning_rate": 3.2979167811641567e-07,
+      "loss": 0.2514,
+      "step": 1959
+    },
+    {
+      "epoch": 0.9267139479905437,
+      "grad_norm": 4.330326557159424,
+      "learning_rate": 3.256001535415465e-07,
+      "loss": 0.2108,
+      "step": 1960
+    },
+    {
+      "epoch": 0.9267139479905437,
+      "eval_accuracy": 0.8658536585365854,
+      "eval_f1": 0.7205542725173211,
+      "eval_loss": 0.29785633087158203,
+      "eval_precision": 0.8666666666666667,
+      "eval_recall": 0.616600790513834,
+      "eval_runtime": 47.846,
+      "eval_samples_per_second": 5.769,
+      "eval_steps_per_second": 0.188,
+      "step": 1960
+    },
+    {
+      "epoch": 0.9271867612293144,
+      "grad_norm": 4.008530139923096,
+      "learning_rate": 3.214349950793183e-07,
+      "loss": 0.1599,
+      "step": 1961
+    },
+    {
+      "epoch": 0.9276595744680851,
+      "grad_norm": 6.942195415496826,
+      "learning_rate": 3.172962140812419e-07,
+      "loss": 0.3592,
+      "step": 1962
+    },
+    {
+      "epoch": 0.9281323877068558,
+      "grad_norm": 4.6299567222595215,
+      "learning_rate": 3.1318382182693894e-07,
+      "loss": 0.2181,
+      "step": 1963
+    },
+    {
+      "epoch": 0.9286052009456265,
+      "grad_norm": 5.631269454956055,
+      "learning_rate": 3.0909782952410984e-07,
+      "loss": 0.269,
+      "step": 1964
+    },
+    {
+      "epoch": 0.9290780141843972,
+      "grad_norm": 4.7120232582092285,
+      "learning_rate": 3.05038248308509e-07,
+      "loss": 0.1236,
+      "step": 1965
+    },
+    {
+      "epoch": 0.9295508274231679,
+      "grad_norm": 7.05232048034668,
+      "learning_rate": 3.010050892439109e-07,
+      "loss": 0.2494,
+      "step": 1966
+    },
+    {
+      "epoch": 0.9300236406619385,
+      "grad_norm": 4.27794885635376,
+      "learning_rate": 2.9699836332208186e-07,
+      "loss": 0.1902,
+      "step": 1967
+    },
+    {
+      "epoch": 0.9304964539007092,
+      "grad_norm": 5.519193172454834,
+      "learning_rate": 2.930180814627448e-07,
+      "loss": 0.2123,
+      "step": 1968
+    },
+    {
+      "epoch": 0.9309692671394799,
+      "grad_norm": 5.249775409698486,
+      "learning_rate": 2.890642545135569e-07,
+      "loss": 0.2105,
+      "step": 1969
+    },
+    {
+      "epoch": 0.9314420803782506,
+      "grad_norm": 6.687892436981201,
+      "learning_rate": 2.851368932500742e-07,
+      "loss": 0.2725,
+      "step": 1970
+    },
+    {
+      "epoch": 0.9319148936170213,
+      "grad_norm": 5.885591506958008,
+      "learning_rate": 2.8123600837572594e-07,
+      "loss": 0.261,
+      "step": 1971
+    },
+    {
+      "epoch": 0.932387706855792,
+      "grad_norm": 4.598552227020264,
+      "learning_rate": 2.773616105217836e-07,
+      "loss": 0.1995,
+      "step": 1972
+    },
+    {
+      "epoch": 0.9328605200945627,
+      "grad_norm": 6.907764434814453,
+      "learning_rate": 2.7351371024733174e-07,
+      "loss": 0.2393,
+      "step": 1973
+    },
+    {
+      "epoch": 0.9333333333333333,
+      "grad_norm": 5.189178466796875,
+      "learning_rate": 2.6969231803923856e-07,
+      "loss": 0.1963,
+      "step": 1974
+    },
+    {
+      "epoch": 0.933806146572104,
+      "grad_norm": 5.675337791442871,
+      "learning_rate": 2.6589744431213313e-07,
+      "loss": 0.2482,
+      "step": 1975
+    },
+    {
+      "epoch": 0.9342789598108747,
+      "grad_norm": 4.176632881164551,
+      "learning_rate": 2.621290994083692e-07,
+      "loss": 0.1704,
+      "step": 1976
+    },
+    {
+      "epoch": 0.9347517730496454,
+      "grad_norm": 4.455401420593262,
+      "learning_rate": 2.5838729359799917e-07,
+      "loss": 0.2635,
+      "step": 1977
+    },
+    {
+      "epoch": 0.9352245862884161,
+      "grad_norm": 5.684086799621582,
+      "learning_rate": 2.546720370787492e-07,
+      "loss": 0.2496,
+      "step": 1978
+    },
+    {
+      "epoch": 0.9356973995271868,
+      "grad_norm": 7.903246879577637,
+      "learning_rate": 2.5098333997598755e-07,
+      "loss": 0.3008,
+      "step": 1979
+    },
+    {
+      "epoch": 0.9361702127659575,
+      "grad_norm": 4.9532270431518555,
+      "learning_rate": 2.4732121234270156e-07,
+      "loss": 0.1501,
+      "step": 1980
+    },
+    {
+      "epoch": 0.9361702127659575,
+      "eval_accuracy": 0.8669623059866962,
+      "eval_f1": 0.7222222222222222,
+      "eval_loss": 0.2986098527908325,
+      "eval_precision": 0.8715083798882681,
+      "eval_recall": 0.616600790513834,
+      "eval_runtime": 49.3058,
+      "eval_samples_per_second": 5.598,
+      "eval_steps_per_second": 0.183,
+      "step": 1980
+    },
+    {
+      "epoch": 0.9366430260047282,
+      "grad_norm": 4.474625110626221,
+      "learning_rate": 2.4368566415946536e-07,
+      "loss": 0.1952,
+      "step": 1981
+    },
+    {
+      "epoch": 0.9371158392434988,
+      "grad_norm": 7.129388809204102,
+      "learning_rate": 2.400767053344144e-07,
+      "loss": 0.2342,
+      "step": 1982
+    },
+    {
+      "epoch": 0.9375886524822695,
+      "grad_norm": 7.6979780197143555,
+      "learning_rate": 2.3649434570321984e-07,
+      "loss": 0.2414,
+      "step": 1983
+    },
+    {
+      "epoch": 0.9380614657210402,
+      "grad_norm": 5.29350471496582,
+      "learning_rate": 2.3293859502906192e-07,
+      "loss": 0.241,
+      "step": 1984
+    },
+    {
+      "epoch": 0.9385342789598109,
+      "grad_norm": 5.01874303817749,
+      "learning_rate": 2.2940946300260113e-07,
+      "loss": 0.2131,
+      "step": 1985
+    },
+    {
+      "epoch": 0.9390070921985816,
+      "grad_norm": 5.676163673400879,
+      "learning_rate": 2.2590695924195048e-07,
+      "loss": 0.3109,
+      "step": 1986
+    },
+    {
+      "epoch": 0.9394799054373523,
+      "grad_norm": 4.1814045906066895,
+      "learning_rate": 2.2243109329265545e-07,
+      "loss": 0.1398,
+      "step": 1987
+    },
+    {
+      "epoch": 0.939952718676123,
+      "grad_norm": 5.860604763031006,
+      "learning_rate": 2.1898187462766395e-07,
+      "loss": 0.2024,
+      "step": 1988
+    },
+    {
+      "epoch": 0.9404255319148936,
+      "grad_norm": 5.536343574523926,
+      "learning_rate": 2.1555931264729657e-07,
+      "loss": 0.2877,
+      "step": 1989
+    },
+    {
+      "epoch": 0.9408983451536643,
+      "grad_norm": 4.574560642242432,
+      "learning_rate": 2.121634166792308e-07,
+      "loss": 0.226,
+      "step": 1990
+    },
+    {
+      "epoch": 0.941371158392435,
+      "grad_norm": 6.119741439819336,
+      "learning_rate": 2.087941959784634e-07,
+      "loss": 0.213,
+      "step": 1991
+    },
+    {
+      "epoch": 0.9418439716312057,
+      "grad_norm": 5.73854398727417,
+      "learning_rate": 2.054516597272993e-07,
+      "loss": 0.2295,
+      "step": 1992
+    },
+    {
+      "epoch": 0.9423167848699764,
+      "grad_norm": 6.395056247711182,
+      "learning_rate": 2.021358170353138e-07,
+      "loss": 0.2884,
+      "step": 1993
+    },
+    {
+      "epoch": 0.9427895981087471,
+      "grad_norm": 6.370244026184082,
+      "learning_rate": 1.988466769393349e-07,
+      "loss": 0.2622,
+      "step": 1994
+    },
+    {
+      "epoch": 0.9432624113475178,
+      "grad_norm": 5.031834125518799,
+      "learning_rate": 1.9558424840341428e-07,
+      "loss": 0.2347,
+      "step": 1995
+    },
+    {
+      "epoch": 0.9437352245862884,
+      "grad_norm": 4.863191604614258,
+      "learning_rate": 1.9234854031880856e-07,
+      "loss": 0.2221,
+      "step": 1996
+    },
+    {
+      "epoch": 0.9442080378250591,
+      "grad_norm": 7.025779724121094,
+      "learning_rate": 1.891395615039504e-07,
+      "loss": 0.2246,
+      "step": 1997
+    },
+    {
+      "epoch": 0.9446808510638298,
+      "grad_norm": 6.635202407836914,
+      "learning_rate": 1.859573207044274e-07,
+      "loss": 0.2719,
+      "step": 1998
+    },
+    {
+      "epoch": 0.9451536643026005,
+      "grad_norm": 7.9684014320373535,
+      "learning_rate": 1.8280182659295321e-07,
+      "loss": 0.3291,
+      "step": 1999
+    },
+    {
+      "epoch": 0.9456264775413712,
+      "grad_norm": 5.251444339752197,
+      "learning_rate": 1.7967308776934755e-07,
+      "loss": 0.2162,
+      "step": 2000
+    },
+    {
+      "epoch": 0.9456264775413712,
+      "eval_accuracy": 0.8625277161862528,
+      "eval_f1": 0.7116279069767442,
+      "eval_loss": 0.29841360449790955,
+      "eval_precision": 0.864406779661017,
+      "eval_recall": 0.6047430830039525,
+      "eval_runtime": 48.9233,
+      "eval_samples_per_second": 5.641,
+      "eval_steps_per_second": 0.184,
+      "step": 2000
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 2115,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.1185465136093594e+17,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}