Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 9123, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0005480653293872629, | |
| "grad_norm": 9.609877586364746, | |
| "learning_rate": 4.999996294265421e-05, | |
| "loss": 5.868, | |
| "num_input_tokens_seen": 3944, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0010961306587745259, | |
| "grad_norm": 8.435359001159668, | |
| "learning_rate": 4.999985177072669e-05, | |
| "loss": 5.1519, | |
| "num_input_tokens_seen": 7552, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.001644195988161789, | |
| "grad_norm": 4.555312156677246, | |
| "learning_rate": 4.999966648454702e-05, | |
| "loss": 4.5297, | |
| "num_input_tokens_seen": 10552, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0021922613175490518, | |
| "grad_norm": 5.34758186340332, | |
| "learning_rate": 4.9999407084664514e-05, | |
| "loss": 4.1016, | |
| "num_input_tokens_seen": 14720, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.002740326646936315, | |
| "grad_norm": 4.284458160400391, | |
| "learning_rate": 4.999907357184816e-05, | |
| "loss": 4.0075, | |
| "num_input_tokens_seen": 17648, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.003288391976323578, | |
| "grad_norm": 6.062355041503906, | |
| "learning_rate": 4.99986659470867e-05, | |
| "loss": 3.9682, | |
| "num_input_tokens_seen": 21192, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.003836457305710841, | |
| "grad_norm": 3.1782262325286865, | |
| "learning_rate": 4.9998184211588574e-05, | |
| "loss": 3.6158, | |
| "num_input_tokens_seen": 24680, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0043845226350981035, | |
| "grad_norm": 4.492194652557373, | |
| "learning_rate": 4.999762836678192e-05, | |
| "loss": 4.4312, | |
| "num_input_tokens_seen": 27304, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.004932587964485367, | |
| "grad_norm": 4.35511589050293, | |
| "learning_rate": 4.99969984143146e-05, | |
| "loss": 4.0391, | |
| "num_input_tokens_seen": 29824, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.00548065329387263, | |
| "grad_norm": 4.070927619934082, | |
| "learning_rate": 4.999629435605416e-05, | |
| "loss": 3.9559, | |
| "num_input_tokens_seen": 32496, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.006028718623259892, | |
| "grad_norm": 3.5581634044647217, | |
| "learning_rate": 4.9995516194087845e-05, | |
| "loss": 3.6342, | |
| "num_input_tokens_seen": 35624, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.006576783952647156, | |
| "grad_norm": 3.646406888961792, | |
| "learning_rate": 4.999466393072258e-05, | |
| "loss": 3.8581, | |
| "num_input_tokens_seen": 38896, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.007124849282034418, | |
| "grad_norm": 3.964329719543457, | |
| "learning_rate": 4.9993737568484967e-05, | |
| "loss": 4.0054, | |
| "num_input_tokens_seen": 42736, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.007672914611421682, | |
| "grad_norm": 4.500335693359375, | |
| "learning_rate": 4.99927371101213e-05, | |
| "loss": 3.3325, | |
| "num_input_tokens_seen": 45256, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.008220979940808944, | |
| "grad_norm": 4.3628315925598145, | |
| "learning_rate": 4.999166255859752e-05, | |
| "loss": 3.5725, | |
| "num_input_tokens_seen": 48576, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.008769045270196207, | |
| "grad_norm": 3.4167840480804443, | |
| "learning_rate": 4.9990513917099225e-05, | |
| "loss": 3.7729, | |
| "num_input_tokens_seen": 52736, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.00931711059958347, | |
| "grad_norm": 4.027678489685059, | |
| "learning_rate": 4.998929118903167e-05, | |
| "loss": 3.7879, | |
| "num_input_tokens_seen": 56256, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.009865175928970734, | |
| "grad_norm": 4.3075056076049805, | |
| "learning_rate": 4.9987994378019746e-05, | |
| "loss": 3.5822, | |
| "num_input_tokens_seen": 59448, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.010413241258357997, | |
| "grad_norm": 3.550978899002075, | |
| "learning_rate": 4.9986623487907955e-05, | |
| "loss": 3.8015, | |
| "num_input_tokens_seen": 63424, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.01096130658774526, | |
| "grad_norm": 3.6582727432250977, | |
| "learning_rate": 4.998517852276042e-05, | |
| "loss": 3.7712, | |
| "num_input_tokens_seen": 66720, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.011509371917132522, | |
| "grad_norm": 5.284353733062744, | |
| "learning_rate": 4.9983659486860865e-05, | |
| "loss": 3.5192, | |
| "num_input_tokens_seen": 69280, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.012057437246519784, | |
| "grad_norm": 3.712407350540161, | |
| "learning_rate": 4.998206638471261e-05, | |
| "loss": 3.9006, | |
| "num_input_tokens_seen": 72488, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.012605502575907049, | |
| "grad_norm": 5.380141258239746, | |
| "learning_rate": 4.9980399221038544e-05, | |
| "loss": 3.7691, | |
| "num_input_tokens_seen": 75728, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.013153567905294311, | |
| "grad_norm": 6.7210693359375, | |
| "learning_rate": 4.997865800078112e-05, | |
| "loss": 3.4306, | |
| "num_input_tokens_seen": 78456, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.013701633234681574, | |
| "grad_norm": 3.6822457313537598, | |
| "learning_rate": 4.997684272910233e-05, | |
| "loss": 3.7098, | |
| "num_input_tokens_seen": 81912, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.014249698564068837, | |
| "grad_norm": 4.587904453277588, | |
| "learning_rate": 4.997495341138373e-05, | |
| "loss": 3.7503, | |
| "num_input_tokens_seen": 85768, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0147977638934561, | |
| "grad_norm": 4.4221510887146, | |
| "learning_rate": 4.997299005322634e-05, | |
| "loss": 3.6916, | |
| "num_input_tokens_seen": 89744, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.015345829222843364, | |
| "grad_norm": 4.955567359924316, | |
| "learning_rate": 4.9970952660450734e-05, | |
| "loss": 3.8345, | |
| "num_input_tokens_seen": 93584, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.015893894552230625, | |
| "grad_norm": 3.8360307216644287, | |
| "learning_rate": 4.996884123909692e-05, | |
| "loss": 3.8622, | |
| "num_input_tokens_seen": 96880, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.01644195988161789, | |
| "grad_norm": 4.293831825256348, | |
| "learning_rate": 4.996665579542439e-05, | |
| "loss": 3.6978, | |
| "num_input_tokens_seen": 99736, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.016990025211005153, | |
| "grad_norm": 3.8615922927856445, | |
| "learning_rate": 4.99643963359121e-05, | |
| "loss": 3.7886, | |
| "num_input_tokens_seen": 102768, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.017538090540392414, | |
| "grad_norm": 4.592337608337402, | |
| "learning_rate": 4.996206286725841e-05, | |
| "loss": 3.4776, | |
| "num_input_tokens_seen": 107960, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.01808615586977968, | |
| "grad_norm": 5.695650577545166, | |
| "learning_rate": 4.995965539638108e-05, | |
| "loss": 3.9904, | |
| "num_input_tokens_seen": 110712, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.01863422119916694, | |
| "grad_norm": 6.341024398803711, | |
| "learning_rate": 4.995717393041729e-05, | |
| "loss": 3.727, | |
| "num_input_tokens_seen": 114496, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.019182286528554204, | |
| "grad_norm": 5.523504734039307, | |
| "learning_rate": 4.995461847672354e-05, | |
| "loss": 3.5366, | |
| "num_input_tokens_seen": 118408, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.019730351857941468, | |
| "grad_norm": 4.576908111572266, | |
| "learning_rate": 4.995198904287572e-05, | |
| "loss": 3.4552, | |
| "num_input_tokens_seen": 122024, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.02027841718732873, | |
| "grad_norm": 4.912643909454346, | |
| "learning_rate": 4.9949285636669e-05, | |
| "loss": 3.878, | |
| "num_input_tokens_seen": 125680, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.020826482516715993, | |
| "grad_norm": 3.790379047393799, | |
| "learning_rate": 4.994650826611787e-05, | |
| "loss": 3.7852, | |
| "num_input_tokens_seen": 129056, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.021374547846103254, | |
| "grad_norm": 4.877086162567139, | |
| "learning_rate": 4.9943656939456094e-05, | |
| "loss": 3.7977, | |
| "num_input_tokens_seen": 132072, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.02192261317549052, | |
| "grad_norm": 4.675802230834961, | |
| "learning_rate": 4.994073166513667e-05, | |
| "loss": 3.6024, | |
| "num_input_tokens_seen": 134448, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.022470678504877783, | |
| "grad_norm": 9.45524787902832, | |
| "learning_rate": 4.9937732451831845e-05, | |
| "loss": 3.9247, | |
| "num_input_tokens_seen": 137808, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.023018743834265044, | |
| "grad_norm": 4.349103927612305, | |
| "learning_rate": 4.9934659308433024e-05, | |
| "loss": 3.5971, | |
| "num_input_tokens_seen": 140752, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.023566809163652308, | |
| "grad_norm": 3.90029239654541, | |
| "learning_rate": 4.993151224405084e-05, | |
| "loss": 3.656, | |
| "num_input_tokens_seen": 143328, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.02411487449303957, | |
| "grad_norm": 3.4128267765045166, | |
| "learning_rate": 4.992829126801502e-05, | |
| "loss": 3.7457, | |
| "num_input_tokens_seen": 146792, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.024662939822426833, | |
| "grad_norm": 5.266091346740723, | |
| "learning_rate": 4.9924996389874435e-05, | |
| "loss": 3.3972, | |
| "num_input_tokens_seen": 150352, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.025211005151814098, | |
| "grad_norm": 3.7570605278015137, | |
| "learning_rate": 4.992162761939704e-05, | |
| "loss": 2.8386, | |
| "num_input_tokens_seen": 153688, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.02575907048120136, | |
| "grad_norm": 3.587785243988037, | |
| "learning_rate": 4.991818496656986e-05, | |
| "loss": 3.909, | |
| "num_input_tokens_seen": 156824, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.026307135810588623, | |
| "grad_norm": 4.7243757247924805, | |
| "learning_rate": 4.991466844159893e-05, | |
| "loss": 3.7806, | |
| "num_input_tokens_seen": 159728, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.026855201139975884, | |
| "grad_norm": 4.537757396697998, | |
| "learning_rate": 4.99110780549093e-05, | |
| "loss": 3.7949, | |
| "num_input_tokens_seen": 162456, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.027403266469363148, | |
| "grad_norm": 5.187793731689453, | |
| "learning_rate": 4.990741381714498e-05, | |
| "loss": 3.7304, | |
| "num_input_tokens_seen": 165176, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.027951331798750412, | |
| "grad_norm": 5.144887447357178, | |
| "learning_rate": 4.990367573916894e-05, | |
| "loss": 3.7232, | |
| "num_input_tokens_seen": 168824, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.028499397128137673, | |
| "grad_norm": 5.238748550415039, | |
| "learning_rate": 4.989986383206302e-05, | |
| "loss": 3.5484, | |
| "num_input_tokens_seen": 172512, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.029047462457524938, | |
| "grad_norm": 4.251674652099609, | |
| "learning_rate": 4.9895978107127975e-05, | |
| "loss": 3.3929, | |
| "num_input_tokens_seen": 175544, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.0295955277869122, | |
| "grad_norm": 7.541206359863281, | |
| "learning_rate": 4.9892018575883354e-05, | |
| "loss": 3.5038, | |
| "num_input_tokens_seen": 178784, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.030143593116299463, | |
| "grad_norm": 3.8806400299072266, | |
| "learning_rate": 4.988798525006755e-05, | |
| "loss": 3.9488, | |
| "num_input_tokens_seen": 181112, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.030691658445686727, | |
| "grad_norm": 3.7918715476989746, | |
| "learning_rate": 4.988387814163771e-05, | |
| "loss": 3.4375, | |
| "num_input_tokens_seen": 185416, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.031239723775073988, | |
| "grad_norm": 4.9953813552856445, | |
| "learning_rate": 4.9879697262769706e-05, | |
| "loss": 3.7866, | |
| "num_input_tokens_seen": 188528, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.03178778910446125, | |
| "grad_norm": 4.683384418487549, | |
| "learning_rate": 4.9875442625858125e-05, | |
| "loss": 3.4738, | |
| "num_input_tokens_seen": 191472, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.03233585443384852, | |
| "grad_norm": 3.5414726734161377, | |
| "learning_rate": 4.987111424351622e-05, | |
| "loss": 3.6306, | |
| "num_input_tokens_seen": 195416, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.03288391976323578, | |
| "grad_norm": 6.5463547706604, | |
| "learning_rate": 4.9866712128575855e-05, | |
| "loss": 3.6409, | |
| "num_input_tokens_seen": 198576, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.03343198509262304, | |
| "grad_norm": 4.8504180908203125, | |
| "learning_rate": 4.9862236294087485e-05, | |
| "loss": 3.9698, | |
| "num_input_tokens_seen": 201432, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.033980050422010306, | |
| "grad_norm": 4.2637739181518555, | |
| "learning_rate": 4.98576867533201e-05, | |
| "loss": 3.4978, | |
| "num_input_tokens_seen": 204776, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.03452811575139757, | |
| "grad_norm": 6.201929569244385, | |
| "learning_rate": 4.9853063519761234e-05, | |
| "loss": 3.5306, | |
| "num_input_tokens_seen": 207984, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.03507618108078483, | |
| "grad_norm": 5.745614528656006, | |
| "learning_rate": 4.984836660711686e-05, | |
| "loss": 3.4114, | |
| "num_input_tokens_seen": 211304, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.035624246410172096, | |
| "grad_norm": 7.258711338043213, | |
| "learning_rate": 4.9843596029311386e-05, | |
| "loss": 3.5909, | |
| "num_input_tokens_seen": 214680, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.03617231173955936, | |
| "grad_norm": 5.421024799346924, | |
| "learning_rate": 4.9838751800487606e-05, | |
| "loss": 3.9625, | |
| "num_input_tokens_seen": 217472, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.03672037706894662, | |
| "grad_norm": 4.33311653137207, | |
| "learning_rate": 4.983383393500667e-05, | |
| "loss": 3.1581, | |
| "num_input_tokens_seen": 220824, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.03726844239833388, | |
| "grad_norm": 3.667479991912842, | |
| "learning_rate": 4.982884244744801e-05, | |
| "loss": 3.6578, | |
| "num_input_tokens_seen": 224464, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.037816507727721146, | |
| "grad_norm": 4.797352313995361, | |
| "learning_rate": 4.982377735260933e-05, | |
| "loss": 3.4615, | |
| "num_input_tokens_seen": 228120, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.03836457305710841, | |
| "grad_norm": 6.432485103607178, | |
| "learning_rate": 4.981863866550656e-05, | |
| "loss": 3.7862, | |
| "num_input_tokens_seen": 231112, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.03891263838649567, | |
| "grad_norm": 5.501232624053955, | |
| "learning_rate": 4.981342640137377e-05, | |
| "loss": 3.5962, | |
| "num_input_tokens_seen": 234456, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.039460703715882936, | |
| "grad_norm": 4.993545055389404, | |
| "learning_rate": 4.9808140575663186e-05, | |
| "loss": 3.4178, | |
| "num_input_tokens_seen": 237744, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.0400087690452702, | |
| "grad_norm": 4.6652421951293945, | |
| "learning_rate": 4.98027812040451e-05, | |
| "loss": 3.3215, | |
| "num_input_tokens_seen": 240240, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.04055683437465746, | |
| "grad_norm": 7.660661220550537, | |
| "learning_rate": 4.979734830240784e-05, | |
| "loss": 3.4482, | |
| "num_input_tokens_seen": 243344, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.041104899704044726, | |
| "grad_norm": 5.362435340881348, | |
| "learning_rate": 4.979184188685772e-05, | |
| "loss": 3.6152, | |
| "num_input_tokens_seen": 246928, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.041652965033431986, | |
| "grad_norm": 4.019466876983643, | |
| "learning_rate": 4.9786261973718984e-05, | |
| "loss": 3.4659, | |
| "num_input_tokens_seen": 250592, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.04220103036281925, | |
| "grad_norm": 3.5128304958343506, | |
| "learning_rate": 4.9780608579533774e-05, | |
| "loss": 3.369, | |
| "num_input_tokens_seen": 254136, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.04274909569220651, | |
| "grad_norm": 5.328804969787598, | |
| "learning_rate": 4.9774881721062083e-05, | |
| "loss": 3.396, | |
| "num_input_tokens_seen": 257000, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.043297161021593776, | |
| "grad_norm": 3.9344732761383057, | |
| "learning_rate": 4.976908141528168e-05, | |
| "loss": 3.5748, | |
| "num_input_tokens_seen": 259544, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.04384522635098104, | |
| "grad_norm": 6.34092903137207, | |
| "learning_rate": 4.976320767938808e-05, | |
| "loss": 3.2784, | |
| "num_input_tokens_seen": 262648, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0443932916803683, | |
| "grad_norm": 6.228747367858887, | |
| "learning_rate": 4.975726053079448e-05, | |
| "loss": 3.7733, | |
| "num_input_tokens_seen": 265800, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.044941357009755566, | |
| "grad_norm": 6.360103130340576, | |
| "learning_rate": 4.9751239987131735e-05, | |
| "loss": 3.3795, | |
| "num_input_tokens_seen": 268352, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.045489422339142827, | |
| "grad_norm": 5.080907821655273, | |
| "learning_rate": 4.9745146066248275e-05, | |
| "loss": 3.4467, | |
| "num_input_tokens_seen": 271416, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.04603748766853009, | |
| "grad_norm": 4.075165271759033, | |
| "learning_rate": 4.973897878621005e-05, | |
| "loss": 3.4581, | |
| "num_input_tokens_seen": 274912, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.046585552997917355, | |
| "grad_norm": 4.517000675201416, | |
| "learning_rate": 4.973273816530051e-05, | |
| "loss": 3.3681, | |
| "num_input_tokens_seen": 279184, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.047133618327304616, | |
| "grad_norm": 5.66272497177124, | |
| "learning_rate": 4.9726424222020527e-05, | |
| "loss": 3.8983, | |
| "num_input_tokens_seen": 283008, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.04768168365669188, | |
| "grad_norm": 5.277008056640625, | |
| "learning_rate": 4.9720036975088334e-05, | |
| "loss": 3.8482, | |
| "num_input_tokens_seen": 285408, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.04822974898607914, | |
| "grad_norm": 5.911515235900879, | |
| "learning_rate": 4.971357644343948e-05, | |
| "loss": 3.7086, | |
| "num_input_tokens_seen": 287672, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.048777814315466406, | |
| "grad_norm": 5.71356725692749, | |
| "learning_rate": 4.9707042646226784e-05, | |
| "loss": 3.7235, | |
| "num_input_tokens_seen": 290608, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.04932587964485367, | |
| "grad_norm": 4.606592178344727, | |
| "learning_rate": 4.9700435602820276e-05, | |
| "loss": 3.5481, | |
| "num_input_tokens_seen": 293688, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.04987394497424093, | |
| "grad_norm": 5.814152240753174, | |
| "learning_rate": 4.969375533280708e-05, | |
| "loss": 3.38, | |
| "num_input_tokens_seen": 297160, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.050422010303628195, | |
| "grad_norm": 5.669627666473389, | |
| "learning_rate": 4.968700185599147e-05, | |
| "loss": 3.5052, | |
| "num_input_tokens_seen": 300608, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.050970075633015456, | |
| "grad_norm": 4.943079471588135, | |
| "learning_rate": 4.96801751923947e-05, | |
| "loss": 3.5689, | |
| "num_input_tokens_seen": 303680, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.05151814096240272, | |
| "grad_norm": 5.5774664878845215, | |
| "learning_rate": 4.9673275362255035e-05, | |
| "loss": 3.1872, | |
| "num_input_tokens_seen": 306664, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.052066206291789985, | |
| "grad_norm": 5.742215633392334, | |
| "learning_rate": 4.966630238602761e-05, | |
| "loss": 3.873, | |
| "num_input_tokens_seen": 310024, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.052614271621177246, | |
| "grad_norm": 5.4475507736206055, | |
| "learning_rate": 4.9659256284384434e-05, | |
| "loss": 3.5306, | |
| "num_input_tokens_seen": 313296, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.05316233695056451, | |
| "grad_norm": 5.270495414733887, | |
| "learning_rate": 4.965213707821428e-05, | |
| "loss": 3.3911, | |
| "num_input_tokens_seen": 317528, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.05371040227995177, | |
| "grad_norm": 4.345836639404297, | |
| "learning_rate": 4.964494478862267e-05, | |
| "loss": 3.338, | |
| "num_input_tokens_seen": 320224, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.054258467609339035, | |
| "grad_norm": 8.715791702270508, | |
| "learning_rate": 4.963767943693178e-05, | |
| "loss": 3.6676, | |
| "num_input_tokens_seen": 323576, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.054806532938726296, | |
| "grad_norm": 6.43541955947876, | |
| "learning_rate": 4.9630341044680375e-05, | |
| "loss": 3.4779, | |
| "num_input_tokens_seen": 326840, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05535459826811356, | |
| "grad_norm": 5.299740314483643, | |
| "learning_rate": 4.962292963362376e-05, | |
| "loss": 3.0794, | |
| "num_input_tokens_seen": 330400, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.055902663597500825, | |
| "grad_norm": 5.377191543579102, | |
| "learning_rate": 4.9615445225733714e-05, | |
| "loss": 3.3778, | |
| "num_input_tokens_seen": 334264, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.056450728926888086, | |
| "grad_norm": 4.671337127685547, | |
| "learning_rate": 4.9607887843198417e-05, | |
| "loss": 3.2423, | |
| "num_input_tokens_seen": 338632, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.05699879425627535, | |
| "grad_norm": 4.917747497558594, | |
| "learning_rate": 4.960025750842241e-05, | |
| "loss": 3.2912, | |
| "num_input_tokens_seen": 341576, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.057546859585662614, | |
| "grad_norm": 5.633148670196533, | |
| "learning_rate": 4.959255424402647e-05, | |
| "loss": 3.9649, | |
| "num_input_tokens_seen": 343752, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.058094924915049875, | |
| "grad_norm": 5.843842506408691, | |
| "learning_rate": 4.9584778072847605e-05, | |
| "loss": 3.5301, | |
| "num_input_tokens_seen": 346768, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.058642990244437136, | |
| "grad_norm": 6.019566059112549, | |
| "learning_rate": 4.957692901793896e-05, | |
| "loss": 3.7123, | |
| "num_input_tokens_seen": 349488, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.0591910555738244, | |
| "grad_norm": 5.83019495010376, | |
| "learning_rate": 4.9569007102569746e-05, | |
| "loss": 4.0987, | |
| "num_input_tokens_seen": 353448, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.059739120903211665, | |
| "grad_norm": 7.744917392730713, | |
| "learning_rate": 4.9561012350225174e-05, | |
| "loss": 3.4271, | |
| "num_input_tokens_seen": 357336, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.060287186232598926, | |
| "grad_norm": 6.845799922943115, | |
| "learning_rate": 4.955294478460638e-05, | |
| "loss": 3.7176, | |
| "num_input_tokens_seen": 361272, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.06083525156198619, | |
| "grad_norm": 7.8909592628479, | |
| "learning_rate": 4.954480442963038e-05, | |
| "loss": 3.3092, | |
| "num_input_tokens_seen": 364048, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.061383316891373454, | |
| "grad_norm": 6.57379674911499, | |
| "learning_rate": 4.953659130942997e-05, | |
| "loss": 4.0073, | |
| "num_input_tokens_seen": 368336, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.061931382220760715, | |
| "grad_norm": 5.875579833984375, | |
| "learning_rate": 4.952830544835366e-05, | |
| "loss": 3.4651, | |
| "num_input_tokens_seen": 370824, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.062479447550147976, | |
| "grad_norm": 5.310330867767334, | |
| "learning_rate": 4.951994687096562e-05, | |
| "loss": 3.8036, | |
| "num_input_tokens_seen": 374104, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.06302751287953524, | |
| "grad_norm": 6.611202239990234, | |
| "learning_rate": 4.9511515602045563e-05, | |
| "loss": 3.2939, | |
| "num_input_tokens_seen": 376176, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.0635755782089225, | |
| "grad_norm": 4.5933451652526855, | |
| "learning_rate": 4.950301166658875e-05, | |
| "loss": 3.529, | |
| "num_input_tokens_seen": 378600, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.06412364353830977, | |
| "grad_norm": 5.080543518066406, | |
| "learning_rate": 4.9494435089805835e-05, | |
| "loss": 4.0958, | |
| "num_input_tokens_seen": 382584, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.06467170886769703, | |
| "grad_norm": 4.658755779266357, | |
| "learning_rate": 4.948578589712283e-05, | |
| "loss": 3.3213, | |
| "num_input_tokens_seen": 386376, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.06521977419708429, | |
| "grad_norm": 5.556814670562744, | |
| "learning_rate": 4.9477064114181026e-05, | |
| "loss": 3.5986, | |
| "num_input_tokens_seen": 390784, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.06576783952647156, | |
| "grad_norm": 6.1433491706848145, | |
| "learning_rate": 4.946826976683691e-05, | |
| "loss": 3.4305, | |
| "num_input_tokens_seen": 395104, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.06631590485585882, | |
| "grad_norm": 4.176370143890381, | |
| "learning_rate": 4.9459402881162095e-05, | |
| "loss": 3.6053, | |
| "num_input_tokens_seen": 398072, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.06686397018524608, | |
| "grad_norm": 4.746314525604248, | |
| "learning_rate": 4.945046348344325e-05, | |
| "loss": 3.4613, | |
| "num_input_tokens_seen": 401112, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.06741203551463334, | |
| "grad_norm": 6.04541015625, | |
| "learning_rate": 4.9441451600182e-05, | |
| "loss": 3.3843, | |
| "num_input_tokens_seen": 404728, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.06796010084402061, | |
| "grad_norm": 4.687957763671875, | |
| "learning_rate": 4.943236725809485e-05, | |
| "loss": 3.6494, | |
| "num_input_tokens_seen": 407824, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.06850816617340787, | |
| "grad_norm": 5.392053604125977, | |
| "learning_rate": 4.942321048411314e-05, | |
| "loss": 3.7716, | |
| "num_input_tokens_seen": 410064, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.06905623150279513, | |
| "grad_norm": 5.196096420288086, | |
| "learning_rate": 4.9413981305382936e-05, | |
| "loss": 3.7037, | |
| "num_input_tokens_seen": 413664, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.0696042968321824, | |
| "grad_norm": 4.464987754821777, | |
| "learning_rate": 4.940467974926493e-05, | |
| "loss": 3.0886, | |
| "num_input_tokens_seen": 416752, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.07015236216156966, | |
| "grad_norm": 4.81376838684082, | |
| "learning_rate": 4.939530584333441e-05, | |
| "loss": 3.11, | |
| "num_input_tokens_seen": 420552, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.07070042749095692, | |
| "grad_norm": 5.184936046600342, | |
| "learning_rate": 4.938585961538115e-05, | |
| "loss": 3.1776, | |
| "num_input_tokens_seen": 423200, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.07124849282034419, | |
| "grad_norm": 7.05800724029541, | |
| "learning_rate": 4.9376341093409305e-05, | |
| "loss": 3.2882, | |
| "num_input_tokens_seen": 426840, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.07179655814973145, | |
| "grad_norm": 7.437703609466553, | |
| "learning_rate": 4.9366750305637385e-05, | |
| "loss": 3.3796, | |
| "num_input_tokens_seen": 430168, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.07234462347911871, | |
| "grad_norm": 7.665436744689941, | |
| "learning_rate": 4.9357087280498105e-05, | |
| "loss": 3.6646, | |
| "num_input_tokens_seen": 433080, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.07289268880850597, | |
| "grad_norm": 7.2700324058532715, | |
| "learning_rate": 4.934735204663835e-05, | |
| "loss": 3.4558, | |
| "num_input_tokens_seen": 436600, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.07344075413789324, | |
| "grad_norm": 4.932444095611572, | |
| "learning_rate": 4.9337544632919085e-05, | |
| "loss": 3.1135, | |
| "num_input_tokens_seen": 439552, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.0739888194672805, | |
| "grad_norm": 6.515824794769287, | |
| "learning_rate": 4.9327665068415254e-05, | |
| "loss": 3.3952, | |
| "num_input_tokens_seen": 442776, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.07453688479666776, | |
| "grad_norm": 6.392978668212891, | |
| "learning_rate": 4.931771338241566e-05, | |
| "loss": 3.5728, | |
| "num_input_tokens_seen": 445344, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.07508495012605503, | |
| "grad_norm": 5.692570209503174, | |
| "learning_rate": 4.930768960442299e-05, | |
| "loss": 3.3921, | |
| "num_input_tokens_seen": 449360, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.07563301545544229, | |
| "grad_norm": 10.294317245483398, | |
| "learning_rate": 4.929759376415358e-05, | |
| "loss": 3.6814, | |
| "num_input_tokens_seen": 452736, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.07618108078482955, | |
| "grad_norm": 7.613968849182129, | |
| "learning_rate": 4.9287425891537454e-05, | |
| "loss": 3.5298, | |
| "num_input_tokens_seen": 455648, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.07672914611421681, | |
| "grad_norm": 5.538883209228516, | |
| "learning_rate": 4.927718601671816e-05, | |
| "loss": 3.4538, | |
| "num_input_tokens_seen": 458256, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.07727721144360408, | |
| "grad_norm": 5.105963706970215, | |
| "learning_rate": 4.926687417005268e-05, | |
| "loss": 3.3759, | |
| "num_input_tokens_seen": 461984, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.07782527677299134, | |
| "grad_norm": 5.424991130828857, | |
| "learning_rate": 4.925649038211142e-05, | |
| "loss": 3.4941, | |
| "num_input_tokens_seen": 465216, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.0783733421023786, | |
| "grad_norm": 6.287330627441406, | |
| "learning_rate": 4.924603468367801e-05, | |
| "loss": 3.3536, | |
| "num_input_tokens_seen": 468496, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.07892140743176587, | |
| "grad_norm": 7.270327568054199, | |
| "learning_rate": 4.923550710574929e-05, | |
| "loss": 3.1898, | |
| "num_input_tokens_seen": 471784, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.07946947276115313, | |
| "grad_norm": 5.402751922607422, | |
| "learning_rate": 4.922490767953519e-05, | |
| "loss": 3.7645, | |
| "num_input_tokens_seen": 474928, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.0800175380905404, | |
| "grad_norm": 5.472609996795654, | |
| "learning_rate": 4.921423643645863e-05, | |
| "loss": 3.5023, | |
| "num_input_tokens_seen": 479376, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.08056560341992766, | |
| "grad_norm": 4.318566799163818, | |
| "learning_rate": 4.9203493408155455e-05, | |
| "loss": 3.1444, | |
| "num_input_tokens_seen": 482328, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.08111366874931492, | |
| "grad_norm": 6.903258800506592, | |
| "learning_rate": 4.919267862647431e-05, | |
| "loss": 3.8837, | |
| "num_input_tokens_seen": 486248, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.08166173407870218, | |
| "grad_norm": 4.821303844451904, | |
| "learning_rate": 4.918179212347657e-05, | |
| "loss": 3.7363, | |
| "num_input_tokens_seen": 489736, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.08220979940808945, | |
| "grad_norm": 4.108252048492432, | |
| "learning_rate": 4.917083393143621e-05, | |
| "loss": 3.0709, | |
| "num_input_tokens_seen": 492784, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.0827578647374767, | |
| "grad_norm": 6.259218215942383, | |
| "learning_rate": 4.915980408283977e-05, | |
| "loss": 3.4733, | |
| "num_input_tokens_seen": 496528, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.08330593006686397, | |
| "grad_norm": 5.9338531494140625, | |
| "learning_rate": 4.91487026103862e-05, | |
| "loss": 3.8987, | |
| "num_input_tokens_seen": 500832, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.08385399539625123, | |
| "grad_norm": 5.397777557373047, | |
| "learning_rate": 4.913752954698677e-05, | |
| "loss": 3.3764, | |
| "num_input_tokens_seen": 503744, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.0844020607256385, | |
| "grad_norm": 5.536934852600098, | |
| "learning_rate": 4.912628492576503e-05, | |
| "loss": 3.7953, | |
| "num_input_tokens_seen": 507656, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.08495012605502576, | |
| "grad_norm": 5.932541847229004, | |
| "learning_rate": 4.9114968780056635e-05, | |
| "loss": 3.4254, | |
| "num_input_tokens_seen": 511216, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.08549819138441302, | |
| "grad_norm": 5.971353530883789, | |
| "learning_rate": 4.910358114340929e-05, | |
| "loss": 3.6466, | |
| "num_input_tokens_seen": 514328, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.08604625671380028, | |
| "grad_norm": 8.010024070739746, | |
| "learning_rate": 4.9092122049582636e-05, | |
| "loss": 3.9475, | |
| "num_input_tokens_seen": 518200, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.08659432204318755, | |
| "grad_norm": 6.520806312561035, | |
| "learning_rate": 4.9080591532548175e-05, | |
| "loss": 3.4056, | |
| "num_input_tokens_seen": 521704, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.0871423873725748, | |
| "grad_norm": 5.646440029144287, | |
| "learning_rate": 4.9068989626489126e-05, | |
| "loss": 3.5912, | |
| "num_input_tokens_seen": 524456, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.08769045270196207, | |
| "grad_norm": 4.937885284423828, | |
| "learning_rate": 4.9057316365800366e-05, | |
| "loss": 3.4854, | |
| "num_input_tokens_seen": 526920, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.08823851803134934, | |
| "grad_norm": 6.204067230224609, | |
| "learning_rate": 4.904557178508829e-05, | |
| "loss": 3.3649, | |
| "num_input_tokens_seen": 530544, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.0887865833607366, | |
| "grad_norm": 6.427296161651611, | |
| "learning_rate": 4.9033755919170733e-05, | |
| "loss": 3.8582, | |
| "num_input_tokens_seen": 532832, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.08933464869012386, | |
| "grad_norm": 7.1010589599609375, | |
| "learning_rate": 4.9021868803076875e-05, | |
| "loss": 3.5353, | |
| "num_input_tokens_seen": 536056, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.08988271401951113, | |
| "grad_norm": 4.813199043273926, | |
| "learning_rate": 4.900991047204712e-05, | |
| "loss": 3.2529, | |
| "num_input_tokens_seen": 539248, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.09043077934889839, | |
| "grad_norm": 7.545267581939697, | |
| "learning_rate": 4.899788096153297e-05, | |
| "loss": 3.0758, | |
| "num_input_tokens_seen": 543584, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.09097884467828565, | |
| "grad_norm": 5.574884414672852, | |
| "learning_rate": 4.898578030719698e-05, | |
| "loss": 3.0291, | |
| "num_input_tokens_seen": 546792, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.09152691000767292, | |
| "grad_norm": 5.587398529052734, | |
| "learning_rate": 4.897360854491259e-05, | |
| "loss": 3.2747, | |
| "num_input_tokens_seen": 549296, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.09207497533706017, | |
| "grad_norm": 6.558215618133545, | |
| "learning_rate": 4.896136571076406e-05, | |
| "loss": 3.4765, | |
| "num_input_tokens_seen": 551784, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.09262304066644744, | |
| "grad_norm": 5.221803188323975, | |
| "learning_rate": 4.894905184104634e-05, | |
| "loss": 3.3299, | |
| "num_input_tokens_seen": 555608, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.09317110599583471, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.8939149624187016e-05, | |
| "loss": 3.5208, | |
| "num_input_tokens_seen": 558848, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.09371917132522196, | |
| "grad_norm": 5.915983200073242, | |
| "learning_rate": 4.8926707982580194e-05, | |
| "loss": 3.5031, | |
| "num_input_tokens_seen": 562384, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.09426723665460923, | |
| "grad_norm": 6.868443965911865, | |
| "learning_rate": 4.891419540815006e-05, | |
| "loss": 3.5194, | |
| "num_input_tokens_seen": 565648, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.09481530198399649, | |
| "grad_norm": 6.696837902069092, | |
| "learning_rate": 4.8901611937991244e-05, | |
| "loss": 3.4405, | |
| "num_input_tokens_seen": 568384, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.09536336731338375, | |
| "grad_norm": 6.879650592803955, | |
| "learning_rate": 4.8888957609408535e-05, | |
| "loss": 3.2062, | |
| "num_input_tokens_seen": 571184, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.09591143264277102, | |
| "grad_norm": 5.235931396484375, | |
| "learning_rate": 4.8876232459916805e-05, | |
| "loss": 3.351, | |
| "num_input_tokens_seen": 575328, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.09645949797215828, | |
| "grad_norm": 6.496284008026123, | |
| "learning_rate": 4.886343652724088e-05, | |
| "loss": 3.3753, | |
| "num_input_tokens_seen": 578520, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.09700756330154554, | |
| "grad_norm": 8.708456039428711, | |
| "learning_rate": 4.8850569849315414e-05, | |
| "loss": 3.4456, | |
| "num_input_tokens_seen": 581688, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.09755562863093281, | |
| "grad_norm": 5.558722496032715, | |
| "learning_rate": 4.883763246428481e-05, | |
| "loss": 3.3753, | |
| "num_input_tokens_seen": 584736, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.09810369396032007, | |
| "grad_norm": 6.443663597106934, | |
| "learning_rate": 4.882462441050308e-05, | |
| "loss": 3.5381, | |
| "num_input_tokens_seen": 587952, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.09865175928970733, | |
| "grad_norm": 6.3144073486328125, | |
| "learning_rate": 4.881154572653373e-05, | |
| "loss": 3.5416, | |
| "num_input_tokens_seen": 590704, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.0991998246190946, | |
| "grad_norm": 5.615172386169434, | |
| "learning_rate": 4.8798396451149676e-05, | |
| "loss": 3.5944, | |
| "num_input_tokens_seen": 593056, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.09974788994848185, | |
| "grad_norm": 6.011329174041748, | |
| "learning_rate": 4.8785176623333094e-05, | |
| "loss": 3.2378, | |
| "num_input_tokens_seen": 596584, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.10029595527786912, | |
| "grad_norm": 5.445102214813232, | |
| "learning_rate": 4.8771886282275324e-05, | |
| "loss": 3.6375, | |
| "num_input_tokens_seen": 600080, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.10084402060725639, | |
| "grad_norm": 6.635453701019287, | |
| "learning_rate": 4.875852546737675e-05, | |
| "loss": 3.5498, | |
| "num_input_tokens_seen": 602696, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.10139208593664364, | |
| "grad_norm": 5.236489772796631, | |
| "learning_rate": 4.874509421824667e-05, | |
| "loss": 3.4216, | |
| "num_input_tokens_seen": 606200, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.10194015126603091, | |
| "grad_norm": 6.734245300292969, | |
| "learning_rate": 4.87315925747032e-05, | |
| "loss": 3.3747, | |
| "num_input_tokens_seen": 609848, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.10248821659541818, | |
| "grad_norm": 6.802552223205566, | |
| "learning_rate": 4.871802057677315e-05, | |
| "loss": 3.2441, | |
| "num_input_tokens_seen": 613440, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.10303628192480543, | |
| "grad_norm": 6.780172824859619, | |
| "learning_rate": 4.8704378264691894e-05, | |
| "loss": 3.4606, | |
| "num_input_tokens_seen": 617088, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.1035843472541927, | |
| "grad_norm": 6.527922630310059, | |
| "learning_rate": 4.869066567890327e-05, | |
| "loss": 3.4019, | |
| "num_input_tokens_seen": 619952, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.10413241258357997, | |
| "grad_norm": 6.2412214279174805, | |
| "learning_rate": 4.867688286005944e-05, | |
| "loss": 3.2408, | |
| "num_input_tokens_seen": 623088, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.10468047791296722, | |
| "grad_norm": 6.477228164672852, | |
| "learning_rate": 4.8663029849020775e-05, | |
| "loss": 3.2491, | |
| "num_input_tokens_seen": 626376, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.10522854324235449, | |
| "grad_norm": 5.359529495239258, | |
| "learning_rate": 4.864910668685574e-05, | |
| "loss": 3.1534, | |
| "num_input_tokens_seen": 628800, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.10577660857174175, | |
| "grad_norm": 5.2979960441589355, | |
| "learning_rate": 4.863511341484077e-05, | |
| "loss": 3.4653, | |
| "num_input_tokens_seen": 631312, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.10632467390112901, | |
| "grad_norm": 12.67263126373291, | |
| "learning_rate": 4.8621050074460136e-05, | |
| "loss": 3.8407, | |
| "num_input_tokens_seen": 634144, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.10687273923051628, | |
| "grad_norm": 4.020299434661865, | |
| "learning_rate": 4.860691670740587e-05, | |
| "loss": 3.6273, | |
| "num_input_tokens_seen": 637568, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.10742080455990353, | |
| "grad_norm": 5.12907075881958, | |
| "learning_rate": 4.8592713355577555e-05, | |
| "loss": 2.9803, | |
| "num_input_tokens_seen": 640368, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.1079688698892908, | |
| "grad_norm": 5.088891983032227, | |
| "learning_rate": 4.8578440061082275e-05, | |
| "loss": 3.0532, | |
| "num_input_tokens_seen": 643928, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.10851693521867807, | |
| "grad_norm": 6.150454521179199, | |
| "learning_rate": 4.856409686623447e-05, | |
| "loss": 3.5733, | |
| "num_input_tokens_seen": 648192, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.10906500054806532, | |
| "grad_norm": 6.601188659667969, | |
| "learning_rate": 4.85496838135558e-05, | |
| "loss": 3.4824, | |
| "num_input_tokens_seen": 652272, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.10961306587745259, | |
| "grad_norm": 6.9974141120910645, | |
| "learning_rate": 4.8535200945775016e-05, | |
| "loss": 3.516, | |
| "num_input_tokens_seen": 655696, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.11016113120683986, | |
| "grad_norm": 7.116706371307373, | |
| "learning_rate": 4.8520648305827855e-05, | |
| "loss": 3.4208, | |
| "num_input_tokens_seen": 658560, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.11070919653622711, | |
| "grad_norm": 5.209189414978027, | |
| "learning_rate": 4.850602593685689e-05, | |
| "loss": 3.353, | |
| "num_input_tokens_seen": 662152, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.11125726186561438, | |
| "grad_norm": 5.9092278480529785, | |
| "learning_rate": 4.8491333882211416e-05, | |
| "loss": 3.2833, | |
| "num_input_tokens_seen": 665968, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.11180532719500165, | |
| "grad_norm": 7.026948928833008, | |
| "learning_rate": 4.847657218544732e-05, | |
| "loss": 3.291, | |
| "num_input_tokens_seen": 668808, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.1123533925243889, | |
| "grad_norm": 6.154213905334473, | |
| "learning_rate": 4.8461740890326936e-05, | |
| "loss": 3.3035, | |
| "num_input_tokens_seen": 672280, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.11290145785377617, | |
| "grad_norm": 6.6929521560668945, | |
| "learning_rate": 4.844684004081895e-05, | |
| "loss": 3.6387, | |
| "num_input_tokens_seen": 675184, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.11344952318316344, | |
| "grad_norm": 5.449969291687012, | |
| "learning_rate": 4.843186968109823e-05, | |
| "loss": 3.1393, | |
| "num_input_tokens_seen": 677824, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.1139975885125507, | |
| "grad_norm": 3.6720149517059326, | |
| "learning_rate": 4.841682985554573e-05, | |
| "loss": 3.2646, | |
| "num_input_tokens_seen": 682856, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.11454565384193796, | |
| "grad_norm": 5.606584072113037, | |
| "learning_rate": 4.8401720608748324e-05, | |
| "loss": 3.3697, | |
| "num_input_tokens_seen": 687680, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.11509371917132523, | |
| "grad_norm": 5.044498920440674, | |
| "learning_rate": 4.83865419854987e-05, | |
| "loss": 3.3275, | |
| "num_input_tokens_seen": 690616, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.11564178450071248, | |
| "grad_norm": 5.938497543334961, | |
| "learning_rate": 4.83712940307952e-05, | |
| "loss": 3.1055, | |
| "num_input_tokens_seen": 693808, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.11618984983009975, | |
| "grad_norm": 7.216318607330322, | |
| "learning_rate": 4.8355976789841754e-05, | |
| "loss": 3.5388, | |
| "num_input_tokens_seen": 696992, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.116737915159487, | |
| "grad_norm": 5.2063164710998535, | |
| "learning_rate": 4.834059030804764e-05, | |
| "loss": 3.3436, | |
| "num_input_tokens_seen": 700448, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.11728598048887427, | |
| "grad_norm": 6.457626819610596, | |
| "learning_rate": 4.832513463102745e-05, | |
| "loss": 3.281, | |
| "num_input_tokens_seen": 702928, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.11783404581826154, | |
| "grad_norm": 5.837212562561035, | |
| "learning_rate": 4.8309609804600886e-05, | |
| "loss": 3.3414, | |
| "num_input_tokens_seen": 707064, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.1183821111476488, | |
| "grad_norm": 5.227325439453125, | |
| "learning_rate": 4.829401587479265e-05, | |
| "loss": 3.0907, | |
| "num_input_tokens_seen": 711056, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.11893017647703606, | |
| "grad_norm": 7.185408115386963, | |
| "learning_rate": 4.8278352887832326e-05, | |
| "loss": 3.159, | |
| "num_input_tokens_seen": 714472, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.11947824180642333, | |
| "grad_norm": 7.311601638793945, | |
| "learning_rate": 4.82626208901542e-05, | |
| "loss": 3.5405, | |
| "num_input_tokens_seen": 717400, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.12002630713581058, | |
| "grad_norm": 4.9710693359375, | |
| "learning_rate": 4.824681992839717e-05, | |
| "loss": 3.3058, | |
| "num_input_tokens_seen": 720472, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.12057437246519785, | |
| "grad_norm": 4.5781779289245605, | |
| "learning_rate": 4.823095004940456e-05, | |
| "loss": 3.1374, | |
| "num_input_tokens_seen": 723808, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.12112243779458512, | |
| "grad_norm": 6.077118396759033, | |
| "learning_rate": 4.8215011300224027e-05, | |
| "loss": 3.1628, | |
| "num_input_tokens_seen": 727576, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.12167050312397237, | |
| "grad_norm": 6.6747870445251465, | |
| "learning_rate": 4.819900372810739e-05, | |
| "loss": 3.5095, | |
| "num_input_tokens_seen": 730536, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.12221856845335964, | |
| "grad_norm": 5.468014240264893, | |
| "learning_rate": 4.818292738051049e-05, | |
| "loss": 3.521, | |
| "num_input_tokens_seen": 733024, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.12276663378274691, | |
| "grad_norm": 6.263638019561768, | |
| "learning_rate": 4.816678230509308e-05, | |
| "loss": 3.2318, | |
| "num_input_tokens_seen": 736048, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.12331469911213416, | |
| "grad_norm": 5.998656272888184, | |
| "learning_rate": 4.8150568549718655e-05, | |
| "loss": 3.0286, | |
| "num_input_tokens_seen": 739264, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.12386276444152143, | |
| "grad_norm": 6.395206928253174, | |
| "learning_rate": 4.81342861624543e-05, | |
| "loss": 3.4223, | |
| "num_input_tokens_seen": 742008, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.1244108297709087, | |
| "grad_norm": 6.199779510498047, | |
| "learning_rate": 4.811793519157059e-05, | |
| "loss": 3.5237, | |
| "num_input_tokens_seen": 745064, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.12495889510029595, | |
| "grad_norm": 6.504228115081787, | |
| "learning_rate": 4.81015156855414e-05, | |
| "loss": 3.4249, | |
| "num_input_tokens_seen": 748104, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.1255069604296832, | |
| "grad_norm": 6.280592441558838, | |
| "learning_rate": 4.80850276930438e-05, | |
| "loss": 3.0411, | |
| "num_input_tokens_seen": 752032, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.1260550257590705, | |
| "grad_norm": 8.529096603393555, | |
| "learning_rate": 4.806847126295789e-05, | |
| "loss": 3.1457, | |
| "num_input_tokens_seen": 755400, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.12660309108845774, | |
| "grad_norm": 6.454196453094482, | |
| "learning_rate": 4.8051846444366676e-05, | |
| "loss": 3.0008, | |
| "num_input_tokens_seen": 758392, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.127151156417845, | |
| "grad_norm": 6.862017631530762, | |
| "learning_rate": 4.803515328655586e-05, | |
| "loss": 3.3972, | |
| "num_input_tokens_seen": 760824, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.12769922174723228, | |
| "grad_norm": 6.56373929977417, | |
| "learning_rate": 4.8018391839013784e-05, | |
| "loss": 3.4338, | |
| "num_input_tokens_seen": 763680, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.12824728707661953, | |
| "grad_norm": 5.431229114532471, | |
| "learning_rate": 4.800156215143124e-05, | |
| "loss": 3.2619, | |
| "num_input_tokens_seen": 767352, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.12879535240600679, | |
| "grad_norm": 5.761483192443848, | |
| "learning_rate": 4.7984664273701305e-05, | |
| "loss": 3.3616, | |
| "num_input_tokens_seen": 771096, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.12934341773539407, | |
| "grad_norm": 7.804869651794434, | |
| "learning_rate": 4.796769825591921e-05, | |
| "loss": 3.2658, | |
| "num_input_tokens_seen": 774192, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.12989148306478132, | |
| "grad_norm": 5.688300609588623, | |
| "learning_rate": 4.7950664148382205e-05, | |
| "loss": 3.7069, | |
| "num_input_tokens_seen": 777712, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.13043954839416858, | |
| "grad_norm": 4.980658054351807, | |
| "learning_rate": 4.793356200158941e-05, | |
| "loss": 3.0386, | |
| "num_input_tokens_seen": 780680, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.13098761372355586, | |
| "grad_norm": 6.9450249671936035, | |
| "learning_rate": 4.791639186624162e-05, | |
| "loss": 3.4293, | |
| "num_input_tokens_seen": 783664, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.1315356790529431, | |
| "grad_norm": 6.7938408851623535, | |
| "learning_rate": 4.789915379324121e-05, | |
| "loss": 3.2908, | |
| "num_input_tokens_seen": 787480, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.13208374438233036, | |
| "grad_norm": 5.833454608917236, | |
| "learning_rate": 4.788184783369196e-05, | |
| "loss": 3.3431, | |
| "num_input_tokens_seen": 791560, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.13263180971171765, | |
| "grad_norm": 6.020946502685547, | |
| "learning_rate": 4.786447403889891e-05, | |
| "loss": 3.1235, | |
| "num_input_tokens_seen": 794600, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.1331798750411049, | |
| "grad_norm": 9.639689445495605, | |
| "learning_rate": 4.78470324603682e-05, | |
| "loss": 3.357, | |
| "num_input_tokens_seen": 796976, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.13372794037049215, | |
| "grad_norm": 5.102296829223633, | |
| "learning_rate": 4.782952314980691e-05, | |
| "loss": 3.4762, | |
| "num_input_tokens_seen": 801208, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.13427600569987944, | |
| "grad_norm": 6.015713214874268, | |
| "learning_rate": 4.781194615912292e-05, | |
| "loss": 3.2738, | |
| "num_input_tokens_seen": 804472, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.1348240710292667, | |
| "grad_norm": 7.88398551940918, | |
| "learning_rate": 4.7794301540424774e-05, | |
| "loss": 3.3333, | |
| "num_input_tokens_seen": 807568, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.13537213635865394, | |
| "grad_norm": 6.841670989990234, | |
| "learning_rate": 4.7776589346021486e-05, | |
| "loss": 3.5167, | |
| "num_input_tokens_seen": 811016, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.13592020168804123, | |
| "grad_norm": 6.089728355407715, | |
| "learning_rate": 4.775880962842241e-05, | |
| "loss": 3.703, | |
| "num_input_tokens_seen": 814536, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.13646826701742848, | |
| "grad_norm": 6.35260009765625, | |
| "learning_rate": 4.774096244033707e-05, | |
| "loss": 3.1131, | |
| "num_input_tokens_seen": 817496, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.13701633234681573, | |
| "grad_norm": 5.8579254150390625, | |
| "learning_rate": 4.772304783467503e-05, | |
| "loss": 3.2992, | |
| "num_input_tokens_seen": 821712, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.13756439767620302, | |
| "grad_norm": 5.486454963684082, | |
| "learning_rate": 4.7705065864545695e-05, | |
| "loss": 3.1721, | |
| "num_input_tokens_seen": 824688, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.13811246300559027, | |
| "grad_norm": 6.544208526611328, | |
| "learning_rate": 4.7687016583258203e-05, | |
| "loss": 3.4493, | |
| "num_input_tokens_seen": 828400, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.13866052833497752, | |
| "grad_norm": 4.948637008666992, | |
| "learning_rate": 4.7668900044321236e-05, | |
| "loss": 3.0927, | |
| "num_input_tokens_seen": 831936, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.1392085936643648, | |
| "grad_norm": 6.64813756942749, | |
| "learning_rate": 4.7650716301442856e-05, | |
| "loss": 3.6065, | |
| "num_input_tokens_seen": 834912, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.13975665899375206, | |
| "grad_norm": 7.289310455322266, | |
| "learning_rate": 4.763246540853035e-05, | |
| "loss": 3.3871, | |
| "num_input_tokens_seen": 839072, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.1403047243231393, | |
| "grad_norm": 5.887922763824463, | |
| "learning_rate": 4.761414741969011e-05, | |
| "loss": 3.1424, | |
| "num_input_tokens_seen": 842568, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.1408527896525266, | |
| "grad_norm": 6.820570468902588, | |
| "learning_rate": 4.7595762389227406e-05, | |
| "loss": 3.0197, | |
| "num_input_tokens_seen": 845808, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.14140085498191385, | |
| "grad_norm": 6.593437671661377, | |
| "learning_rate": 4.757731037164628e-05, | |
| "loss": 3.2013, | |
| "num_input_tokens_seen": 849184, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.1419489203113011, | |
| "grad_norm": 8.89852523803711, | |
| "learning_rate": 4.7558791421649354e-05, | |
| "loss": 3.5085, | |
| "num_input_tokens_seen": 852392, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.14249698564068838, | |
| "grad_norm": 7.368271827697754, | |
| "learning_rate": 4.754020559413768e-05, | |
| "loss": 3.3167, | |
| "num_input_tokens_seen": 855376, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.14304505097007564, | |
| "grad_norm": 5.54932975769043, | |
| "learning_rate": 4.752155294421056e-05, | |
| "loss": 3.0516, | |
| "num_input_tokens_seen": 858720, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.1435931162994629, | |
| "grad_norm": 8.180092811584473, | |
| "learning_rate": 4.750283352716543e-05, | |
| "loss": 3.4647, | |
| "num_input_tokens_seen": 861312, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.14414118162885015, | |
| "grad_norm": 6.608414173126221, | |
| "learning_rate": 4.748404739849763e-05, | |
| "loss": 3.3686, | |
| "num_input_tokens_seen": 864368, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.14468924695823743, | |
| "grad_norm": 6.880706787109375, | |
| "learning_rate": 4.746519461390029e-05, | |
| "loss": 3.0061, | |
| "num_input_tokens_seen": 868000, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.14523731228762468, | |
| "grad_norm": 4.034643650054932, | |
| "learning_rate": 4.744627522926414e-05, | |
| "loss": 3.3709, | |
| "num_input_tokens_seen": 871648, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.14578537761701194, | |
| "grad_norm": 5.335696220397949, | |
| "learning_rate": 4.742728930067736e-05, | |
| "loss": 3.0955, | |
| "num_input_tokens_seen": 875440, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.14633344294639922, | |
| "grad_norm": 8.005532264709473, | |
| "learning_rate": 4.7408236884425396e-05, | |
| "loss": 3.6277, | |
| "num_input_tokens_seen": 879208, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.14688150827578647, | |
| "grad_norm": 7.770083904266357, | |
| "learning_rate": 4.7389118036990795e-05, | |
| "loss": 3.5794, | |
| "num_input_tokens_seen": 882040, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.14742957360517372, | |
| "grad_norm": 6.539053916931152, | |
| "learning_rate": 4.736993281505307e-05, | |
| "loss": 3.2326, | |
| "num_input_tokens_seen": 884984, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.147977638934561, | |
| "grad_norm": 7.831300258636475, | |
| "learning_rate": 4.73506812754885e-05, | |
| "loss": 3.2767, | |
| "num_input_tokens_seen": 888128, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.14852570426394826, | |
| "grad_norm": 5.242404937744141, | |
| "learning_rate": 4.733136347536995e-05, | |
| "loss": 3.4698, | |
| "num_input_tokens_seen": 890520, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.14907376959333551, | |
| "grad_norm": 5.803912162780762, | |
| "learning_rate": 4.731197947196673e-05, | |
| "loss": 3.4711, | |
| "num_input_tokens_seen": 893464, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.1496218349227228, | |
| "grad_norm": 8.300127983093262, | |
| "learning_rate": 4.7292529322744416e-05, | |
| "loss": 3.2302, | |
| "num_input_tokens_seen": 897520, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.15016990025211005, | |
| "grad_norm": 5.02566385269165, | |
| "learning_rate": 4.7273013085364694e-05, | |
| "loss": 3.2959, | |
| "num_input_tokens_seen": 901416, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.1507179655814973, | |
| "grad_norm": 4.600845813751221, | |
| "learning_rate": 4.725343081768514e-05, | |
| "loss": 3.3303, | |
| "num_input_tokens_seen": 906432, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.15126603091088459, | |
| "grad_norm": 6.849578380584717, | |
| "learning_rate": 4.723378257775912e-05, | |
| "loss": 3.1125, | |
| "num_input_tokens_seen": 909264, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.15181409624027184, | |
| "grad_norm": 7.15298318862915, | |
| "learning_rate": 4.7214068423835566e-05, | |
| "loss": 3.2795, | |
| "num_input_tokens_seen": 912464, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.1523621615696591, | |
| "grad_norm": 5.415898323059082, | |
| "learning_rate": 4.7194288414358804e-05, | |
| "loss": 3.1385, | |
| "num_input_tokens_seen": 915960, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.15291022689904638, | |
| "grad_norm": 6.559721946716309, | |
| "learning_rate": 4.717444260796841e-05, | |
| "loss": 3.4027, | |
| "num_input_tokens_seen": 918984, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.15345829222843363, | |
| "grad_norm": 5.312758922576904, | |
| "learning_rate": 4.715453106349902e-05, | |
| "loss": 3.4349, | |
| "num_input_tokens_seen": 921912, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.15400635755782088, | |
| "grad_norm": 6.985774040222168, | |
| "learning_rate": 4.7134553839980143e-05, | |
| "loss": 3.7019, | |
| "num_input_tokens_seen": 925848, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.15455442288720816, | |
| "grad_norm": 6.191575527191162, | |
| "learning_rate": 4.711451099663603e-05, | |
| "loss": 3.4276, | |
| "num_input_tokens_seen": 929792, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.15510248821659542, | |
| "grad_norm": 6.040350437164307, | |
| "learning_rate": 4.709440259288542e-05, | |
| "loss": 2.9173, | |
| "num_input_tokens_seen": 932400, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.15565055354598267, | |
| "grad_norm": 6.164414405822754, | |
| "learning_rate": 4.707422868834146e-05, | |
| "loss": 3.1684, | |
| "num_input_tokens_seen": 935408, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.15619861887536995, | |
| "grad_norm": 7.248453140258789, | |
| "learning_rate": 4.705398934281145e-05, | |
| "loss": 3.6365, | |
| "num_input_tokens_seen": 938184, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.1567466842047572, | |
| "grad_norm": 5.813863754272461, | |
| "learning_rate": 4.70336846162967e-05, | |
| "loss": 3.405, | |
| "num_input_tokens_seen": 941272, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.15729474953414446, | |
| "grad_norm": 6.239504337310791, | |
| "learning_rate": 4.701331456899236e-05, | |
| "loss": 3.0722, | |
| "num_input_tokens_seen": 944728, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.15784281486353174, | |
| "grad_norm": 9.224727630615234, | |
| "learning_rate": 4.6992879261287226e-05, | |
| "loss": 3.2262, | |
| "num_input_tokens_seen": 947528, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.158390880192919, | |
| "grad_norm": 7.570671558380127, | |
| "learning_rate": 4.6972378753763545e-05, | |
| "loss": 3.2116, | |
| "num_input_tokens_seen": 950128, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.15893894552230625, | |
| "grad_norm": 4.781320095062256, | |
| "learning_rate": 4.6951813107196874e-05, | |
| "loss": 3.2953, | |
| "num_input_tokens_seen": 954336, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.15948701085169353, | |
| "grad_norm": 7.117349147796631, | |
| "learning_rate": 4.693118238255587e-05, | |
| "loss": 3.2755, | |
| "num_input_tokens_seen": 957704, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.1600350761810808, | |
| "grad_norm": 6.41115665435791, | |
| "learning_rate": 4.6910486641002136e-05, | |
| "loss": 3.2523, | |
| "num_input_tokens_seen": 960184, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.16058314151046804, | |
| "grad_norm": 8.865285873413086, | |
| "learning_rate": 4.688972594389001e-05, | |
| "loss": 3.3998, | |
| "num_input_tokens_seen": 963264, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.16113120683985532, | |
| "grad_norm": 4.722679615020752, | |
| "learning_rate": 4.6868900352766394e-05, | |
| "loss": 3.0958, | |
| "num_input_tokens_seen": 966536, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.16167927216924258, | |
| "grad_norm": 8.334817886352539, | |
| "learning_rate": 4.6848009929370575e-05, | |
| "loss": 3.2969, | |
| "num_input_tokens_seen": 969008, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.16222733749862983, | |
| "grad_norm": 6.063559055328369, | |
| "learning_rate": 4.682705473563406e-05, | |
| "loss": 3.0186, | |
| "num_input_tokens_seen": 972168, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.1627754028280171, | |
| "grad_norm": 6.434414386749268, | |
| "learning_rate": 4.680603483368033e-05, | |
| "loss": 3.4689, | |
| "num_input_tokens_seen": 976096, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.16332346815740437, | |
| "grad_norm": 8.82730770111084, | |
| "learning_rate": 4.678495028582476e-05, | |
| "loss": 3.2562, | |
| "num_input_tokens_seen": 979080, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.16387153348679162, | |
| "grad_norm": 6.3244171142578125, | |
| "learning_rate": 4.676380115457431e-05, | |
| "loss": 3.0127, | |
| "num_input_tokens_seen": 981896, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.1644195988161789, | |
| "grad_norm": 6.033606052398682, | |
| "learning_rate": 4.674258750262745e-05, | |
| "loss": 3.1823, | |
| "num_input_tokens_seen": 985072, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.16496766414556616, | |
| "grad_norm": 4.211119174957275, | |
| "learning_rate": 4.6721309392873926e-05, | |
| "loss": 3.1351, | |
| "num_input_tokens_seen": 987448, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.1655157294749534, | |
| "grad_norm": 6.105933666229248, | |
| "learning_rate": 4.669996688839453e-05, | |
| "loss": 3.2884, | |
| "num_input_tokens_seen": 990840, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.16606379480434066, | |
| "grad_norm": 8.247055053710938, | |
| "learning_rate": 4.6678560052460994e-05, | |
| "loss": 3.1378, | |
| "num_input_tokens_seen": 994768, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.16661186013372795, | |
| "grad_norm": 5.653783798217773, | |
| "learning_rate": 4.6657088948535776e-05, | |
| "loss": 3.7376, | |
| "num_input_tokens_seen": 997840, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.1671599254631152, | |
| "grad_norm": 5.42575216293335, | |
| "learning_rate": 4.6635553640271835e-05, | |
| "loss": 3.4831, | |
| "num_input_tokens_seen": 1000536, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.16770799079250245, | |
| "grad_norm": 7.640921115875244, | |
| "learning_rate": 4.6613954191512474e-05, | |
| "loss": 3.5714, | |
| "num_input_tokens_seen": 1003952, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.16825605612188974, | |
| "grad_norm": 5.931758880615234, | |
| "learning_rate": 4.6592290666291163e-05, | |
| "loss": 3.4493, | |
| "num_input_tokens_seen": 1006544, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.168804121451277, | |
| "grad_norm": 4.96866512298584, | |
| "learning_rate": 4.657056312883132e-05, | |
| "loss": 3.0963, | |
| "num_input_tokens_seen": 1009920, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.16935218678066424, | |
| "grad_norm": 7.009856224060059, | |
| "learning_rate": 4.6548771643546134e-05, | |
| "loss": 3.0819, | |
| "num_input_tokens_seen": 1012544, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.16990025211005153, | |
| "grad_norm": 6.719354629516602, | |
| "learning_rate": 4.652691627503837e-05, | |
| "loss": 3.3187, | |
| "num_input_tokens_seen": 1015248, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.17044831743943878, | |
| "grad_norm": 7.1751837730407715, | |
| "learning_rate": 4.650499708810018e-05, | |
| "loss": 3.6579, | |
| "num_input_tokens_seen": 1018720, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.17099638276882603, | |
| "grad_norm": 11.277824401855469, | |
| "learning_rate": 4.648301414771293e-05, | |
| "loss": 3.5192, | |
| "num_input_tokens_seen": 1021424, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.17154444809821331, | |
| "grad_norm": 9.307093620300293, | |
| "learning_rate": 4.646096751904696e-05, | |
| "loss": 3.2431, | |
| "num_input_tokens_seen": 1024192, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.17209251342760057, | |
| "grad_norm": 6.657312393188477, | |
| "learning_rate": 4.643885726746143e-05, | |
| "loss": 3.1878, | |
| "num_input_tokens_seen": 1027600, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.17264057875698782, | |
| "grad_norm": 5.908510208129883, | |
| "learning_rate": 4.641668345850414e-05, | |
| "loss": 3.67, | |
| "num_input_tokens_seen": 1030168, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.1731886440863751, | |
| "grad_norm": 6.540554046630859, | |
| "learning_rate": 4.639444615791128e-05, | |
| "loss": 2.9285, | |
| "num_input_tokens_seen": 1034472, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.17373670941576236, | |
| "grad_norm": 6.857239723205566, | |
| "learning_rate": 4.6372145431607264e-05, | |
| "loss": 3.3879, | |
| "num_input_tokens_seen": 1038520, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.1742847747451496, | |
| "grad_norm": 5.343799591064453, | |
| "learning_rate": 4.634978134570456e-05, | |
| "loss": 3.3824, | |
| "num_input_tokens_seen": 1041864, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.1748328400745369, | |
| "grad_norm": 5.971281051635742, | |
| "learning_rate": 4.632735396650346e-05, | |
| "loss": 3.5344, | |
| "num_input_tokens_seen": 1045192, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.17538090540392415, | |
| "grad_norm": 5.474274158477783, | |
| "learning_rate": 4.6304863360491906e-05, | |
| "loss": 3.0682, | |
| "num_input_tokens_seen": 1048680, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.1759289707333114, | |
| "grad_norm": 6.720623970031738, | |
| "learning_rate": 4.6282309594345266e-05, | |
| "loss": 3.0808, | |
| "num_input_tokens_seen": 1051776, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.17647703606269868, | |
| "grad_norm": 6.88260555267334, | |
| "learning_rate": 4.625969273492614e-05, | |
| "loss": 3.5346, | |
| "num_input_tokens_seen": 1054256, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.17702510139208594, | |
| "grad_norm": 6.154021263122559, | |
| "learning_rate": 4.623701284928421e-05, | |
| "loss": 3.2947, | |
| "num_input_tokens_seen": 1057536, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.1775731667214732, | |
| "grad_norm": 6.108212471008301, | |
| "learning_rate": 4.6214270004655985e-05, | |
| "loss": 3.3287, | |
| "num_input_tokens_seen": 1060872, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.17812123205086047, | |
| "grad_norm": 4.82647705078125, | |
| "learning_rate": 4.6191464268464614e-05, | |
| "loss": 3.3231, | |
| "num_input_tokens_seen": 1063536, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.17866929738024773, | |
| "grad_norm": 6.965377330780029, | |
| "learning_rate": 4.61685957083197e-05, | |
| "loss": 3.5096, | |
| "num_input_tokens_seen": 1066392, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.17921736270963498, | |
| "grad_norm": 7.133657455444336, | |
| "learning_rate": 4.6145664392017096e-05, | |
| "loss": 3.2534, | |
| "num_input_tokens_seen": 1068920, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.17976542803902226, | |
| "grad_norm": 8.859077453613281, | |
| "learning_rate": 4.6122670387538704e-05, | |
| "loss": 3.2012, | |
| "num_input_tokens_seen": 1071696, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.18031349336840952, | |
| "grad_norm": 6.119090557098389, | |
| "learning_rate": 4.6099613763052264e-05, | |
| "loss": 3.6088, | |
| "num_input_tokens_seen": 1074720, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.18086155869779677, | |
| "grad_norm": 6.804201126098633, | |
| "learning_rate": 4.607649458691115e-05, | |
| "loss": 3.2794, | |
| "num_input_tokens_seen": 1077944, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.18140962402718405, | |
| "grad_norm": 7.389477729797363, | |
| "learning_rate": 4.60533129276542e-05, | |
| "loss": 3.4432, | |
| "num_input_tokens_seen": 1080792, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.1819576893565713, | |
| "grad_norm": 5.930356502532959, | |
| "learning_rate": 4.6030068854005476e-05, | |
| "loss": 3.2158, | |
| "num_input_tokens_seen": 1083520, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.18250575468595856, | |
| "grad_norm": 6.847218036651611, | |
| "learning_rate": 4.6006762434874065e-05, | |
| "loss": 3.4395, | |
| "num_input_tokens_seen": 1086128, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.18305382001534584, | |
| "grad_norm": 9.511390686035156, | |
| "learning_rate": 4.598339373935389e-05, | |
| "loss": 3.2795, | |
| "num_input_tokens_seen": 1088560, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.1836018853447331, | |
| "grad_norm": 4.90114688873291, | |
| "learning_rate": 4.595996283672349e-05, | |
| "loss": 3.2474, | |
| "num_input_tokens_seen": 1091832, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.18414995067412035, | |
| "grad_norm": 9.29576301574707, | |
| "learning_rate": 4.5936469796445854e-05, | |
| "loss": 3.3011, | |
| "num_input_tokens_seen": 1095048, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.18469801600350763, | |
| "grad_norm": 6.643434524536133, | |
| "learning_rate": 4.5912914688168134e-05, | |
| "loss": 3.4029, | |
| "num_input_tokens_seen": 1097704, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.18524608133289489, | |
| "grad_norm": 4.961350440979004, | |
| "learning_rate": 4.5889297581721526e-05, | |
| "loss": 3.0958, | |
| "num_input_tokens_seen": 1100736, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.18579414666228214, | |
| "grad_norm": 7.057353496551514, | |
| "learning_rate": 4.5865618547121016e-05, | |
| "loss": 3.1003, | |
| "num_input_tokens_seen": 1104184, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.18634221199166942, | |
| "grad_norm": 3.688004970550537, | |
| "learning_rate": 4.584187765456516e-05, | |
| "loss": 3.5992, | |
| "num_input_tokens_seen": 1107880, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.18689027732105667, | |
| "grad_norm": 6.79044246673584, | |
| "learning_rate": 4.5818074974435935e-05, | |
| "loss": 3.5112, | |
| "num_input_tokens_seen": 1110728, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.18743834265044393, | |
| "grad_norm": 5.125957489013672, | |
| "learning_rate": 4.579421057729846e-05, | |
| "loss": 3.4606, | |
| "num_input_tokens_seen": 1113632, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.18798640797983118, | |
| "grad_norm": 6.708007335662842, | |
| "learning_rate": 4.577028453390084e-05, | |
| "loss": 3.4139, | |
| "num_input_tokens_seen": 1117248, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.18853447330921846, | |
| "grad_norm": 4.76835298538208, | |
| "learning_rate": 4.5746296915173924e-05, | |
| "loss": 3.4408, | |
| "num_input_tokens_seen": 1120600, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.18908253863860572, | |
| "grad_norm": 6.29659366607666, | |
| "learning_rate": 4.572224779223111e-05, | |
| "loss": 3.4817, | |
| "num_input_tokens_seen": 1123856, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.18963060396799297, | |
| "grad_norm": 9.75003433227539, | |
| "learning_rate": 4.569813723636813e-05, | |
| "loss": 3.5152, | |
| "num_input_tokens_seen": 1127872, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.19017866929738025, | |
| "grad_norm": 6.846242427825928, | |
| "learning_rate": 4.567396531906285e-05, | |
| "loss": 3.4197, | |
| "num_input_tokens_seen": 1131656, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.1907267346267675, | |
| "grad_norm": 6.956099033355713, | |
| "learning_rate": 4.564973211197503e-05, | |
| "loss": 3.5098, | |
| "num_input_tokens_seen": 1135160, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.19127479995615476, | |
| "grad_norm": 5.187982559204102, | |
| "learning_rate": 4.562543768694614e-05, | |
| "loss": 3.2708, | |
| "num_input_tokens_seen": 1137640, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.19182286528554204, | |
| "grad_norm": 6.0655035972595215, | |
| "learning_rate": 4.5601082115999126e-05, | |
| "loss": 3.1415, | |
| "num_input_tokens_seen": 1140624, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.1923709306149293, | |
| "grad_norm": 7.111659049987793, | |
| "learning_rate": 4.557666547133822e-05, | |
| "loss": 3.419, | |
| "num_input_tokens_seen": 1143352, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.19291899594431655, | |
| "grad_norm": 5.601785659790039, | |
| "learning_rate": 4.55521878253487e-05, | |
| "loss": 3.1537, | |
| "num_input_tokens_seen": 1146552, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.19346706127370383, | |
| "grad_norm": 5.885753154754639, | |
| "learning_rate": 4.5527649250596705e-05, | |
| "loss": 3.1606, | |
| "num_input_tokens_seen": 1150064, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.1940151266030911, | |
| "grad_norm": 7.787903785705566, | |
| "learning_rate": 4.5503049819828975e-05, | |
| "loss": 3.5314, | |
| "num_input_tokens_seen": 1152720, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.19456319193247834, | |
| "grad_norm": 6.6935133934021, | |
| "learning_rate": 4.5478389605972695e-05, | |
| "loss": 3.2798, | |
| "num_input_tokens_seen": 1155704, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.19511125726186562, | |
| "grad_norm": 5.613322734832764, | |
| "learning_rate": 4.545366868213521e-05, | |
| "loss": 2.9432, | |
| "num_input_tokens_seen": 1159064, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.19565932259125288, | |
| "grad_norm": 5.332114219665527, | |
| "learning_rate": 4.542888712160389e-05, | |
| "loss": 3.417, | |
| "num_input_tokens_seen": 1162384, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.19620738792064013, | |
| "grad_norm": 5.810116291046143, | |
| "learning_rate": 4.540404499784582e-05, | |
| "loss": 3.4744, | |
| "num_input_tokens_seen": 1165168, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.1967554532500274, | |
| "grad_norm": 6.959201335906982, | |
| "learning_rate": 4.537914238450768e-05, | |
| "loss": 3.6205, | |
| "num_input_tokens_seen": 1168288, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.19730351857941467, | |
| "grad_norm": 7.266166687011719, | |
| "learning_rate": 4.535417935541543e-05, | |
| "loss": 3.5834, | |
| "num_input_tokens_seen": 1170536, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.19785158390880192, | |
| "grad_norm": 6.565328598022461, | |
| "learning_rate": 4.5329155984574154e-05, | |
| "loss": 3.094, | |
| "num_input_tokens_seen": 1174016, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.1983996492381892, | |
| "grad_norm": 6.1436944007873535, | |
| "learning_rate": 4.5304072346167846e-05, | |
| "loss": 3.6874, | |
| "num_input_tokens_seen": 1177584, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.19894771456757646, | |
| "grad_norm": 6.344284534454346, | |
| "learning_rate": 4.527892851455915e-05, | |
| "loss": 3.5916, | |
| "num_input_tokens_seen": 1180544, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.1994957798969637, | |
| "grad_norm": 6.047328472137451, | |
| "learning_rate": 4.5253724564289144e-05, | |
| "loss": 3.1019, | |
| "num_input_tokens_seen": 1184376, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.200043845226351, | |
| "grad_norm": 5.976099491119385, | |
| "learning_rate": 4.522846057007716e-05, | |
| "loss": 3.0793, | |
| "num_input_tokens_seen": 1187280, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.20059191055573825, | |
| "grad_norm": 6.050201892852783, | |
| "learning_rate": 4.5203136606820515e-05, | |
| "loss": 3.1914, | |
| "num_input_tokens_seen": 1190952, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.2011399758851255, | |
| "grad_norm": 5.573675632476807, | |
| "learning_rate": 4.517775274959434e-05, | |
| "loss": 3.3849, | |
| "num_input_tokens_seen": 1194568, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.20168804121451278, | |
| "grad_norm": 10.978282928466797, | |
| "learning_rate": 4.5152309073651266e-05, | |
| "loss": 3.3821, | |
| "num_input_tokens_seen": 1197992, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.20223610654390003, | |
| "grad_norm": 6.215994358062744, | |
| "learning_rate": 4.512680565442133e-05, | |
| "loss": 2.9822, | |
| "num_input_tokens_seen": 1201456, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.2027841718732873, | |
| "grad_norm": 5.15269660949707, | |
| "learning_rate": 4.510124256751166e-05, | |
| "loss": 3.0034, | |
| "num_input_tokens_seen": 1205552, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.20333223720267457, | |
| "grad_norm": 8.590337753295898, | |
| "learning_rate": 4.507561988870624e-05, | |
| "loss": 3.3385, | |
| "num_input_tokens_seen": 1208496, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.20388030253206182, | |
| "grad_norm": 6.038626194000244, | |
| "learning_rate": 4.5049937693965764e-05, | |
| "loss": 3.3063, | |
| "num_input_tokens_seen": 1211856, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.20442836786144908, | |
| "grad_norm": 6.621918678283691, | |
| "learning_rate": 4.502419605942735e-05, | |
| "loss": 3.2243, | |
| "num_input_tokens_seen": 1216152, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.20497643319083636, | |
| "grad_norm": 6.029962062835693, | |
| "learning_rate": 4.499839506140433e-05, | |
| "loss": 3.4138, | |
| "num_input_tokens_seen": 1219840, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.20552449852022361, | |
| "grad_norm": 7.1330952644348145, | |
| "learning_rate": 4.497253477638602e-05, | |
| "loss": 3.3366, | |
| "num_input_tokens_seen": 1222888, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.20607256384961087, | |
| "grad_norm": 7.775686264038086, | |
| "learning_rate": 4.494661528103751e-05, | |
| "loss": 3.1706, | |
| "num_input_tokens_seen": 1227096, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.20662062917899815, | |
| "grad_norm": 8.789952278137207, | |
| "learning_rate": 4.492063665219941e-05, | |
| "loss": 3.4648, | |
| "num_input_tokens_seen": 1230856, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 0.2071686945083854, | |
| "grad_norm": 7.492274284362793, | |
| "learning_rate": 4.489459896688764e-05, | |
| "loss": 3.6099, | |
| "num_input_tokens_seen": 1234160, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.20771675983777266, | |
| "grad_norm": 6.971865177154541, | |
| "learning_rate": 4.48685023022932e-05, | |
| "loss": 3.037, | |
| "num_input_tokens_seen": 1236904, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.20826482516715994, | |
| "grad_norm": 9.107683181762695, | |
| "learning_rate": 4.484234673578196e-05, | |
| "loss": 3.435, | |
| "num_input_tokens_seen": 1239936, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.2088128904965472, | |
| "grad_norm": 6.467232704162598, | |
| "learning_rate": 4.4816132344894354e-05, | |
| "loss": 3.6629, | |
| "num_input_tokens_seen": 1242952, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.20936095582593445, | |
| "grad_norm": 6.295756816864014, | |
| "learning_rate": 4.4789859207345274e-05, | |
| "loss": 3.1083, | |
| "num_input_tokens_seen": 1246560, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.2099090211553217, | |
| "grad_norm": 5.817240238189697, | |
| "learning_rate": 4.4763527401023724e-05, | |
| "loss": 3.2389, | |
| "num_input_tokens_seen": 1249904, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 0.21045708648470898, | |
| "grad_norm": 7.3531317710876465, | |
| "learning_rate": 4.473713700399266e-05, | |
| "loss": 3.1022, | |
| "num_input_tokens_seen": 1252272, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.21100515181409624, | |
| "grad_norm": 7.078802108764648, | |
| "learning_rate": 4.471068809448872e-05, | |
| "loss": 3.2372, | |
| "num_input_tokens_seen": 1255904, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.2115532171434835, | |
| "grad_norm": 5.776179313659668, | |
| "learning_rate": 4.468418075092201e-05, | |
| "loss": 3.2817, | |
| "num_input_tokens_seen": 1259024, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.21210128247287077, | |
| "grad_norm": 9.986640930175781, | |
| "learning_rate": 4.465761505187589e-05, | |
| "loss": 3.349, | |
| "num_input_tokens_seen": 1262584, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.21264934780225803, | |
| "grad_norm": 8.421146392822266, | |
| "learning_rate": 4.463099107610669e-05, | |
| "loss": 3.2711, | |
| "num_input_tokens_seen": 1266072, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.21319741313164528, | |
| "grad_norm": 8.646468162536621, | |
| "learning_rate": 4.460430890254353e-05, | |
| "loss": 3.264, | |
| "num_input_tokens_seen": 1269528, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 0.21374547846103256, | |
| "grad_norm": 6.439562797546387, | |
| "learning_rate": 4.457756861028804e-05, | |
| "loss": 3.2899, | |
| "num_input_tokens_seen": 1272200, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.21429354379041982, | |
| "grad_norm": 8.170503616333008, | |
| "learning_rate": 4.455077027861417e-05, | |
| "loss": 3.3649, | |
| "num_input_tokens_seen": 1275360, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 0.21484160911980707, | |
| "grad_norm": 6.329521179199219, | |
| "learning_rate": 4.452391398696794e-05, | |
| "loss": 3.4714, | |
| "num_input_tokens_seen": 1278480, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.21538967444919435, | |
| "grad_norm": 7.618672847747803, | |
| "learning_rate": 4.449699981496714e-05, | |
| "loss": 3.1889, | |
| "num_input_tokens_seen": 1281312, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.2159377397785816, | |
| "grad_norm": 5.937787055969238, | |
| "learning_rate": 4.447002784240122e-05, | |
| "loss": 3.2998, | |
| "num_input_tokens_seen": 1284456, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.21648580510796886, | |
| "grad_norm": 6.004344463348389, | |
| "learning_rate": 4.444299814923096e-05, | |
| "loss": 3.5535, | |
| "num_input_tokens_seen": 1287512, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.21703387043735614, | |
| "grad_norm": 6.512199878692627, | |
| "learning_rate": 4.4415910815588235e-05, | |
| "loss": 3.4036, | |
| "num_input_tokens_seen": 1290336, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.2175819357667434, | |
| "grad_norm": 6.4987616539001465, | |
| "learning_rate": 4.438876592177584e-05, | |
| "loss": 3.6318, | |
| "num_input_tokens_seen": 1292832, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.21813000109613065, | |
| "grad_norm": 5.955297946929932, | |
| "learning_rate": 4.4361563548267186e-05, | |
| "loss": 3.4087, | |
| "num_input_tokens_seen": 1296336, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.21867806642551793, | |
| "grad_norm": 9.001585960388184, | |
| "learning_rate": 4.4334303775706087e-05, | |
| "loss": 3.0256, | |
| "num_input_tokens_seen": 1299928, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.21922613175490518, | |
| "grad_norm": 8.543002128601074, | |
| "learning_rate": 4.4306986684906534e-05, | |
| "loss": 3.0983, | |
| "num_input_tokens_seen": 1303344, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.21977419708429244, | |
| "grad_norm": 5.445712089538574, | |
| "learning_rate": 4.427961235685245e-05, | |
| "loss": 3.5193, | |
| "num_input_tokens_seen": 1306536, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 0.22032226241367972, | |
| "grad_norm": 4.273796558380127, | |
| "learning_rate": 4.4252180872697403e-05, | |
| "loss": 3.036, | |
| "num_input_tokens_seen": 1311056, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.22087032774306697, | |
| "grad_norm": 5.357060432434082, | |
| "learning_rate": 4.422469231376445e-05, | |
| "loss": 3.2927, | |
| "num_input_tokens_seen": 1314432, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 0.22141839307245423, | |
| "grad_norm": 6.554574012756348, | |
| "learning_rate": 4.4197146761545825e-05, | |
| "loss": 3.4088, | |
| "num_input_tokens_seen": 1317568, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.2219664584018415, | |
| "grad_norm": 5.920197486877441, | |
| "learning_rate": 4.4169544297702745e-05, | |
| "loss": 3.1075, | |
| "num_input_tokens_seen": 1321288, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.22251452373122876, | |
| "grad_norm": 5.399965763092041, | |
| "learning_rate": 4.414188500406513e-05, | |
| "loss": 3.023, | |
| "num_input_tokens_seen": 1324832, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.22306258906061602, | |
| "grad_norm": 4.449610710144043, | |
| "learning_rate": 4.411416896263137e-05, | |
| "loss": 3.2649, | |
| "num_input_tokens_seen": 1327992, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 0.2236106543900033, | |
| "grad_norm": 5.2429304122924805, | |
| "learning_rate": 4.408639625556812e-05, | |
| "loss": 3.2027, | |
| "num_input_tokens_seen": 1331448, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.22415871971939055, | |
| "grad_norm": 5.563135623931885, | |
| "learning_rate": 4.405856696520998e-05, | |
| "loss": 3.0106, | |
| "num_input_tokens_seen": 1334672, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 0.2247067850487778, | |
| "grad_norm": 9.401083946228027, | |
| "learning_rate": 4.403068117405933e-05, | |
| "loss": 3.5604, | |
| "num_input_tokens_seen": 1338664, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.2252548503781651, | |
| "grad_norm": 6.381105899810791, | |
| "learning_rate": 4.4002738964786047e-05, | |
| "loss": 3.1456, | |
| "num_input_tokens_seen": 1341320, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.22580291570755234, | |
| "grad_norm": 8.379097938537598, | |
| "learning_rate": 4.397474042022727e-05, | |
| "loss": 3.7295, | |
| "num_input_tokens_seen": 1344712, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.2263509810369396, | |
| "grad_norm": 5.414994239807129, | |
| "learning_rate": 4.394668562338711e-05, | |
| "loss": 3.2339, | |
| "num_input_tokens_seen": 1348704, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 0.22689904636632688, | |
| "grad_norm": 6.6783447265625, | |
| "learning_rate": 4.391857465743649e-05, | |
| "loss": 3.1633, | |
| "num_input_tokens_seen": 1352136, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.22744711169571413, | |
| "grad_norm": 6.781215667724609, | |
| "learning_rate": 4.389040760571284e-05, | |
| "loss": 3.2454, | |
| "num_input_tokens_seen": 1355704, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.2279951770251014, | |
| "grad_norm": 8.376158714294434, | |
| "learning_rate": 4.386218455171984e-05, | |
| "loss": 3.2688, | |
| "num_input_tokens_seen": 1358224, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.22854324235448867, | |
| "grad_norm": 6.815377712249756, | |
| "learning_rate": 4.383390557912722e-05, | |
| "loss": 3.2047, | |
| "num_input_tokens_seen": 1361624, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.22909130768387592, | |
| "grad_norm": 9.893330574035645, | |
| "learning_rate": 4.380557077177046e-05, | |
| "loss": 3.3861, | |
| "num_input_tokens_seen": 1365672, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.22963937301326318, | |
| "grad_norm": 5.984465599060059, | |
| "learning_rate": 4.3777180213650587e-05, | |
| "loss": 3.2901, | |
| "num_input_tokens_seen": 1368440, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 0.23018743834265046, | |
| "grad_norm": 8.21902847290039, | |
| "learning_rate": 4.37487339889339e-05, | |
| "loss": 3.135, | |
| "num_input_tokens_seen": 1370736, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.2307355036720377, | |
| "grad_norm": 7.617781639099121, | |
| "learning_rate": 4.3720232181951726e-05, | |
| "loss": 3.2967, | |
| "num_input_tokens_seen": 1373632, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 0.23128356900142497, | |
| "grad_norm": 5.901704788208008, | |
| "learning_rate": 4.3691674877200164e-05, | |
| "loss": 3.0304, | |
| "num_input_tokens_seen": 1376840, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.23183163433081222, | |
| "grad_norm": 7.1147074699401855, | |
| "learning_rate": 4.3663062159339855e-05, | |
| "loss": 3.2797, | |
| "num_input_tokens_seen": 1380024, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.2323796996601995, | |
| "grad_norm": 6.9793243408203125, | |
| "learning_rate": 4.363439411319571e-05, | |
| "loss": 3.6079, | |
| "num_input_tokens_seen": 1382992, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.23292776498958676, | |
| "grad_norm": 5.454427242279053, | |
| "learning_rate": 4.360567082375666e-05, | |
| "loss": 3.1035, | |
| "num_input_tokens_seen": 1385936, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.233475830318974, | |
| "grad_norm": 9.776113510131836, | |
| "learning_rate": 4.3576892376175414e-05, | |
| "loss": 3.1049, | |
| "num_input_tokens_seen": 1389176, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.2340238956483613, | |
| "grad_norm": 5.588262557983398, | |
| "learning_rate": 4.3553829961575053e-05, | |
| "loss": 3.0589, | |
| "num_input_tokens_seen": 1392080, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 0.23457196097774854, | |
| "grad_norm": 7.208589553833008, | |
| "learning_rate": 4.352495244444449e-05, | |
| "loss": 3.3501, | |
| "num_input_tokens_seen": 1395360, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.2351200263071358, | |
| "grad_norm": 5.150116920471191, | |
| "learning_rate": 4.349602000846844e-05, | |
| "loss": 3.4204, | |
| "num_input_tokens_seen": 1398760, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.23566809163652308, | |
| "grad_norm": 7.456035137176514, | |
| "learning_rate": 4.346703273941965e-05, | |
| "loss": 2.9937, | |
| "num_input_tokens_seen": 1402384, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.23621615696591033, | |
| "grad_norm": 5.8624067306518555, | |
| "learning_rate": 4.3437990723233416e-05, | |
| "loss": 3.233, | |
| "num_input_tokens_seen": 1406152, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 0.2367642222952976, | |
| "grad_norm": 5.129085063934326, | |
| "learning_rate": 4.3408894046007354e-05, | |
| "loss": 3.3833, | |
| "num_input_tokens_seen": 1409704, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.23731228762468487, | |
| "grad_norm": 7.074642658233643, | |
| "learning_rate": 4.337974279400111e-05, | |
| "loss": 3.2288, | |
| "num_input_tokens_seen": 1412984, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 0.23786035295407212, | |
| "grad_norm": 7.073869228363037, | |
| "learning_rate": 4.335053705363611e-05, | |
| "loss": 3.1338, | |
| "num_input_tokens_seen": 1416232, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.23840841828345938, | |
| "grad_norm": 6.7071990966796875, | |
| "learning_rate": 4.332127691149535e-05, | |
| "loss": 3.1272, | |
| "num_input_tokens_seen": 1419904, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.23895648361284666, | |
| "grad_norm": 8.463297843933105, | |
| "learning_rate": 4.3291962454323076e-05, | |
| "loss": 3.3227, | |
| "num_input_tokens_seen": 1423048, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.2395045489422339, | |
| "grad_norm": 7.098794460296631, | |
| "learning_rate": 4.3262593769024576e-05, | |
| "loss": 3.1422, | |
| "num_input_tokens_seen": 1425568, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 0.24005261427162117, | |
| "grad_norm": 5.919711589813232, | |
| "learning_rate": 4.323317094266589e-05, | |
| "loss": 3.0584, | |
| "num_input_tokens_seen": 1429464, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.24060067960100845, | |
| "grad_norm": 5.311784267425537, | |
| "learning_rate": 4.320369406247356e-05, | |
| "loss": 2.8391, | |
| "num_input_tokens_seen": 1432832, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 0.2411487449303957, | |
| "grad_norm": 6.239211559295654, | |
| "learning_rate": 4.317416321583437e-05, | |
| "loss": 3.1701, | |
| "num_input_tokens_seen": 1435960, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.24169681025978296, | |
| "grad_norm": 9.268356323242188, | |
| "learning_rate": 4.314457849029513e-05, | |
| "loss": 3.3796, | |
| "num_input_tokens_seen": 1439752, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.24224487558917024, | |
| "grad_norm": 7.6005449295043945, | |
| "learning_rate": 4.311493997356234e-05, | |
| "loss": 3.189, | |
| "num_input_tokens_seen": 1442488, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.2427929409185575, | |
| "grad_norm": 6.128123760223389, | |
| "learning_rate": 4.308524775350198e-05, | |
| "loss": 3.2867, | |
| "num_input_tokens_seen": 1445800, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 0.24334100624794475, | |
| "grad_norm": 6.555956840515137, | |
| "learning_rate": 4.305550191813923e-05, | |
| "loss": 3.1985, | |
| "num_input_tokens_seen": 1448992, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.24388907157733203, | |
| "grad_norm": 6.0009446144104, | |
| "learning_rate": 4.302570255565825e-05, | |
| "loss": 3.1752, | |
| "num_input_tokens_seen": 1452104, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.24443713690671928, | |
| "grad_norm": 5.329344749450684, | |
| "learning_rate": 4.299584975440184e-05, | |
| "loss": 2.9533, | |
| "num_input_tokens_seen": 1457016, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.24498520223610654, | |
| "grad_norm": 4.869180202484131, | |
| "learning_rate": 4.296594360287126e-05, | |
| "loss": 2.9869, | |
| "num_input_tokens_seen": 1459624, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 0.24553326756549382, | |
| "grad_norm": 6.4714202880859375, | |
| "learning_rate": 4.293598418972592e-05, | |
| "loss": 3.2594, | |
| "num_input_tokens_seen": 1462696, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.24608133289488107, | |
| "grad_norm": 10.35406494140625, | |
| "learning_rate": 4.2905971603783116e-05, | |
| "loss": 3.164, | |
| "num_input_tokens_seen": 1466832, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 0.24662939822426833, | |
| "grad_norm": 5.773983001708984, | |
| "learning_rate": 4.287590593401778e-05, | |
| "loss": 3.2342, | |
| "num_input_tokens_seen": 1470288, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.2471774635536556, | |
| "grad_norm": 5.758610248565674, | |
| "learning_rate": 4.284578726956225e-05, | |
| "loss": 3.38, | |
| "num_input_tokens_seen": 1473032, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 0.24772552888304286, | |
| "grad_norm": 7.092349529266357, | |
| "learning_rate": 4.2815615699705943e-05, | |
| "loss": 3.1884, | |
| "num_input_tokens_seen": 1476104, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.24827359421243012, | |
| "grad_norm": 8.047478675842285, | |
| "learning_rate": 4.2785391313895103e-05, | |
| "loss": 3.3215, | |
| "num_input_tokens_seen": 1479376, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 0.2488216595418174, | |
| "grad_norm": 7.5882439613342285, | |
| "learning_rate": 4.27551142017326e-05, | |
| "loss": 3.0476, | |
| "num_input_tokens_seen": 1482248, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.24936972487120465, | |
| "grad_norm": 5.922421932220459, | |
| "learning_rate": 4.2724784452977565e-05, | |
| "loss": 3.3373, | |
| "num_input_tokens_seen": 1485232, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.2499177902005919, | |
| "grad_norm": 6.161900520324707, | |
| "learning_rate": 4.26944021575452e-05, | |
| "loss": 3.0011, | |
| "num_input_tokens_seen": 1488896, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.2504658555299792, | |
| "grad_norm": 7.3562397956848145, | |
| "learning_rate": 4.2663967405506486e-05, | |
| "loss": 2.9991, | |
| "num_input_tokens_seen": 1492072, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 0.2510139208593664, | |
| "grad_norm": 6.788776397705078, | |
| "learning_rate": 4.263348028708792e-05, | |
| "loss": 2.9735, | |
| "num_input_tokens_seen": 1495224, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.2515619861887537, | |
| "grad_norm": 8.632386207580566, | |
| "learning_rate": 4.260294089267123e-05, | |
| "loss": 3.2221, | |
| "num_input_tokens_seen": 1498256, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 0.252110051518141, | |
| "grad_norm": 6.462652683258057, | |
| "learning_rate": 4.257234931279313e-05, | |
| "loss": 2.8929, | |
| "num_input_tokens_seen": 1501824, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.2526581168475282, | |
| "grad_norm": 7.380079746246338, | |
| "learning_rate": 4.254170563814505e-05, | |
| "loss": 3.2545, | |
| "num_input_tokens_seen": 1504768, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 0.2532061821769155, | |
| "grad_norm": 5.370420455932617, | |
| "learning_rate": 4.2511009959572826e-05, | |
| "loss": 3.4558, | |
| "num_input_tokens_seen": 1508056, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.25375424750630277, | |
| "grad_norm": 5.953249454498291, | |
| "learning_rate": 4.2480262368076504e-05, | |
| "loss": 3.2177, | |
| "num_input_tokens_seen": 1511920, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 0.25430231283569, | |
| "grad_norm": 5.694786548614502, | |
| "learning_rate": 4.244946295481001e-05, | |
| "loss": 3.2378, | |
| "num_input_tokens_seen": 1514936, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.2548503781650773, | |
| "grad_norm": 7.257277965545654, | |
| "learning_rate": 4.241861181108092e-05, | |
| "loss": 3.616, | |
| "num_input_tokens_seen": 1518416, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.25539844349446456, | |
| "grad_norm": 6.388315200805664, | |
| "learning_rate": 4.238770902835013e-05, | |
| "loss": 3.2898, | |
| "num_input_tokens_seen": 1521960, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.2559465088238518, | |
| "grad_norm": 8.813338279724121, | |
| "learning_rate": 4.235675469823166e-05, | |
| "loss": 3.4491, | |
| "num_input_tokens_seen": 1525312, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 0.25649457415323906, | |
| "grad_norm": 6.0403947830200195, | |
| "learning_rate": 4.232574891249234e-05, | |
| "loss": 3.0747, | |
| "num_input_tokens_seen": 1528632, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.25704263948262634, | |
| "grad_norm": 6.77452278137207, | |
| "learning_rate": 4.229469176305153e-05, | |
| "loss": 3.2356, | |
| "num_input_tokens_seen": 1532200, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 0.25759070481201357, | |
| "grad_norm": 6.781161785125732, | |
| "learning_rate": 4.2263583341980885e-05, | |
| "loss": 3.1273, | |
| "num_input_tokens_seen": 1535624, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.25813877014140085, | |
| "grad_norm": 6.070975303649902, | |
| "learning_rate": 4.223242374150402e-05, | |
| "loss": 3.0905, | |
| "num_input_tokens_seen": 1538504, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 0.25868683547078813, | |
| "grad_norm": 6.770239353179932, | |
| "learning_rate": 4.220121305399634e-05, | |
| "loss": 3.2115, | |
| "num_input_tokens_seen": 1541520, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.25923490080017536, | |
| "grad_norm": 6.523434638977051, | |
| "learning_rate": 4.216995137198463e-05, | |
| "loss": 3.2605, | |
| "num_input_tokens_seen": 1545656, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 0.25978296612956264, | |
| "grad_norm": 6.475868225097656, | |
| "learning_rate": 4.213863878814691e-05, | |
| "loss": 3.2498, | |
| "num_input_tokens_seen": 1549464, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.2603310314589499, | |
| "grad_norm": 7.743395805358887, | |
| "learning_rate": 4.210727539531206e-05, | |
| "loss": 3.0166, | |
| "num_input_tokens_seen": 1553408, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.26087909678833715, | |
| "grad_norm": 6.206083297729492, | |
| "learning_rate": 4.207586128645963e-05, | |
| "loss": 3.2151, | |
| "num_input_tokens_seen": 1557112, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.26142716211772443, | |
| "grad_norm": 7.58196496963501, | |
| "learning_rate": 4.204439655471949e-05, | |
| "loss": 3.5573, | |
| "num_input_tokens_seen": 1560984, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 0.2619752274471117, | |
| "grad_norm": 8.101637840270996, | |
| "learning_rate": 4.201288129337158e-05, | |
| "loss": 3.4451, | |
| "num_input_tokens_seen": 1563808, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.26252329277649894, | |
| "grad_norm": 9.19637680053711, | |
| "learning_rate": 4.1981315595845684e-05, | |
| "loss": 3.191, | |
| "num_input_tokens_seen": 1567344, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 0.2630713581058862, | |
| "grad_norm": 7.602110862731934, | |
| "learning_rate": 4.194969955572105e-05, | |
| "loss": 3.7303, | |
| "num_input_tokens_seen": 1570104, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.2636194234352735, | |
| "grad_norm": 10.502030372619629, | |
| "learning_rate": 4.191803326672622e-05, | |
| "loss": 3.2205, | |
| "num_input_tokens_seen": 1572864, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 0.26416748876466073, | |
| "grad_norm": 5.903884410858154, | |
| "learning_rate": 4.188631682273868e-05, | |
| "loss": 3.5156, | |
| "num_input_tokens_seen": 1575720, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.264715554094048, | |
| "grad_norm": 5.067075729370117, | |
| "learning_rate": 4.1854550317784604e-05, | |
| "loss": 3.1053, | |
| "num_input_tokens_seen": 1579008, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 0.2652636194234353, | |
| "grad_norm": 6.393657207489014, | |
| "learning_rate": 4.1822733846038584e-05, | |
| "loss": 3.1813, | |
| "num_input_tokens_seen": 1582216, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.2658116847528225, | |
| "grad_norm": 10.575018882751465, | |
| "learning_rate": 4.1790867501823345e-05, | |
| "loss": 3.7197, | |
| "num_input_tokens_seen": 1585440, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.2663597500822098, | |
| "grad_norm": 7.280240535736084, | |
| "learning_rate": 4.175895137960945e-05, | |
| "loss": 3.0196, | |
| "num_input_tokens_seen": 1588248, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.2669078154115971, | |
| "grad_norm": 6.695456504821777, | |
| "learning_rate": 4.172698557401503e-05, | |
| "loss": 2.9587, | |
| "num_input_tokens_seen": 1591288, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 0.2674558807409843, | |
| "grad_norm": 6.2725653648376465, | |
| "learning_rate": 4.169497017980555e-05, | |
| "loss": 3.3583, | |
| "num_input_tokens_seen": 1595056, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.2680039460703716, | |
| "grad_norm": 6.505600929260254, | |
| "learning_rate": 4.166290529189342e-05, | |
| "loss": 3.474, | |
| "num_input_tokens_seen": 1598096, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 0.26855201139975887, | |
| "grad_norm": 7.131421089172363, | |
| "learning_rate": 4.163079100533783e-05, | |
| "loss": 3.2172, | |
| "num_input_tokens_seen": 1602648, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.2691000767291461, | |
| "grad_norm": 5.818497657775879, | |
| "learning_rate": 4.1598627415344394e-05, | |
| "loss": 3.2497, | |
| "num_input_tokens_seen": 1605776, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 0.2696481420585334, | |
| "grad_norm": 8.350225448608398, | |
| "learning_rate": 4.156641461726489e-05, | |
| "loss": 3.2372, | |
| "num_input_tokens_seen": 1609960, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.27019620738792066, | |
| "grad_norm": 10.619945526123047, | |
| "learning_rate": 4.153415270659699e-05, | |
| "loss": 3.0958, | |
| "num_input_tokens_seen": 1612808, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 0.2707442727173079, | |
| "grad_norm": 6.475553035736084, | |
| "learning_rate": 4.150184177898394e-05, | |
| "loss": 3.4121, | |
| "num_input_tokens_seen": 1616104, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.27129233804669517, | |
| "grad_norm": 9.670978546142578, | |
| "learning_rate": 4.1469481930214335e-05, | |
| "loss": 3.1002, | |
| "num_input_tokens_seen": 1618920, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.27184040337608245, | |
| "grad_norm": 5.271237850189209, | |
| "learning_rate": 4.1437073256221784e-05, | |
| "loss": 3.1366, | |
| "num_input_tokens_seen": 1622272, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.2723884687054697, | |
| "grad_norm": 6.107699394226074, | |
| "learning_rate": 4.1404615853084626e-05, | |
| "loss": 3.5266, | |
| "num_input_tokens_seen": 1624928, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 0.27293653403485696, | |
| "grad_norm": 8.945226669311523, | |
| "learning_rate": 4.137210981702568e-05, | |
| "loss": 3.627, | |
| "num_input_tokens_seen": 1628632, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.27348459936424424, | |
| "grad_norm": 5.393161296844482, | |
| "learning_rate": 4.133955524441196e-05, | |
| "loss": 3.6371, | |
| "num_input_tokens_seen": 1631272, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 0.27403266469363147, | |
| "grad_norm": 7.735115051269531, | |
| "learning_rate": 4.130695223175434e-05, | |
| "loss": 3.4529, | |
| "num_input_tokens_seen": 1634272, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.27458073002301875, | |
| "grad_norm": 9.375452041625977, | |
| "learning_rate": 4.1274300875707295e-05, | |
| "loss": 3.2474, | |
| "num_input_tokens_seen": 1638000, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 0.27512879535240603, | |
| "grad_norm": 6.957891464233398, | |
| "learning_rate": 4.124160127306864e-05, | |
| "loss": 3.0279, | |
| "num_input_tokens_seen": 1641896, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.27567686068179326, | |
| "grad_norm": 6.637111663818359, | |
| "learning_rate": 4.120885352077922e-05, | |
| "loss": 3.5516, | |
| "num_input_tokens_seen": 1645288, | |
| "step": 2515 | |
| }, | |
| { | |
| "epoch": 0.27622492601118054, | |
| "grad_norm": 6.921294212341309, | |
| "learning_rate": 4.1176057715922624e-05, | |
| "loss": 3.2415, | |
| "num_input_tokens_seen": 1648800, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.2767729913405678, | |
| "grad_norm": 6.21347713470459, | |
| "learning_rate": 4.114321395572488e-05, | |
| "loss": 3.3217, | |
| "num_input_tokens_seen": 1652416, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 0.27732105666995505, | |
| "grad_norm": 7.985599040985107, | |
| "learning_rate": 4.111032233755418e-05, | |
| "loss": 3.0362, | |
| "num_input_tokens_seen": 1655720, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.27786912199934233, | |
| "grad_norm": 6.855371952056885, | |
| "learning_rate": 4.107738295892063e-05, | |
| "loss": 3.0962, | |
| "num_input_tokens_seen": 1659440, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 0.2784171873287296, | |
| "grad_norm": 7.123937129974365, | |
| "learning_rate": 4.104439591747591e-05, | |
| "loss": 3.102, | |
| "num_input_tokens_seen": 1662400, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.27896525265811684, | |
| "grad_norm": 6.53096866607666, | |
| "learning_rate": 4.101136131101297e-05, | |
| "loss": 2.9064, | |
| "num_input_tokens_seen": 1665336, | |
| "step": 2545 | |
| }, | |
| { | |
| "epoch": 0.2795133179875041, | |
| "grad_norm": 8.0481538772583, | |
| "learning_rate": 4.0978279237465825e-05, | |
| "loss": 3.103, | |
| "num_input_tokens_seen": 1668288, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.2800613833168914, | |
| "grad_norm": 4.704191207885742, | |
| "learning_rate": 4.094514979490917e-05, | |
| "loss": 2.9912, | |
| "num_input_tokens_seen": 1671840, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 0.2806094486462786, | |
| "grad_norm": 6.396568775177002, | |
| "learning_rate": 4.091197308155814e-05, | |
| "loss": 3.0125, | |
| "num_input_tokens_seen": 1675512, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.2811575139756659, | |
| "grad_norm": 6.377243518829346, | |
| "learning_rate": 4.087874919576801e-05, | |
| "loss": 2.9588, | |
| "num_input_tokens_seen": 1679232, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 0.2817055793050532, | |
| "grad_norm": 7.850512981414795, | |
| "learning_rate": 4.084547823603391e-05, | |
| "loss": 3.1181, | |
| "num_input_tokens_seen": 1682432, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.2822536446344404, | |
| "grad_norm": 7.351206302642822, | |
| "learning_rate": 4.08121603009905e-05, | |
| "loss": 3.2493, | |
| "num_input_tokens_seen": 1686064, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 0.2828017099638277, | |
| "grad_norm": 6.765766620635986, | |
| "learning_rate": 4.077879548941172e-05, | |
| "loss": 2.9447, | |
| "num_input_tokens_seen": 1689312, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.283349775293215, | |
| "grad_norm": 6.162474155426025, | |
| "learning_rate": 4.0745383900210514e-05, | |
| "loss": 3.0923, | |
| "num_input_tokens_seen": 1692976, | |
| "step": 2585 | |
| }, | |
| { | |
| "epoch": 0.2838978406226022, | |
| "grad_norm": 6.094540119171143, | |
| "learning_rate": 4.071192563243843e-05, | |
| "loss": 3.4034, | |
| "num_input_tokens_seen": 1695344, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.2844459059519895, | |
| "grad_norm": 9.006319999694824, | |
| "learning_rate": 4.0678420785285446e-05, | |
| "loss": 3.3876, | |
| "num_input_tokens_seen": 1698336, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 0.28499397128137677, | |
| "grad_norm": 7.306302070617676, | |
| "learning_rate": 4.064486945807963e-05, | |
| "loss": 2.9591, | |
| "num_input_tokens_seen": 1703912, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.285542036610764, | |
| "grad_norm": 5.706150054931641, | |
| "learning_rate": 4.0611271750286805e-05, | |
| "loss": 3.0137, | |
| "num_input_tokens_seen": 1707664, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 0.2860901019401513, | |
| "grad_norm": 7.290525436401367, | |
| "learning_rate": 4.057762776151035e-05, | |
| "loss": 3.4755, | |
| "num_input_tokens_seen": 1710832, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.2866381672695385, | |
| "grad_norm": 7.548462867736816, | |
| "learning_rate": 4.054393759149081e-05, | |
| "loss": 3.1482, | |
| "num_input_tokens_seen": 1713616, | |
| "step": 2615 | |
| }, | |
| { | |
| "epoch": 0.2871862325989258, | |
| "grad_norm": 7.191598415374756, | |
| "learning_rate": 4.051020134010564e-05, | |
| "loss": 3.5189, | |
| "num_input_tokens_seen": 1717328, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.28773429792831307, | |
| "grad_norm": 5.576016426086426, | |
| "learning_rate": 4.0476419107368924e-05, | |
| "loss": 3.1058, | |
| "num_input_tokens_seen": 1720976, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.2882823632577003, | |
| "grad_norm": 5.512149333953857, | |
| "learning_rate": 4.044259099343104e-05, | |
| "loss": 3.3606, | |
| "num_input_tokens_seen": 1723840, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.2888304285870876, | |
| "grad_norm": 6.475109100341797, | |
| "learning_rate": 4.040871709857842e-05, | |
| "loss": 3.2876, | |
| "num_input_tokens_seen": 1726944, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 0.28937849391647485, | |
| "grad_norm": 6.24223518371582, | |
| "learning_rate": 4.037479752323317e-05, | |
| "loss": 3.2583, | |
| "num_input_tokens_seen": 1730056, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.2899265592458621, | |
| "grad_norm": 7.499751091003418, | |
| "learning_rate": 4.034083236795286e-05, | |
| "loss": 3.6548, | |
| "num_input_tokens_seen": 1733800, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 0.29047462457524936, | |
| "grad_norm": 5.272352695465088, | |
| "learning_rate": 4.030682173343016e-05, | |
| "loss": 3.345, | |
| "num_input_tokens_seen": 1738176, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.29102268990463664, | |
| "grad_norm": 4.747354030609131, | |
| "learning_rate": 4.027276572049259e-05, | |
| "loss": 2.8691, | |
| "num_input_tokens_seen": 1742088, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 0.29157075523402387, | |
| "grad_norm": 4.695064544677734, | |
| "learning_rate": 4.0238664430102175e-05, | |
| "loss": 3.3259, | |
| "num_input_tokens_seen": 1746032, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.29211882056341115, | |
| "grad_norm": 5.169468402862549, | |
| "learning_rate": 4.020451796335518e-05, | |
| "loss": 3.193, | |
| "num_input_tokens_seen": 1749336, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 0.29266688589279843, | |
| "grad_norm": 6.7505340576171875, | |
| "learning_rate": 4.017032642148181e-05, | |
| "loss": 3.1603, | |
| "num_input_tokens_seen": 1752808, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.29321495122218566, | |
| "grad_norm": 8.776106834411621, | |
| "learning_rate": 4.0136089905845874e-05, | |
| "loss": 3.065, | |
| "num_input_tokens_seen": 1756768, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 0.29376301655157294, | |
| "grad_norm": 5.4388203620910645, | |
| "learning_rate": 4.010180851794453e-05, | |
| "loss": 3.3523, | |
| "num_input_tokens_seen": 1759960, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.2943110818809602, | |
| "grad_norm": 7.309511661529541, | |
| "learning_rate": 4.006748235940796e-05, | |
| "loss": 3.1897, | |
| "num_input_tokens_seen": 1763848, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 0.29485914721034745, | |
| "grad_norm": 7.108086109161377, | |
| "learning_rate": 4.003311153199908e-05, | |
| "loss": 3.2525, | |
| "num_input_tokens_seen": 1767224, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.29540721253973473, | |
| "grad_norm": 6.940639495849609, | |
| "learning_rate": 3.99986961376132e-05, | |
| "loss": 3.0928, | |
| "num_input_tokens_seen": 1770816, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 0.295955277869122, | |
| "grad_norm": 8.109939575195312, | |
| "learning_rate": 3.996423627827778e-05, | |
| "loss": 3.2992, | |
| "num_input_tokens_seen": 1775144, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.29650334319850924, | |
| "grad_norm": 8.848753929138184, | |
| "learning_rate": 3.9929732056152104e-05, | |
| "loss": 3.1256, | |
| "num_input_tokens_seen": 1777888, | |
| "step": 2705 | |
| }, | |
| { | |
| "epoch": 0.2970514085278965, | |
| "grad_norm": 6.489472389221191, | |
| "learning_rate": 3.989518357352695e-05, | |
| "loss": 3.0047, | |
| "num_input_tokens_seen": 1782160, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.2975994738572838, | |
| "grad_norm": 7.247778415679932, | |
| "learning_rate": 3.986059093282433e-05, | |
| "loss": 3.075, | |
| "num_input_tokens_seen": 1784824, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 0.29814753918667103, | |
| "grad_norm": 7.691065788269043, | |
| "learning_rate": 3.982595423659716e-05, | |
| "loss": 3.4486, | |
| "num_input_tokens_seen": 1788072, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.2986956045160583, | |
| "grad_norm": 7.700766086578369, | |
| "learning_rate": 3.979127358752897e-05, | |
| "loss": 3.4979, | |
| "num_input_tokens_seen": 1790944, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 0.2992436698454456, | |
| "grad_norm": 5.059070110321045, | |
| "learning_rate": 3.975654908843356e-05, | |
| "loss": 3.305, | |
| "num_input_tokens_seen": 1794368, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.2997917351748328, | |
| "grad_norm": 6.1541595458984375, | |
| "learning_rate": 3.972178084225478e-05, | |
| "loss": 3.2146, | |
| "num_input_tokens_seen": 1798760, | |
| "step": 2735 | |
| }, | |
| { | |
| "epoch": 0.3003398005042201, | |
| "grad_norm": 8.040989875793457, | |
| "learning_rate": 3.968696895206613e-05, | |
| "loss": 3.482, | |
| "num_input_tokens_seen": 1801512, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.3008878658336074, | |
| "grad_norm": 5.050278186798096, | |
| "learning_rate": 3.9652113521070513e-05, | |
| "loss": 3.3143, | |
| "num_input_tokens_seen": 1805240, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 0.3014359311629946, | |
| "grad_norm": 5.1891279220581055, | |
| "learning_rate": 3.9617214652599904e-05, | |
| "loss": 2.8368, | |
| "num_input_tokens_seen": 1809040, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.3019839964923819, | |
| "grad_norm": 6.89003849029541, | |
| "learning_rate": 3.958227245011506e-05, | |
| "loss": 3.3205, | |
| "num_input_tokens_seen": 1812536, | |
| "step": 2755 | |
| }, | |
| { | |
| "epoch": 0.30253206182176917, | |
| "grad_norm": 6.001296043395996, | |
| "learning_rate": 3.954728701720521e-05, | |
| "loss": 3.4753, | |
| "num_input_tokens_seen": 1816296, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.3030801271511564, | |
| "grad_norm": 4.202249050140381, | |
| "learning_rate": 3.951225845758773e-05, | |
| "loss": 3.3659, | |
| "num_input_tokens_seen": 1819896, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 0.3036281924805437, | |
| "grad_norm": 6.209683418273926, | |
| "learning_rate": 3.9477186875107865e-05, | |
| "loss": 3.5706, | |
| "num_input_tokens_seen": 1823960, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.30417625780993096, | |
| "grad_norm": 5.219339847564697, | |
| "learning_rate": 3.944207237373838e-05, | |
| "loss": 3.121, | |
| "num_input_tokens_seen": 1827176, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.3047243231393182, | |
| "grad_norm": 6.556133270263672, | |
| "learning_rate": 3.940691505757931e-05, | |
| "loss": 3.1289, | |
| "num_input_tokens_seen": 1830016, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.30527238846870547, | |
| "grad_norm": 5.480815887451172, | |
| "learning_rate": 3.9371715030857595e-05, | |
| "loss": 2.8851, | |
| "num_input_tokens_seen": 1833280, | |
| "step": 2785 | |
| }, | |
| { | |
| "epoch": 0.30582045379809275, | |
| "grad_norm": 4.781624794006348, | |
| "learning_rate": 3.933647239792679e-05, | |
| "loss": 3.066, | |
| "num_input_tokens_seen": 1836784, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.30636851912748, | |
| "grad_norm": 5.901027202606201, | |
| "learning_rate": 3.930118726326678e-05, | |
| "loss": 3.0618, | |
| "num_input_tokens_seen": 1840600, | |
| "step": 2795 | |
| }, | |
| { | |
| "epoch": 0.30691658445686726, | |
| "grad_norm": 4.3098649978637695, | |
| "learning_rate": 3.926585973148344e-05, | |
| "loss": 3.0273, | |
| "num_input_tokens_seen": 1844456, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.30746464978625454, | |
| "grad_norm": 7.2452521324157715, | |
| "learning_rate": 3.923048990730832e-05, | |
| "loss": 3.3328, | |
| "num_input_tokens_seen": 1847648, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 0.30801271511564177, | |
| "grad_norm": 9.102137565612793, | |
| "learning_rate": 3.9195077895598385e-05, | |
| "loss": 3.4577, | |
| "num_input_tokens_seen": 1851080, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.30856078044502905, | |
| "grad_norm": 7.165421009063721, | |
| "learning_rate": 3.9159623801335635e-05, | |
| "loss": 3.2345, | |
| "num_input_tokens_seen": 1854544, | |
| "step": 2815 | |
| }, | |
| { | |
| "epoch": 0.30910884577441633, | |
| "grad_norm": 6.918674468994141, | |
| "learning_rate": 3.912412772962685e-05, | |
| "loss": 3.3151, | |
| "num_input_tokens_seen": 1857488, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.30965691110380356, | |
| "grad_norm": 7.7270660400390625, | |
| "learning_rate": 3.908858978570324e-05, | |
| "loss": 3.0722, | |
| "num_input_tokens_seen": 1859744, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 0.31020497643319084, | |
| "grad_norm": 5.471165657043457, | |
| "learning_rate": 3.905301007492016e-05, | |
| "loss": 3.3752, | |
| "num_input_tokens_seen": 1862520, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.3107530417625781, | |
| "grad_norm": 8.547778129577637, | |
| "learning_rate": 3.9017388702756766e-05, | |
| "loss": 3.4572, | |
| "num_input_tokens_seen": 1865688, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 0.31130110709196535, | |
| "grad_norm": 5.8289289474487305, | |
| "learning_rate": 3.898172577481577e-05, | |
| "loss": 3.0442, | |
| "num_input_tokens_seen": 1869008, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.3118491724213526, | |
| "grad_norm": 5.646442413330078, | |
| "learning_rate": 3.894602139682301e-05, | |
| "loss": 3.3365, | |
| "num_input_tokens_seen": 1872200, | |
| "step": 2845 | |
| }, | |
| { | |
| "epoch": 0.3123972377507399, | |
| "grad_norm": 5.7611565589904785, | |
| "learning_rate": 3.891027567462727e-05, | |
| "loss": 3.0501, | |
| "num_input_tokens_seen": 1874936, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.31294530308012714, | |
| "grad_norm": 6.07964563369751, | |
| "learning_rate": 3.8874488714199874e-05, | |
| "loss": 3.1584, | |
| "num_input_tokens_seen": 1877880, | |
| "step": 2855 | |
| }, | |
| { | |
| "epoch": 0.3134933684095144, | |
| "grad_norm": 6.76899528503418, | |
| "learning_rate": 3.883866062163439e-05, | |
| "loss": 3.2215, | |
| "num_input_tokens_seen": 1880632, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.3140414337389017, | |
| "grad_norm": 9.11755657196045, | |
| "learning_rate": 3.880279150314636e-05, | |
| "loss": 3.4992, | |
| "num_input_tokens_seen": 1883792, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 0.3145894990682889, | |
| "grad_norm": 4.672335147857666, | |
| "learning_rate": 3.876688146507291e-05, | |
| "loss": 3.2378, | |
| "num_input_tokens_seen": 1887984, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.3151375643976762, | |
| "grad_norm": 8.21897029876709, | |
| "learning_rate": 3.873093061387251e-05, | |
| "loss": 3.4215, | |
| "num_input_tokens_seen": 1890952, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 0.3156856297270635, | |
| "grad_norm": 6.4296674728393555, | |
| "learning_rate": 3.869493905612461e-05, | |
| "loss": 3.1436, | |
| "num_input_tokens_seen": 1894376, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.3162336950564507, | |
| "grad_norm": 6.088110446929932, | |
| "learning_rate": 3.8658906898529325e-05, | |
| "loss": 3.1597, | |
| "num_input_tokens_seen": 1897632, | |
| "step": 2885 | |
| }, | |
| { | |
| "epoch": 0.316781760385838, | |
| "grad_norm": 7.144382953643799, | |
| "learning_rate": 3.8622834247907155e-05, | |
| "loss": 3.3071, | |
| "num_input_tokens_seen": 1899992, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.3173298257152253, | |
| "grad_norm": 5.95371675491333, | |
| "learning_rate": 3.858672121119863e-05, | |
| "loss": 3.1272, | |
| "num_input_tokens_seen": 1902928, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 0.3178778910446125, | |
| "grad_norm": 5.033254623413086, | |
| "learning_rate": 3.855056789546402e-05, | |
| "loss": 3.5104, | |
| "num_input_tokens_seen": 1905872, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.3184259563739998, | |
| "grad_norm": 9.2310209274292, | |
| "learning_rate": 3.8514374407883e-05, | |
| "loss": 3.22, | |
| "num_input_tokens_seen": 1910456, | |
| "step": 2905 | |
| }, | |
| { | |
| "epoch": 0.31897402170338707, | |
| "grad_norm": 13.305641174316406, | |
| "learning_rate": 3.847814085575432e-05, | |
| "loss": 3.5537, | |
| "num_input_tokens_seen": 1914432, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.3195220870327743, | |
| "grad_norm": 4.90524959564209, | |
| "learning_rate": 3.844186734649554e-05, | |
| "loss": 3.1428, | |
| "num_input_tokens_seen": 1917176, | |
| "step": 2915 | |
| }, | |
| { | |
| "epoch": 0.3200701523621616, | |
| "grad_norm": 7.605042457580566, | |
| "learning_rate": 3.840555398764265e-05, | |
| "loss": 2.6933, | |
| "num_input_tokens_seen": 1919488, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.32061821769154886, | |
| "grad_norm": 6.435617923736572, | |
| "learning_rate": 3.836920088684979e-05, | |
| "loss": 3.1942, | |
| "num_input_tokens_seen": 1922184, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 0.3211662830209361, | |
| "grad_norm": 5.5276288986206055, | |
| "learning_rate": 3.8332808151888906e-05, | |
| "loss": 3.3987, | |
| "num_input_tokens_seen": 1925760, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.32171434835032336, | |
| "grad_norm": 7.981554985046387, | |
| "learning_rate": 3.829637589064946e-05, | |
| "loss": 3.107, | |
| "num_input_tokens_seen": 1928024, | |
| "step": 2935 | |
| }, | |
| { | |
| "epoch": 0.32226241367971065, | |
| "grad_norm": 6.667475700378418, | |
| "learning_rate": 3.8259904211138074e-05, | |
| "loss": 2.8259, | |
| "num_input_tokens_seen": 1931992, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.3228104790090979, | |
| "grad_norm": 6.904677867889404, | |
| "learning_rate": 3.8223393221478257e-05, | |
| "loss": 3.3099, | |
| "num_input_tokens_seen": 1934432, | |
| "step": 2945 | |
| }, | |
| { | |
| "epoch": 0.32335854433848515, | |
| "grad_norm": 6.4357008934021, | |
| "learning_rate": 3.818684302991001e-05, | |
| "loss": 3.5156, | |
| "num_input_tokens_seen": 1938288, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.32390660966787244, | |
| "grad_norm": 6.910282611846924, | |
| "learning_rate": 3.8150253744789624e-05, | |
| "loss": 3.7432, | |
| "num_input_tokens_seen": 1941552, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 0.32445467499725966, | |
| "grad_norm": 6.355223178863525, | |
| "learning_rate": 3.811362547458919e-05, | |
| "loss": 3.3951, | |
| "num_input_tokens_seen": 1944848, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.32500274032664694, | |
| "grad_norm": 5.630364418029785, | |
| "learning_rate": 3.807695832789646e-05, | |
| "loss": 3.1733, | |
| "num_input_tokens_seen": 1947576, | |
| "step": 2965 | |
| }, | |
| { | |
| "epoch": 0.3255508056560342, | |
| "grad_norm": 7.782848358154297, | |
| "learning_rate": 3.80402524134144e-05, | |
| "loss": 2.9549, | |
| "num_input_tokens_seen": 1950920, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.32609887098542145, | |
| "grad_norm": 6.886142730712891, | |
| "learning_rate": 3.8003507839960895e-05, | |
| "loss": 3.1884, | |
| "num_input_tokens_seen": 1954424, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 0.32664693631480873, | |
| "grad_norm": 6.035950660705566, | |
| "learning_rate": 3.796672471646848e-05, | |
| "loss": 2.9874, | |
| "num_input_tokens_seen": 1957928, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.327195001644196, | |
| "grad_norm": 8.303248405456543, | |
| "learning_rate": 3.7929903151983934e-05, | |
| "loss": 3.4268, | |
| "num_input_tokens_seen": 1961240, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 0.32774306697358324, | |
| "grad_norm": 6.161063194274902, | |
| "learning_rate": 3.789304325566801e-05, | |
| "loss": 2.8965, | |
| "num_input_tokens_seen": 1963864, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.3282911323029705, | |
| "grad_norm": 5.629215717315674, | |
| "learning_rate": 3.7856145136795104e-05, | |
| "loss": 3.0241, | |
| "num_input_tokens_seen": 1967656, | |
| "step": 2995 | |
| }, | |
| { | |
| "epoch": 0.3288391976323578, | |
| "grad_norm": 9.494491577148438, | |
| "learning_rate": 3.781920890475294e-05, | |
| "loss": 3.2297, | |
| "num_input_tokens_seen": 1970608, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.32938726296174503, | |
| "grad_norm": 4.975097179412842, | |
| "learning_rate": 3.7782234669042186e-05, | |
| "loss": 3.1757, | |
| "num_input_tokens_seen": 1973664, | |
| "step": 3005 | |
| }, | |
| { | |
| "epoch": 0.3299353282911323, | |
| "grad_norm": 7.1082258224487305, | |
| "learning_rate": 3.7745222539276224e-05, | |
| "loss": 3.1921, | |
| "num_input_tokens_seen": 1976944, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.33048339362051954, | |
| "grad_norm": 11.492435455322266, | |
| "learning_rate": 3.770817262518076e-05, | |
| "loss": 3.1751, | |
| "num_input_tokens_seen": 1980160, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 0.3310314589499068, | |
| "grad_norm": 6.560080051422119, | |
| "learning_rate": 3.76710850365935e-05, | |
| "loss": 3.0906, | |
| "num_input_tokens_seen": 1983576, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.3315795242792941, | |
| "grad_norm": 7.438432216644287, | |
| "learning_rate": 3.763395988346386e-05, | |
| "loss": 3.1074, | |
| "num_input_tokens_seen": 1985784, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 0.33212758960868133, | |
| "grad_norm": 7.6575164794921875, | |
| "learning_rate": 3.759679727585262e-05, | |
| "loss": 3.1625, | |
| "num_input_tokens_seen": 1989344, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.3326756549380686, | |
| "grad_norm": 6.756874084472656, | |
| "learning_rate": 3.7559597323931566e-05, | |
| "loss": 3.2758, | |
| "num_input_tokens_seen": 1992304, | |
| "step": 3035 | |
| }, | |
| { | |
| "epoch": 0.3332237202674559, | |
| "grad_norm": 5.427942276000977, | |
| "learning_rate": 3.7522360137983235e-05, | |
| "loss": 3.1905, | |
| "num_input_tokens_seen": 1996120, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.3337717855968431, | |
| "grad_norm": 5.814554691314697, | |
| "learning_rate": 3.748508582840052e-05, | |
| "loss": 2.8693, | |
| "num_input_tokens_seen": 1999176, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 0.3343198509262304, | |
| "grad_norm": 7.720613956451416, | |
| "learning_rate": 3.744777450568638e-05, | |
| "loss": 3.3644, | |
| "num_input_tokens_seen": 2002112, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.3348679162556177, | |
| "grad_norm": 5.780377388000488, | |
| "learning_rate": 3.7410426280453505e-05, | |
| "loss": 2.8918, | |
| "num_input_tokens_seen": 2005800, | |
| "step": 3055 | |
| }, | |
| { | |
| "epoch": 0.3354159815850049, | |
| "grad_norm": 5.939544677734375, | |
| "learning_rate": 3.737304126342398e-05, | |
| "loss": 3.0217, | |
| "num_input_tokens_seen": 2009192, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.3359640469143922, | |
| "grad_norm": 6.661081314086914, | |
| "learning_rate": 3.7335619565428964e-05, | |
| "loss": 3.2056, | |
| "num_input_tokens_seen": 2012280, | |
| "step": 3065 | |
| }, | |
| { | |
| "epoch": 0.33651211224377947, | |
| "grad_norm": 4.9228620529174805, | |
| "learning_rate": 3.729816129740836e-05, | |
| "loss": 3.106, | |
| "num_input_tokens_seen": 2014984, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.3370601775731667, | |
| "grad_norm": 6.285070896148682, | |
| "learning_rate": 3.726066657041051e-05, | |
| "loss": 3.1639, | |
| "num_input_tokens_seen": 2019048, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 0.337608242902554, | |
| "grad_norm": 6.625104904174805, | |
| "learning_rate": 3.7223135495591776e-05, | |
| "loss": 3.2258, | |
| "num_input_tokens_seen": 2022776, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.33815630823194126, | |
| "grad_norm": 8.347160339355469, | |
| "learning_rate": 3.718556818421636e-05, | |
| "loss": 3.4006, | |
| "num_input_tokens_seen": 2026304, | |
| "step": 3085 | |
| }, | |
| { | |
| "epoch": 0.3387043735613285, | |
| "grad_norm": 9.37065601348877, | |
| "learning_rate": 3.7147964747655836e-05, | |
| "loss": 3.2778, | |
| "num_input_tokens_seen": 2030200, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.33925243889071577, | |
| "grad_norm": 6.341724872589111, | |
| "learning_rate": 3.711032529738887e-05, | |
| "loss": 3.5654, | |
| "num_input_tokens_seen": 2033656, | |
| "step": 3095 | |
| }, | |
| { | |
| "epoch": 0.33980050422010305, | |
| "grad_norm": 6.54714298248291, | |
| "learning_rate": 3.7072649945000936e-05, | |
| "loss": 3.0664, | |
| "num_input_tokens_seen": 2037328, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.3403485695494903, | |
| "grad_norm": 6.289731979370117, | |
| "learning_rate": 3.703493880218391e-05, | |
| "loss": 2.8214, | |
| "num_input_tokens_seen": 2040488, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 0.34089663487887756, | |
| "grad_norm": 8.150530815124512, | |
| "learning_rate": 3.699719198073578e-05, | |
| "loss": 3.2654, | |
| "num_input_tokens_seen": 2043256, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.34144470020826484, | |
| "grad_norm": 7.053910255432129, | |
| "learning_rate": 3.6959409592560304e-05, | |
| "loss": 3.3008, | |
| "num_input_tokens_seen": 2046064, | |
| "step": 3115 | |
| }, | |
| { | |
| "epoch": 0.34199276553765207, | |
| "grad_norm": 5.083940505981445, | |
| "learning_rate": 3.69215917496667e-05, | |
| "loss": 3.0999, | |
| "num_input_tokens_seen": 2049568, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.34254083086703935, | |
| "grad_norm": 5.558229446411133, | |
| "learning_rate": 3.6883738564169254e-05, | |
| "loss": 3.4491, | |
| "num_input_tokens_seen": 2052400, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 0.34308889619642663, | |
| "grad_norm": 7.365407466888428, | |
| "learning_rate": 3.684585014828708e-05, | |
| "loss": 3.1569, | |
| "num_input_tokens_seen": 2055864, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.34363696152581386, | |
| "grad_norm": 7.316169738769531, | |
| "learning_rate": 3.680792661434368e-05, | |
| "loss": 3.1274, | |
| "num_input_tokens_seen": 2058856, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 0.34418502685520114, | |
| "grad_norm": 8.32957935333252, | |
| "learning_rate": 3.676996807476671e-05, | |
| "loss": 2.9842, | |
| "num_input_tokens_seen": 2062056, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.3447330921845884, | |
| "grad_norm": 7.238974094390869, | |
| "learning_rate": 3.673197464208759e-05, | |
| "loss": 3.1055, | |
| "num_input_tokens_seen": 2064760, | |
| "step": 3145 | |
| }, | |
| { | |
| "epoch": 0.34528115751397565, | |
| "grad_norm": 8.2353515625, | |
| "learning_rate": 3.669394642894118e-05, | |
| "loss": 2.7765, | |
| "num_input_tokens_seen": 2068440, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.3458292228433629, | |
| "grad_norm": 7.214339256286621, | |
| "learning_rate": 3.665588354806545e-05, | |
| "loss": 3.0102, | |
| "num_input_tokens_seen": 2072136, | |
| "step": 3155 | |
| }, | |
| { | |
| "epoch": 0.3463772881727502, | |
| "grad_norm": 6.484249114990234, | |
| "learning_rate": 3.661778611230114e-05, | |
| "loss": 3.2456, | |
| "num_input_tokens_seen": 2074560, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.34692535350213743, | |
| "grad_norm": 6.298303604125977, | |
| "learning_rate": 3.657965423459145e-05, | |
| "loss": 3.3588, | |
| "num_input_tokens_seen": 2077248, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 0.3474734188315247, | |
| "grad_norm": 8.595486640930176, | |
| "learning_rate": 3.6541488027981675e-05, | |
| "loss": 2.9303, | |
| "num_input_tokens_seen": 2080160, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.348021484160912, | |
| "grad_norm": 7.8414740562438965, | |
| "learning_rate": 3.650328760561887e-05, | |
| "loss": 3.5767, | |
| "num_input_tokens_seen": 2082320, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 0.3485695494902992, | |
| "grad_norm": 5.1522908210754395, | |
| "learning_rate": 3.646505308075154e-05, | |
| "loss": 3.1739, | |
| "num_input_tokens_seen": 2085104, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.3491176148196865, | |
| "grad_norm": 9.065922737121582, | |
| "learning_rate": 3.642678456672929e-05, | |
| "loss": 3.3567, | |
| "num_input_tokens_seen": 2087800, | |
| "step": 3185 | |
| }, | |
| { | |
| "epoch": 0.3496656801490738, | |
| "grad_norm": 11.175498962402344, | |
| "learning_rate": 3.638848217700248e-05, | |
| "loss": 3.3376, | |
| "num_input_tokens_seen": 2090776, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.350213745478461, | |
| "grad_norm": 7.90383768081665, | |
| "learning_rate": 3.63501460251219e-05, | |
| "loss": 2.9388, | |
| "num_input_tokens_seen": 2093152, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 0.3507618108078483, | |
| "grad_norm": 7.013014316558838, | |
| "learning_rate": 3.6311776224738435e-05, | |
| "loss": 3.0298, | |
| "num_input_tokens_seen": 2096192, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.3513098761372356, | |
| "grad_norm": 4.87260103225708, | |
| "learning_rate": 3.627337288960272e-05, | |
| "loss": 3.3596, | |
| "num_input_tokens_seen": 2100256, | |
| "step": 3205 | |
| }, | |
| { | |
| "epoch": 0.3518579414666228, | |
| "grad_norm": 7.644909858703613, | |
| "learning_rate": 3.6234936133564823e-05, | |
| "loss": 3.1154, | |
| "num_input_tokens_seen": 2102928, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.3524060067960101, | |
| "grad_norm": 5.678354263305664, | |
| "learning_rate": 3.619646607057386e-05, | |
| "loss": 2.8941, | |
| "num_input_tokens_seen": 2106944, | |
| "step": 3215 | |
| }, | |
| { | |
| "epoch": 0.35295407212539737, | |
| "grad_norm": 5.123593330383301, | |
| "learning_rate": 3.61579628146777e-05, | |
| "loss": 3.1417, | |
| "num_input_tokens_seen": 2111496, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.3535021374547846, | |
| "grad_norm": 5.542695999145508, | |
| "learning_rate": 3.611942648002265e-05, | |
| "loss": 3.1733, | |
| "num_input_tokens_seen": 2114960, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 0.3540502027841719, | |
| "grad_norm": 8.204092025756836, | |
| "learning_rate": 3.6080857180853025e-05, | |
| "loss": 3.4422, | |
| "num_input_tokens_seen": 2117528, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.35459826811355916, | |
| "grad_norm": 6.3048014640808105, | |
| "learning_rate": 3.6042255031510895e-05, | |
| "loss": 3.3049, | |
| "num_input_tokens_seen": 2121312, | |
| "step": 3235 | |
| }, | |
| { | |
| "epoch": 0.3551463334429464, | |
| "grad_norm": 8.287495613098145, | |
| "learning_rate": 3.600362014643573e-05, | |
| "loss": 3.2349, | |
| "num_input_tokens_seen": 2125296, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.35569439877233366, | |
| "grad_norm": 7.690340995788574, | |
| "learning_rate": 3.5964952640164016e-05, | |
| "loss": 3.4982, | |
| "num_input_tokens_seen": 2127944, | |
| "step": 3245 | |
| }, | |
| { | |
| "epoch": 0.35624246410172095, | |
| "grad_norm": 5.382369518280029, | |
| "learning_rate": 3.592625262732898e-05, | |
| "loss": 3.3248, | |
| "num_input_tokens_seen": 2131200, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.35679052943110817, | |
| "grad_norm": 7.964527606964111, | |
| "learning_rate": 3.58875202226602e-05, | |
| "loss": 3.2188, | |
| "num_input_tokens_seen": 2133648, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 0.35733859476049545, | |
| "grad_norm": 5.458812236785889, | |
| "learning_rate": 3.5848755540983286e-05, | |
| "loss": 3.3385, | |
| "num_input_tokens_seen": 2136960, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.35788666008988274, | |
| "grad_norm": 7.087930679321289, | |
| "learning_rate": 3.580995869721953e-05, | |
| "loss": 3.0703, | |
| "num_input_tokens_seen": 2140656, | |
| "step": 3265 | |
| }, | |
| { | |
| "epoch": 0.35843472541926996, | |
| "grad_norm": 6.762202262878418, | |
| "learning_rate": 3.577112980638557e-05, | |
| "loss": 2.9214, | |
| "num_input_tokens_seen": 2143360, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.35898279074865724, | |
| "grad_norm": 6.3621649742126465, | |
| "learning_rate": 3.573226898359308e-05, | |
| "loss": 3.4276, | |
| "num_input_tokens_seen": 2146456, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 0.3595308560780445, | |
| "grad_norm": 8.797203063964844, | |
| "learning_rate": 3.5693376344048344e-05, | |
| "loss": 3.0474, | |
| "num_input_tokens_seen": 2149336, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.36007892140743175, | |
| "grad_norm": 7.268299579620361, | |
| "learning_rate": 3.5654452003052033e-05, | |
| "loss": 2.8497, | |
| "num_input_tokens_seen": 2152960, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 0.36062698673681903, | |
| "grad_norm": 8.053544044494629, | |
| "learning_rate": 3.5615496075998744e-05, | |
| "loss": 3.6495, | |
| "num_input_tokens_seen": 2157104, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.3611750520662063, | |
| "grad_norm": 6.6186604499816895, | |
| "learning_rate": 3.5576508678376743e-05, | |
| "loss": 2.9909, | |
| "num_input_tokens_seen": 2159576, | |
| "step": 3295 | |
| }, | |
| { | |
| "epoch": 0.36172311739559354, | |
| "grad_norm": 6.244167327880859, | |
| "learning_rate": 3.55374899257676e-05, | |
| "loss": 3.064, | |
| "num_input_tokens_seen": 2163112, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.3622711827249808, | |
| "grad_norm": 7.658557891845703, | |
| "learning_rate": 3.549843993384582e-05, | |
| "loss": 3.1039, | |
| "num_input_tokens_seen": 2166048, | |
| "step": 3305 | |
| }, | |
| { | |
| "epoch": 0.3628192480543681, | |
| "grad_norm": 5.7698140144348145, | |
| "learning_rate": 3.545935881837852e-05, | |
| "loss": 2.9442, | |
| "num_input_tokens_seen": 2169192, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.36336731338375533, | |
| "grad_norm": 6.534774303436279, | |
| "learning_rate": 3.542024669522511e-05, | |
| "loss": 2.9845, | |
| "num_input_tokens_seen": 2172544, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 0.3639153787131426, | |
| "grad_norm": 5.373234748840332, | |
| "learning_rate": 3.538110368033689e-05, | |
| "loss": 3.0865, | |
| "num_input_tokens_seen": 2176280, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.3644634440425299, | |
| "grad_norm": 6.9778547286987305, | |
| "learning_rate": 3.5341929889756775e-05, | |
| "loss": 3.1341, | |
| "num_input_tokens_seen": 2179792, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 0.3650115093719171, | |
| "grad_norm": 10.10000991821289, | |
| "learning_rate": 3.530272543961888e-05, | |
| "loss": 3.3558, | |
| "num_input_tokens_seen": 2182776, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.3655595747013044, | |
| "grad_norm": 6.022150993347168, | |
| "learning_rate": 3.526349044614826e-05, | |
| "loss": 3.1005, | |
| "num_input_tokens_seen": 2186112, | |
| "step": 3335 | |
| }, | |
| { | |
| "epoch": 0.3661076400306917, | |
| "grad_norm": 6.781782150268555, | |
| "learning_rate": 3.522422502566047e-05, | |
| "loss": 3.3438, | |
| "num_input_tokens_seen": 2188600, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.3666557053600789, | |
| "grad_norm": 4.399787425994873, | |
| "learning_rate": 3.51849292945613e-05, | |
| "loss": 3.0477, | |
| "num_input_tokens_seen": 2191600, | |
| "step": 3345 | |
| }, | |
| { | |
| "epoch": 0.3672037706894662, | |
| "grad_norm": 6.852601528167725, | |
| "learning_rate": 3.51456033693464e-05, | |
| "loss": 2.8756, | |
| "num_input_tokens_seen": 2194544, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.3677518360188535, | |
| "grad_norm": 7.015017509460449, | |
| "learning_rate": 3.510624736660091e-05, | |
| "loss": 3.6253, | |
| "num_input_tokens_seen": 2198296, | |
| "step": 3355 | |
| }, | |
| { | |
| "epoch": 0.3682999013482407, | |
| "grad_norm": 4.540085792541504, | |
| "learning_rate": 3.506686140299915e-05, | |
| "loss": 2.9568, | |
| "num_input_tokens_seen": 2201384, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.368847966677628, | |
| "grad_norm": 9.393879890441895, | |
| "learning_rate": 3.502744559530426e-05, | |
| "loss": 3.1794, | |
| "num_input_tokens_seen": 2205720, | |
| "step": 3365 | |
| }, | |
| { | |
| "epoch": 0.36939603200701526, | |
| "grad_norm": 7.7508344650268555, | |
| "learning_rate": 3.498800006036788e-05, | |
| "loss": 3.0188, | |
| "num_input_tokens_seen": 2210344, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.3699440973364025, | |
| "grad_norm": 5.801796913146973, | |
| "learning_rate": 3.4948524915129726e-05, | |
| "loss": 3.1028, | |
| "num_input_tokens_seen": 2213264, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.37049216266578977, | |
| "grad_norm": 6.9859938621521, | |
| "learning_rate": 3.490902027661734e-05, | |
| "loss": 3.5774, | |
| "num_input_tokens_seen": 2216560, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.37104022799517705, | |
| "grad_norm": 5.871939659118652, | |
| "learning_rate": 3.4869486261945695e-05, | |
| "loss": 3.3648, | |
| "num_input_tokens_seen": 2219376, | |
| "step": 3385 | |
| }, | |
| { | |
| "epoch": 0.3715882933245643, | |
| "grad_norm": 6.051314830780029, | |
| "learning_rate": 3.482992298831682e-05, | |
| "loss": 3.2641, | |
| "num_input_tokens_seen": 2222568, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.37213635865395156, | |
| "grad_norm": 7.149409294128418, | |
| "learning_rate": 3.4790330573019524e-05, | |
| "loss": 3.0127, | |
| "num_input_tokens_seen": 2225232, | |
| "step": 3395 | |
| }, | |
| { | |
| "epoch": 0.37268442398333884, | |
| "grad_norm": 5.8362650871276855, | |
| "learning_rate": 3.4750709133429e-05, | |
| "loss": 3.2417, | |
| "num_input_tokens_seen": 2228360, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.37323248931272607, | |
| "grad_norm": 6.061380386352539, | |
| "learning_rate": 3.471105878700646e-05, | |
| "loss": 3.4256, | |
| "num_input_tokens_seen": 2231864, | |
| "step": 3405 | |
| }, | |
| { | |
| "epoch": 0.37378055464211335, | |
| "grad_norm": 7.543921947479248, | |
| "learning_rate": 3.467137965129884e-05, | |
| "loss": 3.1154, | |
| "num_input_tokens_seen": 2234400, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.3743286199715006, | |
| "grad_norm": 4.8110151290893555, | |
| "learning_rate": 3.463167184393843e-05, | |
| "loss": 3.1221, | |
| "num_input_tokens_seen": 2238056, | |
| "step": 3415 | |
| }, | |
| { | |
| "epoch": 0.37487668530088786, | |
| "grad_norm": 7.194852352142334, | |
| "learning_rate": 3.459193548264248e-05, | |
| "loss": 3.4609, | |
| "num_input_tokens_seen": 2240472, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.37542475063027514, | |
| "grad_norm": 7.457151889801025, | |
| "learning_rate": 3.4552170685212936e-05, | |
| "loss": 3.1907, | |
| "num_input_tokens_seen": 2243944, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 0.37597281595966237, | |
| "grad_norm": 8.671926498413086, | |
| "learning_rate": 3.4512377569536025e-05, | |
| "loss": 3.0142, | |
| "num_input_tokens_seen": 2246376, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.37652088128904965, | |
| "grad_norm": 6.243984222412109, | |
| "learning_rate": 3.447255625358191e-05, | |
| "loss": 3.094, | |
| "num_input_tokens_seen": 2249288, | |
| "step": 3435 | |
| }, | |
| { | |
| "epoch": 0.37706894661843693, | |
| "grad_norm": 7.37971830368042, | |
| "learning_rate": 3.443270685540439e-05, | |
| "loss": 3.4606, | |
| "num_input_tokens_seen": 2252536, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.37761701194782415, | |
| "grad_norm": 6.270237445831299, | |
| "learning_rate": 3.43928294931405e-05, | |
| "loss": 3.1928, | |
| "num_input_tokens_seen": 2255576, | |
| "step": 3445 | |
| }, | |
| { | |
| "epoch": 0.37816507727721144, | |
| "grad_norm": 5.272236347198486, | |
| "learning_rate": 3.435292428501016e-05, | |
| "loss": 3.4196, | |
| "num_input_tokens_seen": 2258456, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.3787131426065987, | |
| "grad_norm": 6.378783226013184, | |
| "learning_rate": 3.431299134931587e-05, | |
| "loss": 3.3069, | |
| "num_input_tokens_seen": 2261160, | |
| "step": 3455 | |
| }, | |
| { | |
| "epoch": 0.37926120793598594, | |
| "grad_norm": 7.296474456787109, | |
| "learning_rate": 3.427303080444232e-05, | |
| "loss": 3.3306, | |
| "num_input_tokens_seen": 2263808, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.3798092732653732, | |
| "grad_norm": 6.654740333557129, | |
| "learning_rate": 3.423304276885605e-05, | |
| "loss": 2.871, | |
| "num_input_tokens_seen": 2267280, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 0.3803573385947605, | |
| "grad_norm": 7.27192497253418, | |
| "learning_rate": 3.419302736110508e-05, | |
| "loss": 3.3171, | |
| "num_input_tokens_seen": 2270632, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.38090540392414773, | |
| "grad_norm": 5.948354721069336, | |
| "learning_rate": 3.4152984699818614e-05, | |
| "loss": 3.4794, | |
| "num_input_tokens_seen": 2273960, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 0.381453469253535, | |
| "grad_norm": 6.537465572357178, | |
| "learning_rate": 3.4112914903706616e-05, | |
| "loss": 3.1609, | |
| "num_input_tokens_seen": 2277568, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.3820015345829223, | |
| "grad_norm": 13.15424919128418, | |
| "learning_rate": 3.4072818091559524e-05, | |
| "loss": 3.0777, | |
| "num_input_tokens_seen": 2279976, | |
| "step": 3485 | |
| }, | |
| { | |
| "epoch": 0.3825495999123095, | |
| "grad_norm": 5.581765174865723, | |
| "learning_rate": 3.403269438224784e-05, | |
| "loss": 3.1242, | |
| "num_input_tokens_seen": 2282912, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.3830976652416968, | |
| "grad_norm": 5.730728626251221, | |
| "learning_rate": 3.3992543894721825e-05, | |
| "loss": 3.2418, | |
| "num_input_tokens_seen": 2286272, | |
| "step": 3495 | |
| }, | |
| { | |
| "epoch": 0.3836457305710841, | |
| "grad_norm": 9.713155746459961, | |
| "learning_rate": 3.3952366748011114e-05, | |
| "loss": 3.17, | |
| "num_input_tokens_seen": 2289944, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.3841937959004713, | |
| "grad_norm": 6.645389556884766, | |
| "learning_rate": 3.391216306122439e-05, | |
| "loss": 3.3796, | |
| "num_input_tokens_seen": 2292688, | |
| "step": 3505 | |
| }, | |
| { | |
| "epoch": 0.3847418612298586, | |
| "grad_norm": 7.148984432220459, | |
| "learning_rate": 3.3871932953549005e-05, | |
| "loss": 3.282, | |
| "num_input_tokens_seen": 2295584, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.3852899265592459, | |
| "grad_norm": 5.25370979309082, | |
| "learning_rate": 3.3831676544250616e-05, | |
| "loss": 2.9293, | |
| "num_input_tokens_seen": 2298440, | |
| "step": 3515 | |
| }, | |
| { | |
| "epoch": 0.3858379918886331, | |
| "grad_norm": 5.668978214263916, | |
| "learning_rate": 3.3791393952672915e-05, | |
| "loss": 3.0635, | |
| "num_input_tokens_seen": 2301024, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.3863860572180204, | |
| "grad_norm": 4.52470064163208, | |
| "learning_rate": 3.375108529823715e-05, | |
| "loss": 3.0398, | |
| "num_input_tokens_seen": 2304392, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 0.38693412254740767, | |
| "grad_norm": 5.700072288513184, | |
| "learning_rate": 3.371075070044186e-05, | |
| "loss": 3.0855, | |
| "num_input_tokens_seen": 2307688, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.3874821878767949, | |
| "grad_norm": 5.35679292678833, | |
| "learning_rate": 3.367039027886252e-05, | |
| "loss": 3.2953, | |
| "num_input_tokens_seen": 2312384, | |
| "step": 3535 | |
| }, | |
| { | |
| "epoch": 0.3880302532061822, | |
| "grad_norm": 6.735170841217041, | |
| "learning_rate": 3.363000415315111e-05, | |
| "loss": 3.1434, | |
| "num_input_tokens_seen": 2315864, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.38857831853556946, | |
| "grad_norm": 6.647335052490234, | |
| "learning_rate": 3.358959244303585e-05, | |
| "loss": 3.2033, | |
| "num_input_tokens_seen": 2319744, | |
| "step": 3545 | |
| }, | |
| { | |
| "epoch": 0.3891263838649567, | |
| "grad_norm": 6.841831684112549, | |
| "learning_rate": 3.354915526832082e-05, | |
| "loss": 3.3414, | |
| "num_input_tokens_seen": 2322856, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.38967444919434396, | |
| "grad_norm": 7.023780822753906, | |
| "learning_rate": 3.350869274888554e-05, | |
| "loss": 3.1525, | |
| "num_input_tokens_seen": 2326016, | |
| "step": 3555 | |
| }, | |
| { | |
| "epoch": 0.39022251452373125, | |
| "grad_norm": 8.96906852722168, | |
| "learning_rate": 3.3468205004684695e-05, | |
| "loss": 3.2852, | |
| "num_input_tokens_seen": 2330120, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.39077057985311847, | |
| "grad_norm": 7.874572277069092, | |
| "learning_rate": 3.3427692155747766e-05, | |
| "loss": 2.9457, | |
| "num_input_tokens_seen": 2332776, | |
| "step": 3565 | |
| }, | |
| { | |
| "epoch": 0.39131864518250575, | |
| "grad_norm": 6.962822914123535, | |
| "learning_rate": 3.338715432217865e-05, | |
| "loss": 3.0687, | |
| "num_input_tokens_seen": 2336856, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.39186671051189303, | |
| "grad_norm": 6.802676200866699, | |
| "learning_rate": 3.334659162415529e-05, | |
| "loss": 3.6562, | |
| "num_input_tokens_seen": 2339768, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 0.39241477584128026, | |
| "grad_norm": 7.828624725341797, | |
| "learning_rate": 3.3306004181929375e-05, | |
| "loss": 3.2111, | |
| "num_input_tokens_seen": 2342920, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.39296284117066754, | |
| "grad_norm": 7.1746320724487305, | |
| "learning_rate": 3.326539211582592e-05, | |
| "loss": 3.2333, | |
| "num_input_tokens_seen": 2346656, | |
| "step": 3585 | |
| }, | |
| { | |
| "epoch": 0.3935109065000548, | |
| "grad_norm": 7.000988006591797, | |
| "learning_rate": 3.3224755546242967e-05, | |
| "loss": 3.3291, | |
| "num_input_tokens_seen": 2351008, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.39405897182944205, | |
| "grad_norm": 6.557620048522949, | |
| "learning_rate": 3.3184094593651196e-05, | |
| "loss": 2.7686, | |
| "num_input_tokens_seen": 2354160, | |
| "step": 3595 | |
| }, | |
| { | |
| "epoch": 0.39460703715882933, | |
| "grad_norm": 7.011937618255615, | |
| "learning_rate": 3.314340937859356e-05, | |
| "loss": 3.4913, | |
| "num_input_tokens_seen": 2357464, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.3951551024882166, | |
| "grad_norm": 6.284838676452637, | |
| "learning_rate": 3.310270002168493e-05, | |
| "loss": 2.835, | |
| "num_input_tokens_seen": 2360488, | |
| "step": 3605 | |
| }, | |
| { | |
| "epoch": 0.39570316781760384, | |
| "grad_norm": 7.415198802947998, | |
| "learning_rate": 3.306196664361178e-05, | |
| "loss": 2.9347, | |
| "num_input_tokens_seen": 2363448, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.3962512331469911, | |
| "grad_norm": 7.382150650024414, | |
| "learning_rate": 3.302120936513177e-05, | |
| "loss": 3.3669, | |
| "num_input_tokens_seen": 2365800, | |
| "step": 3615 | |
| }, | |
| { | |
| "epoch": 0.3967992984763784, | |
| "grad_norm": 5.894745349884033, | |
| "learning_rate": 3.2980428307073435e-05, | |
| "loss": 2.8094, | |
| "num_input_tokens_seen": 2369016, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.39734736380576563, | |
| "grad_norm": 6.539662837982178, | |
| "learning_rate": 3.29396235903358e-05, | |
| "loss": 3.1544, | |
| "num_input_tokens_seen": 2372144, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 0.3978954291351529, | |
| "grad_norm": 6.1463799476623535, | |
| "learning_rate": 3.2898795335888005e-05, | |
| "loss": 3.2679, | |
| "num_input_tokens_seen": 2374656, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.3984434944645402, | |
| "grad_norm": 8.810948371887207, | |
| "learning_rate": 3.2857943664769e-05, | |
| "loss": 3.394, | |
| "num_input_tokens_seen": 2378056, | |
| "step": 3635 | |
| }, | |
| { | |
| "epoch": 0.3989915597939274, | |
| "grad_norm": 10.048519134521484, | |
| "learning_rate": 3.2817068698087164e-05, | |
| "loss": 3.4094, | |
| "num_input_tokens_seen": 2380792, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.3995396251233147, | |
| "grad_norm": 8.441570281982422, | |
| "learning_rate": 3.277617055701989e-05, | |
| "loss": 2.9142, | |
| "num_input_tokens_seen": 2383912, | |
| "step": 3645 | |
| }, | |
| { | |
| "epoch": 0.400087690452702, | |
| "grad_norm": 5.723228931427002, | |
| "learning_rate": 3.273524936281331e-05, | |
| "loss": 3.2162, | |
| "num_input_tokens_seen": 2386592, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.4006357557820892, | |
| "grad_norm": 5.869374752044678, | |
| "learning_rate": 3.2694305236781904e-05, | |
| "loss": 3.301, | |
| "num_input_tokens_seen": 2390144, | |
| "step": 3655 | |
| }, | |
| { | |
| "epoch": 0.4011838211114765, | |
| "grad_norm": 6.342257499694824, | |
| "learning_rate": 3.26533383003081e-05, | |
| "loss": 3.2055, | |
| "num_input_tokens_seen": 2393872, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.4017318864408638, | |
| "grad_norm": 6.534188270568848, | |
| "learning_rate": 3.2612348674841995e-05, | |
| "loss": 3.0935, | |
| "num_input_tokens_seen": 2396648, | |
| "step": 3665 | |
| }, | |
| { | |
| "epoch": 0.402279951770251, | |
| "grad_norm": 7.0050272941589355, | |
| "learning_rate": 3.2571336481900926e-05, | |
| "loss": 3.2582, | |
| "num_input_tokens_seen": 2400328, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.4028280170996383, | |
| "grad_norm": 8.4814453125, | |
| "learning_rate": 3.253030184306912e-05, | |
| "loss": 3.3026, | |
| "num_input_tokens_seen": 2403080, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 0.40337608242902556, | |
| "grad_norm": 7.716960906982422, | |
| "learning_rate": 3.248924487999737e-05, | |
| "loss": 3.052, | |
| "num_input_tokens_seen": 2406352, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.4039241477584128, | |
| "grad_norm": 6.716127395629883, | |
| "learning_rate": 3.244816571440265e-05, | |
| "loss": 3.2428, | |
| "num_input_tokens_seen": 2409496, | |
| "step": 3685 | |
| }, | |
| { | |
| "epoch": 0.40447221308780007, | |
| "grad_norm": 8.213761329650879, | |
| "learning_rate": 3.240706446806773e-05, | |
| "loss": 2.9107, | |
| "num_input_tokens_seen": 2414032, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.40502027841718735, | |
| "grad_norm": 6.492610931396484, | |
| "learning_rate": 3.236594126284086e-05, | |
| "loss": 3.293, | |
| "num_input_tokens_seen": 2417472, | |
| "step": 3695 | |
| }, | |
| { | |
| "epoch": 0.4055683437465746, | |
| "grad_norm": 6.562194347381592, | |
| "learning_rate": 3.23247962206354e-05, | |
| "loss": 3.4693, | |
| "num_input_tokens_seen": 2420224, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.40611640907596186, | |
| "grad_norm": 6.379699230194092, | |
| "learning_rate": 3.228362946342942e-05, | |
| "loss": 3.2036, | |
| "num_input_tokens_seen": 2425376, | |
| "step": 3705 | |
| }, | |
| { | |
| "epoch": 0.40666447440534914, | |
| "grad_norm": 8.669161796569824, | |
| "learning_rate": 3.2242441113265395e-05, | |
| "loss": 3.3417, | |
| "num_input_tokens_seen": 2429616, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.40721253973473637, | |
| "grad_norm": 4.813148021697998, | |
| "learning_rate": 3.220123129224979e-05, | |
| "loss": 2.9484, | |
| "num_input_tokens_seen": 2433168, | |
| "step": 3715 | |
| }, | |
| { | |
| "epoch": 0.40776060506412365, | |
| "grad_norm": 6.526965141296387, | |
| "learning_rate": 3.216000012255273e-05, | |
| "loss": 3.5202, | |
| "num_input_tokens_seen": 2435880, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.40830867039351093, | |
| "grad_norm": 7.899510860443115, | |
| "learning_rate": 3.211874772640765e-05, | |
| "loss": 3.2844, | |
| "num_input_tokens_seen": 2439232, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 0.40885673572289816, | |
| "grad_norm": 6.932427406311035, | |
| "learning_rate": 3.2077474226110866e-05, | |
| "loss": 3.5213, | |
| "num_input_tokens_seen": 2443400, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.40940480105228544, | |
| "grad_norm": 6.4443793296813965, | |
| "learning_rate": 3.203617974402131e-05, | |
| "loss": 3.4504, | |
| "num_input_tokens_seen": 2446448, | |
| "step": 3735 | |
| }, | |
| { | |
| "epoch": 0.4099528663816727, | |
| "grad_norm": 6.693415641784668, | |
| "learning_rate": 3.199486440256009e-05, | |
| "loss": 3.6388, | |
| "num_input_tokens_seen": 2450016, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.41050093171105995, | |
| "grad_norm": 6.27035665512085, | |
| "learning_rate": 3.195352832421015e-05, | |
| "loss": 3.4589, | |
| "num_input_tokens_seen": 2452584, | |
| "step": 3745 | |
| }, | |
| { | |
| "epoch": 0.41104899704044723, | |
| "grad_norm": 6.987046241760254, | |
| "learning_rate": 3.191217163151593e-05, | |
| "loss": 3.484, | |
| "num_input_tokens_seen": 2455440, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.4115970623698345, | |
| "grad_norm": 5.9024200439453125, | |
| "learning_rate": 3.187079444708296e-05, | |
| "loss": 2.9859, | |
| "num_input_tokens_seen": 2459048, | |
| "step": 3755 | |
| }, | |
| { | |
| "epoch": 0.41214512769922174, | |
| "grad_norm": 5.624914646148682, | |
| "learning_rate": 3.182939689357753e-05, | |
| "loss": 3.317, | |
| "num_input_tokens_seen": 2463488, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.412693193028609, | |
| "grad_norm": 5.933727264404297, | |
| "learning_rate": 3.1787979093726314e-05, | |
| "loss": 3.1318, | |
| "num_input_tokens_seen": 2466560, | |
| "step": 3765 | |
| }, | |
| { | |
| "epoch": 0.4132412583579963, | |
| "grad_norm": 8.507558822631836, | |
| "learning_rate": 3.1746541170316036e-05, | |
| "loss": 3.5896, | |
| "num_input_tokens_seen": 2469072, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.4137893236873835, | |
| "grad_norm": 6.940069198608398, | |
| "learning_rate": 3.1705083246193015e-05, | |
| "loss": 3.5636, | |
| "num_input_tokens_seen": 2471528, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 0.4143373890167708, | |
| "grad_norm": 7.710633277893066, | |
| "learning_rate": 3.166360544426293e-05, | |
| "loss": 3.373, | |
| "num_input_tokens_seen": 2474672, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.4148854543461581, | |
| "grad_norm": 6.710258960723877, | |
| "learning_rate": 3.1622107887490354e-05, | |
| "loss": 2.9773, | |
| "num_input_tokens_seen": 2478184, | |
| "step": 3785 | |
| }, | |
| { | |
| "epoch": 0.4154335196755453, | |
| "grad_norm": 6.593062400817871, | |
| "learning_rate": 3.158059069889843e-05, | |
| "loss": 3.1045, | |
| "num_input_tokens_seen": 2481016, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.4159815850049326, | |
| "grad_norm": 8.369247436523438, | |
| "learning_rate": 3.1539054001568493e-05, | |
| "loss": 2.7624, | |
| "num_input_tokens_seen": 2483976, | |
| "step": 3795 | |
| }, | |
| { | |
| "epoch": 0.4165296503343199, | |
| "grad_norm": 5.184842586517334, | |
| "learning_rate": 3.149749791863974e-05, | |
| "loss": 3.2427, | |
| "num_input_tokens_seen": 2486960, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.4170777156637071, | |
| "grad_norm": 5.449498653411865, | |
| "learning_rate": 3.145592257330881e-05, | |
| "loss": 3.3931, | |
| "num_input_tokens_seen": 2490928, | |
| "step": 3805 | |
| }, | |
| { | |
| "epoch": 0.4176257809930944, | |
| "grad_norm": 7.610599994659424, | |
| "learning_rate": 3.141432808882946e-05, | |
| "loss": 3.3562, | |
| "num_input_tokens_seen": 2494760, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.4181738463224816, | |
| "grad_norm": 6.789968490600586, | |
| "learning_rate": 3.13727145885122e-05, | |
| "loss": 2.823, | |
| "num_input_tokens_seen": 2498352, | |
| "step": 3815 | |
| }, | |
| { | |
| "epoch": 0.4187219116518689, | |
| "grad_norm": 6.654449462890625, | |
| "learning_rate": 3.133108219572388e-05, | |
| "loss": 3.2867, | |
| "num_input_tokens_seen": 2501440, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.4192699769812562, | |
| "grad_norm": 6.487675189971924, | |
| "learning_rate": 3.1289431033887386e-05, | |
| "loss": 3.3113, | |
| "num_input_tokens_seen": 2504560, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 0.4198180423106434, | |
| "grad_norm": 7.911233901977539, | |
| "learning_rate": 3.1247761226481244e-05, | |
| "loss": 2.8476, | |
| "num_input_tokens_seen": 2507984, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.4203661076400307, | |
| "grad_norm": 7.292878150939941, | |
| "learning_rate": 3.120607289703925e-05, | |
| "loss": 2.9229, | |
| "num_input_tokens_seen": 2511632, | |
| "step": 3835 | |
| }, | |
| { | |
| "epoch": 0.42091417296941797, | |
| "grad_norm": 7.699312686920166, | |
| "learning_rate": 3.11643661691501e-05, | |
| "loss": 3.2728, | |
| "num_input_tokens_seen": 2514512, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.4214622382988052, | |
| "grad_norm": 7.424167156219482, | |
| "learning_rate": 3.112264116645705e-05, | |
| "loss": 3.0013, | |
| "num_input_tokens_seen": 2517840, | |
| "step": 3845 | |
| }, | |
| { | |
| "epoch": 0.4220103036281925, | |
| "grad_norm": 6.991738796234131, | |
| "learning_rate": 3.1080898012657536e-05, | |
| "loss": 2.9434, | |
| "num_input_tokens_seen": 2521296, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.42255836895757976, | |
| "grad_norm": 6.644684314727783, | |
| "learning_rate": 3.103913683150278e-05, | |
| "loss": 3.4346, | |
| "num_input_tokens_seen": 2523800, | |
| "step": 3855 | |
| }, | |
| { | |
| "epoch": 0.423106434286967, | |
| "grad_norm": 6.666325092315674, | |
| "learning_rate": 3.099735774679749e-05, | |
| "loss": 3.2123, | |
| "num_input_tokens_seen": 2526096, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.42365449961635426, | |
| "grad_norm": 9.987031936645508, | |
| "learning_rate": 3.09555608823994e-05, | |
| "loss": 3.2205, | |
| "num_input_tokens_seen": 2528464, | |
| "step": 3865 | |
| }, | |
| { | |
| "epoch": 0.42420256494574154, | |
| "grad_norm": 8.114043235778809, | |
| "learning_rate": 3.091374636221899e-05, | |
| "loss": 3.1648, | |
| "num_input_tokens_seen": 2530808, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.42475063027512877, | |
| "grad_norm": 7.4291229248046875, | |
| "learning_rate": 3.087191431021908e-05, | |
| "loss": 2.874, | |
| "num_input_tokens_seen": 2534400, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 0.42529869560451605, | |
| "grad_norm": 6.414401054382324, | |
| "learning_rate": 3.083006485041444e-05, | |
| "loss": 3.0927, | |
| "num_input_tokens_seen": 2538584, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.42584676093390333, | |
| "grad_norm": 12.14594554901123, | |
| "learning_rate": 3.078819810687147e-05, | |
| "loss": 3.1133, | |
| "num_input_tokens_seen": 2542184, | |
| "step": 3885 | |
| }, | |
| { | |
| "epoch": 0.42639482626329056, | |
| "grad_norm": 6.391221046447754, | |
| "learning_rate": 3.074631420370779e-05, | |
| "loss": 3.0244, | |
| "num_input_tokens_seen": 2545592, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.42694289159267784, | |
| "grad_norm": 6.802542686462402, | |
| "learning_rate": 3.0704413265091916e-05, | |
| "loss": 3.2812, | |
| "num_input_tokens_seen": 2548816, | |
| "step": 3895 | |
| }, | |
| { | |
| "epoch": 0.4274909569220651, | |
| "grad_norm": 7.281493186950684, | |
| "learning_rate": 3.066249541524285e-05, | |
| "loss": 3.3321, | |
| "num_input_tokens_seen": 2552352, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.42803902225145235, | |
| "grad_norm": 6.2967047691345215, | |
| "learning_rate": 3.0620560778429736e-05, | |
| "loss": 3.1571, | |
| "num_input_tokens_seen": 2556072, | |
| "step": 3905 | |
| }, | |
| { | |
| "epoch": 0.42858708758083963, | |
| "grad_norm": 5.46196174621582, | |
| "learning_rate": 3.0578609478971474e-05, | |
| "loss": 2.9312, | |
| "num_input_tokens_seen": 2559680, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.4291351529102269, | |
| "grad_norm": 6.703193664550781, | |
| "learning_rate": 3.0536641641236366e-05, | |
| "loss": 3.1173, | |
| "num_input_tokens_seen": 2564072, | |
| "step": 3915 | |
| }, | |
| { | |
| "epoch": 0.42968321823961414, | |
| "grad_norm": 6.250140190124512, | |
| "learning_rate": 3.0494657389641763e-05, | |
| "loss": 2.8173, | |
| "num_input_tokens_seen": 2567848, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.4302312835690014, | |
| "grad_norm": 8.19283676147461, | |
| "learning_rate": 3.0452656848653643e-05, | |
| "loss": 3.1555, | |
| "num_input_tokens_seen": 2570760, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 0.4307793488983887, | |
| "grad_norm": 4.393120288848877, | |
| "learning_rate": 3.041064014278629e-05, | |
| "loss": 3.3082, | |
| "num_input_tokens_seen": 2574112, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.43132741422777593, | |
| "grad_norm": 7.910434246063232, | |
| "learning_rate": 3.036860739660193e-05, | |
| "loss": 3.0528, | |
| "num_input_tokens_seen": 2578144, | |
| "step": 3935 | |
| }, | |
| { | |
| "epoch": 0.4318754795571632, | |
| "grad_norm": 8.536887168884277, | |
| "learning_rate": 3.0326558734710304e-05, | |
| "loss": 3.224, | |
| "num_input_tokens_seen": 2581008, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.4324235448865505, | |
| "grad_norm": 5.810432434082031, | |
| "learning_rate": 3.028449428176836e-05, | |
| "loss": 3.2157, | |
| "num_input_tokens_seen": 2583616, | |
| "step": 3945 | |
| }, | |
| { | |
| "epoch": 0.4329716102159377, | |
| "grad_norm": 7.819321632385254, | |
| "learning_rate": 3.024241416247987e-05, | |
| "loss": 3.3845, | |
| "num_input_tokens_seen": 2587680, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.433519675545325, | |
| "grad_norm": 7.583765506744385, | |
| "learning_rate": 3.0200318501595028e-05, | |
| "loss": 3.4347, | |
| "num_input_tokens_seen": 2590536, | |
| "step": 3955 | |
| }, | |
| { | |
| "epoch": 0.4340677408747123, | |
| "grad_norm": 6.201939105987549, | |
| "learning_rate": 3.01582074239101e-05, | |
| "loss": 3.0368, | |
| "num_input_tokens_seen": 2593560, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.4346158062040995, | |
| "grad_norm": 6.4165425300598145, | |
| "learning_rate": 3.0116081054267086e-05, | |
| "loss": 3.1866, | |
| "num_input_tokens_seen": 2597464, | |
| "step": 3965 | |
| }, | |
| { | |
| "epoch": 0.4351638715334868, | |
| "grad_norm": 5.670197486877441, | |
| "learning_rate": 3.007393951755329e-05, | |
| "loss": 3.1721, | |
| "num_input_tokens_seen": 2600616, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.43571193686287407, | |
| "grad_norm": 6.542341709136963, | |
| "learning_rate": 3.0031782938701004e-05, | |
| "loss": 3.1902, | |
| "num_input_tokens_seen": 2603832, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 0.4362600021922613, | |
| "grad_norm": 11.36231803894043, | |
| "learning_rate": 2.9989611442687087e-05, | |
| "loss": 3.1505, | |
| "num_input_tokens_seen": 2607032, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.4368080675216486, | |
| "grad_norm": 8.223766326904297, | |
| "learning_rate": 2.994742515453264e-05, | |
| "loss": 3.2596, | |
| "num_input_tokens_seen": 2609848, | |
| "step": 3985 | |
| }, | |
| { | |
| "epoch": 0.43735613285103586, | |
| "grad_norm": 6.220792770385742, | |
| "learning_rate": 2.9905224199302612e-05, | |
| "loss": 3.105, | |
| "num_input_tokens_seen": 2613072, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.4379041981804231, | |
| "grad_norm": 9.295598983764648, | |
| "learning_rate": 2.9863008702105444e-05, | |
| "loss": 3.5309, | |
| "num_input_tokens_seen": 2617216, | |
| "step": 3995 | |
| }, | |
| { | |
| "epoch": 0.43845226350981037, | |
| "grad_norm": 7.482667446136475, | |
| "learning_rate": 2.9820778788092662e-05, | |
| "loss": 3.0894, | |
| "num_input_tokens_seen": 2620440, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.43900032883919765, | |
| "grad_norm": 8.263635635375977, | |
| "learning_rate": 2.9778534582458563e-05, | |
| "loss": 3.2592, | |
| "num_input_tokens_seen": 2624136, | |
| "step": 4005 | |
| }, | |
| { | |
| "epoch": 0.4395483941685849, | |
| "grad_norm": 6.1141180992126465, | |
| "learning_rate": 2.973627621043979e-05, | |
| "loss": 2.9611, | |
| "num_input_tokens_seen": 2628416, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.44009645949797216, | |
| "grad_norm": 5.068775653839111, | |
| "learning_rate": 2.969400379731499e-05, | |
| "loss": 3.2408, | |
| "num_input_tokens_seen": 2632360, | |
| "step": 4015 | |
| }, | |
| { | |
| "epoch": 0.44064452482735944, | |
| "grad_norm": 4.8074049949646, | |
| "learning_rate": 2.965171746840445e-05, | |
| "loss": 3.3503, | |
| "num_input_tokens_seen": 2635144, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.44119259015674667, | |
| "grad_norm": 5.924848556518555, | |
| "learning_rate": 2.9609417349069685e-05, | |
| "loss": 2.8347, | |
| "num_input_tokens_seen": 2638880, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 0.44174065548613395, | |
| "grad_norm": 6.371955871582031, | |
| "learning_rate": 2.9567103564713107e-05, | |
| "loss": 3.0076, | |
| "num_input_tokens_seen": 2642200, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.44228872081552123, | |
| "grad_norm": 6.616983890533447, | |
| "learning_rate": 2.952477624077764e-05, | |
| "loss": 3.1063, | |
| "num_input_tokens_seen": 2647008, | |
| "step": 4035 | |
| }, | |
| { | |
| "epoch": 0.44283678614490846, | |
| "grad_norm": 6.057950973510742, | |
| "learning_rate": 2.9482435502746363e-05, | |
| "loss": 2.9816, | |
| "num_input_tokens_seen": 2649824, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.44338485147429574, | |
| "grad_norm": 5.292036533355713, | |
| "learning_rate": 2.944008147614208e-05, | |
| "loss": 2.9774, | |
| "num_input_tokens_seen": 2652424, | |
| "step": 4045 | |
| }, | |
| { | |
| "epoch": 0.443932916803683, | |
| "grad_norm": 6.374473571777344, | |
| "learning_rate": 2.9397714286527034e-05, | |
| "loss": 2.9106, | |
| "num_input_tokens_seen": 2655792, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.44448098213307025, | |
| "grad_norm": 5.729962348937988, | |
| "learning_rate": 2.9355334059502472e-05, | |
| "loss": 3.1529, | |
| "num_input_tokens_seen": 2658608, | |
| "step": 4055 | |
| }, | |
| { | |
| "epoch": 0.4450290474624575, | |
| "grad_norm": 8.748932838439941, | |
| "learning_rate": 2.9312940920708277e-05, | |
| "loss": 3.236, | |
| "num_input_tokens_seen": 2661312, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.4455771127918448, | |
| "grad_norm": 8.778289794921875, | |
| "learning_rate": 2.927053499582264e-05, | |
| "loss": 3.1197, | |
| "num_input_tokens_seen": 2665256, | |
| "step": 4065 | |
| }, | |
| { | |
| "epoch": 0.44612517812123204, | |
| "grad_norm": 8.748550415039062, | |
| "learning_rate": 2.922811641056164e-05, | |
| "loss": 3.2486, | |
| "num_input_tokens_seen": 2669288, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.4466732434506193, | |
| "grad_norm": 5.559131145477295, | |
| "learning_rate": 2.9185685290678888e-05, | |
| "loss": 2.9932, | |
| "num_input_tokens_seen": 2672312, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 0.4472213087800066, | |
| "grad_norm": 5.6860575675964355, | |
| "learning_rate": 2.9143241761965155e-05, | |
| "loss": 3.1337, | |
| "num_input_tokens_seen": 2676312, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.4477693741093938, | |
| "grad_norm": 7.295080184936523, | |
| "learning_rate": 2.9100785950248015e-05, | |
| "loss": 2.9724, | |
| "num_input_tokens_seen": 2679592, | |
| "step": 4085 | |
| }, | |
| { | |
| "epoch": 0.4483174394387811, | |
| "grad_norm": 9.514237403869629, | |
| "learning_rate": 2.9058317981391437e-05, | |
| "loss": 3.1765, | |
| "num_input_tokens_seen": 2682472, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.4488655047681684, | |
| "grad_norm": 7.216882705688477, | |
| "learning_rate": 2.901583798129543e-05, | |
| "loss": 3.3707, | |
| "num_input_tokens_seen": 2685328, | |
| "step": 4095 | |
| }, | |
| { | |
| "epoch": 0.4494135700975556, | |
| "grad_norm": 7.9535298347473145, | |
| "learning_rate": 2.8973346075895695e-05, | |
| "loss": 3.4585, | |
| "num_input_tokens_seen": 2688080, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.4499616354269429, | |
| "grad_norm": 7.782059669494629, | |
| "learning_rate": 2.8930842391163192e-05, | |
| "loss": 2.9516, | |
| "num_input_tokens_seen": 2691112, | |
| "step": 4105 | |
| }, | |
| { | |
| "epoch": 0.4505097007563302, | |
| "grad_norm": 6.065903186798096, | |
| "learning_rate": 2.8888327053103836e-05, | |
| "loss": 3.0919, | |
| "num_input_tokens_seen": 2694328, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.4510577660857174, | |
| "grad_norm": 6.912715435028076, | |
| "learning_rate": 2.884580018775807e-05, | |
| "loss": 2.9052, | |
| "num_input_tokens_seen": 2696856, | |
| "step": 4115 | |
| }, | |
| { | |
| "epoch": 0.4516058314151047, | |
| "grad_norm": 8.30929946899414, | |
| "learning_rate": 2.8803261921200503e-05, | |
| "loss": 3.3268, | |
| "num_input_tokens_seen": 2699968, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.45215389674449197, | |
| "grad_norm": 8.51347541809082, | |
| "learning_rate": 2.8760712379539567e-05, | |
| "loss": 3.3617, | |
| "num_input_tokens_seen": 2702416, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 0.4527019620738792, | |
| "grad_norm": 6.167294979095459, | |
| "learning_rate": 2.8718151688917105e-05, | |
| "loss": 3.1805, | |
| "num_input_tokens_seen": 2705440, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.4532500274032665, | |
| "grad_norm": 8.299149513244629, | |
| "learning_rate": 2.867557997550801e-05, | |
| "loss": 3.2122, | |
| "num_input_tokens_seen": 2708248, | |
| "step": 4135 | |
| }, | |
| { | |
| "epoch": 0.45379809273265376, | |
| "grad_norm": 8.19796085357666, | |
| "learning_rate": 2.8632997365519877e-05, | |
| "loss": 3.0817, | |
| "num_input_tokens_seen": 2712464, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.454346158062041, | |
| "grad_norm": 6.964700698852539, | |
| "learning_rate": 2.859040398519256e-05, | |
| "loss": 3.4051, | |
| "num_input_tokens_seen": 2715048, | |
| "step": 4145 | |
| }, | |
| { | |
| "epoch": 0.45489422339142827, | |
| "grad_norm": 6.310876846313477, | |
| "learning_rate": 2.8547799960797883e-05, | |
| "loss": 2.7846, | |
| "num_input_tokens_seen": 2718192, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.45544228872081555, | |
| "grad_norm": 6.786360263824463, | |
| "learning_rate": 2.8505185418639212e-05, | |
| "loss": 2.829, | |
| "num_input_tokens_seen": 2722064, | |
| "step": 4155 | |
| }, | |
| { | |
| "epoch": 0.4559903540502028, | |
| "grad_norm": 7.1503520011901855, | |
| "learning_rate": 2.8462560485051098e-05, | |
| "loss": 2.9883, | |
| "num_input_tokens_seen": 2725640, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.45653841937959005, | |
| "grad_norm": 5.350907802581787, | |
| "learning_rate": 2.841992528639888e-05, | |
| "loss": 3.0743, | |
| "num_input_tokens_seen": 2729992, | |
| "step": 4165 | |
| }, | |
| { | |
| "epoch": 0.45708648470897734, | |
| "grad_norm": 5.482122421264648, | |
| "learning_rate": 2.837727994907835e-05, | |
| "loss": 3.2459, | |
| "num_input_tokens_seen": 2733424, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.45763455003836456, | |
| "grad_norm": 4.941489219665527, | |
| "learning_rate": 2.833462459951534e-05, | |
| "loss": 3.2963, | |
| "num_input_tokens_seen": 2736656, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 0.45818261536775184, | |
| "grad_norm": 10.229253768920898, | |
| "learning_rate": 2.8291959364165387e-05, | |
| "loss": 3.2607, | |
| "num_input_tokens_seen": 2739808, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.4587306806971391, | |
| "grad_norm": 5.911849498748779, | |
| "learning_rate": 2.824928436951332e-05, | |
| "loss": 3.3887, | |
| "num_input_tokens_seen": 2742752, | |
| "step": 4185 | |
| }, | |
| { | |
| "epoch": 0.45927874602652635, | |
| "grad_norm": 6.14879846572876, | |
| "learning_rate": 2.8206599742072883e-05, | |
| "loss": 3.0095, | |
| "num_input_tokens_seen": 2746256, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.45982681135591363, | |
| "grad_norm": 6.8150529861450195, | |
| "learning_rate": 2.8163905608386415e-05, | |
| "loss": 3.0599, | |
| "num_input_tokens_seen": 2750736, | |
| "step": 4195 | |
| }, | |
| { | |
| "epoch": 0.4603748766853009, | |
| "grad_norm": 5.578204154968262, | |
| "learning_rate": 2.812120209502441e-05, | |
| "loss": 3.4177, | |
| "num_input_tokens_seen": 2753832, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.46092294201468814, | |
| "grad_norm": 7.075170040130615, | |
| "learning_rate": 2.8078489328585184e-05, | |
| "loss": 3.2787, | |
| "num_input_tokens_seen": 2757176, | |
| "step": 4205 | |
| }, | |
| { | |
| "epoch": 0.4614710073440754, | |
| "grad_norm": 7.633877754211426, | |
| "learning_rate": 2.803576743569447e-05, | |
| "loss": 3.2838, | |
| "num_input_tokens_seen": 2760632, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.46201907267346265, | |
| "grad_norm": 7.296063423156738, | |
| "learning_rate": 2.7993036543005073e-05, | |
| "loss": 3.2533, | |
| "num_input_tokens_seen": 2763160, | |
| "step": 4215 | |
| }, | |
| { | |
| "epoch": 0.46256713800284993, | |
| "grad_norm": 9.778048515319824, | |
| "learning_rate": 2.7950296777196454e-05, | |
| "loss": 3.2876, | |
| "num_input_tokens_seen": 2766304, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.4631152033322372, | |
| "grad_norm": 6.1279826164245605, | |
| "learning_rate": 2.7907548264974408e-05, | |
| "loss": 3.3613, | |
| "num_input_tokens_seen": 2769112, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 0.46366326866162444, | |
| "grad_norm": 7.0411458015441895, | |
| "learning_rate": 2.7864791133070655e-05, | |
| "loss": 2.9218, | |
| "num_input_tokens_seen": 2773120, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.4642113339910117, | |
| "grad_norm": 7.575366497039795, | |
| "learning_rate": 2.782202550824244e-05, | |
| "loss": 2.7816, | |
| "num_input_tokens_seen": 2775712, | |
| "step": 4235 | |
| }, | |
| { | |
| "epoch": 0.464759399320399, | |
| "grad_norm": 4.21223258972168, | |
| "learning_rate": 2.777925151727222e-05, | |
| "loss": 2.913, | |
| "num_input_tokens_seen": 2778872, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.46530746464978623, | |
| "grad_norm": 7.198635101318359, | |
| "learning_rate": 2.7736469286967244e-05, | |
| "loss": 3.3944, | |
| "num_input_tokens_seen": 2783424, | |
| "step": 4245 | |
| }, | |
| { | |
| "epoch": 0.4658555299791735, | |
| "grad_norm": 6.785750389099121, | |
| "learning_rate": 2.7693678944159168e-05, | |
| "loss": 3.0493, | |
| "num_input_tokens_seen": 2787720, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.4664035953085608, | |
| "grad_norm": 5.799097061157227, | |
| "learning_rate": 2.7650880615703735e-05, | |
| "loss": 3.043, | |
| "num_input_tokens_seen": 2790528, | |
| "step": 4255 | |
| }, | |
| { | |
| "epoch": 0.466951660637948, | |
| "grad_norm": 5.558688163757324, | |
| "learning_rate": 2.760807442848033e-05, | |
| "loss": 3.0476, | |
| "num_input_tokens_seen": 2794088, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.4674997259673353, | |
| "grad_norm": 7.959995269775391, | |
| "learning_rate": 2.7565260509391644e-05, | |
| "loss": 3.3705, | |
| "num_input_tokens_seen": 2797168, | |
| "step": 4265 | |
| }, | |
| { | |
| "epoch": 0.4680477912967226, | |
| "grad_norm": 5.836214542388916, | |
| "learning_rate": 2.7522438985363297e-05, | |
| "loss": 3.1173, | |
| "num_input_tokens_seen": 2799752, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.4685958566261098, | |
| "grad_norm": 5.6099348068237305, | |
| "learning_rate": 2.7479609983343457e-05, | |
| "loss": 3.4298, | |
| "num_input_tokens_seen": 2803560, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 0.4691439219554971, | |
| "grad_norm": 6.971024513244629, | |
| "learning_rate": 2.7436773630302448e-05, | |
| "loss": 3.4299, | |
| "num_input_tokens_seen": 2806360, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.46969198728488437, | |
| "grad_norm": 5.738091945648193, | |
| "learning_rate": 2.7393930053232393e-05, | |
| "loss": 3.0872, | |
| "num_input_tokens_seen": 2809408, | |
| "step": 4285 | |
| }, | |
| { | |
| "epoch": 0.4702400526142716, | |
| "grad_norm": 10.746182441711426, | |
| "learning_rate": 2.7351079379146844e-05, | |
| "loss": 3.5487, | |
| "num_input_tokens_seen": 2812752, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.4707881179436589, | |
| "grad_norm": 6.557742595672607, | |
| "learning_rate": 2.7308221735080363e-05, | |
| "loss": 3.1006, | |
| "num_input_tokens_seen": 2816432, | |
| "step": 4295 | |
| }, | |
| { | |
| "epoch": 0.47133618327304616, | |
| "grad_norm": 7.124549865722656, | |
| "learning_rate": 2.726535724808821e-05, | |
| "loss": 3.2491, | |
| "num_input_tokens_seen": 2819608, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.4718842486024334, | |
| "grad_norm": 8.328391075134277, | |
| "learning_rate": 2.7222486045245905e-05, | |
| "loss": 2.9571, | |
| "num_input_tokens_seen": 2822304, | |
| "step": 4305 | |
| }, | |
| { | |
| "epoch": 0.47243231393182067, | |
| "grad_norm": 8.121037483215332, | |
| "learning_rate": 2.717960825364888e-05, | |
| "loss": 3.0946, | |
| "num_input_tokens_seen": 2826112, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.47298037926120795, | |
| "grad_norm": 7.5214715003967285, | |
| "learning_rate": 2.7136724000412122e-05, | |
| "loss": 3.2682, | |
| "num_input_tokens_seen": 2829640, | |
| "step": 4315 | |
| }, | |
| { | |
| "epoch": 0.4735284445905952, | |
| "grad_norm": 5.765413761138916, | |
| "learning_rate": 2.709383341266975e-05, | |
| "loss": 3.3871, | |
| "num_input_tokens_seen": 2832536, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.47407650991998246, | |
| "grad_norm": 7.573315143585205, | |
| "learning_rate": 2.7050936617574674e-05, | |
| "loss": 3.0505, | |
| "num_input_tokens_seen": 2835312, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 0.47462457524936974, | |
| "grad_norm": 5.444807052612305, | |
| "learning_rate": 2.70080337422982e-05, | |
| "loss": 3.1385, | |
| "num_input_tokens_seen": 2839520, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.47517264057875697, | |
| "grad_norm": 5.842774868011475, | |
| "learning_rate": 2.696512491402967e-05, | |
| "loss": 3.0295, | |
| "num_input_tokens_seen": 2842096, | |
| "step": 4335 | |
| }, | |
| { | |
| "epoch": 0.47572070590814425, | |
| "grad_norm": 6.1106157302856445, | |
| "learning_rate": 2.692221025997606e-05, | |
| "loss": 3.0393, | |
| "num_input_tokens_seen": 2845424, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.47626877123753153, | |
| "grad_norm": 7.988515377044678, | |
| "learning_rate": 2.687928990736163e-05, | |
| "loss": 3.3657, | |
| "num_input_tokens_seen": 2847648, | |
| "step": 4345 | |
| }, | |
| { | |
| "epoch": 0.47681683656691876, | |
| "grad_norm": 7.0514655113220215, | |
| "learning_rate": 2.683636398342753e-05, | |
| "loss": 3.4438, | |
| "num_input_tokens_seen": 2850432, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.47736490189630604, | |
| "grad_norm": 5.54784631729126, | |
| "learning_rate": 2.6793432615431406e-05, | |
| "loss": 2.9583, | |
| "num_input_tokens_seen": 2854176, | |
| "step": 4355 | |
| }, | |
| { | |
| "epoch": 0.4779129672256933, | |
| "grad_norm": 6.001830577850342, | |
| "learning_rate": 2.6750495930647083e-05, | |
| "loss": 3.4694, | |
| "num_input_tokens_seen": 2857368, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.47846103255508055, | |
| "grad_norm": 7.455556392669678, | |
| "learning_rate": 2.670755405636412e-05, | |
| "loss": 3.0839, | |
| "num_input_tokens_seen": 2860064, | |
| "step": 4365 | |
| }, | |
| { | |
| "epoch": 0.4790090978844678, | |
| "grad_norm": 6.409590721130371, | |
| "learning_rate": 2.6664607119887462e-05, | |
| "loss": 3.0962, | |
| "num_input_tokens_seen": 2863128, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.4795571632138551, | |
| "grad_norm": 5.903439044952393, | |
| "learning_rate": 2.6621655248537075e-05, | |
| "loss": 3.0613, | |
| "num_input_tokens_seen": 2866720, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 0.48010522854324233, | |
| "grad_norm": 7.286397457122803, | |
| "learning_rate": 2.657869856964754e-05, | |
| "loss": 2.9673, | |
| "num_input_tokens_seen": 2869568, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.4806532938726296, | |
| "grad_norm": 7.941439151763916, | |
| "learning_rate": 2.6535737210567707e-05, | |
| "loss": 3.3656, | |
| "num_input_tokens_seen": 2874584, | |
| "step": 4385 | |
| }, | |
| { | |
| "epoch": 0.4812013592020169, | |
| "grad_norm": 3.8733413219451904, | |
| "learning_rate": 2.6492771298660286e-05, | |
| "loss": 2.8012, | |
| "num_input_tokens_seen": 2879248, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.4817494245314041, | |
| "grad_norm": 4.492478370666504, | |
| "learning_rate": 2.6449800961301485e-05, | |
| "loss": 2.9495, | |
| "num_input_tokens_seen": 2882824, | |
| "step": 4395 | |
| }, | |
| { | |
| "epoch": 0.4822974898607914, | |
| "grad_norm": 7.726132392883301, | |
| "learning_rate": 2.640682632588064e-05, | |
| "loss": 3.1087, | |
| "num_input_tokens_seen": 2886440, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.4828455551901787, | |
| "grad_norm": 6.549642562866211, | |
| "learning_rate": 2.6363847519799822e-05, | |
| "loss": 2.985, | |
| "num_input_tokens_seen": 2889808, | |
| "step": 4405 | |
| }, | |
| { | |
| "epoch": 0.4833936205195659, | |
| "grad_norm": 8.789740562438965, | |
| "learning_rate": 2.632086467047348e-05, | |
| "loss": 3.1352, | |
| "num_input_tokens_seen": 2893680, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.4839416858489532, | |
| "grad_norm": 8.024590492248535, | |
| "learning_rate": 2.6277877905328023e-05, | |
| "loss": 3.3008, | |
| "num_input_tokens_seen": 2895872, | |
| "step": 4415 | |
| }, | |
| { | |
| "epoch": 0.4844897511783405, | |
| "grad_norm": 6.235259532928467, | |
| "learning_rate": 2.623488735180149e-05, | |
| "loss": 3.1758, | |
| "num_input_tokens_seen": 2898680, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.4850378165077277, | |
| "grad_norm": 7.674651145935059, | |
| "learning_rate": 2.619189313734316e-05, | |
| "loss": 2.9519, | |
| "num_input_tokens_seen": 2903496, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 0.485585881837115, | |
| "grad_norm": 5.884274959564209, | |
| "learning_rate": 2.614889538941313e-05, | |
| "loss": 3.3259, | |
| "num_input_tokens_seen": 2906248, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.48613394716650227, | |
| "grad_norm": 5.681421279907227, | |
| "learning_rate": 2.610589423548201e-05, | |
| "loss": 3.4432, | |
| "num_input_tokens_seen": 2909352, | |
| "step": 4435 | |
| }, | |
| { | |
| "epoch": 0.4866820124958895, | |
| "grad_norm": 8.08205795288086, | |
| "learning_rate": 2.6062889803030477e-05, | |
| "loss": 3.6165, | |
| "num_input_tokens_seen": 2911960, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.4872300778252768, | |
| "grad_norm": 7.7329277992248535, | |
| "learning_rate": 2.601988221954894e-05, | |
| "loss": 3.2172, | |
| "num_input_tokens_seen": 2915256, | |
| "step": 4445 | |
| }, | |
| { | |
| "epoch": 0.48777814315466406, | |
| "grad_norm": 6.208625793457031, | |
| "learning_rate": 2.5976871612537164e-05, | |
| "loss": 3.2373, | |
| "num_input_tokens_seen": 2919040, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.4883262084840513, | |
| "grad_norm": 8.127032279968262, | |
| "learning_rate": 2.593385810950386e-05, | |
| "loss": 2.9402, | |
| "num_input_tokens_seen": 2922272, | |
| "step": 4455 | |
| }, | |
| { | |
| "epoch": 0.48887427381343856, | |
| "grad_norm": 6.481329441070557, | |
| "learning_rate": 2.589084183796632e-05, | |
| "loss": 3.0208, | |
| "num_input_tokens_seen": 2926072, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.48942233914282585, | |
| "grad_norm": 6.350535869598389, | |
| "learning_rate": 2.5847822925450055e-05, | |
| "loss": 3.1026, | |
| "num_input_tokens_seen": 2928760, | |
| "step": 4465 | |
| }, | |
| { | |
| "epoch": 0.4899704044722131, | |
| "grad_norm": 7.3511457443237305, | |
| "learning_rate": 2.5804801499488407e-05, | |
| "loss": 2.9358, | |
| "num_input_tokens_seen": 2932088, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.49051846980160035, | |
| "grad_norm": 5.9759521484375, | |
| "learning_rate": 2.576177768762216e-05, | |
| "loss": 3.1564, | |
| "num_input_tokens_seen": 2935272, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 0.49106653513098764, | |
| "grad_norm": 7.138418674468994, | |
| "learning_rate": 2.5718751617399182e-05, | |
| "loss": 3.0998, | |
| "num_input_tokens_seen": 2938280, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.49161460046037486, | |
| "grad_norm": 10.551050186157227, | |
| "learning_rate": 2.5675723416374026e-05, | |
| "loss": 3.1874, | |
| "num_input_tokens_seen": 2941648, | |
| "step": 4485 | |
| }, | |
| { | |
| "epoch": 0.49216266578976214, | |
| "grad_norm": 6.085887432098389, | |
| "learning_rate": 2.5632693212107567e-05, | |
| "loss": 2.8506, | |
| "num_input_tokens_seen": 2944680, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.4927107311191494, | |
| "grad_norm": 6.314172267913818, | |
| "learning_rate": 2.5589661132166613e-05, | |
| "loss": 2.8206, | |
| "num_input_tokens_seen": 2948744, | |
| "step": 4495 | |
| }, | |
| { | |
| "epoch": 0.49325879644853665, | |
| "grad_norm": 6.3680853843688965, | |
| "learning_rate": 2.5546627304123545e-05, | |
| "loss": 2.85, | |
| "num_input_tokens_seen": 2951256, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.49380686177792393, | |
| "grad_norm": 6.314942359924316, | |
| "learning_rate": 2.5503591855555908e-05, | |
| "loss": 3.2021, | |
| "num_input_tokens_seen": 2954536, | |
| "step": 4505 | |
| }, | |
| { | |
| "epoch": 0.4943549271073112, | |
| "grad_norm": 6.349035739898682, | |
| "learning_rate": 2.546055491404607e-05, | |
| "loss": 2.9022, | |
| "num_input_tokens_seen": 2958112, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.49490299243669844, | |
| "grad_norm": 6.812668800354004, | |
| "learning_rate": 2.5417516607180825e-05, | |
| "loss": 3.2304, | |
| "num_input_tokens_seen": 2961024, | |
| "step": 4515 | |
| }, | |
| { | |
| "epoch": 0.4954510577660857, | |
| "grad_norm": 4.483590126037598, | |
| "learning_rate": 2.5374477062550984e-05, | |
| "loss": 2.8489, | |
| "num_input_tokens_seen": 2964344, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.495999123095473, | |
| "grad_norm": 6.769683837890625, | |
| "learning_rate": 2.5331436407751074e-05, | |
| "loss": 3.1946, | |
| "num_input_tokens_seen": 2967608, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 0.49654718842486023, | |
| "grad_norm": 9.059048652648926, | |
| "learning_rate": 2.528839477037887e-05, | |
| "loss": 3.2895, | |
| "num_input_tokens_seen": 2970488, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.4970952537542475, | |
| "grad_norm": 9.555692672729492, | |
| "learning_rate": 2.5245352278035095e-05, | |
| "loss": 3.0595, | |
| "num_input_tokens_seen": 2973200, | |
| "step": 4535 | |
| }, | |
| { | |
| "epoch": 0.4976433190836348, | |
| "grad_norm": 8.808011054992676, | |
| "learning_rate": 2.520230905832298e-05, | |
| "loss": 3.1939, | |
| "num_input_tokens_seen": 2976576, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.498191384413022, | |
| "grad_norm": 7.059693336486816, | |
| "learning_rate": 2.515926523884792e-05, | |
| "loss": 3.3154, | |
| "num_input_tokens_seen": 2980624, | |
| "step": 4545 | |
| }, | |
| { | |
| "epoch": 0.4987394497424093, | |
| "grad_norm": 5.0204973220825195, | |
| "learning_rate": 2.5116220947217107e-05, | |
| "loss": 3.2012, | |
| "num_input_tokens_seen": 2983328, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.4992875150717966, | |
| "grad_norm": 8.473772048950195, | |
| "learning_rate": 2.507317631103911e-05, | |
| "loss": 3.3448, | |
| "num_input_tokens_seen": 2986664, | |
| "step": 4555 | |
| }, | |
| { | |
| "epoch": 0.4998355804011838, | |
| "grad_norm": 5.891829490661621, | |
| "learning_rate": 2.5030131457923512e-05, | |
| "loss": 3.0624, | |
| "num_input_tokens_seen": 2990088, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.500383645730571, | |
| "grad_norm": 8.812019348144531, | |
| "learning_rate": 2.498708651548057e-05, | |
| "loss": 3.1606, | |
| "num_input_tokens_seen": 2993152, | |
| "step": 4565 | |
| }, | |
| { | |
| "epoch": 0.5009317110599584, | |
| "grad_norm": 6.772736549377441, | |
| "learning_rate": 2.494404161132079e-05, | |
| "loss": 2.6401, | |
| "num_input_tokens_seen": 2996104, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.5014797763893456, | |
| "grad_norm": 6.640130996704102, | |
| "learning_rate": 2.490099687305455e-05, | |
| "loss": 2.8047, | |
| "num_input_tokens_seen": 3000664, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 0.5020278417187328, | |
| "grad_norm": 8.050363540649414, | |
| "learning_rate": 2.485795242829177e-05, | |
| "loss": 2.9757, | |
| "num_input_tokens_seen": 3004312, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.5025759070481202, | |
| "grad_norm": 7.689075469970703, | |
| "learning_rate": 2.481490840464147e-05, | |
| "loss": 3.6823, | |
| "num_input_tokens_seen": 3008056, | |
| "step": 4585 | |
| }, | |
| { | |
| "epoch": 0.5031239723775074, | |
| "grad_norm": 7.890453815460205, | |
| "learning_rate": 2.4771864929711414e-05, | |
| "loss": 3.5555, | |
| "num_input_tokens_seen": 3010640, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.5036720377068946, | |
| "grad_norm": 8.07981014251709, | |
| "learning_rate": 2.4728822131107784e-05, | |
| "loss": 2.9504, | |
| "num_input_tokens_seen": 3013752, | |
| "step": 4595 | |
| }, | |
| { | |
| "epoch": 0.504220103036282, | |
| "grad_norm": 5.753955364227295, | |
| "learning_rate": 2.468578013643472e-05, | |
| "loss": 3.1703, | |
| "num_input_tokens_seen": 3016248, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.5047681683656692, | |
| "grad_norm": 5.296700954437256, | |
| "learning_rate": 2.4642739073293978e-05, | |
| "loss": 2.8482, | |
| "num_input_tokens_seen": 3019256, | |
| "step": 4605 | |
| }, | |
| { | |
| "epoch": 0.5053162336950564, | |
| "grad_norm": 11.357376098632812, | |
| "learning_rate": 2.459969906928458e-05, | |
| "loss": 2.8125, | |
| "num_input_tokens_seen": 3021936, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.5058642990244437, | |
| "grad_norm": 9.2806396484375, | |
| "learning_rate": 2.4556660252002384e-05, | |
| "loss": 3.1294, | |
| "num_input_tokens_seen": 3025888, | |
| "step": 4615 | |
| }, | |
| { | |
| "epoch": 0.506412364353831, | |
| "grad_norm": 7.156399250030518, | |
| "learning_rate": 2.451362274903973e-05, | |
| "loss": 3.202, | |
| "num_input_tokens_seen": 3029752, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.5069604296832182, | |
| "grad_norm": 7.298778533935547, | |
| "learning_rate": 2.4470586687985077e-05, | |
| "loss": 3.2958, | |
| "num_input_tokens_seen": 3033576, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 0.5075084950126055, | |
| "grad_norm": 7.478179454803467, | |
| "learning_rate": 2.4427552196422602e-05, | |
| "loss": 3.1416, | |
| "num_input_tokens_seen": 3037016, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.5080565603419928, | |
| "grad_norm": 8.109244346618652, | |
| "learning_rate": 2.438451940193181e-05, | |
| "loss": 2.7633, | |
| "num_input_tokens_seen": 3040640, | |
| "step": 4635 | |
| }, | |
| { | |
| "epoch": 0.50860462567138, | |
| "grad_norm": 6.991682052612305, | |
| "learning_rate": 2.434148843208722e-05, | |
| "loss": 2.9995, | |
| "num_input_tokens_seen": 3043424, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.5091526910007673, | |
| "grad_norm": 5.315702438354492, | |
| "learning_rate": 2.4298459414457896e-05, | |
| "loss": 2.9122, | |
| "num_input_tokens_seen": 3046672, | |
| "step": 4645 | |
| }, | |
| { | |
| "epoch": 0.5097007563301545, | |
| "grad_norm": 8.090765953063965, | |
| "learning_rate": 2.425543247660713e-05, | |
| "loss": 3.3741, | |
| "num_input_tokens_seen": 3049736, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.5102488216595418, | |
| "grad_norm": 9.288080215454102, | |
| "learning_rate": 2.4212407746092066e-05, | |
| "loss": 3.4609, | |
| "num_input_tokens_seen": 3053656, | |
| "step": 4655 | |
| }, | |
| { | |
| "epoch": 0.5107968869889291, | |
| "grad_norm": 5.754721164703369, | |
| "learning_rate": 2.4169385350463282e-05, | |
| "loss": 2.9946, | |
| "num_input_tokens_seen": 3056144, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.5113449523183163, | |
| "grad_norm": 6.588372230529785, | |
| "learning_rate": 2.412636541726444e-05, | |
| "loss": 3.0074, | |
| "num_input_tokens_seen": 3059712, | |
| "step": 4665 | |
| }, | |
| { | |
| "epoch": 0.5118930176477036, | |
| "grad_norm": 7.401770114898682, | |
| "learning_rate": 2.4083348074031904e-05, | |
| "loss": 3.4029, | |
| "num_input_tokens_seen": 3062288, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.5124410829770909, | |
| "grad_norm": 5.612600803375244, | |
| "learning_rate": 2.4040333448294364e-05, | |
| "loss": 3.2012, | |
| "num_input_tokens_seen": 3065728, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 0.5129891483064781, | |
| "grad_norm": 5.925127983093262, | |
| "learning_rate": 2.399732166757243e-05, | |
| "loss": 3.0461, | |
| "num_input_tokens_seen": 3068632, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.5135372136358654, | |
| "grad_norm": 8.738677978515625, | |
| "learning_rate": 2.3954312859378325e-05, | |
| "loss": 3.4782, | |
| "num_input_tokens_seen": 3070968, | |
| "step": 4685 | |
| }, | |
| { | |
| "epoch": 0.5140852789652527, | |
| "grad_norm": 9.27092170715332, | |
| "learning_rate": 2.3911307151215413e-05, | |
| "loss": 3.2625, | |
| "num_input_tokens_seen": 3074696, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.5146333442946399, | |
| "grad_norm": 5.855086326599121, | |
| "learning_rate": 2.3868304670577886e-05, | |
| "loss": 3.045, | |
| "num_input_tokens_seen": 3078584, | |
| "step": 4695 | |
| }, | |
| { | |
| "epoch": 0.5151814096240271, | |
| "grad_norm": 8.794078826904297, | |
| "learning_rate": 2.3825305544950374e-05, | |
| "loss": 2.7209, | |
| "num_input_tokens_seen": 3081624, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.5157294749534145, | |
| "grad_norm": 7.675835132598877, | |
| "learning_rate": 2.3782309901807555e-05, | |
| "loss": 3.3431, | |
| "num_input_tokens_seen": 3084152, | |
| "step": 4705 | |
| }, | |
| { | |
| "epoch": 0.5162775402828017, | |
| "grad_norm": 7.583930969238281, | |
| "learning_rate": 2.3739317868613776e-05, | |
| "loss": 3.1141, | |
| "num_input_tokens_seen": 3087040, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.5168256056121889, | |
| "grad_norm": 7.561563968658447, | |
| "learning_rate": 2.369632957282269e-05, | |
| "loss": 3.4023, | |
| "num_input_tokens_seen": 3090352, | |
| "step": 4715 | |
| }, | |
| { | |
| "epoch": 0.5173736709415763, | |
| "grad_norm": 6.868551254272461, | |
| "learning_rate": 2.365334514187687e-05, | |
| "loss": 3.0766, | |
| "num_input_tokens_seen": 3093552, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.5179217362709635, | |
| "grad_norm": 5.663219928741455, | |
| "learning_rate": 2.3610364703207432e-05, | |
| "loss": 3.1136, | |
| "num_input_tokens_seen": 3097168, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 0.5184698016003507, | |
| "grad_norm": 7.611098766326904, | |
| "learning_rate": 2.3567388384233648e-05, | |
| "loss": 3.0911, | |
| "num_input_tokens_seen": 3101648, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.5190178669297381, | |
| "grad_norm": 6.850576877593994, | |
| "learning_rate": 2.352441631236259e-05, | |
| "loss": 2.9311, | |
| "num_input_tokens_seen": 3105888, | |
| "step": 4735 | |
| }, | |
| { | |
| "epoch": 0.5195659322591253, | |
| "grad_norm": 5.57901668548584, | |
| "learning_rate": 2.348144861498873e-05, | |
| "loss": 3.0239, | |
| "num_input_tokens_seen": 3110648, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.5201139975885125, | |
| "grad_norm": 6.950675010681152, | |
| "learning_rate": 2.343848541949356e-05, | |
| "loss": 3.053, | |
| "num_input_tokens_seen": 3113400, | |
| "step": 4745 | |
| }, | |
| { | |
| "epoch": 0.5206620629178998, | |
| "grad_norm": 5.661995887756348, | |
| "learning_rate": 2.3395526853245264e-05, | |
| "loss": 3.2619, | |
| "num_input_tokens_seen": 3117000, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.5212101282472871, | |
| "grad_norm": 6.956995010375977, | |
| "learning_rate": 2.3352573043598267e-05, | |
| "loss": 3.6572, | |
| "num_input_tokens_seen": 3121664, | |
| "step": 4755 | |
| }, | |
| { | |
| "epoch": 0.5217581935766743, | |
| "grad_norm": 4.707006454467773, | |
| "learning_rate": 2.3309624117892885e-05, | |
| "loss": 2.9066, | |
| "num_input_tokens_seen": 3124872, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.5223062589060616, | |
| "grad_norm": 5.503338813781738, | |
| "learning_rate": 2.3266680203455004e-05, | |
| "loss": 3.2066, | |
| "num_input_tokens_seen": 3128760, | |
| "step": 4765 | |
| }, | |
| { | |
| "epoch": 0.5228543242354489, | |
| "grad_norm": 7.054602146148682, | |
| "learning_rate": 2.322374142759561e-05, | |
| "loss": 2.8683, | |
| "num_input_tokens_seen": 3131480, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.5234023895648361, | |
| "grad_norm": 8.06494140625, | |
| "learning_rate": 2.318080791761046e-05, | |
| "loss": 3.2634, | |
| "num_input_tokens_seen": 3135040, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 0.5239504548942234, | |
| "grad_norm": 8.718894958496094, | |
| "learning_rate": 2.313787980077972e-05, | |
| "loss": 3.3735, | |
| "num_input_tokens_seen": 3137816, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.5244985202236107, | |
| "grad_norm": 6.601426124572754, | |
| "learning_rate": 2.309495720436755e-05, | |
| "loss": 3.0622, | |
| "num_input_tokens_seen": 3141752, | |
| "step": 4785 | |
| }, | |
| { | |
| "epoch": 0.5250465855529979, | |
| "grad_norm": 7.08184814453125, | |
| "learning_rate": 2.305204025562174e-05, | |
| "loss": 2.6361, | |
| "num_input_tokens_seen": 3144792, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.5255946508823852, | |
| "grad_norm": 8.298012733459473, | |
| "learning_rate": 2.3009129081773366e-05, | |
| "loss": 2.8071, | |
| "num_input_tokens_seen": 3147904, | |
| "step": 4795 | |
| }, | |
| { | |
| "epoch": 0.5261427162117724, | |
| "grad_norm": 7.070413589477539, | |
| "learning_rate": 2.2966223810036357e-05, | |
| "loss": 3.2667, | |
| "num_input_tokens_seen": 3150344, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.5266907815411597, | |
| "grad_norm": 8.037806510925293, | |
| "learning_rate": 2.292332456760714e-05, | |
| "loss": 3.3148, | |
| "num_input_tokens_seen": 3154328, | |
| "step": 4805 | |
| }, | |
| { | |
| "epoch": 0.527238846870547, | |
| "grad_norm": 5.284430980682373, | |
| "learning_rate": 2.2880431481664306e-05, | |
| "loss": 2.6196, | |
| "num_input_tokens_seen": 3157392, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.5277869121999342, | |
| "grad_norm": 7.804793357849121, | |
| "learning_rate": 2.283754467936815e-05, | |
| "loss": 2.9899, | |
| "num_input_tokens_seen": 3160304, | |
| "step": 4815 | |
| }, | |
| { | |
| "epoch": 0.5283349775293215, | |
| "grad_norm": 8.394335746765137, | |
| "learning_rate": 2.279466428786035e-05, | |
| "loss": 3.2071, | |
| "num_input_tokens_seen": 3163736, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.5288830428587088, | |
| "grad_norm": 6.269372463226318, | |
| "learning_rate": 2.2751790434263608e-05, | |
| "loss": 3.1003, | |
| "num_input_tokens_seen": 3166368, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 0.529431108188096, | |
| "grad_norm": 7.112332820892334, | |
| "learning_rate": 2.2708923245681203e-05, | |
| "loss": 3.2725, | |
| "num_input_tokens_seen": 3169960, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.5299791735174832, | |
| "grad_norm": 8.58667278289795, | |
| "learning_rate": 2.266606284919667e-05, | |
| "loss": 2.7479, | |
| "num_input_tokens_seen": 3172744, | |
| "step": 4835 | |
| }, | |
| { | |
| "epoch": 0.5305272388468706, | |
| "grad_norm": 7.745898723602295, | |
| "learning_rate": 2.262320937187344e-05, | |
| "loss": 3.4911, | |
| "num_input_tokens_seen": 3175984, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.5310753041762578, | |
| "grad_norm": 6.885601997375488, | |
| "learning_rate": 2.258036294075438e-05, | |
| "loss": 2.8831, | |
| "num_input_tokens_seen": 3178800, | |
| "step": 4845 | |
| }, | |
| { | |
| "epoch": 0.531623369505645, | |
| "grad_norm": 6.387146472930908, | |
| "learning_rate": 2.2537523682861484e-05, | |
| "loss": 3.0745, | |
| "num_input_tokens_seen": 3182328, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.5321714348350324, | |
| "grad_norm": 4.868107795715332, | |
| "learning_rate": 2.249469172519551e-05, | |
| "loss": 3.0048, | |
| "num_input_tokens_seen": 3185912, | |
| "step": 4855 | |
| }, | |
| { | |
| "epoch": 0.5327195001644196, | |
| "grad_norm": 8.075777053833008, | |
| "learning_rate": 2.2451867194735542e-05, | |
| "loss": 3.3234, | |
| "num_input_tokens_seen": 3189352, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.5332675654938068, | |
| "grad_norm": 5.830811500549316, | |
| "learning_rate": 2.2409050218438645e-05, | |
| "loss": 3.0588, | |
| "num_input_tokens_seen": 3193072, | |
| "step": 4865 | |
| }, | |
| { | |
| "epoch": 0.5338156308231942, | |
| "grad_norm": 5.349551200866699, | |
| "learning_rate": 2.2366240923239514e-05, | |
| "loss": 2.7223, | |
| "num_input_tokens_seen": 3196104, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.5343636961525814, | |
| "grad_norm": 8.454142570495605, | |
| "learning_rate": 2.2323439436050054e-05, | |
| "loss": 3.1157, | |
| "num_input_tokens_seen": 3198648, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 0.5349117614819686, | |
| "grad_norm": 7.110290050506592, | |
| "learning_rate": 2.2280645883759006e-05, | |
| "loss": 3.0379, | |
| "num_input_tokens_seen": 3201056, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.535459826811356, | |
| "grad_norm": 5.1915154457092285, | |
| "learning_rate": 2.2237860393231634e-05, | |
| "loss": 3.575, | |
| "num_input_tokens_seen": 3203712, | |
| "step": 4885 | |
| }, | |
| { | |
| "epoch": 0.5360078921407432, | |
| "grad_norm": 8.497429847717285, | |
| "learning_rate": 2.219508309130927e-05, | |
| "loss": 2.9379, | |
| "num_input_tokens_seen": 3206288, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.5365559574701304, | |
| "grad_norm": 8.26462173461914, | |
| "learning_rate": 2.2152314104808956e-05, | |
| "loss": 3.1587, | |
| "num_input_tokens_seen": 3209928, | |
| "step": 4895 | |
| }, | |
| { | |
| "epoch": 0.5371040227995177, | |
| "grad_norm": 6.499933242797852, | |
| "learning_rate": 2.210955356052313e-05, | |
| "loss": 2.9181, | |
| "num_input_tokens_seen": 3213336, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.537652088128905, | |
| "grad_norm": 5.8398590087890625, | |
| "learning_rate": 2.2066801585219156e-05, | |
| "loss": 2.8303, | |
| "num_input_tokens_seen": 3216464, | |
| "step": 4905 | |
| }, | |
| { | |
| "epoch": 0.5382001534582922, | |
| "grad_norm": 6.813495635986328, | |
| "learning_rate": 2.2024058305639015e-05, | |
| "loss": 2.9079, | |
| "num_input_tokens_seen": 3221256, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.5387482187876795, | |
| "grad_norm": 8.064513206481934, | |
| "learning_rate": 2.198132384849891e-05, | |
| "loss": 3.2373, | |
| "num_input_tokens_seen": 3224320, | |
| "step": 4915 | |
| }, | |
| { | |
| "epoch": 0.5392962841170668, | |
| "grad_norm": 7.14154577255249, | |
| "learning_rate": 2.1938598340488886e-05, | |
| "loss": 3.0737, | |
| "num_input_tokens_seen": 3227128, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.539844349446454, | |
| "grad_norm": 6.514719009399414, | |
| "learning_rate": 2.1895881908272446e-05, | |
| "loss": 2.8825, | |
| "num_input_tokens_seen": 3230352, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 0.5403924147758413, | |
| "grad_norm": 7.076175212860107, | |
| "learning_rate": 2.1853174678486213e-05, | |
| "loss": 2.8721, | |
| "num_input_tokens_seen": 3234440, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.5409404801052285, | |
| "grad_norm": 5.526149749755859, | |
| "learning_rate": 2.1810476777739508e-05, | |
| "loss": 3.1112, | |
| "num_input_tokens_seen": 3238176, | |
| "step": 4935 | |
| }, | |
| { | |
| "epoch": 0.5414885454346158, | |
| "grad_norm": 8.458449363708496, | |
| "learning_rate": 2.176778833261399e-05, | |
| "loss": 3.2798, | |
| "num_input_tokens_seen": 3241728, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.5420366107640031, | |
| "grad_norm": 7.216832160949707, | |
| "learning_rate": 2.1725109469663318e-05, | |
| "loss": 3.1847, | |
| "num_input_tokens_seen": 3244416, | |
| "step": 4945 | |
| }, | |
| { | |
| "epoch": 0.5425846760933903, | |
| "grad_norm": 5.6720147132873535, | |
| "learning_rate": 2.168244031541271e-05, | |
| "loss": 3.4552, | |
| "num_input_tokens_seen": 3247816, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.5431327414227776, | |
| "grad_norm": 7.452066898345947, | |
| "learning_rate": 2.163978099635861e-05, | |
| "loss": 2.958, | |
| "num_input_tokens_seen": 3250432, | |
| "step": 4955 | |
| }, | |
| { | |
| "epoch": 0.5436808067521649, | |
| "grad_norm": 6.589701175689697, | |
| "learning_rate": 2.159713163896832e-05, | |
| "loss": 3.4633, | |
| "num_input_tokens_seen": 3253376, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.5442288720815521, | |
| "grad_norm": 4.926830768585205, | |
| "learning_rate": 2.1554492369679598e-05, | |
| "loss": 3.0458, | |
| "num_input_tokens_seen": 3257640, | |
| "step": 4965 | |
| }, | |
| { | |
| "epoch": 0.5447769374109394, | |
| "grad_norm": 8.084177017211914, | |
| "learning_rate": 2.1511863314900275e-05, | |
| "loss": 2.992, | |
| "num_input_tokens_seen": 3261952, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.5453250027403267, | |
| "grad_norm": 5.291374683380127, | |
| "learning_rate": 2.146924460100795e-05, | |
| "loss": 2.5116, | |
| "num_input_tokens_seen": 3265912, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 0.5458730680697139, | |
| "grad_norm": 9.101826667785645, | |
| "learning_rate": 2.1426636354349523e-05, | |
| "loss": 3.0809, | |
| "num_input_tokens_seen": 3269624, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.5464211333991011, | |
| "grad_norm": 9.933355331420898, | |
| "learning_rate": 2.1384038701240865e-05, | |
| "loss": 2.6956, | |
| "num_input_tokens_seen": 3273112, | |
| "step": 4985 | |
| }, | |
| { | |
| "epoch": 0.5469691987284885, | |
| "grad_norm": 8.288704872131348, | |
| "learning_rate": 2.1341451767966475e-05, | |
| "loss": 3.319, | |
| "num_input_tokens_seen": 3275624, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.5475172640578757, | |
| "grad_norm": 6.39847469329834, | |
| "learning_rate": 2.129887568077904e-05, | |
| "loss": 3.0552, | |
| "num_input_tokens_seen": 3279792, | |
| "step": 4995 | |
| }, | |
| { | |
| "epoch": 0.5480653293872629, | |
| "grad_norm": 6.739533424377441, | |
| "learning_rate": 2.12563105658991e-05, | |
| "loss": 3.1218, | |
| "num_input_tokens_seen": 3283560, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.5486133947166503, | |
| "grad_norm": 7.888918399810791, | |
| "learning_rate": 2.1213756549514674e-05, | |
| "loss": 3.0369, | |
| "num_input_tokens_seen": 3286504, | |
| "step": 5005 | |
| }, | |
| { | |
| "epoch": 0.5491614600460375, | |
| "grad_norm": 6.957367897033691, | |
| "learning_rate": 2.1171213757780873e-05, | |
| "loss": 2.9968, | |
| "num_input_tokens_seen": 3289512, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.5497095253754247, | |
| "grad_norm": 6.351596355438232, | |
| "learning_rate": 2.1128682316819522e-05, | |
| "loss": 3.0657, | |
| "num_input_tokens_seen": 3293512, | |
| "step": 5015 | |
| }, | |
| { | |
| "epoch": 0.5502575907048121, | |
| "grad_norm": 7.056116104125977, | |
| "learning_rate": 2.1086162352718825e-05, | |
| "loss": 3.029, | |
| "num_input_tokens_seen": 3298024, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.5508056560341993, | |
| "grad_norm": 6.343071937561035, | |
| "learning_rate": 2.1043653991532934e-05, | |
| "loss": 2.8398, | |
| "num_input_tokens_seen": 3301000, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 0.5513537213635865, | |
| "grad_norm": 8.5012788772583, | |
| "learning_rate": 2.1001157359281605e-05, | |
| "loss": 3.1406, | |
| "num_input_tokens_seen": 3304064, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.5519017866929739, | |
| "grad_norm": 5.8669819831848145, | |
| "learning_rate": 2.095867258194984e-05, | |
| "loss": 2.7844, | |
| "num_input_tokens_seen": 3308616, | |
| "step": 5035 | |
| }, | |
| { | |
| "epoch": 0.5524498520223611, | |
| "grad_norm": 6.373290061950684, | |
| "learning_rate": 2.0916199785487488e-05, | |
| "loss": 3.346, | |
| "num_input_tokens_seen": 3312128, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.5529979173517483, | |
| "grad_norm": 7.038343906402588, | |
| "learning_rate": 2.0873739095808865e-05, | |
| "loss": 3.1385, | |
| "num_input_tokens_seen": 3315040, | |
| "step": 5045 | |
| }, | |
| { | |
| "epoch": 0.5535459826811356, | |
| "grad_norm": 7.340169429779053, | |
| "learning_rate": 2.083129063879242e-05, | |
| "loss": 2.9194, | |
| "num_input_tokens_seen": 3319432, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.5540940480105229, | |
| "grad_norm": 5.199733734130859, | |
| "learning_rate": 2.0788854540280315e-05, | |
| "loss": 3.5487, | |
| "num_input_tokens_seen": 3322568, | |
| "step": 5055 | |
| }, | |
| { | |
| "epoch": 0.5546421133399101, | |
| "grad_norm": 7.935201168060303, | |
| "learning_rate": 2.0746430926078086e-05, | |
| "loss": 2.8886, | |
| "num_input_tokens_seen": 3325536, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.5551901786692974, | |
| "grad_norm": 7.43034029006958, | |
| "learning_rate": 2.0704019921954264e-05, | |
| "loss": 3.0405, | |
| "num_input_tokens_seen": 3329312, | |
| "step": 5065 | |
| }, | |
| { | |
| "epoch": 0.5557382439986847, | |
| "grad_norm": 5.411002159118652, | |
| "learning_rate": 2.0661621653639987e-05, | |
| "loss": 3.1599, | |
| "num_input_tokens_seen": 3333232, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.5562863093280719, | |
| "grad_norm": 8.897222518920898, | |
| "learning_rate": 2.0619236246828622e-05, | |
| "loss": 2.8413, | |
| "num_input_tokens_seen": 3336312, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 0.5568343746574592, | |
| "grad_norm": 8.512425422668457, | |
| "learning_rate": 2.0576863827175447e-05, | |
| "loss": 2.9528, | |
| "num_input_tokens_seen": 3339344, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.5573824399868464, | |
| "grad_norm": 7.003962516784668, | |
| "learning_rate": 2.0534504520297203e-05, | |
| "loss": 3.3579, | |
| "num_input_tokens_seen": 3342520, | |
| "step": 5085 | |
| }, | |
| { | |
| "epoch": 0.5579305053162337, | |
| "grad_norm": 6.14302396774292, | |
| "learning_rate": 2.0492158451771767e-05, | |
| "loss": 3.3721, | |
| "num_input_tokens_seen": 3346272, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.558478570645621, | |
| "grad_norm": 8.199108123779297, | |
| "learning_rate": 2.0449825747137778e-05, | |
| "loss": 2.9852, | |
| "num_input_tokens_seen": 3350232, | |
| "step": 5095 | |
| }, | |
| { | |
| "epoch": 0.5590266359750082, | |
| "grad_norm": 7.849426746368408, | |
| "learning_rate": 2.0407506531894245e-05, | |
| "loss": 3.1338, | |
| "num_input_tokens_seen": 3353144, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.5595747013043955, | |
| "grad_norm": 6.752470016479492, | |
| "learning_rate": 2.0365200931500177e-05, | |
| "loss": 2.9589, | |
| "num_input_tokens_seen": 3356952, | |
| "step": 5105 | |
| }, | |
| { | |
| "epoch": 0.5601227666337828, | |
| "grad_norm": 7.846312046051025, | |
| "learning_rate": 2.0322909071374265e-05, | |
| "loss": 3.2629, | |
| "num_input_tokens_seen": 3360424, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.56067083196317, | |
| "grad_norm": 6.629732131958008, | |
| "learning_rate": 2.028063107689442e-05, | |
| "loss": 3.2232, | |
| "num_input_tokens_seen": 3363544, | |
| "step": 5115 | |
| }, | |
| { | |
| "epoch": 0.5612188972925573, | |
| "grad_norm": 7.26005220413208, | |
| "learning_rate": 2.023836707339745e-05, | |
| "loss": 3.2771, | |
| "num_input_tokens_seen": 3366664, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.5617669626219446, | |
| "grad_norm": 7.383485317230225, | |
| "learning_rate": 2.0196117186178727e-05, | |
| "loss": 2.8273, | |
| "num_input_tokens_seen": 3369848, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 0.5623150279513318, | |
| "grad_norm": 7.374210357666016, | |
| "learning_rate": 2.015388154049173e-05, | |
| "loss": 3.2708, | |
| "num_input_tokens_seen": 3373208, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.562863093280719, | |
| "grad_norm": 6.803157329559326, | |
| "learning_rate": 2.0111660261547728e-05, | |
| "loss": 3.1036, | |
| "num_input_tokens_seen": 3376872, | |
| "step": 5135 | |
| }, | |
| { | |
| "epoch": 0.5634111586101064, | |
| "grad_norm": 6.192258358001709, | |
| "learning_rate": 2.006945347451541e-05, | |
| "loss": 3.0572, | |
| "num_input_tokens_seen": 3382136, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.5639592239394936, | |
| "grad_norm": 9.468875885009766, | |
| "learning_rate": 2.00272613045205e-05, | |
| "loss": 3.2346, | |
| "num_input_tokens_seen": 3385456, | |
| "step": 5145 | |
| }, | |
| { | |
| "epoch": 0.5645072892688808, | |
| "grad_norm": 6.274002552032471, | |
| "learning_rate": 1.9985083876645368e-05, | |
| "loss": 3.1731, | |
| "num_input_tokens_seen": 3388976, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.5650553545982682, | |
| "grad_norm": 5.550570487976074, | |
| "learning_rate": 1.994292131592872e-05, | |
| "loss": 3.2257, | |
| "num_input_tokens_seen": 3392736, | |
| "step": 5155 | |
| }, | |
| { | |
| "epoch": 0.5656034199276554, | |
| "grad_norm": 8.218210220336914, | |
| "learning_rate": 1.990077374736515e-05, | |
| "loss": 3.0855, | |
| "num_input_tokens_seen": 3396128, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.5661514852570426, | |
| "grad_norm": 7.721156597137451, | |
| "learning_rate": 1.9858641295904813e-05, | |
| "loss": 2.9721, | |
| "num_input_tokens_seen": 3399376, | |
| "step": 5165 | |
| }, | |
| { | |
| "epoch": 0.56669955058643, | |
| "grad_norm": 6.2414231300354, | |
| "learning_rate": 1.981652408645307e-05, | |
| "loss": 3.3822, | |
| "num_input_tokens_seen": 3401928, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.5672476159158172, | |
| "grad_norm": 8.496658325195312, | |
| "learning_rate": 1.9774422243870078e-05, | |
| "loss": 3.0474, | |
| "num_input_tokens_seen": 3404744, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 0.5677956812452044, | |
| "grad_norm": 7.224369049072266, | |
| "learning_rate": 1.9732335892970427e-05, | |
| "loss": 3.259, | |
| "num_input_tokens_seen": 3407824, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.5683437465745917, | |
| "grad_norm": 9.386946678161621, | |
| "learning_rate": 1.969026515852281e-05, | |
| "loss": 3.0473, | |
| "num_input_tokens_seen": 3410608, | |
| "step": 5185 | |
| }, | |
| { | |
| "epoch": 0.568891811903979, | |
| "grad_norm": 8.189655303955078, | |
| "learning_rate": 1.96482101652496e-05, | |
| "loss": 3.3926, | |
| "num_input_tokens_seen": 3413592, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.5694398772333662, | |
| "grad_norm": 6.405150890350342, | |
| "learning_rate": 1.9606171037826502e-05, | |
| "loss": 2.9921, | |
| "num_input_tokens_seen": 3417320, | |
| "step": 5195 | |
| }, | |
| { | |
| "epoch": 0.5699879425627535, | |
| "grad_norm": 6.89292573928833, | |
| "learning_rate": 1.9564147900882213e-05, | |
| "loss": 2.9261, | |
| "num_input_tokens_seen": 3420888, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.5705360078921408, | |
| "grad_norm": 6.517080307006836, | |
| "learning_rate": 1.9522140878997995e-05, | |
| "loss": 3.3255, | |
| "num_input_tokens_seen": 3424336, | |
| "step": 5205 | |
| }, | |
| { | |
| "epoch": 0.571084073221528, | |
| "grad_norm": 8.910572052001953, | |
| "learning_rate": 1.9480150096707344e-05, | |
| "loss": 2.9723, | |
| "num_input_tokens_seen": 3428120, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.5716321385509152, | |
| "grad_norm": 8.455070495605469, | |
| "learning_rate": 1.943817567849563e-05, | |
| "loss": 3.0703, | |
| "num_input_tokens_seen": 3430880, | |
| "step": 5215 | |
| }, | |
| { | |
| "epoch": 0.5721802038803026, | |
| "grad_norm": 6.948888778686523, | |
| "learning_rate": 1.9396217748799682e-05, | |
| "loss": 2.9862, | |
| "num_input_tokens_seen": 3435560, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.5727282692096898, | |
| "grad_norm": 6.147201061248779, | |
| "learning_rate": 1.935427643200746e-05, | |
| "loss": 3.0719, | |
| "num_input_tokens_seen": 3438352, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 0.573276334539077, | |
| "grad_norm": 7.213772773742676, | |
| "learning_rate": 1.9312351852457686e-05, | |
| "loss": 2.9474, | |
| "num_input_tokens_seen": 3441216, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.5738243998684643, | |
| "grad_norm": 6.16003942489624, | |
| "learning_rate": 1.9270444134439434e-05, | |
| "loss": 3.0849, | |
| "num_input_tokens_seen": 3444944, | |
| "step": 5235 | |
| }, | |
| { | |
| "epoch": 0.5743724651978516, | |
| "grad_norm": 7.64081335067749, | |
| "learning_rate": 1.9228553402191822e-05, | |
| "loss": 3.0799, | |
| "num_input_tokens_seen": 3449568, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.5749205305272388, | |
| "grad_norm": 7.353094577789307, | |
| "learning_rate": 1.91866797799036e-05, | |
| "loss": 3.3501, | |
| "num_input_tokens_seen": 3452544, | |
| "step": 5245 | |
| }, | |
| { | |
| "epoch": 0.5754685958566261, | |
| "grad_norm": 7.696213722229004, | |
| "learning_rate": 1.9144823391712785e-05, | |
| "loss": 3.2286, | |
| "num_input_tokens_seen": 3455600, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.5760166611860134, | |
| "grad_norm": 9.90982723236084, | |
| "learning_rate": 1.91029843617063e-05, | |
| "loss": 3.3799, | |
| "num_input_tokens_seen": 3458728, | |
| "step": 5255 | |
| }, | |
| { | |
| "epoch": 0.5765647265154006, | |
| "grad_norm": 6.676484107971191, | |
| "learning_rate": 1.9061162813919637e-05, | |
| "loss": 3.2611, | |
| "num_input_tokens_seen": 3461888, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.5771127918447879, | |
| "grad_norm": 6.546321868896484, | |
| "learning_rate": 1.9019358872336428e-05, | |
| "loss": 2.9518, | |
| "num_input_tokens_seen": 3464880, | |
| "step": 5265 | |
| }, | |
| { | |
| "epoch": 0.5776608571741751, | |
| "grad_norm": 5.9848151206970215, | |
| "learning_rate": 1.8977572660888122e-05, | |
| "loss": 3.1144, | |
| "num_input_tokens_seen": 3467712, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.5782089225035624, | |
| "grad_norm": 6.030148506164551, | |
| "learning_rate": 1.8935804303453612e-05, | |
| "loss": 3.0001, | |
| "num_input_tokens_seen": 3471760, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 0.5787569878329497, | |
| "grad_norm": 9.319378852844238, | |
| "learning_rate": 1.8894053923858857e-05, | |
| "loss": 2.7935, | |
| "num_input_tokens_seen": 3475928, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.5793050531623369, | |
| "grad_norm": 7.607476711273193, | |
| "learning_rate": 1.8852321645876507e-05, | |
| "loss": 2.9319, | |
| "num_input_tokens_seen": 3478968, | |
| "step": 5285 | |
| }, | |
| { | |
| "epoch": 0.5798531184917242, | |
| "grad_norm": 7.065295219421387, | |
| "learning_rate": 1.8810607593225567e-05, | |
| "loss": 2.9655, | |
| "num_input_tokens_seen": 3482160, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.5804011838211115, | |
| "grad_norm": 6.528260707855225, | |
| "learning_rate": 1.8768911889571002e-05, | |
| "loss": 3.0625, | |
| "num_input_tokens_seen": 3486016, | |
| "step": 5295 | |
| }, | |
| { | |
| "epoch": 0.5809492491504987, | |
| "grad_norm": 8.56631851196289, | |
| "learning_rate": 1.8727234658523368e-05, | |
| "loss": 3.1642, | |
| "num_input_tokens_seen": 3488552, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.581497314479886, | |
| "grad_norm": 6.70935583114624, | |
| "learning_rate": 1.8685576023638495e-05, | |
| "loss": 2.908, | |
| "num_input_tokens_seen": 3492192, | |
| "step": 5305 | |
| }, | |
| { | |
| "epoch": 0.5820453798092733, | |
| "grad_norm": 9.139800071716309, | |
| "learning_rate": 1.864393610841704e-05, | |
| "loss": 3.0694, | |
| "num_input_tokens_seen": 3495032, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.5825934451386605, | |
| "grad_norm": 6.343008041381836, | |
| "learning_rate": 1.8602315036304175e-05, | |
| "loss": 2.939, | |
| "num_input_tokens_seen": 3498288, | |
| "step": 5315 | |
| }, | |
| { | |
| "epoch": 0.5831415104680477, | |
| "grad_norm": 6.961386203765869, | |
| "learning_rate": 1.8560712930689238e-05, | |
| "loss": 2.7722, | |
| "num_input_tokens_seen": 3501112, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.5836895757974351, | |
| "grad_norm": 8.582582473754883, | |
| "learning_rate": 1.851912991490531e-05, | |
| "loss": 3.0957, | |
| "num_input_tokens_seen": 3504384, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 0.5842376411268223, | |
| "grad_norm": 6.227029800415039, | |
| "learning_rate": 1.8477566112228878e-05, | |
| "loss": 3.2204, | |
| "num_input_tokens_seen": 3508024, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.5847857064562095, | |
| "grad_norm": 6.587297439575195, | |
| "learning_rate": 1.8436021645879494e-05, | |
| "loss": 3.1471, | |
| "num_input_tokens_seen": 3511392, | |
| "step": 5335 | |
| }, | |
| { | |
| "epoch": 0.5853337717855969, | |
| "grad_norm": 5.520746231079102, | |
| "learning_rate": 1.839449663901936e-05, | |
| "loss": 2.9406, | |
| "num_input_tokens_seen": 3514568, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.5858818371149841, | |
| "grad_norm": 5.80632209777832, | |
| "learning_rate": 1.8352991214752983e-05, | |
| "loss": 2.9652, | |
| "num_input_tokens_seen": 3517672, | |
| "step": 5345 | |
| }, | |
| { | |
| "epoch": 0.5864299024443713, | |
| "grad_norm": 4.704535484313965, | |
| "learning_rate": 1.8311505496126868e-05, | |
| "loss": 2.7212, | |
| "num_input_tokens_seen": 3522392, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.5869779677737587, | |
| "grad_norm": 12.650748252868652, | |
| "learning_rate": 1.8270039606129045e-05, | |
| "loss": 3.7118, | |
| "num_input_tokens_seen": 3526336, | |
| "step": 5355 | |
| }, | |
| { | |
| "epoch": 0.5875260331031459, | |
| "grad_norm": 9.578808784484863, | |
| "learning_rate": 1.8228593667688772e-05, | |
| "loss": 3.2441, | |
| "num_input_tokens_seen": 3530656, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.5880740984325331, | |
| "grad_norm": 6.2789812088012695, | |
| "learning_rate": 1.818716780367618e-05, | |
| "loss": 2.7651, | |
| "num_input_tokens_seen": 3533184, | |
| "step": 5365 | |
| }, | |
| { | |
| "epoch": 0.5886221637619204, | |
| "grad_norm": 8.422161102294922, | |
| "learning_rate": 1.8145762136901874e-05, | |
| "loss": 3.3134, | |
| "num_input_tokens_seen": 3536976, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.5891702290913077, | |
| "grad_norm": 7.674281597137451, | |
| "learning_rate": 1.8104376790116572e-05, | |
| "loss": 3.1223, | |
| "num_input_tokens_seen": 3540496, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 0.5897182944206949, | |
| "grad_norm": 7.617640495300293, | |
| "learning_rate": 1.8063011886010777e-05, | |
| "loss": 3.4106, | |
| "num_input_tokens_seen": 3542952, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.5902663597500822, | |
| "grad_norm": 6.847158908843994, | |
| "learning_rate": 1.8021667547214367e-05, | |
| "loss": 3.4031, | |
| "num_input_tokens_seen": 3545952, | |
| "step": 5385 | |
| }, | |
| { | |
| "epoch": 0.5908144250794695, | |
| "grad_norm": 7.656712532043457, | |
| "learning_rate": 1.7980343896296243e-05, | |
| "loss": 3.1261, | |
| "num_input_tokens_seen": 3548960, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.5913624904088567, | |
| "grad_norm": 6.854838848114014, | |
| "learning_rate": 1.7939041055764015e-05, | |
| "loss": 2.8715, | |
| "num_input_tokens_seen": 3552888, | |
| "step": 5395 | |
| }, | |
| { | |
| "epoch": 0.591910555738244, | |
| "grad_norm": 7.809703350067139, | |
| "learning_rate": 1.789775914806357e-05, | |
| "loss": 3.0002, | |
| "num_input_tokens_seen": 3556448, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.5924586210676313, | |
| "grad_norm": 9.405502319335938, | |
| "learning_rate": 1.785649829557873e-05, | |
| "loss": 3.4519, | |
| "num_input_tokens_seen": 3560392, | |
| "step": 5405 | |
| }, | |
| { | |
| "epoch": 0.5930066863970185, | |
| "grad_norm": 9.429394721984863, | |
| "learning_rate": 1.781525862063092e-05, | |
| "loss": 3.2288, | |
| "num_input_tokens_seen": 3563680, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.5935547517264058, | |
| "grad_norm": 6.114898204803467, | |
| "learning_rate": 1.7774040245478767e-05, | |
| "loss": 3.3265, | |
| "num_input_tokens_seen": 3567200, | |
| "step": 5415 | |
| }, | |
| { | |
| "epoch": 0.594102817055793, | |
| "grad_norm": 6.565958499908447, | |
| "learning_rate": 1.7732843292317757e-05, | |
| "loss": 3.0318, | |
| "num_input_tokens_seen": 3570120, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.5946508823851803, | |
| "grad_norm": 7.470787048339844, | |
| "learning_rate": 1.7691667883279877e-05, | |
| "loss": 2.9758, | |
| "num_input_tokens_seen": 3573704, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 0.5951989477145676, | |
| "grad_norm": 6.305603504180908, | |
| "learning_rate": 1.7650514140433226e-05, | |
| "loss": 2.8946, | |
| "num_input_tokens_seen": 3577472, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.5957470130439548, | |
| "grad_norm": 7.486173629760742, | |
| "learning_rate": 1.760938218578168e-05, | |
| "loss": 3.0453, | |
| "num_input_tokens_seen": 3579928, | |
| "step": 5435 | |
| }, | |
| { | |
| "epoch": 0.5962950783733421, | |
| "grad_norm": 5.27332067489624, | |
| "learning_rate": 1.7568272141264542e-05, | |
| "loss": 3.0027, | |
| "num_input_tokens_seen": 3582744, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.5968431437027294, | |
| "grad_norm": 5.261857986450195, | |
| "learning_rate": 1.752718412875613e-05, | |
| "loss": 3.373, | |
| "num_input_tokens_seen": 3586344, | |
| "step": 5445 | |
| }, | |
| { | |
| "epoch": 0.5973912090321166, | |
| "grad_norm": 7.151644706726074, | |
| "learning_rate": 1.748611827006545e-05, | |
| "loss": 3.0059, | |
| "num_input_tokens_seen": 3590696, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.5979392743615038, | |
| "grad_norm": 6.867771148681641, | |
| "learning_rate": 1.7445074686935865e-05, | |
| "loss": 2.9594, | |
| "num_input_tokens_seen": 3593960, | |
| "step": 5455 | |
| }, | |
| { | |
| "epoch": 0.5984873396908912, | |
| "grad_norm": 10.243605613708496, | |
| "learning_rate": 1.740405350104466e-05, | |
| "loss": 3.1614, | |
| "num_input_tokens_seen": 3597248, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.5990354050202784, | |
| "grad_norm": 7.2442827224731445, | |
| "learning_rate": 1.736305483400273e-05, | |
| "loss": 3.444, | |
| "num_input_tokens_seen": 3600048, | |
| "step": 5465 | |
| }, | |
| { | |
| "epoch": 0.5995834703496656, | |
| "grad_norm": 8.634395599365234, | |
| "learning_rate": 1.7322078807354232e-05, | |
| "loss": 3.6502, | |
| "num_input_tokens_seen": 3603160, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.600131535679053, | |
| "grad_norm": 7.339416027069092, | |
| "learning_rate": 1.728112554257618e-05, | |
| "loss": 2.9444, | |
| "num_input_tokens_seen": 3606976, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 0.6006796010084402, | |
| "grad_norm": 6.438117027282715, | |
| "learning_rate": 1.7240195161078112e-05, | |
| "loss": 2.7825, | |
| "num_input_tokens_seen": 3610368, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.6012276663378274, | |
| "grad_norm": 8.13581657409668, | |
| "learning_rate": 1.7199287784201752e-05, | |
| "loss": 3.1469, | |
| "num_input_tokens_seen": 3613240, | |
| "step": 5485 | |
| }, | |
| { | |
| "epoch": 0.6017757316672148, | |
| "grad_norm": 9.25243854522705, | |
| "learning_rate": 1.715840353322059e-05, | |
| "loss": 3.1494, | |
| "num_input_tokens_seen": 3616384, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.602323796996602, | |
| "grad_norm": 6.846777439117432, | |
| "learning_rate": 1.7117542529339564e-05, | |
| "loss": 3.0651, | |
| "num_input_tokens_seen": 3620600, | |
| "step": 5495 | |
| }, | |
| { | |
| "epoch": 0.6028718623259892, | |
| "grad_norm": 9.576505661010742, | |
| "learning_rate": 1.7076704893694725e-05, | |
| "loss": 3.2062, | |
| "num_input_tokens_seen": 3624184, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.6034199276553766, | |
| "grad_norm": 5.831842422485352, | |
| "learning_rate": 1.7035890747352812e-05, | |
| "loss": 2.9302, | |
| "num_input_tokens_seen": 3628160, | |
| "step": 5505 | |
| }, | |
| { | |
| "epoch": 0.6039679929847638, | |
| "grad_norm": 6.526121139526367, | |
| "learning_rate": 1.699510021131093e-05, | |
| "loss": 3.0619, | |
| "num_input_tokens_seen": 3632144, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.604516058314151, | |
| "grad_norm": 8.087743759155273, | |
| "learning_rate": 1.695433340649622e-05, | |
| "loss": 3.1402, | |
| "num_input_tokens_seen": 3635512, | |
| "step": 5515 | |
| }, | |
| { | |
| "epoch": 0.6050641236435383, | |
| "grad_norm": 4.840604305267334, | |
| "learning_rate": 1.6913590453765436e-05, | |
| "loss": 3.0223, | |
| "num_input_tokens_seen": 3638824, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.6056121889729256, | |
| "grad_norm": 7.919428825378418, | |
| "learning_rate": 1.687287147390463e-05, | |
| "loss": 2.7976, | |
| "num_input_tokens_seen": 3642704, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 0.6061602543023128, | |
| "grad_norm": 5.97782039642334, | |
| "learning_rate": 1.6832176587628784e-05, | |
| "loss": 2.9795, | |
| "num_input_tokens_seen": 3645432, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.6067083196317001, | |
| "grad_norm": 7.9558539390563965, | |
| "learning_rate": 1.6791505915581474e-05, | |
| "loss": 3.0965, | |
| "num_input_tokens_seen": 3647912, | |
| "step": 5535 | |
| }, | |
| { | |
| "epoch": 0.6072563849610874, | |
| "grad_norm": 7.399658203125, | |
| "learning_rate": 1.675085957833446e-05, | |
| "loss": 3.0064, | |
| "num_input_tokens_seen": 3651176, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.6078044502904746, | |
| "grad_norm": 5.475082874298096, | |
| "learning_rate": 1.6710237696387364e-05, | |
| "loss": 3.0204, | |
| "num_input_tokens_seen": 3653864, | |
| "step": 5545 | |
| }, | |
| { | |
| "epoch": 0.6083525156198619, | |
| "grad_norm": 7.328055381774902, | |
| "learning_rate": 1.666964039016734e-05, | |
| "loss": 3.4209, | |
| "num_input_tokens_seen": 3656896, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.6089005809492491, | |
| "grad_norm": 6.844607353210449, | |
| "learning_rate": 1.6629067780028643e-05, | |
| "loss": 2.8587, | |
| "num_input_tokens_seen": 3660032, | |
| "step": 5555 | |
| }, | |
| { | |
| "epoch": 0.6094486462786364, | |
| "grad_norm": 8.957280158996582, | |
| "learning_rate": 1.6588519986252334e-05, | |
| "loss": 3.3932, | |
| "num_input_tokens_seen": 3662592, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.6099967116080237, | |
| "grad_norm": 6.236993789672852, | |
| "learning_rate": 1.6547997129045907e-05, | |
| "loss": 2.8217, | |
| "num_input_tokens_seen": 3665480, | |
| "step": 5565 | |
| }, | |
| { | |
| "epoch": 0.6105447769374109, | |
| "grad_norm": 6.7575201988220215, | |
| "learning_rate": 1.6507499328542926e-05, | |
| "loss": 3.1285, | |
| "num_input_tokens_seen": 3668296, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.6110928422667982, | |
| "grad_norm": 6.297115802764893, | |
| "learning_rate": 1.6467026704802652e-05, | |
| "loss": 3.0519, | |
| "num_input_tokens_seen": 3671088, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 0.6116409075961855, | |
| "grad_norm": 5.6386003494262695, | |
| "learning_rate": 1.6426579377809755e-05, | |
| "loss": 3.0005, | |
| "num_input_tokens_seen": 3674856, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.6121889729255727, | |
| "grad_norm": 5.507198333740234, | |
| "learning_rate": 1.6386157467473867e-05, | |
| "loss": 3.0995, | |
| "num_input_tokens_seen": 3677256, | |
| "step": 5585 | |
| }, | |
| { | |
| "epoch": 0.61273703825496, | |
| "grad_norm": 6.467530250549316, | |
| "learning_rate": 1.6345761093629276e-05, | |
| "loss": 3.1279, | |
| "num_input_tokens_seen": 3680248, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.6132851035843473, | |
| "grad_norm": 6.12019681930542, | |
| "learning_rate": 1.630539037603459e-05, | |
| "loss": 3.0768, | |
| "num_input_tokens_seen": 3683464, | |
| "step": 5595 | |
| }, | |
| { | |
| "epoch": 0.6138331689137345, | |
| "grad_norm": 6.198227882385254, | |
| "learning_rate": 1.626504543437234e-05, | |
| "loss": 3.1144, | |
| "num_input_tokens_seen": 3686448, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.6143812342431217, | |
| "grad_norm": 8.729185104370117, | |
| "learning_rate": 1.6224726388248622e-05, | |
| "loss": 3.2992, | |
| "num_input_tokens_seen": 3690360, | |
| "step": 5605 | |
| }, | |
| { | |
| "epoch": 0.6149292995725091, | |
| "grad_norm": 8.366303443908691, | |
| "learning_rate": 1.618443335719281e-05, | |
| "loss": 3.1796, | |
| "num_input_tokens_seen": 3693344, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.6154773649018963, | |
| "grad_norm": 5.997150897979736, | |
| "learning_rate": 1.614416646065711e-05, | |
| "loss": 3.0782, | |
| "num_input_tokens_seen": 3696488, | |
| "step": 5615 | |
| }, | |
| { | |
| "epoch": 0.6160254302312835, | |
| "grad_norm": 6.210281848907471, | |
| "learning_rate": 1.6103925818016257e-05, | |
| "loss": 3.0592, | |
| "num_input_tokens_seen": 3700080, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.6165734955606709, | |
| "grad_norm": 10.414953231811523, | |
| "learning_rate": 1.606371154856719e-05, | |
| "loss": 2.9467, | |
| "num_input_tokens_seen": 3703264, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 0.6171215608900581, | |
| "grad_norm": 6.666655540466309, | |
| "learning_rate": 1.6023523771528623e-05, | |
| "loss": 3.3406, | |
| "num_input_tokens_seen": 3706232, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.6176696262194453, | |
| "grad_norm": 6.776188373565674, | |
| "learning_rate": 1.5983362606040733e-05, | |
| "loss": 2.9584, | |
| "num_input_tokens_seen": 3709728, | |
| "step": 5635 | |
| }, | |
| { | |
| "epoch": 0.6182176915488327, | |
| "grad_norm": 6.977499008178711, | |
| "learning_rate": 1.5943228171164837e-05, | |
| "loss": 3.607, | |
| "num_input_tokens_seen": 3713824, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.6187657568782199, | |
| "grad_norm": 6.040121555328369, | |
| "learning_rate": 1.5903120585882974e-05, | |
| "loss": 3.4444, | |
| "num_input_tokens_seen": 3718048, | |
| "step": 5645 | |
| }, | |
| { | |
| "epoch": 0.6193138222076071, | |
| "grad_norm": 7.120656967163086, | |
| "learning_rate": 1.5863039969097592e-05, | |
| "loss": 3.3153, | |
| "num_input_tokens_seen": 3720360, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.6198618875369944, | |
| "grad_norm": 10.212481498718262, | |
| "learning_rate": 1.5822986439631207e-05, | |
| "loss": 3.0222, | |
| "num_input_tokens_seen": 3723136, | |
| "step": 5655 | |
| }, | |
| { | |
| "epoch": 0.6204099528663817, | |
| "grad_norm": 6.770248889923096, | |
| "learning_rate": 1.5782960116226007e-05, | |
| "loss": 2.9785, | |
| "num_input_tokens_seen": 3726064, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.6209580181957689, | |
| "grad_norm": 5.595423221588135, | |
| "learning_rate": 1.574296111754353e-05, | |
| "loss": 3.03, | |
| "num_input_tokens_seen": 3729800, | |
| "step": 5665 | |
| }, | |
| { | |
| "epoch": 0.6215060835251562, | |
| "grad_norm": 6.7276225090026855, | |
| "learning_rate": 1.5702989562164337e-05, | |
| "loss": 3.2465, | |
| "num_input_tokens_seen": 3733608, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.6220541488545435, | |
| "grad_norm": 7.501856327056885, | |
| "learning_rate": 1.5663045568587592e-05, | |
| "loss": 2.8702, | |
| "num_input_tokens_seen": 3736928, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 0.6226022141839307, | |
| "grad_norm": 4.790249824523926, | |
| "learning_rate": 1.562312925523076e-05, | |
| "loss": 3.0023, | |
| "num_input_tokens_seen": 3740256, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.623150279513318, | |
| "grad_norm": 6.182326316833496, | |
| "learning_rate": 1.5583240740429266e-05, | |
| "loss": 2.9844, | |
| "num_input_tokens_seen": 3743504, | |
| "step": 5685 | |
| }, | |
| { | |
| "epoch": 0.6236983448427053, | |
| "grad_norm": 8.316134452819824, | |
| "learning_rate": 1.5543380142436108e-05, | |
| "loss": 3.1194, | |
| "num_input_tokens_seen": 3746976, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.6242464101720925, | |
| "grad_norm": 4.825036525726318, | |
| "learning_rate": 1.5503547579421507e-05, | |
| "loss": 2.9029, | |
| "num_input_tokens_seen": 3749736, | |
| "step": 5695 | |
| }, | |
| { | |
| "epoch": 0.6247944755014798, | |
| "grad_norm": 5.379034996032715, | |
| "learning_rate": 1.5463743169472604e-05, | |
| "loss": 2.813, | |
| "num_input_tokens_seen": 3754312, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.625342540830867, | |
| "grad_norm": 7.649238586425781, | |
| "learning_rate": 1.5423967030593054e-05, | |
| "loss": 2.9726, | |
| "num_input_tokens_seen": 3757320, | |
| "step": 5705 | |
| }, | |
| { | |
| "epoch": 0.6258906061602543, | |
| "grad_norm": 8.456625938415527, | |
| "learning_rate": 1.5384219280702707e-05, | |
| "loss": 2.9852, | |
| "num_input_tokens_seen": 3761320, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.6264386714896416, | |
| "grad_norm": 5.238711833953857, | |
| "learning_rate": 1.534450003763726e-05, | |
| "loss": 2.8722, | |
| "num_input_tokens_seen": 3764536, | |
| "step": 5715 | |
| }, | |
| { | |
| "epoch": 0.6269867368190288, | |
| "grad_norm": 7.77496337890625, | |
| "learning_rate": 1.5304809419147885e-05, | |
| "loss": 3.0119, | |
| "num_input_tokens_seen": 3766832, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.6275348021484161, | |
| "grad_norm": 6.092039108276367, | |
| "learning_rate": 1.526514754290089e-05, | |
| "loss": 3.1644, | |
| "num_input_tokens_seen": 3770960, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 0.6280828674778034, | |
| "grad_norm": 8.289813995361328, | |
| "learning_rate": 1.5225514526477408e-05, | |
| "loss": 3.0392, | |
| "num_input_tokens_seen": 3774184, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.6286309328071906, | |
| "grad_norm": 7.361676216125488, | |
| "learning_rate": 1.5185910487372973e-05, | |
| "loss": 2.9171, | |
| "num_input_tokens_seen": 3778784, | |
| "step": 5735 | |
| }, | |
| { | |
| "epoch": 0.6291789981365778, | |
| "grad_norm": 6.253126621246338, | |
| "learning_rate": 1.514633554299723e-05, | |
| "loss": 2.9294, | |
| "num_input_tokens_seen": 3781568, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.6297270634659652, | |
| "grad_norm": 10.453216552734375, | |
| "learning_rate": 1.5106789810673578e-05, | |
| "loss": 3.2064, | |
| "num_input_tokens_seen": 3784152, | |
| "step": 5745 | |
| }, | |
| { | |
| "epoch": 0.6302751287953524, | |
| "grad_norm": 7.798788547515869, | |
| "learning_rate": 1.506727340763881e-05, | |
| "loss": 2.9679, | |
| "num_input_tokens_seen": 3786864, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.6308231941247396, | |
| "grad_norm": 7.438601493835449, | |
| "learning_rate": 1.5027786451042758e-05, | |
| "loss": 2.9835, | |
| "num_input_tokens_seen": 3790360, | |
| "step": 5755 | |
| }, | |
| { | |
| "epoch": 0.631371259454127, | |
| "grad_norm": 8.202717781066895, | |
| "learning_rate": 1.498832905794797e-05, | |
| "loss": 3.1209, | |
| "num_input_tokens_seen": 3793160, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.6319193247835142, | |
| "grad_norm": 7.448530673980713, | |
| "learning_rate": 1.4948901345329352e-05, | |
| "loss": 3.1779, | |
| "num_input_tokens_seen": 3797568, | |
| "step": 5765 | |
| }, | |
| { | |
| "epoch": 0.6324673901129014, | |
| "grad_norm": 5.029766082763672, | |
| "learning_rate": 1.4909503430073796e-05, | |
| "loss": 2.8519, | |
| "num_input_tokens_seen": 3801096, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.6330154554422888, | |
| "grad_norm": 5.234902858734131, | |
| "learning_rate": 1.48701354289799e-05, | |
| "loss": 3.1461, | |
| "num_input_tokens_seen": 3806256, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 0.633563520771676, | |
| "grad_norm": 8.089512825012207, | |
| "learning_rate": 1.4830797458757544e-05, | |
| "loss": 3.12, | |
| "num_input_tokens_seen": 3808880, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.6341115861010632, | |
| "grad_norm": 5.7707839012146, | |
| "learning_rate": 1.4791489636027583e-05, | |
| "loss": 2.7087, | |
| "num_input_tokens_seen": 3813584, | |
| "step": 5785 | |
| }, | |
| { | |
| "epoch": 0.6346596514304506, | |
| "grad_norm": 6.020088195800781, | |
| "learning_rate": 1.475221207732151e-05, | |
| "loss": 2.9224, | |
| "num_input_tokens_seen": 3816848, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.6352077167598378, | |
| "grad_norm": 6.976149082183838, | |
| "learning_rate": 1.4712964899081093e-05, | |
| "loss": 3.0359, | |
| "num_input_tokens_seen": 3820368, | |
| "step": 5795 | |
| }, | |
| { | |
| "epoch": 0.635755782089225, | |
| "grad_norm": 7.066904544830322, | |
| "learning_rate": 1.4673748217658026e-05, | |
| "loss": 3.0753, | |
| "num_input_tokens_seen": 3823064, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.6363038474186123, | |
| "grad_norm": 5.929400444030762, | |
| "learning_rate": 1.4634562149313607e-05, | |
| "loss": 3.1222, | |
| "num_input_tokens_seen": 3826048, | |
| "step": 5805 | |
| }, | |
| { | |
| "epoch": 0.6368519127479996, | |
| "grad_norm": 6.900379657745361, | |
| "learning_rate": 1.459540681021836e-05, | |
| "loss": 3.4275, | |
| "num_input_tokens_seen": 3829584, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.6373999780773868, | |
| "grad_norm": 6.451569080352783, | |
| "learning_rate": 1.4556282316451733e-05, | |
| "loss": 3.0381, | |
| "num_input_tokens_seen": 3832848, | |
| "step": 5815 | |
| }, | |
| { | |
| "epoch": 0.6379480434067741, | |
| "grad_norm": 6.459670066833496, | |
| "learning_rate": 1.4517188784001712e-05, | |
| "loss": 2.9231, | |
| "num_input_tokens_seen": 3835392, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.6384961087361614, | |
| "grad_norm": 9.6491117477417, | |
| "learning_rate": 1.4478126328764496e-05, | |
| "loss": 3.1121, | |
| "num_input_tokens_seen": 3839016, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 0.6390441740655486, | |
| "grad_norm": 6.9248552322387695, | |
| "learning_rate": 1.4439095066544154e-05, | |
| "loss": 3.0439, | |
| "num_input_tokens_seen": 3841424, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.6395922393949359, | |
| "grad_norm": 8.927162170410156, | |
| "learning_rate": 1.44000951130523e-05, | |
| "loss": 2.9511, | |
| "num_input_tokens_seen": 3843624, | |
| "step": 5835 | |
| }, | |
| { | |
| "epoch": 0.6401403047243232, | |
| "grad_norm": 7.547786712646484, | |
| "learning_rate": 1.4361126583907708e-05, | |
| "loss": 3.2556, | |
| "num_input_tokens_seen": 3846024, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.6406883700537104, | |
| "grad_norm": 9.325125694274902, | |
| "learning_rate": 1.432218959463599e-05, | |
| "loss": 3.2518, | |
| "num_input_tokens_seen": 3849176, | |
| "step": 5845 | |
| }, | |
| { | |
| "epoch": 0.6412364353830977, | |
| "grad_norm": 7.831711292266846, | |
| "learning_rate": 1.4283284260669282e-05, | |
| "loss": 3.3252, | |
| "num_input_tokens_seen": 3851496, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.6417845007124849, | |
| "grad_norm": 5.674088001251221, | |
| "learning_rate": 1.4244410697345845e-05, | |
| "loss": 3.1402, | |
| "num_input_tokens_seen": 3854384, | |
| "step": 5855 | |
| }, | |
| { | |
| "epoch": 0.6423325660418722, | |
| "grad_norm": 5.759450912475586, | |
| "learning_rate": 1.4205569019909759e-05, | |
| "loss": 3.2573, | |
| "num_input_tokens_seen": 3857336, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.6428806313712595, | |
| "grad_norm": 6.425468921661377, | |
| "learning_rate": 1.4166759343510599e-05, | |
| "loss": 2.994, | |
| "num_input_tokens_seen": 3860008, | |
| "step": 5865 | |
| }, | |
| { | |
| "epoch": 0.6434286967006467, | |
| "grad_norm": 8.979571342468262, | |
| "learning_rate": 1.4127981783203049e-05, | |
| "loss": 2.8518, | |
| "num_input_tokens_seen": 3863232, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.643976762030034, | |
| "grad_norm": 7.848270416259766, | |
| "learning_rate": 1.4089236453946563e-05, | |
| "loss": 3.312, | |
| "num_input_tokens_seen": 3867768, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 0.6445248273594213, | |
| "grad_norm": 6.893942832946777, | |
| "learning_rate": 1.4050523470605099e-05, | |
| "loss": 3.0278, | |
| "num_input_tokens_seen": 3870384, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.6450728926888085, | |
| "grad_norm": 6.547880172729492, | |
| "learning_rate": 1.4011842947946674e-05, | |
| "loss": 2.7762, | |
| "num_input_tokens_seen": 3873064, | |
| "step": 5885 | |
| }, | |
| { | |
| "epoch": 0.6456209580181957, | |
| "grad_norm": 8.624503135681152, | |
| "learning_rate": 1.397319500064308e-05, | |
| "loss": 2.8362, | |
| "num_input_tokens_seen": 3876656, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.6461690233475831, | |
| "grad_norm": 7.134870529174805, | |
| "learning_rate": 1.3934579743269561e-05, | |
| "loss": 2.6202, | |
| "num_input_tokens_seen": 3880296, | |
| "step": 5895 | |
| }, | |
| { | |
| "epoch": 0.6467170886769703, | |
| "grad_norm": 7.61886739730835, | |
| "learning_rate": 1.389599729030443e-05, | |
| "loss": 2.9104, | |
| "num_input_tokens_seen": 3883280, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.6472651540063575, | |
| "grad_norm": 6.761881351470947, | |
| "learning_rate": 1.3857447756128744e-05, | |
| "loss": 2.9658, | |
| "num_input_tokens_seen": 3885848, | |
| "step": 5905 | |
| }, | |
| { | |
| "epoch": 0.6478132193357449, | |
| "grad_norm": 9.020877838134766, | |
| "learning_rate": 1.381893125502598e-05, | |
| "loss": 3.1887, | |
| "num_input_tokens_seen": 3889168, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.6483612846651321, | |
| "grad_norm": 7.6226091384887695, | |
| "learning_rate": 1.3780447901181681e-05, | |
| "loss": 3.2913, | |
| "num_input_tokens_seen": 3892368, | |
| "step": 5915 | |
| }, | |
| { | |
| "epoch": 0.6489093499945193, | |
| "grad_norm": 6.327563285827637, | |
| "learning_rate": 1.374199780868311e-05, | |
| "loss": 2.868, | |
| "num_input_tokens_seen": 3895192, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.6494574153239067, | |
| "grad_norm": 7.200982093811035, | |
| "learning_rate": 1.3703581091518964e-05, | |
| "loss": 2.9841, | |
| "num_input_tokens_seen": 3899104, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 0.6500054806532939, | |
| "grad_norm": 7.297597885131836, | |
| "learning_rate": 1.3665197863578954e-05, | |
| "loss": 3.1225, | |
| "num_input_tokens_seen": 3901696, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.6505535459826811, | |
| "grad_norm": 6.203746318817139, | |
| "learning_rate": 1.3626848238653516e-05, | |
| "loss": 3.082, | |
| "num_input_tokens_seen": 3905192, | |
| "step": 5935 | |
| }, | |
| { | |
| "epoch": 0.6511016113120685, | |
| "grad_norm": 7.677253246307373, | |
| "learning_rate": 1.358853233043349e-05, | |
| "loss": 3.2795, | |
| "num_input_tokens_seen": 3908456, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.6516496766414557, | |
| "grad_norm": 6.703474044799805, | |
| "learning_rate": 1.3550250252509744e-05, | |
| "loss": 3.123, | |
| "num_input_tokens_seen": 3910504, | |
| "step": 5945 | |
| }, | |
| { | |
| "epoch": 0.6521977419708429, | |
| "grad_norm": 7.855628967285156, | |
| "learning_rate": 1.3512002118372835e-05, | |
| "loss": 2.8393, | |
| "num_input_tokens_seen": 3913032, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.6527458073002302, | |
| "grad_norm": 7.922531604766846, | |
| "learning_rate": 1.3473788041412732e-05, | |
| "loss": 2.7007, | |
| "num_input_tokens_seen": 3916392, | |
| "step": 5955 | |
| }, | |
| { | |
| "epoch": 0.6532938726296175, | |
| "grad_norm": 10.957340240478516, | |
| "learning_rate": 1.3435608134918412e-05, | |
| "loss": 2.9213, | |
| "num_input_tokens_seen": 3919248, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.6538419379590047, | |
| "grad_norm": 5.184296607971191, | |
| "learning_rate": 1.3397462512077535e-05, | |
| "loss": 3.203, | |
| "num_input_tokens_seen": 3922528, | |
| "step": 5965 | |
| }, | |
| { | |
| "epoch": 0.654390003288392, | |
| "grad_norm": 8.037724494934082, | |
| "learning_rate": 1.3359351285976174e-05, | |
| "loss": 3.1737, | |
| "num_input_tokens_seen": 3925200, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.6549380686177793, | |
| "grad_norm": 7.275876045227051, | |
| "learning_rate": 1.3321274569598382e-05, | |
| "loss": 2.848, | |
| "num_input_tokens_seen": 3928128, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 0.6554861339471665, | |
| "grad_norm": 5.043073654174805, | |
| "learning_rate": 1.3283232475825916e-05, | |
| "loss": 2.8843, | |
| "num_input_tokens_seen": 3931696, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.6560341992765538, | |
| "grad_norm": 8.235861778259277, | |
| "learning_rate": 1.3245225117437918e-05, | |
| "loss": 3.3592, | |
| "num_input_tokens_seen": 3934656, | |
| "step": 5985 | |
| }, | |
| { | |
| "epoch": 0.656582264605941, | |
| "grad_norm": 7.135794162750244, | |
| "learning_rate": 1.3207252607110521e-05, | |
| "loss": 3.263, | |
| "num_input_tokens_seen": 3937536, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.6571303299353283, | |
| "grad_norm": 8.360773086547852, | |
| "learning_rate": 1.3169315057416564e-05, | |
| "loss": 3.1673, | |
| "num_input_tokens_seen": 3940200, | |
| "step": 5995 | |
| }, | |
| { | |
| "epoch": 0.6576783952647156, | |
| "grad_norm": 9.115818977355957, | |
| "learning_rate": 1.3131412580825236e-05, | |
| "loss": 3.1802, | |
| "num_input_tokens_seen": 3942688, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.6582264605941028, | |
| "grad_norm": 8.476052284240723, | |
| "learning_rate": 1.3093545289701747e-05, | |
| "loss": 3.1919, | |
| "num_input_tokens_seen": 3945760, | |
| "step": 6005 | |
| }, | |
| { | |
| "epoch": 0.6587745259234901, | |
| "grad_norm": 6.621984004974365, | |
| "learning_rate": 1.3055713296307016e-05, | |
| "loss": 2.8701, | |
| "num_input_tokens_seen": 3948512, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.6593225912528773, | |
| "grad_norm": 8.03313159942627, | |
| "learning_rate": 1.3017916712797293e-05, | |
| "loss": 3.3227, | |
| "num_input_tokens_seen": 3951520, | |
| "step": 6015 | |
| }, | |
| { | |
| "epoch": 0.6598706565822646, | |
| "grad_norm": 7.0439677238464355, | |
| "learning_rate": 1.2980155651223867e-05, | |
| "loss": 2.8738, | |
| "num_input_tokens_seen": 3955392, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.6604187219116519, | |
| "grad_norm": 7.3785529136657715, | |
| "learning_rate": 1.2942430223532703e-05, | |
| "loss": 3.3427, | |
| "num_input_tokens_seen": 3959592, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 0.6609667872410391, | |
| "grad_norm": 5.641672134399414, | |
| "learning_rate": 1.2904740541564159e-05, | |
| "loss": 3.0156, | |
| "num_input_tokens_seen": 3963064, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.6615148525704264, | |
| "grad_norm": 6.209802150726318, | |
| "learning_rate": 1.286708671705259e-05, | |
| "loss": 3.0553, | |
| "num_input_tokens_seen": 3965552, | |
| "step": 6035 | |
| }, | |
| { | |
| "epoch": 0.6620629178998136, | |
| "grad_norm": 6.092316627502441, | |
| "learning_rate": 1.2829468861626052e-05, | |
| "loss": 2.9092, | |
| "num_input_tokens_seen": 3968480, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.6626109832292009, | |
| "grad_norm": 10.323710441589355, | |
| "learning_rate": 1.2791887086805993e-05, | |
| "loss": 3.4687, | |
| "num_input_tokens_seen": 3971464, | |
| "step": 6045 | |
| }, | |
| { | |
| "epoch": 0.6631590485585882, | |
| "grad_norm": 6.506869792938232, | |
| "learning_rate": 1.2754341504006872e-05, | |
| "loss": 3.0349, | |
| "num_input_tokens_seen": 3975640, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.6637071138879754, | |
| "grad_norm": 6.929319381713867, | |
| "learning_rate": 1.2716832224535847e-05, | |
| "loss": 3.1761, | |
| "num_input_tokens_seen": 3978928, | |
| "step": 6055 | |
| }, | |
| { | |
| "epoch": 0.6642551792173627, | |
| "grad_norm": 6.731025218963623, | |
| "learning_rate": 1.2679359359592488e-05, | |
| "loss": 2.7582, | |
| "num_input_tokens_seen": 3984016, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.66480324454675, | |
| "grad_norm": 7.775283336639404, | |
| "learning_rate": 1.2641923020268377e-05, | |
| "loss": 3.222, | |
| "num_input_tokens_seen": 3986544, | |
| "step": 6065 | |
| }, | |
| { | |
| "epoch": 0.6653513098761372, | |
| "grad_norm": 9.189234733581543, | |
| "learning_rate": 1.2604523317546813e-05, | |
| "loss": 2.7329, | |
| "num_input_tokens_seen": 3989440, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 0.6658993752055244, | |
| "grad_norm": 6.482409954071045, | |
| "learning_rate": 1.2567160362302515e-05, | |
| "loss": 3.0355, | |
| "num_input_tokens_seen": 3993928, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 0.6664474405349118, | |
| "grad_norm": 6.9843878746032715, | |
| "learning_rate": 1.2529834265301227e-05, | |
| "loss": 3.1331, | |
| "num_input_tokens_seen": 3997312, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.666995505864299, | |
| "grad_norm": 7.9999308586120605, | |
| "learning_rate": 1.2492545137199426e-05, | |
| "loss": 3.2756, | |
| "num_input_tokens_seen": 4000160, | |
| "step": 6085 | |
| }, | |
| { | |
| "epoch": 0.6675435711936862, | |
| "grad_norm": 5.13596773147583, | |
| "learning_rate": 1.2455293088544023e-05, | |
| "loss": 3.382, | |
| "num_input_tokens_seen": 4003720, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.6680916365230736, | |
| "grad_norm": 6.42021369934082, | |
| "learning_rate": 1.2418078229771973e-05, | |
| "loss": 2.9692, | |
| "num_input_tokens_seen": 4006680, | |
| "step": 6095 | |
| }, | |
| { | |
| "epoch": 0.6686397018524608, | |
| "grad_norm": 9.268325805664062, | |
| "learning_rate": 1.2380900671209984e-05, | |
| "loss": 2.9399, | |
| "num_input_tokens_seen": 4009632, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.669187767181848, | |
| "grad_norm": 5.049006938934326, | |
| "learning_rate": 1.2343760523074186e-05, | |
| "loss": 3.0858, | |
| "num_input_tokens_seen": 4012552, | |
| "step": 6105 | |
| }, | |
| { | |
| "epoch": 0.6697358325112354, | |
| "grad_norm": 6.255411148071289, | |
| "learning_rate": 1.2306657895469809e-05, | |
| "loss": 3.16, | |
| "num_input_tokens_seen": 4016240, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 0.6702838978406226, | |
| "grad_norm": 10.016054153442383, | |
| "learning_rate": 1.2269592898390833e-05, | |
| "loss": 3.0065, | |
| "num_input_tokens_seen": 4019680, | |
| "step": 6115 | |
| }, | |
| { | |
| "epoch": 0.6708319631700098, | |
| "grad_norm": 7.499462604522705, | |
| "learning_rate": 1.223256564171971e-05, | |
| "loss": 3.3602, | |
| "num_input_tokens_seen": 4022288, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.6713800284993972, | |
| "grad_norm": 7.838258266448975, | |
| "learning_rate": 1.2195576235226977e-05, | |
| "loss": 2.7866, | |
| "num_input_tokens_seen": 4025216, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 0.6719280938287844, | |
| "grad_norm": 7.931380271911621, | |
| "learning_rate": 1.2158624788570965e-05, | |
| "loss": 3.4889, | |
| "num_input_tokens_seen": 4029376, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 0.6724761591581716, | |
| "grad_norm": 5.675364971160889, | |
| "learning_rate": 1.2121711411297498e-05, | |
| "loss": 3.3344, | |
| "num_input_tokens_seen": 4031616, | |
| "step": 6135 | |
| }, | |
| { | |
| "epoch": 0.6730242244875589, | |
| "grad_norm": 5.3835577964782715, | |
| "learning_rate": 1.2084836212839507e-05, | |
| "loss": 3.1429, | |
| "num_input_tokens_seen": 4034840, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.6735722898169462, | |
| "grad_norm": 7.542428016662598, | |
| "learning_rate": 1.2047999302516737e-05, | |
| "loss": 2.9853, | |
| "num_input_tokens_seen": 4037792, | |
| "step": 6145 | |
| }, | |
| { | |
| "epoch": 0.6741203551463334, | |
| "grad_norm": 7.841860771179199, | |
| "learning_rate": 1.2011200789535464e-05, | |
| "loss": 3.011, | |
| "num_input_tokens_seen": 4041272, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.6746684204757207, | |
| "grad_norm": 10.116206169128418, | |
| "learning_rate": 1.1974440782988094e-05, | |
| "loss": 3.1755, | |
| "num_input_tokens_seen": 4044360, | |
| "step": 6155 | |
| }, | |
| { | |
| "epoch": 0.675216485805108, | |
| "grad_norm": 6.566442489624023, | |
| "learning_rate": 1.1937719391852877e-05, | |
| "loss": 3.0532, | |
| "num_input_tokens_seen": 4047544, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.6757645511344952, | |
| "grad_norm": 6.767369747161865, | |
| "learning_rate": 1.1901036724993616e-05, | |
| "loss": 2.9114, | |
| "num_input_tokens_seen": 4050584, | |
| "step": 6165 | |
| }, | |
| { | |
| "epoch": 0.6763126164638825, | |
| "grad_norm": 5.782663822174072, | |
| "learning_rate": 1.1864392891159284e-05, | |
| "loss": 3.4902, | |
| "num_input_tokens_seen": 4053392, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 0.6768606817932697, | |
| "grad_norm": 7.807350158691406, | |
| "learning_rate": 1.1827787998983731e-05, | |
| "loss": 3.1896, | |
| "num_input_tokens_seen": 4056184, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 0.677408747122657, | |
| "grad_norm": 8.840995788574219, | |
| "learning_rate": 1.1791222156985382e-05, | |
| "loss": 3.4261, | |
| "num_input_tokens_seen": 4060616, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.6779568124520443, | |
| "grad_norm": 5.441840171813965, | |
| "learning_rate": 1.1754695473566877e-05, | |
| "loss": 2.8645, | |
| "num_input_tokens_seen": 4065008, | |
| "step": 6185 | |
| }, | |
| { | |
| "epoch": 0.6785048777814315, | |
| "grad_norm": 7.820642471313477, | |
| "learning_rate": 1.1718208057014768e-05, | |
| "loss": 3.1664, | |
| "num_input_tokens_seen": 4068872, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 0.6790529431108188, | |
| "grad_norm": 7.290872573852539, | |
| "learning_rate": 1.1681760015499201e-05, | |
| "loss": 3.4087, | |
| "num_input_tokens_seen": 4071376, | |
| "step": 6195 | |
| }, | |
| { | |
| "epoch": 0.6796010084402061, | |
| "grad_norm": 5.5174360275268555, | |
| "learning_rate": 1.1645351457073594e-05, | |
| "loss": 3.3074, | |
| "num_input_tokens_seen": 4074528, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.6801490737695933, | |
| "grad_norm": 6.114542484283447, | |
| "learning_rate": 1.1608982489674295e-05, | |
| "loss": 3.0535, | |
| "num_input_tokens_seen": 4077600, | |
| "step": 6205 | |
| }, | |
| { | |
| "epoch": 0.6806971390989806, | |
| "grad_norm": 8.515054702758789, | |
| "learning_rate": 1.1572653221120316e-05, | |
| "loss": 3.2291, | |
| "num_input_tokens_seen": 4080664, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.6812452044283679, | |
| "grad_norm": 8.11023235321045, | |
| "learning_rate": 1.1536363759112952e-05, | |
| "loss": 3.1448, | |
| "num_input_tokens_seen": 4083256, | |
| "step": 6215 | |
| }, | |
| { | |
| "epoch": 0.6817932697577551, | |
| "grad_norm": 7.834672927856445, | |
| "learning_rate": 1.1500114211235482e-05, | |
| "loss": 3.1213, | |
| "num_input_tokens_seen": 4085568, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.6823413350871423, | |
| "grad_norm": 6.758762836456299, | |
| "learning_rate": 1.146390468495289e-05, | |
| "loss": 3.0515, | |
| "num_input_tokens_seen": 4088248, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 0.6828894004165297, | |
| "grad_norm": 6.3487372398376465, | |
| "learning_rate": 1.1427735287611477e-05, | |
| "loss": 2.5775, | |
| "num_input_tokens_seen": 4090848, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 0.6834374657459169, | |
| "grad_norm": 5.81227445602417, | |
| "learning_rate": 1.1391606126438586e-05, | |
| "loss": 3.0297, | |
| "num_input_tokens_seen": 4094232, | |
| "step": 6235 | |
| }, | |
| { | |
| "epoch": 0.6839855310753041, | |
| "grad_norm": 7.857996463775635, | |
| "learning_rate": 1.1355517308542301e-05, | |
| "loss": 3.0582, | |
| "num_input_tokens_seen": 4097096, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.6845335964046915, | |
| "grad_norm": 5.819544792175293, | |
| "learning_rate": 1.1319468940911079e-05, | |
| "loss": 2.8814, | |
| "num_input_tokens_seen": 4099912, | |
| "step": 6245 | |
| }, | |
| { | |
| "epoch": 0.6850816617340787, | |
| "grad_norm": 9.14799976348877, | |
| "learning_rate": 1.1283461130413453e-05, | |
| "loss": 3.3229, | |
| "num_input_tokens_seen": 4102320, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.6856297270634659, | |
| "grad_norm": 7.087406158447266, | |
| "learning_rate": 1.1247493983797754e-05, | |
| "loss": 2.8581, | |
| "num_input_tokens_seen": 4106480, | |
| "step": 6255 | |
| }, | |
| { | |
| "epoch": 0.6861777923928533, | |
| "grad_norm": 7.298010349273682, | |
| "learning_rate": 1.1218749616158092e-05, | |
| "loss": 3.1186, | |
| "num_input_tokens_seen": 4110064, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 0.6867258577222405, | |
| "grad_norm": 6.6678290367126465, | |
| "learning_rate": 1.1182855933150582e-05, | |
| "loss": 2.971, | |
| "num_input_tokens_seen": 4113304, | |
| "step": 6265 | |
| }, | |
| { | |
| "epoch": 0.6872739230516277, | |
| "grad_norm": 8.044167518615723, | |
| "learning_rate": 1.1147003212277912e-05, | |
| "loss": 3.3036, | |
| "num_input_tokens_seen": 4115752, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 0.687821988381015, | |
| "grad_norm": 6.803138256072998, | |
| "learning_rate": 1.1111191559828627e-05, | |
| "loss": 2.7812, | |
| "num_input_tokens_seen": 4119488, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 0.6883700537104023, | |
| "grad_norm": 5.070322513580322, | |
| "learning_rate": 1.1075421081969502e-05, | |
| "loss": 3.152, | |
| "num_input_tokens_seen": 4122168, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 0.6889181190397895, | |
| "grad_norm": 6.463720321655273, | |
| "learning_rate": 1.1039691884745252e-05, | |
| "loss": 2.9657, | |
| "num_input_tokens_seen": 4125704, | |
| "step": 6285 | |
| }, | |
| { | |
| "epoch": 0.6894661843691768, | |
| "grad_norm": 9.405960083007812, | |
| "learning_rate": 1.1004004074078223e-05, | |
| "loss": 3.5484, | |
| "num_input_tokens_seen": 4128608, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 0.6900142496985641, | |
| "grad_norm": 6.504082679748535, | |
| "learning_rate": 1.0968357755768051e-05, | |
| "loss": 2.7744, | |
| "num_input_tokens_seen": 4131416, | |
| "step": 6295 | |
| }, | |
| { | |
| "epoch": 0.6905623150279513, | |
| "grad_norm": 7.679104804992676, | |
| "learning_rate": 1.093275303549137e-05, | |
| "loss": 3.1396, | |
| "num_input_tokens_seen": 4135168, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.6911103803573386, | |
| "grad_norm": 10.499975204467773, | |
| "learning_rate": 1.0897190018801503e-05, | |
| "loss": 3.4244, | |
| "num_input_tokens_seen": 4138320, | |
| "step": 6305 | |
| }, | |
| { | |
| "epoch": 0.6916584456867259, | |
| "grad_norm": 5.967805862426758, | |
| "learning_rate": 1.0861668811128129e-05, | |
| "loss": 3.0676, | |
| "num_input_tokens_seen": 4140880, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 0.6922065110161131, | |
| "grad_norm": 6.552985668182373, | |
| "learning_rate": 1.0826189517776975e-05, | |
| "loss": 3.0805, | |
| "num_input_tokens_seen": 4143912, | |
| "step": 6315 | |
| }, | |
| { | |
| "epoch": 0.6927545763455004, | |
| "grad_norm": 8.34593677520752, | |
| "learning_rate": 1.0790752243929523e-05, | |
| "loss": 3.2587, | |
| "num_input_tokens_seen": 4147320, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 0.6933026416748876, | |
| "grad_norm": 6.536946773529053, | |
| "learning_rate": 1.0755357094642674e-05, | |
| "loss": 3.0053, | |
| "num_input_tokens_seen": 4150928, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 0.6938507070042749, | |
| "grad_norm": 7.138943672180176, | |
| "learning_rate": 1.0720004174848444e-05, | |
| "loss": 2.9898, | |
| "num_input_tokens_seen": 4154120, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 0.6943987723336622, | |
| "grad_norm": 9.60561466217041, | |
| "learning_rate": 1.0684693589353678e-05, | |
| "loss": 3.4849, | |
| "num_input_tokens_seen": 4156832, | |
| "step": 6335 | |
| }, | |
| { | |
| "epoch": 0.6949468376630494, | |
| "grad_norm": 8.691582679748535, | |
| "learning_rate": 1.0649425442839697e-05, | |
| "loss": 3.1178, | |
| "num_input_tokens_seen": 4159704, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 0.6954949029924367, | |
| "grad_norm": 8.004415512084961, | |
| "learning_rate": 1.0614199839862002e-05, | |
| "loss": 3.0848, | |
| "num_input_tokens_seen": 4162168, | |
| "step": 6345 | |
| }, | |
| { | |
| "epoch": 0.696042968321824, | |
| "grad_norm": 12.674962043762207, | |
| "learning_rate": 1.0579016884849999e-05, | |
| "loss": 3.4026, | |
| "num_input_tokens_seen": 4165384, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.6965910336512112, | |
| "grad_norm": 7.9511284828186035, | |
| "learning_rate": 1.0543876682106632e-05, | |
| "loss": 3.0329, | |
| "num_input_tokens_seen": 4168128, | |
| "step": 6355 | |
| }, | |
| { | |
| "epoch": 0.6971390989805984, | |
| "grad_norm": 9.268970489501953, | |
| "learning_rate": 1.0508779335808105e-05, | |
| "loss": 3.1994, | |
| "num_input_tokens_seen": 4171888, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.6976871643099858, | |
| "grad_norm": 6.21211051940918, | |
| "learning_rate": 1.04737249500036e-05, | |
| "loss": 3.1242, | |
| "num_input_tokens_seen": 4174896, | |
| "step": 6365 | |
| }, | |
| { | |
| "epoch": 0.698235229639373, | |
| "grad_norm": 7.668500900268555, | |
| "learning_rate": 1.04387136286149e-05, | |
| "loss": 3.0467, | |
| "num_input_tokens_seen": 4178504, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 0.6987832949687602, | |
| "grad_norm": 5.02815580368042, | |
| "learning_rate": 1.040374547543613e-05, | |
| "loss": 2.9279, | |
| "num_input_tokens_seen": 4182040, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 0.6993313602981476, | |
| "grad_norm": 5.940211772918701, | |
| "learning_rate": 1.0368820594133466e-05, | |
| "loss": 2.968, | |
| "num_input_tokens_seen": 4185880, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 0.6998794256275348, | |
| "grad_norm": 6.044907093048096, | |
| "learning_rate": 1.0333939088244771e-05, | |
| "loss": 3.3093, | |
| "num_input_tokens_seen": 4189000, | |
| "step": 6385 | |
| }, | |
| { | |
| "epoch": 0.700427490956922, | |
| "grad_norm": 6.427306652069092, | |
| "learning_rate": 1.0299101061179317e-05, | |
| "loss": 3.2814, | |
| "num_input_tokens_seen": 4191736, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 0.7009755562863094, | |
| "grad_norm": 7.336453914642334, | |
| "learning_rate": 1.0264306616217507e-05, | |
| "loss": 2.8437, | |
| "num_input_tokens_seen": 4194360, | |
| "step": 6395 | |
| }, | |
| { | |
| "epoch": 0.7015236216156966, | |
| "grad_norm": 7.562320709228516, | |
| "learning_rate": 1.0229555856510512e-05, | |
| "loss": 2.828, | |
| "num_input_tokens_seen": 4197920, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.7020716869450838, | |
| "grad_norm": 7.142042636871338, | |
| "learning_rate": 1.0194848885080011e-05, | |
| "loss": 3.1228, | |
| "num_input_tokens_seen": 4201984, | |
| "step": 6405 | |
| }, | |
| { | |
| "epoch": 0.7026197522744712, | |
| "grad_norm": 6.18742036819458, | |
| "learning_rate": 1.0160185804817859e-05, | |
| "loss": 2.8393, | |
| "num_input_tokens_seen": 4205328, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 0.7031678176038584, | |
| "grad_norm": 7.195977687835693, | |
| "learning_rate": 1.0125566718485788e-05, | |
| "loss": 2.9868, | |
| "num_input_tokens_seen": 4208312, | |
| "step": 6415 | |
| }, | |
| { | |
| "epoch": 0.7037158829332456, | |
| "grad_norm": 10.329099655151367, | |
| "learning_rate": 1.0090991728715132e-05, | |
| "loss": 2.829, | |
| "num_input_tokens_seen": 4211312, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 0.7042639482626329, | |
| "grad_norm": 6.6712236404418945, | |
| "learning_rate": 1.0056460938006473e-05, | |
| "loss": 2.9549, | |
| "num_input_tokens_seen": 4213800, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 0.7048120135920202, | |
| "grad_norm": 4.803092002868652, | |
| "learning_rate": 1.0021974448729365e-05, | |
| "loss": 3.3355, | |
| "num_input_tokens_seen": 4217200, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 0.7053600789214074, | |
| "grad_norm": 6.527164459228516, | |
| "learning_rate": 9.987532363122018e-06, | |
| "loss": 2.9652, | |
| "num_input_tokens_seen": 4220768, | |
| "step": 6435 | |
| }, | |
| { | |
| "epoch": 0.7059081442507947, | |
| "grad_norm": 7.362782955169678, | |
| "learning_rate": 9.953134783291036e-06, | |
| "loss": 2.8684, | |
| "num_input_tokens_seen": 4224224, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 0.706456209580182, | |
| "grad_norm": 9.984780311584473, | |
| "learning_rate": 9.918781811211045e-06, | |
| "loss": 2.8968, | |
| "num_input_tokens_seen": 4229272, | |
| "step": 6445 | |
| }, | |
| { | |
| "epoch": 0.7070042749095692, | |
| "grad_norm": 6.219121932983398, | |
| "learning_rate": 9.884473548724441e-06, | |
| "loss": 3.1832, | |
| "num_input_tokens_seen": 4232096, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.7075523402389565, | |
| "grad_norm": 6.208556652069092, | |
| "learning_rate": 9.850210097541085e-06, | |
| "loss": 3.108, | |
| "num_input_tokens_seen": 4235496, | |
| "step": 6455 | |
| }, | |
| { | |
| "epoch": 0.7081004055683437, | |
| "grad_norm": 7.7808003425598145, | |
| "learning_rate": 9.81599155923798e-06, | |
| "loss": 3.0694, | |
| "num_input_tokens_seen": 4238320, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 0.708648470897731, | |
| "grad_norm": 8.587124824523926, | |
| "learning_rate": 9.781818035258972e-06, | |
| "loss": 3.1773, | |
| "num_input_tokens_seen": 4240792, | |
| "step": 6465 | |
| }, | |
| { | |
| "epoch": 0.7091965362271183, | |
| "grad_norm": 11.057994842529297, | |
| "learning_rate": 9.747689626914483e-06, | |
| "loss": 3.4154, | |
| "num_input_tokens_seen": 4244904, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 0.7097446015565055, | |
| "grad_norm": 6.430279731750488, | |
| "learning_rate": 9.713606435381165e-06, | |
| "loss": 3.1772, | |
| "num_input_tokens_seen": 4247632, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 0.7102926668858928, | |
| "grad_norm": 7.846237659454346, | |
| "learning_rate": 9.679568561701615e-06, | |
| "loss": 2.9962, | |
| "num_input_tokens_seen": 4250768, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 0.7108407322152801, | |
| "grad_norm": 8.467151641845703, | |
| "learning_rate": 9.645576106784118e-06, | |
| "loss": 2.8687, | |
| "num_input_tokens_seen": 4253904, | |
| "step": 6485 | |
| }, | |
| { | |
| "epoch": 0.7113887975446673, | |
| "grad_norm": 16.991235733032227, | |
| "learning_rate": 9.611629171402273e-06, | |
| "loss": 3.1696, | |
| "num_input_tokens_seen": 4256768, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 0.7119368628740546, | |
| "grad_norm": 7.091182231903076, | |
| "learning_rate": 9.577727856194746e-06, | |
| "loss": 2.7567, | |
| "num_input_tokens_seen": 4260192, | |
| "step": 6495 | |
| }, | |
| { | |
| "epoch": 0.7124849282034419, | |
| "grad_norm": 7.963916778564453, | |
| "learning_rate": 9.543872261664952e-06, | |
| "loss": 2.9586, | |
| "num_input_tokens_seen": 4263560, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.7130329935328291, | |
| "grad_norm": 6.632905006408691, | |
| "learning_rate": 9.510062488180781e-06, | |
| "loss": 2.8122, | |
| "num_input_tokens_seen": 4266624, | |
| "step": 6505 | |
| }, | |
| { | |
| "epoch": 0.7135810588622163, | |
| "grad_norm": 8.157563209533691, | |
| "learning_rate": 9.476298635974265e-06, | |
| "loss": 2.9458, | |
| "num_input_tokens_seen": 4269488, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 0.7141291241916037, | |
| "grad_norm": 7.982326507568359, | |
| "learning_rate": 9.442580805141305e-06, | |
| "loss": 3.172, | |
| "num_input_tokens_seen": 4272592, | |
| "step": 6515 | |
| }, | |
| { | |
| "epoch": 0.7146771895209909, | |
| "grad_norm": 5.6351423263549805, | |
| "learning_rate": 9.408909095641363e-06, | |
| "loss": 3.139, | |
| "num_input_tokens_seen": 4275552, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 0.7152252548503781, | |
| "grad_norm": 7.883710861206055, | |
| "learning_rate": 9.375283607297175e-06, | |
| "loss": 3.3458, | |
| "num_input_tokens_seen": 4277912, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 0.7157733201797655, | |
| "grad_norm": 5.036897659301758, | |
| "learning_rate": 9.341704439794441e-06, | |
| "loss": 2.9759, | |
| "num_input_tokens_seen": 4280520, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 0.7163213855091527, | |
| "grad_norm": 6.539727687835693, | |
| "learning_rate": 9.308171692681565e-06, | |
| "loss": 2.7201, | |
| "num_input_tokens_seen": 4284248, | |
| "step": 6535 | |
| }, | |
| { | |
| "epoch": 0.7168694508385399, | |
| "grad_norm": 7.108365058898926, | |
| "learning_rate": 9.274685465369303e-06, | |
| "loss": 3.1882, | |
| "num_input_tokens_seen": 4288664, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 0.7174175161679273, | |
| "grad_norm": 5.567689418792725, | |
| "learning_rate": 9.241245857130507e-06, | |
| "loss": 3.3889, | |
| "num_input_tokens_seen": 4292104, | |
| "step": 6545 | |
| }, | |
| { | |
| "epoch": 0.7179655814973145, | |
| "grad_norm": 7.539772033691406, | |
| "learning_rate": 9.207852967099841e-06, | |
| "loss": 3.2677, | |
| "num_input_tokens_seen": 4296664, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.7185136468267017, | |
| "grad_norm": 11.019807815551758, | |
| "learning_rate": 9.174506894273448e-06, | |
| "loss": 3.2587, | |
| "num_input_tokens_seen": 4298936, | |
| "step": 6555 | |
| }, | |
| { | |
| "epoch": 0.719061712156089, | |
| "grad_norm": 4.87662935256958, | |
| "learning_rate": 9.141207737508677e-06, | |
| "loss": 3.4056, | |
| "num_input_tokens_seen": 4301872, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 0.7196097774854763, | |
| "grad_norm": 7.396250247955322, | |
| "learning_rate": 9.107955595523812e-06, | |
| "loss": 3.0741, | |
| "num_input_tokens_seen": 4305096, | |
| "step": 6565 | |
| }, | |
| { | |
| "epoch": 0.7201578428148635, | |
| "grad_norm": 9.769874572753906, | |
| "learning_rate": 9.074750566897733e-06, | |
| "loss": 2.8083, | |
| "num_input_tokens_seen": 4309576, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 0.7207059081442508, | |
| "grad_norm": 7.023451805114746, | |
| "learning_rate": 9.041592750069652e-06, | |
| "loss": 3.162, | |
| "num_input_tokens_seen": 4313728, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 0.7212539734736381, | |
| "grad_norm": 7.67805814743042, | |
| "learning_rate": 9.008482243338841e-06, | |
| "loss": 3.1487, | |
| "num_input_tokens_seen": 4316864, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 0.7218020388030253, | |
| "grad_norm": 5.812924385070801, | |
| "learning_rate": 8.975419144864292e-06, | |
| "loss": 2.6071, | |
| "num_input_tokens_seen": 4320688, | |
| "step": 6585 | |
| }, | |
| { | |
| "epoch": 0.7223501041324126, | |
| "grad_norm": 9.005423545837402, | |
| "learning_rate": 8.94240355266445e-06, | |
| "loss": 3.2333, | |
| "num_input_tokens_seen": 4323184, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 0.7228981694617999, | |
| "grad_norm": 5.683709144592285, | |
| "learning_rate": 8.909435564616944e-06, | |
| "loss": 2.9484, | |
| "num_input_tokens_seen": 4326304, | |
| "step": 6595 | |
| }, | |
| { | |
| "epoch": 0.7234462347911871, | |
| "grad_norm": 9.263490676879883, | |
| "learning_rate": 8.876515278458265e-06, | |
| "loss": 3.2337, | |
| "num_input_tokens_seen": 4329120, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.7239943001205744, | |
| "grad_norm": 6.478157997131348, | |
| "learning_rate": 8.84364279178348e-06, | |
| "loss": 3.0925, | |
| "num_input_tokens_seen": 4332440, | |
| "step": 6605 | |
| }, | |
| { | |
| "epoch": 0.7245423654499616, | |
| "grad_norm": 8.741613388061523, | |
| "learning_rate": 8.810818202045962e-06, | |
| "loss": 3.3093, | |
| "num_input_tokens_seen": 4335440, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 0.7250904307793489, | |
| "grad_norm": 7.031724452972412, | |
| "learning_rate": 8.77804160655708e-06, | |
| "loss": 3.3767, | |
| "num_input_tokens_seen": 4337912, | |
| "step": 6615 | |
| }, | |
| { | |
| "epoch": 0.7256384961087362, | |
| "grad_norm": 8.763786315917969, | |
| "learning_rate": 8.745313102485923e-06, | |
| "loss": 3.201, | |
| "num_input_tokens_seen": 4341472, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 0.7261865614381234, | |
| "grad_norm": 5.877601623535156, | |
| "learning_rate": 8.712632786859021e-06, | |
| "loss": 2.7422, | |
| "num_input_tokens_seen": 4345304, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 0.7267346267675107, | |
| "grad_norm": 7.608758926391602, | |
| "learning_rate": 8.68000075656003e-06, | |
| "loss": 3.2688, | |
| "num_input_tokens_seen": 4348264, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 0.727282692096898, | |
| "grad_norm": 6.207149982452393, | |
| "learning_rate": 8.647417108329454e-06, | |
| "loss": 3.1522, | |
| "num_input_tokens_seen": 4352144, | |
| "step": 6635 | |
| }, | |
| { | |
| "epoch": 0.7278307574262852, | |
| "grad_norm": 6.543735504150391, | |
| "learning_rate": 8.61488193876439e-06, | |
| "loss": 2.968, | |
| "num_input_tokens_seen": 4355840, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 0.7283788227556725, | |
| "grad_norm": 7.882357597351074, | |
| "learning_rate": 8.582395344318197e-06, | |
| "loss": 2.8674, | |
| "num_input_tokens_seen": 4358640, | |
| "step": 6645 | |
| }, | |
| { | |
| "epoch": 0.7289268880850598, | |
| "grad_norm": 10.999910354614258, | |
| "learning_rate": 8.54995742130022e-06, | |
| "loss": 3.2327, | |
| "num_input_tokens_seen": 4361656, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.729474953414447, | |
| "grad_norm": 8.629473686218262, | |
| "learning_rate": 8.517568265875541e-06, | |
| "loss": 3.1042, | |
| "num_input_tokens_seen": 4363968, | |
| "step": 6655 | |
| }, | |
| { | |
| "epoch": 0.7300230187438342, | |
| "grad_norm": 8.353252410888672, | |
| "learning_rate": 8.485227974064647e-06, | |
| "loss": 2.7692, | |
| "num_input_tokens_seen": 4367200, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 0.7305710840732216, | |
| "grad_norm": 7.927604675292969, | |
| "learning_rate": 8.452936641743156e-06, | |
| "loss": 3.2321, | |
| "num_input_tokens_seen": 4370096, | |
| "step": 6665 | |
| }, | |
| { | |
| "epoch": 0.7311191494026088, | |
| "grad_norm": 5.507778644561768, | |
| "learning_rate": 8.42069436464157e-06, | |
| "loss": 3.1024, | |
| "num_input_tokens_seen": 4374264, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 0.731667214731996, | |
| "grad_norm": 6.3533172607421875, | |
| "learning_rate": 8.38850123834494e-06, | |
| "loss": 2.7559, | |
| "num_input_tokens_seen": 4378824, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 0.7322152800613834, | |
| "grad_norm": 6.395352840423584, | |
| "learning_rate": 8.356357358292601e-06, | |
| "loss": 3.243, | |
| "num_input_tokens_seen": 4382616, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 0.7327633453907706, | |
| "grad_norm": 8.324797630310059, | |
| "learning_rate": 8.32426281977792e-06, | |
| "loss": 3.6588, | |
| "num_input_tokens_seen": 4385488, | |
| "step": 6685 | |
| }, | |
| { | |
| "epoch": 0.7333114107201578, | |
| "grad_norm": 6.711746692657471, | |
| "learning_rate": 8.292217717947962e-06, | |
| "loss": 3.1062, | |
| "num_input_tokens_seen": 4388592, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 0.7338594760495452, | |
| "grad_norm": 11.369217872619629, | |
| "learning_rate": 8.26022214780324e-06, | |
| "loss": 3.0253, | |
| "num_input_tokens_seen": 4391640, | |
| "step": 6695 | |
| }, | |
| { | |
| "epoch": 0.7344075413789324, | |
| "grad_norm": 7.522586822509766, | |
| "learning_rate": 8.228276204197427e-06, | |
| "loss": 3.3273, | |
| "num_input_tokens_seen": 4394456, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.7349556067083196, | |
| "grad_norm": 7.1993207931518555, | |
| "learning_rate": 8.196379981837071e-06, | |
| "loss": 2.9679, | |
| "num_input_tokens_seen": 4397352, | |
| "step": 6705 | |
| }, | |
| { | |
| "epoch": 0.735503672037707, | |
| "grad_norm": 9.711231231689453, | |
| "learning_rate": 8.164533575281316e-06, | |
| "loss": 3.5035, | |
| "num_input_tokens_seen": 4400744, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 0.7360517373670942, | |
| "grad_norm": 8.696206092834473, | |
| "learning_rate": 8.132737078941642e-06, | |
| "loss": 2.8264, | |
| "num_input_tokens_seen": 4404712, | |
| "step": 6715 | |
| }, | |
| { | |
| "epoch": 0.7365998026964814, | |
| "grad_norm": 8.558262825012207, | |
| "learning_rate": 8.100990587081536e-06, | |
| "loss": 3.0127, | |
| "num_input_tokens_seen": 4407448, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 0.7371478680258687, | |
| "grad_norm": 7.874935626983643, | |
| "learning_rate": 8.069294193816252e-06, | |
| "loss": 2.9852, | |
| "num_input_tokens_seen": 4410096, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 0.737695933355256, | |
| "grad_norm": 10.938785552978516, | |
| "learning_rate": 8.037647993112543e-06, | |
| "loss": 2.8523, | |
| "num_input_tokens_seen": 4413248, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 0.7382439986846432, | |
| "grad_norm": 6.2363786697387695, | |
| "learning_rate": 8.006052078788335e-06, | |
| "loss": 3.5423, | |
| "num_input_tokens_seen": 4417016, | |
| "step": 6735 | |
| }, | |
| { | |
| "epoch": 0.7387920640140305, | |
| "grad_norm": 7.439382553100586, | |
| "learning_rate": 7.974506544512478e-06, | |
| "loss": 3.0829, | |
| "num_input_tokens_seen": 4420144, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 0.7393401293434178, | |
| "grad_norm": 8.05595588684082, | |
| "learning_rate": 7.943011483804494e-06, | |
| "loss": 2.8291, | |
| "num_input_tokens_seen": 4422672, | |
| "step": 6745 | |
| }, | |
| { | |
| "epoch": 0.739888194672805, | |
| "grad_norm": 7.396727561950684, | |
| "learning_rate": 7.91156699003424e-06, | |
| "loss": 3.1015, | |
| "num_input_tokens_seen": 4425368, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.7404362600021923, | |
| "grad_norm": 5.773197650909424, | |
| "learning_rate": 7.880173156421661e-06, | |
| "loss": 3.0124, | |
| "num_input_tokens_seen": 4427720, | |
| "step": 6755 | |
| }, | |
| { | |
| "epoch": 0.7409843253315795, | |
| "grad_norm": 7.078009128570557, | |
| "learning_rate": 7.848830076036556e-06, | |
| "loss": 3.007, | |
| "num_input_tokens_seen": 4430872, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 0.7415323906609668, | |
| "grad_norm": 6.219594478607178, | |
| "learning_rate": 7.817537841798216e-06, | |
| "loss": 3.0966, | |
| "num_input_tokens_seen": 4434816, | |
| "step": 6765 | |
| }, | |
| { | |
| "epoch": 0.7420804559903541, | |
| "grad_norm": 7.2829365730285645, | |
| "learning_rate": 7.786296546475213e-06, | |
| "loss": 3.4504, | |
| "num_input_tokens_seen": 4437960, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 0.7426285213197413, | |
| "grad_norm": 7.280004978179932, | |
| "learning_rate": 7.755106282685118e-06, | |
| "loss": 3.0042, | |
| "num_input_tokens_seen": 4440624, | |
| "step": 6775 | |
| }, | |
| { | |
| "epoch": 0.7431765866491286, | |
| "grad_norm": 6.213809490203857, | |
| "learning_rate": 7.723967142894195e-06, | |
| "loss": 3.0603, | |
| "num_input_tokens_seen": 4444120, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 0.7437246519785159, | |
| "grad_norm": 6.277675628662109, | |
| "learning_rate": 7.69287921941715e-06, | |
| "loss": 2.9716, | |
| "num_input_tokens_seen": 4447152, | |
| "step": 6785 | |
| }, | |
| { | |
| "epoch": 0.7442727173079031, | |
| "grad_norm": 8.690731048583984, | |
| "learning_rate": 7.661842604416863e-06, | |
| "loss": 3.2242, | |
| "num_input_tokens_seen": 4450720, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 0.7448207826372903, | |
| "grad_norm": 6.518171787261963, | |
| "learning_rate": 7.630857389904095e-06, | |
| "loss": 2.8793, | |
| "num_input_tokens_seen": 4454448, | |
| "step": 6795 | |
| }, | |
| { | |
| "epoch": 0.7453688479666777, | |
| "grad_norm": 10.606318473815918, | |
| "learning_rate": 7.599923667737227e-06, | |
| "loss": 2.9673, | |
| "num_input_tokens_seen": 4457816, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.7459169132960649, | |
| "grad_norm": 10.472159385681152, | |
| "learning_rate": 7.5690415296220035e-06, | |
| "loss": 3.0352, | |
| "num_input_tokens_seen": 4460936, | |
| "step": 6805 | |
| }, | |
| { | |
| "epoch": 0.7464649786254521, | |
| "grad_norm": 7.0004496574401855, | |
| "learning_rate": 7.538211067111223e-06, | |
| "loss": 3.165, | |
| "num_input_tokens_seen": 4463688, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 0.7470130439548394, | |
| "grad_norm": 7.692315101623535, | |
| "learning_rate": 7.5074323716044835e-06, | |
| "loss": 3.3064, | |
| "num_input_tokens_seen": 4466616, | |
| "step": 6815 | |
| }, | |
| { | |
| "epoch": 0.7475611092842267, | |
| "grad_norm": 5.7364702224731445, | |
| "learning_rate": 7.476705534347947e-06, | |
| "loss": 3.2443, | |
| "num_input_tokens_seen": 4470464, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 0.7481091746136139, | |
| "grad_norm": 6.589802265167236, | |
| "learning_rate": 7.446030646434008e-06, | |
| "loss": 2.9859, | |
| "num_input_tokens_seen": 4472944, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 0.7486572399430012, | |
| "grad_norm": 8.241453170776367, | |
| "learning_rate": 7.4154077988010466e-06, | |
| "loss": 3.1194, | |
| "num_input_tokens_seen": 4475896, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 0.7492053052723885, | |
| "grad_norm": 7.177932262420654, | |
| "learning_rate": 7.3848370822332005e-06, | |
| "loss": 2.9095, | |
| "num_input_tokens_seen": 4478424, | |
| "step": 6835 | |
| }, | |
| { | |
| "epoch": 0.7497533706017757, | |
| "grad_norm": 6.683755397796631, | |
| "learning_rate": 7.354318587360029e-06, | |
| "loss": 2.8105, | |
| "num_input_tokens_seen": 4481120, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 0.7503014359311629, | |
| "grad_norm": 7.998584747314453, | |
| "learning_rate": 7.323852404656279e-06, | |
| "loss": 2.5817, | |
| "num_input_tokens_seen": 4484912, | |
| "step": 6845 | |
| }, | |
| { | |
| "epoch": 0.7508495012605503, | |
| "grad_norm": 5.244688034057617, | |
| "learning_rate": 7.293438624441637e-06, | |
| "loss": 3.1018, | |
| "num_input_tokens_seen": 4488416, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.7513975665899375, | |
| "grad_norm": 7.417481422424316, | |
| "learning_rate": 7.263077336880406e-06, | |
| "loss": 3.2385, | |
| "num_input_tokens_seen": 4491392, | |
| "step": 6855 | |
| }, | |
| { | |
| "epoch": 0.7519456319193247, | |
| "grad_norm": 5.952940464019775, | |
| "learning_rate": 7.232768631981285e-06, | |
| "loss": 2.5967, | |
| "num_input_tokens_seen": 4494608, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 0.7524936972487121, | |
| "grad_norm": 7.974299907684326, | |
| "learning_rate": 7.202512599597097e-06, | |
| "loss": 3.3131, | |
| "num_input_tokens_seen": 4497952, | |
| "step": 6865 | |
| }, | |
| { | |
| "epoch": 0.7530417625780993, | |
| "grad_norm": 10.40588092803955, | |
| "learning_rate": 7.172309329424495e-06, | |
| "loss": 2.8735, | |
| "num_input_tokens_seen": 4500792, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 0.7535898279074865, | |
| "grad_norm": 7.208824634552002, | |
| "learning_rate": 7.142158911003724e-06, | |
| "loss": 3.3135, | |
| "num_input_tokens_seen": 4504032, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 0.7541378932368739, | |
| "grad_norm": 7.409761428833008, | |
| "learning_rate": 7.112061433718339e-06, | |
| "loss": 2.955, | |
| "num_input_tokens_seen": 4506784, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 0.7546859585662611, | |
| "grad_norm": 6.84408712387085, | |
| "learning_rate": 7.082016986794951e-06, | |
| "loss": 3.3193, | |
| "num_input_tokens_seen": 4510016, | |
| "step": 6885 | |
| }, | |
| { | |
| "epoch": 0.7552340238956483, | |
| "grad_norm": 5.721726417541504, | |
| "learning_rate": 7.052025659302952e-06, | |
| "loss": 3.1054, | |
| "num_input_tokens_seen": 4512496, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 0.7557820892250356, | |
| "grad_norm": 7.73302698135376, | |
| "learning_rate": 7.022087540154274e-06, | |
| "loss": 3.0514, | |
| "num_input_tokens_seen": 4515040, | |
| "step": 6895 | |
| }, | |
| { | |
| "epoch": 0.7563301545544229, | |
| "grad_norm": 8.347733497619629, | |
| "learning_rate": 6.992202718103086e-06, | |
| "loss": 2.9805, | |
| "num_input_tokens_seen": 4517944, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.7568782198838101, | |
| "grad_norm": 7.3970255851745605, | |
| "learning_rate": 6.962371281745561e-06, | |
| "loss": 3.3263, | |
| "num_input_tokens_seen": 4520568, | |
| "step": 6905 | |
| }, | |
| { | |
| "epoch": 0.7574262852131974, | |
| "grad_norm": 7.3923797607421875, | |
| "learning_rate": 6.932593319519618e-06, | |
| "loss": 3.2219, | |
| "num_input_tokens_seen": 4524592, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 0.7579743505425847, | |
| "grad_norm": 7.414371490478516, | |
| "learning_rate": 6.902868919704627e-06, | |
| "loss": 2.5203, | |
| "num_input_tokens_seen": 4528528, | |
| "step": 6915 | |
| }, | |
| { | |
| "epoch": 0.7585224158719719, | |
| "grad_norm": 7.776823043823242, | |
| "learning_rate": 6.873198170421175e-06, | |
| "loss": 3.1746, | |
| "num_input_tokens_seen": 4532008, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 0.7590704812013592, | |
| "grad_norm": 7.0230889320373535, | |
| "learning_rate": 6.84358115963081e-06, | |
| "loss": 3.0865, | |
| "num_input_tokens_seen": 4536232, | |
| "step": 6925 | |
| }, | |
| { | |
| "epoch": 0.7596185465307465, | |
| "grad_norm": 4.996485233306885, | |
| "learning_rate": 6.814017975135753e-06, | |
| "loss": 3.2363, | |
| "num_input_tokens_seen": 4539680, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 0.7601666118601337, | |
| "grad_norm": 9.683207511901855, | |
| "learning_rate": 6.784508704578646e-06, | |
| "loss": 3.2016, | |
| "num_input_tokens_seen": 4542848, | |
| "step": 6935 | |
| }, | |
| { | |
| "epoch": 0.760714677189521, | |
| "grad_norm": 5.796095848083496, | |
| "learning_rate": 6.755053435442324e-06, | |
| "loss": 2.9563, | |
| "num_input_tokens_seen": 4547104, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 0.7612627425189082, | |
| "grad_norm": 7.686697959899902, | |
| "learning_rate": 6.725652255049508e-06, | |
| "loss": 2.7968, | |
| "num_input_tokens_seen": 4550392, | |
| "step": 6945 | |
| }, | |
| { | |
| "epoch": 0.7618108078482955, | |
| "grad_norm": 7.243149280548096, | |
| "learning_rate": 6.696305250562562e-06, | |
| "loss": 2.9016, | |
| "num_input_tokens_seen": 4553760, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.7623588731776828, | |
| "grad_norm": 5.771494388580322, | |
| "learning_rate": 6.667012508983278e-06, | |
| "loss": 3.1646, | |
| "num_input_tokens_seen": 4558080, | |
| "step": 6955 | |
| }, | |
| { | |
| "epoch": 0.76290693850707, | |
| "grad_norm": 7.9829816818237305, | |
| "learning_rate": 6.63777411715254e-06, | |
| "loss": 2.946, | |
| "num_input_tokens_seen": 4560904, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 0.7634550038364573, | |
| "grad_norm": 6.072175979614258, | |
| "learning_rate": 6.608590161750131e-06, | |
| "loss": 3.2183, | |
| "num_input_tokens_seen": 4563864, | |
| "step": 6965 | |
| }, | |
| { | |
| "epoch": 0.7640030691658446, | |
| "grad_norm": 6.895592212677002, | |
| "learning_rate": 6.579460729294429e-06, | |
| "loss": 3.2887, | |
| "num_input_tokens_seen": 4566800, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 0.7645511344952318, | |
| "grad_norm": 7.528575897216797, | |
| "learning_rate": 6.550385906142212e-06, | |
| "loss": 3.0147, | |
| "num_input_tokens_seen": 4569680, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 0.765099199824619, | |
| "grad_norm": 5.899028301239014, | |
| "learning_rate": 6.521365778488331e-06, | |
| "loss": 2.9008, | |
| "num_input_tokens_seen": 4573704, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 0.7656472651540064, | |
| "grad_norm": 7.313390254974365, | |
| "learning_rate": 6.492400432365503e-06, | |
| "loss": 3.1414, | |
| "num_input_tokens_seen": 4576368, | |
| "step": 6985 | |
| }, | |
| { | |
| "epoch": 0.7661953304833936, | |
| "grad_norm": 7.083227634429932, | |
| "learning_rate": 6.463489953644031e-06, | |
| "loss": 2.7539, | |
| "num_input_tokens_seen": 4578936, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 0.7667433958127808, | |
| "grad_norm": 7.272182941436768, | |
| "learning_rate": 6.434634428031558e-06, | |
| "loss": 3.1749, | |
| "num_input_tokens_seen": 4582096, | |
| "step": 6995 | |
| }, | |
| { | |
| "epoch": 0.7672914611421682, | |
| "grad_norm": 9.697888374328613, | |
| "learning_rate": 6.405833941072834e-06, | |
| "loss": 3.1397, | |
| "num_input_tokens_seen": 4584400, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.7678395264715554, | |
| "grad_norm": 7.066343307495117, | |
| "learning_rate": 6.377088578149418e-06, | |
| "loss": 2.8686, | |
| "num_input_tokens_seen": 4587688, | |
| "step": 7005 | |
| }, | |
| { | |
| "epoch": 0.7683875918009426, | |
| "grad_norm": 5.80040979385376, | |
| "learning_rate": 6.348398424479454e-06, | |
| "loss": 2.7322, | |
| "num_input_tokens_seen": 4591120, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 0.76893565713033, | |
| "grad_norm": 8.803409576416016, | |
| "learning_rate": 6.319763565117432e-06, | |
| "loss": 3.2123, | |
| "num_input_tokens_seen": 4594456, | |
| "step": 7015 | |
| }, | |
| { | |
| "epoch": 0.7694837224597172, | |
| "grad_norm": 6.382712364196777, | |
| "learning_rate": 6.291184084953894e-06, | |
| "loss": 3.3465, | |
| "num_input_tokens_seen": 4597120, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 0.7700317877891044, | |
| "grad_norm": 6.3958740234375, | |
| "learning_rate": 6.2626600687152064e-06, | |
| "loss": 2.9045, | |
| "num_input_tokens_seen": 4599416, | |
| "step": 7025 | |
| }, | |
| { | |
| "epoch": 0.7705798531184918, | |
| "grad_norm": 5.454673767089844, | |
| "learning_rate": 6.234191600963335e-06, | |
| "loss": 3.1258, | |
| "num_input_tokens_seen": 4602760, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 0.771127918447879, | |
| "grad_norm": 4.992536544799805, | |
| "learning_rate": 6.205778766095533e-06, | |
| "loss": 3.0881, | |
| "num_input_tokens_seen": 4605312, | |
| "step": 7035 | |
| }, | |
| { | |
| "epoch": 0.7716759837772662, | |
| "grad_norm": 7.264188766479492, | |
| "learning_rate": 6.1774216483441394e-06, | |
| "loss": 3.117, | |
| "num_input_tokens_seen": 4608784, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 0.7722240491066535, | |
| "grad_norm": 7.106401443481445, | |
| "learning_rate": 6.149120331776329e-06, | |
| "loss": 2.8674, | |
| "num_input_tokens_seen": 4612728, | |
| "step": 7045 | |
| }, | |
| { | |
| "epoch": 0.7727721144360408, | |
| "grad_norm": 8.04111385345459, | |
| "learning_rate": 6.120874900293827e-06, | |
| "loss": 3.0187, | |
| "num_input_tokens_seen": 4616096, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.773320179765428, | |
| "grad_norm": 7.114358901977539, | |
| "learning_rate": 6.092685437632683e-06, | |
| "loss": 2.9277, | |
| "num_input_tokens_seen": 4619312, | |
| "step": 7055 | |
| }, | |
| { | |
| "epoch": 0.7738682450948153, | |
| "grad_norm": 6.135927200317383, | |
| "learning_rate": 6.064552027363049e-06, | |
| "loss": 2.8, | |
| "num_input_tokens_seen": 4623080, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 0.7744163104242026, | |
| "grad_norm": 9.407398223876953, | |
| "learning_rate": 6.0364747528888734e-06, | |
| "loss": 2.8471, | |
| "num_input_tokens_seen": 4625720, | |
| "step": 7065 | |
| }, | |
| { | |
| "epoch": 0.7749643757535898, | |
| "grad_norm": 8.590024948120117, | |
| "learning_rate": 6.0084536974476995e-06, | |
| "loss": 3.1369, | |
| "num_input_tokens_seen": 4628368, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 0.7755124410829771, | |
| "grad_norm": 6.1918721199035645, | |
| "learning_rate": 5.980488944110408e-06, | |
| "loss": 2.9941, | |
| "num_input_tokens_seen": 4631128, | |
| "step": 7075 | |
| }, | |
| { | |
| "epoch": 0.7760605064123643, | |
| "grad_norm": 6.956912994384766, | |
| "learning_rate": 5.9525805757809524e-06, | |
| "loss": 3.3899, | |
| "num_input_tokens_seen": 4634672, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 0.7766085717417516, | |
| "grad_norm": 6.198210716247559, | |
| "learning_rate": 5.9247286751961366e-06, | |
| "loss": 3.165, | |
| "num_input_tokens_seen": 4638184, | |
| "step": 7085 | |
| }, | |
| { | |
| "epoch": 0.7771566370711389, | |
| "grad_norm": 6.877211570739746, | |
| "learning_rate": 5.896933324925372e-06, | |
| "loss": 3.1694, | |
| "num_input_tokens_seen": 4641976, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 0.7777047024005261, | |
| "grad_norm": 6.007309436798096, | |
| "learning_rate": 5.869194607370409e-06, | |
| "loss": 3.1036, | |
| "num_input_tokens_seen": 4645280, | |
| "step": 7095 | |
| }, | |
| { | |
| "epoch": 0.7782527677299134, | |
| "grad_norm": 7.9656572341918945, | |
| "learning_rate": 5.8415126047650955e-06, | |
| "loss": 3.2545, | |
| "num_input_tokens_seen": 4648904, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.7788008330593007, | |
| "grad_norm": 7.05634069442749, | |
| "learning_rate": 5.813887399175169e-06, | |
| "loss": 2.912, | |
| "num_input_tokens_seen": 4651232, | |
| "step": 7105 | |
| }, | |
| { | |
| "epoch": 0.7793488983886879, | |
| "grad_norm": 8.77833080291748, | |
| "learning_rate": 5.7863190724979695e-06, | |
| "loss": 3.0476, | |
| "num_input_tokens_seen": 4654288, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 0.7798969637180752, | |
| "grad_norm": 6.191843032836914, | |
| "learning_rate": 5.75880770646221e-06, | |
| "loss": 3.1158, | |
| "num_input_tokens_seen": 4657808, | |
| "step": 7115 | |
| }, | |
| { | |
| "epoch": 0.7804450290474625, | |
| "grad_norm": 5.634969234466553, | |
| "learning_rate": 5.73135338262776e-06, | |
| "loss": 2.8591, | |
| "num_input_tokens_seen": 4661440, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 0.7809930943768497, | |
| "grad_norm": 6.004340648651123, | |
| "learning_rate": 5.7039561823853615e-06, | |
| "loss": 2.8518, | |
| "num_input_tokens_seen": 4665104, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 0.7815411597062369, | |
| "grad_norm": 7.3791680335998535, | |
| "learning_rate": 5.676616186956413e-06, | |
| "loss": 3.1628, | |
| "num_input_tokens_seen": 4668432, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 0.7820892250356243, | |
| "grad_norm": 9.166860580444336, | |
| "learning_rate": 5.649333477392735e-06, | |
| "loss": 3.3455, | |
| "num_input_tokens_seen": 4671688, | |
| "step": 7135 | |
| }, | |
| { | |
| "epoch": 0.7826372903650115, | |
| "grad_norm": 6.651597023010254, | |
| "learning_rate": 5.622108134576312e-06, | |
| "loss": 3.4196, | |
| "num_input_tokens_seen": 4675408, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 0.7831853556943987, | |
| "grad_norm": 7.5387797355651855, | |
| "learning_rate": 5.594940239219049e-06, | |
| "loss": 3.2571, | |
| "num_input_tokens_seen": 4678440, | |
| "step": 7145 | |
| }, | |
| { | |
| "epoch": 0.7837334210237861, | |
| "grad_norm": 9.256987571716309, | |
| "learning_rate": 5.5678298718625674e-06, | |
| "loss": 3.1553, | |
| "num_input_tokens_seen": 4681320, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.7842814863531733, | |
| "grad_norm": 8.727250099182129, | |
| "learning_rate": 5.54077711287792e-06, | |
| "loss": 3.2874, | |
| "num_input_tokens_seen": 4685024, | |
| "step": 7155 | |
| }, | |
| { | |
| "epoch": 0.7848295516825605, | |
| "grad_norm": 8.900041580200195, | |
| "learning_rate": 5.513782042465385e-06, | |
| "loss": 2.8368, | |
| "num_input_tokens_seen": 4687568, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 0.7853776170119479, | |
| "grad_norm": 10.776511192321777, | |
| "learning_rate": 5.4868447406542125e-06, | |
| "loss": 2.9062, | |
| "num_input_tokens_seen": 4690632, | |
| "step": 7165 | |
| }, | |
| { | |
| "epoch": 0.7859256823413351, | |
| "grad_norm": 6.669962406158447, | |
| "learning_rate": 5.459965287302396e-06, | |
| "loss": 3.3375, | |
| "num_input_tokens_seen": 4694528, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 0.7864737476707223, | |
| "grad_norm": 8.748539924621582, | |
| "learning_rate": 5.4331437620964235e-06, | |
| "loss": 3.1538, | |
| "num_input_tokens_seen": 4697304, | |
| "step": 7175 | |
| }, | |
| { | |
| "epoch": 0.7870218130001096, | |
| "grad_norm": 6.20130729675293, | |
| "learning_rate": 5.406380244551077e-06, | |
| "loss": 3.3296, | |
| "num_input_tokens_seen": 4701400, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 0.7875698783294969, | |
| "grad_norm": 6.8918304443359375, | |
| "learning_rate": 5.379674814009133e-06, | |
| "loss": 2.9058, | |
| "num_input_tokens_seen": 4704688, | |
| "step": 7185 | |
| }, | |
| { | |
| "epoch": 0.7881179436588841, | |
| "grad_norm": 8.053811073303223, | |
| "learning_rate": 5.353027549641185e-06, | |
| "loss": 3.19, | |
| "num_input_tokens_seen": 4707832, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 0.7886660089882714, | |
| "grad_norm": 8.722176551818848, | |
| "learning_rate": 5.326438530445394e-06, | |
| "loss": 3.1039, | |
| "num_input_tokens_seen": 4711272, | |
| "step": 7195 | |
| }, | |
| { | |
| "epoch": 0.7892140743176587, | |
| "grad_norm": 8.22156810760498, | |
| "learning_rate": 5.299907835247228e-06, | |
| "loss": 2.9179, | |
| "num_input_tokens_seen": 4714584, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.7897621396470459, | |
| "grad_norm": 8.812997817993164, | |
| "learning_rate": 5.273435542699259e-06, | |
| "loss": 2.9421, | |
| "num_input_tokens_seen": 4717960, | |
| "step": 7205 | |
| }, | |
| { | |
| "epoch": 0.7903102049764332, | |
| "grad_norm": 7.295377731323242, | |
| "learning_rate": 5.247021731280927e-06, | |
| "loss": 3.1538, | |
| "num_input_tokens_seen": 4721208, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 0.7908582703058205, | |
| "grad_norm": 6.8964762687683105, | |
| "learning_rate": 5.220666479298283e-06, | |
| "loss": 2.9399, | |
| "num_input_tokens_seen": 4723760, | |
| "step": 7215 | |
| }, | |
| { | |
| "epoch": 0.7914063356352077, | |
| "grad_norm": 8.851302146911621, | |
| "learning_rate": 5.194369864883783e-06, | |
| "loss": 3.0368, | |
| "num_input_tokens_seen": 4727808, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 0.791954400964595, | |
| "grad_norm": 6.765636444091797, | |
| "learning_rate": 5.168131965996051e-06, | |
| "loss": 2.5498, | |
| "num_input_tokens_seen": 4730984, | |
| "step": 7225 | |
| }, | |
| { | |
| "epoch": 0.7925024662939822, | |
| "grad_norm": 6.0574750900268555, | |
| "learning_rate": 5.1419528604196385e-06, | |
| "loss": 2.9546, | |
| "num_input_tokens_seen": 4734472, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 0.7930505316233695, | |
| "grad_norm": 6.703484535217285, | |
| "learning_rate": 5.1158326257647855e-06, | |
| "loss": 3.0816, | |
| "num_input_tokens_seen": 4736976, | |
| "step": 7235 | |
| }, | |
| { | |
| "epoch": 0.7935985969527568, | |
| "grad_norm": 5.429347038269043, | |
| "learning_rate": 5.089771339467236e-06, | |
| "loss": 2.8567, | |
| "num_input_tokens_seen": 4740592, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 0.794146662282144, | |
| "grad_norm": 6.805422306060791, | |
| "learning_rate": 5.06376907878795e-06, | |
| "loss": 3.0524, | |
| "num_input_tokens_seen": 4744232, | |
| "step": 7245 | |
| }, | |
| { | |
| "epoch": 0.7946947276115313, | |
| "grad_norm": 7.566915512084961, | |
| "learning_rate": 5.0378259208129054e-06, | |
| "loss": 2.7767, | |
| "num_input_tokens_seen": 4748392, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.7952427929409186, | |
| "grad_norm": 8.171722412109375, | |
| "learning_rate": 5.011941942452872e-06, | |
| "loss": 2.9925, | |
| "num_input_tokens_seen": 4751496, | |
| "step": 7255 | |
| }, | |
| { | |
| "epoch": 0.7957908582703058, | |
| "grad_norm": 9.192333221435547, | |
| "learning_rate": 4.986117220443173e-06, | |
| "loss": 3.3195, | |
| "num_input_tokens_seen": 4754624, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 0.796338923599693, | |
| "grad_norm": 6.089689254760742, | |
| "learning_rate": 4.960351831343452e-06, | |
| "loss": 3.3298, | |
| "num_input_tokens_seen": 4758304, | |
| "step": 7265 | |
| }, | |
| { | |
| "epoch": 0.7968869889290804, | |
| "grad_norm": 7.405531883239746, | |
| "learning_rate": 4.9346458515374785e-06, | |
| "loss": 3.3122, | |
| "num_input_tokens_seen": 4760592, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 0.7974350542584676, | |
| "grad_norm": 7.917971611022949, | |
| "learning_rate": 4.908999357232874e-06, | |
| "loss": 3.0276, | |
| "num_input_tokens_seen": 4763392, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 0.7979831195878548, | |
| "grad_norm": 8.550086975097656, | |
| "learning_rate": 4.8834124244609145e-06, | |
| "loss": 3.2591, | |
| "num_input_tokens_seen": 4766544, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 0.7985311849172422, | |
| "grad_norm": 7.939424514770508, | |
| "learning_rate": 4.857885129076317e-06, | |
| "loss": 2.8357, | |
| "num_input_tokens_seen": 4769408, | |
| "step": 7285 | |
| }, | |
| { | |
| "epoch": 0.7990792502466294, | |
| "grad_norm": 6.404162406921387, | |
| "learning_rate": 4.8324175467569845e-06, | |
| "loss": 3.0799, | |
| "num_input_tokens_seen": 4773344, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 0.7996273155760166, | |
| "grad_norm": 7.251323699951172, | |
| "learning_rate": 4.807009753003791e-06, | |
| "loss": 3.1363, | |
| "num_input_tokens_seen": 4776640, | |
| "step": 7295 | |
| }, | |
| { | |
| "epoch": 0.800175380905404, | |
| "grad_norm": 8.667237281799316, | |
| "learning_rate": 4.781661823140366e-06, | |
| "loss": 3.2124, | |
| "num_input_tokens_seen": 4779376, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.8007234462347912, | |
| "grad_norm": 8.147212028503418, | |
| "learning_rate": 4.756373832312879e-06, | |
| "loss": 2.874, | |
| "num_input_tokens_seen": 4781952, | |
| "step": 7305 | |
| }, | |
| { | |
| "epoch": 0.8012715115641784, | |
| "grad_norm": 8.90487003326416, | |
| "learning_rate": 4.731145855489794e-06, | |
| "loss": 3.2025, | |
| "num_input_tokens_seen": 4784816, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 0.8018195768935658, | |
| "grad_norm": 7.192740440368652, | |
| "learning_rate": 4.70597796746165e-06, | |
| "loss": 2.9843, | |
| "num_input_tokens_seen": 4787592, | |
| "step": 7315 | |
| }, | |
| { | |
| "epoch": 0.802367642222953, | |
| "grad_norm": 6.346043586730957, | |
| "learning_rate": 4.6808702428408706e-06, | |
| "loss": 3.1331, | |
| "num_input_tokens_seen": 4790256, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 0.8029157075523402, | |
| "grad_norm": 8.076735496520996, | |
| "learning_rate": 4.655822756061503e-06, | |
| "loss": 3.1571, | |
| "num_input_tokens_seen": 4792768, | |
| "step": 7325 | |
| }, | |
| { | |
| "epoch": 0.8034637728817275, | |
| "grad_norm": 7.521450519561768, | |
| "learning_rate": 4.630835581379006e-06, | |
| "loss": 2.929, | |
| "num_input_tokens_seen": 4796152, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 0.8040118382111148, | |
| "grad_norm": 12.113771438598633, | |
| "learning_rate": 4.605908792870067e-06, | |
| "loss": 3.1268, | |
| "num_input_tokens_seen": 4798376, | |
| "step": 7335 | |
| }, | |
| { | |
| "epoch": 0.804559903540502, | |
| "grad_norm": 5.997092247009277, | |
| "learning_rate": 4.581042464432328e-06, | |
| "loss": 2.8665, | |
| "num_input_tokens_seen": 4802104, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 0.8051079688698893, | |
| "grad_norm": 6.922906875610352, | |
| "learning_rate": 4.556236669784197e-06, | |
| "loss": 3.3316, | |
| "num_input_tokens_seen": 4805648, | |
| "step": 7345 | |
| }, | |
| { | |
| "epoch": 0.8056560341992766, | |
| "grad_norm": 9.63893985748291, | |
| "learning_rate": 4.531491482464628e-06, | |
| "loss": 3.2614, | |
| "num_input_tokens_seen": 4810112, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.8062040995286638, | |
| "grad_norm": 8.894881248474121, | |
| "learning_rate": 4.5068069758329e-06, | |
| "loss": 3.2695, | |
| "num_input_tokens_seen": 4813192, | |
| "step": 7355 | |
| }, | |
| { | |
| "epoch": 0.8067521648580511, | |
| "grad_norm": 6.436181545257568, | |
| "learning_rate": 4.482183223068387e-06, | |
| "loss": 2.8622, | |
| "num_input_tokens_seen": 4815768, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 0.8073002301874384, | |
| "grad_norm": 7.975905895233154, | |
| "learning_rate": 4.457620297170381e-06, | |
| "loss": 3.3166, | |
| "num_input_tokens_seen": 4819144, | |
| "step": 7365 | |
| }, | |
| { | |
| "epoch": 0.8078482955168256, | |
| "grad_norm": 7.515452861785889, | |
| "learning_rate": 4.433118270957818e-06, | |
| "loss": 2.5207, | |
| "num_input_tokens_seen": 4822152, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 0.8083963608462129, | |
| "grad_norm": 6.722434997558594, | |
| "learning_rate": 4.408677217069096e-06, | |
| "loss": 3.1815, | |
| "num_input_tokens_seen": 4825920, | |
| "step": 7375 | |
| }, | |
| { | |
| "epoch": 0.8089444261756001, | |
| "grad_norm": 6.1937031745910645, | |
| "learning_rate": 4.3842972079618765e-06, | |
| "loss": 3.0536, | |
| "num_input_tokens_seen": 4829224, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 0.8094924915049874, | |
| "grad_norm": 7.4900898933410645, | |
| "learning_rate": 4.359978315912827e-06, | |
| "loss": 2.9555, | |
| "num_input_tokens_seen": 4832576, | |
| "step": 7385 | |
| }, | |
| { | |
| "epoch": 0.8100405568343747, | |
| "grad_norm": 7.267132759094238, | |
| "learning_rate": 4.33572061301743e-06, | |
| "loss": 3.376, | |
| "num_input_tokens_seen": 4834896, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 0.8105886221637619, | |
| "grad_norm": 6.553824424743652, | |
| "learning_rate": 4.311524171189782e-06, | |
| "loss": 3.1203, | |
| "num_input_tokens_seen": 4838536, | |
| "step": 7395 | |
| }, | |
| { | |
| "epoch": 0.8111366874931492, | |
| "grad_norm": 6.04332971572876, | |
| "learning_rate": 4.28738906216235e-06, | |
| "loss": 2.898, | |
| "num_input_tokens_seen": 4842312, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.8116847528225365, | |
| "grad_norm": 6.300970077514648, | |
| "learning_rate": 4.263315357485775e-06, | |
| "loss": 3.2478, | |
| "num_input_tokens_seen": 4845640, | |
| "step": 7405 | |
| }, | |
| { | |
| "epoch": 0.8122328181519237, | |
| "grad_norm": 8.834260940551758, | |
| "learning_rate": 4.2393031285286796e-06, | |
| "loss": 3.1214, | |
| "num_input_tokens_seen": 4848880, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 0.812780883481311, | |
| "grad_norm": 7.611583709716797, | |
| "learning_rate": 4.215352446477413e-06, | |
| "loss": 2.8593, | |
| "num_input_tokens_seen": 4852904, | |
| "step": 7415 | |
| }, | |
| { | |
| "epoch": 0.8133289488106983, | |
| "grad_norm": 5.708853244781494, | |
| "learning_rate": 4.191463382335867e-06, | |
| "loss": 3.1984, | |
| "num_input_tokens_seen": 4855720, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 0.8138770141400855, | |
| "grad_norm": 5.545560836791992, | |
| "learning_rate": 4.167636006925274e-06, | |
| "loss": 3.1826, | |
| "num_input_tokens_seen": 4859488, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 0.8144250794694727, | |
| "grad_norm": 9.735588073730469, | |
| "learning_rate": 4.143870390883978e-06, | |
| "loss": 2.8356, | |
| "num_input_tokens_seen": 4862808, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 0.8149731447988601, | |
| "grad_norm": 10.298928260803223, | |
| "learning_rate": 4.120166604667225e-06, | |
| "loss": 2.9738, | |
| "num_input_tokens_seen": 4866608, | |
| "step": 7435 | |
| }, | |
| { | |
| "epoch": 0.8155212101282473, | |
| "grad_norm": 8.623414039611816, | |
| "learning_rate": 4.096524718546974e-06, | |
| "loss": 3.0776, | |
| "num_input_tokens_seen": 4868832, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 0.8160692754576345, | |
| "grad_norm": 10.033533096313477, | |
| "learning_rate": 4.072944802611655e-06, | |
| "loss": 3.1786, | |
| "num_input_tokens_seen": 4872536, | |
| "step": 7445 | |
| }, | |
| { | |
| "epoch": 0.8166173407870219, | |
| "grad_norm": 8.511270523071289, | |
| "learning_rate": 4.0494269267660144e-06, | |
| "loss": 3.4183, | |
| "num_input_tokens_seen": 4876032, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.8171654061164091, | |
| "grad_norm": 6.882598876953125, | |
| "learning_rate": 4.025971160730846e-06, | |
| "loss": 3.0995, | |
| "num_input_tokens_seen": 4878536, | |
| "step": 7455 | |
| }, | |
| { | |
| "epoch": 0.8177134714457963, | |
| "grad_norm": 6.228262901306152, | |
| "learning_rate": 4.002577574042829e-06, | |
| "loss": 2.8603, | |
| "num_input_tokens_seen": 4880976, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 0.8182615367751837, | |
| "grad_norm": 9.165740013122559, | |
| "learning_rate": 3.9792462360542935e-06, | |
| "loss": 2.8565, | |
| "num_input_tokens_seen": 4884688, | |
| "step": 7465 | |
| }, | |
| { | |
| "epoch": 0.8188096021045709, | |
| "grad_norm": 7.1637701988220215, | |
| "learning_rate": 3.955977215933046e-06, | |
| "loss": 2.9947, | |
| "num_input_tokens_seen": 4888200, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 0.8193576674339581, | |
| "grad_norm": 7.321343421936035, | |
| "learning_rate": 3.932770582662135e-06, | |
| "loss": 3.1105, | |
| "num_input_tokens_seen": 4890856, | |
| "step": 7475 | |
| }, | |
| { | |
| "epoch": 0.8199057327633454, | |
| "grad_norm": 7.804381847381592, | |
| "learning_rate": 3.9096264050396485e-06, | |
| "loss": 2.9519, | |
| "num_input_tokens_seen": 4893712, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 0.8204537980927327, | |
| "grad_norm": 6.569583415985107, | |
| "learning_rate": 3.886544751678547e-06, | |
| "loss": 3.0457, | |
| "num_input_tokens_seen": 4897104, | |
| "step": 7485 | |
| }, | |
| { | |
| "epoch": 0.8210018634221199, | |
| "grad_norm": 10.908699035644531, | |
| "learning_rate": 3.863525691006406e-06, | |
| "loss": 3.5541, | |
| "num_input_tokens_seen": 4900616, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 0.8215499287515072, | |
| "grad_norm": 8.427760124206543, | |
| "learning_rate": 3.840569291265242e-06, | |
| "loss": 2.9541, | |
| "num_input_tokens_seen": 4902848, | |
| "step": 7495 | |
| }, | |
| { | |
| "epoch": 0.8220979940808945, | |
| "grad_norm": 10.59475040435791, | |
| "learning_rate": 3.817675620511329e-06, | |
| "loss": 2.932, | |
| "num_input_tokens_seen": 4905424, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.8226460594102817, | |
| "grad_norm": 8.56042194366455, | |
| "learning_rate": 3.794844746614956e-06, | |
| "loss": 3.3314, | |
| "num_input_tokens_seen": 4908016, | |
| "step": 7505 | |
| }, | |
| { | |
| "epoch": 0.823194124739669, | |
| "grad_norm": 8.957588195800781, | |
| "learning_rate": 3.772076737260241e-06, | |
| "loss": 3.4287, | |
| "num_input_tokens_seen": 4912944, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 0.8237421900690562, | |
| "grad_norm": 8.641453742980957, | |
| "learning_rate": 3.7493716599449557e-06, | |
| "loss": 2.7836, | |
| "num_input_tokens_seen": 4915344, | |
| "step": 7515 | |
| }, | |
| { | |
| "epoch": 0.8242902553984435, | |
| "grad_norm": 9.905373573303223, | |
| "learning_rate": 3.726729581980287e-06, | |
| "loss": 3.3792, | |
| "num_input_tokens_seen": 4918280, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 0.8248383207278308, | |
| "grad_norm": 6.359044075012207, | |
| "learning_rate": 3.7041505704906554e-06, | |
| "loss": 2.6283, | |
| "num_input_tokens_seen": 4923056, | |
| "step": 7525 | |
| }, | |
| { | |
| "epoch": 0.825386386057218, | |
| "grad_norm": 8.611063957214355, | |
| "learning_rate": 3.681634692413527e-06, | |
| "loss": 3.0805, | |
| "num_input_tokens_seen": 4925992, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 0.8259344513866053, | |
| "grad_norm": 6.022265911102295, | |
| "learning_rate": 3.659182014499199e-06, | |
| "loss": 2.9173, | |
| "num_input_tokens_seen": 4928312, | |
| "step": 7535 | |
| }, | |
| { | |
| "epoch": 0.8264825167159926, | |
| "grad_norm": 7.828344821929932, | |
| "learning_rate": 3.636792603310593e-06, | |
| "loss": 3.3786, | |
| "num_input_tokens_seen": 4931816, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 0.8270305820453798, | |
| "grad_norm": 9.197246551513672, | |
| "learning_rate": 3.6144665252230897e-06, | |
| "loss": 3.1869, | |
| "num_input_tokens_seen": 4934904, | |
| "step": 7545 | |
| }, | |
| { | |
| "epoch": 0.827578647374767, | |
| "grad_norm": 6.626698017120361, | |
| "learning_rate": 3.5922038464243e-06, | |
| "loss": 2.864, | |
| "num_input_tokens_seen": 4937320, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.8281267127041544, | |
| "grad_norm": 6.149302959442139, | |
| "learning_rate": 3.570004632913884e-06, | |
| "loss": 2.9841, | |
| "num_input_tokens_seen": 4940472, | |
| "step": 7555 | |
| }, | |
| { | |
| "epoch": 0.8286747780335416, | |
| "grad_norm": 5.897488117218018, | |
| "learning_rate": 3.5478689505033635e-06, | |
| "loss": 3.0083, | |
| "num_input_tokens_seen": 4943240, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 0.8292228433629288, | |
| "grad_norm": 5.379867076873779, | |
| "learning_rate": 3.5257968648159085e-06, | |
| "loss": 3.2044, | |
| "num_input_tokens_seen": 4947448, | |
| "step": 7565 | |
| }, | |
| { | |
| "epoch": 0.8297709086923162, | |
| "grad_norm": 8.127168655395508, | |
| "learning_rate": 3.503788441286143e-06, | |
| "loss": 3.0341, | |
| "num_input_tokens_seen": 4950720, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 0.8303189740217034, | |
| "grad_norm": 7.3780364990234375, | |
| "learning_rate": 3.4818437451599796e-06, | |
| "loss": 3.2321, | |
| "num_input_tokens_seen": 4954728, | |
| "step": 7575 | |
| }, | |
| { | |
| "epoch": 0.8308670393510906, | |
| "grad_norm": 6.4768757820129395, | |
| "learning_rate": 3.459962841494391e-06, | |
| "loss": 3.1017, | |
| "num_input_tokens_seen": 4957936, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 0.831415104680478, | |
| "grad_norm": 7.365682125091553, | |
| "learning_rate": 3.4381457951572245e-06, | |
| "loss": 2.8212, | |
| "num_input_tokens_seen": 4961240, | |
| "step": 7585 | |
| }, | |
| { | |
| "epoch": 0.8319631700098652, | |
| "grad_norm": 7.922868251800537, | |
| "learning_rate": 3.41639267082704e-06, | |
| "loss": 2.8681, | |
| "num_input_tokens_seen": 4964016, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 0.8325112353392524, | |
| "grad_norm": 4.56962251663208, | |
| "learning_rate": 3.3947035329928768e-06, | |
| "loss": 3.0944, | |
| "num_input_tokens_seen": 4966208, | |
| "step": 7595 | |
| }, | |
| { | |
| "epoch": 0.8330593006686398, | |
| "grad_norm": 8.027546882629395, | |
| "learning_rate": 3.3730784459540755e-06, | |
| "loss": 2.62, | |
| "num_input_tokens_seen": 4969656, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.833607365998027, | |
| "grad_norm": 9.634477615356445, | |
| "learning_rate": 3.3515174738201204e-06, | |
| "loss": 3.0848, | |
| "num_input_tokens_seen": 4972656, | |
| "step": 7605 | |
| }, | |
| { | |
| "epoch": 0.8341554313274142, | |
| "grad_norm": 6.137497901916504, | |
| "learning_rate": 3.3300206805103902e-06, | |
| "loss": 2.8019, | |
| "num_input_tokens_seen": 4976816, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 0.8347034966568014, | |
| "grad_norm": 6.958483695983887, | |
| "learning_rate": 3.3085881297540143e-06, | |
| "loss": 3.1585, | |
| "num_input_tokens_seen": 4979448, | |
| "step": 7615 | |
| }, | |
| { | |
| "epoch": 0.8352515619861888, | |
| "grad_norm": 6.135876178741455, | |
| "learning_rate": 3.2872198850896763e-06, | |
| "loss": 3.4485, | |
| "num_input_tokens_seen": 4982096, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 0.835799627315576, | |
| "grad_norm": 5.784817218780518, | |
| "learning_rate": 3.265916009865405e-06, | |
| "loss": 2.5781, | |
| "num_input_tokens_seen": 4987624, | |
| "step": 7625 | |
| }, | |
| { | |
| "epoch": 0.8363476926449632, | |
| "grad_norm": 7.2112603187561035, | |
| "learning_rate": 3.2446765672384083e-06, | |
| "loss": 3.1842, | |
| "num_input_tokens_seen": 4991016, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 0.8368957579743506, | |
| "grad_norm": 8.30711555480957, | |
| "learning_rate": 3.223501620174871e-06, | |
| "loss": 2.8567, | |
| "num_input_tokens_seen": 4994496, | |
| "step": 7635 | |
| }, | |
| { | |
| "epoch": 0.8374438233037378, | |
| "grad_norm": 5.6931915283203125, | |
| "learning_rate": 3.2023912314497835e-06, | |
| "loss": 3.109, | |
| "num_input_tokens_seen": 4997176, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 0.837991888633125, | |
| "grad_norm": 7.178470611572266, | |
| "learning_rate": 3.18134546364674e-06, | |
| "loss": 3.1472, | |
| "num_input_tokens_seen": 5001168, | |
| "step": 7645 | |
| }, | |
| { | |
| "epoch": 0.8385399539625124, | |
| "grad_norm": 6.247611045837402, | |
| "learning_rate": 3.160364379157771e-06, | |
| "loss": 3.0272, | |
| "num_input_tokens_seen": 5004928, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.8390880192918996, | |
| "grad_norm": 8.314835548400879, | |
| "learning_rate": 3.1394480401831376e-06, | |
| "loss": 3.1062, | |
| "num_input_tokens_seen": 5007976, | |
| "step": 7655 | |
| }, | |
| { | |
| "epoch": 0.8396360846212868, | |
| "grad_norm": 8.253650665283203, | |
| "learning_rate": 3.118596508731153e-06, | |
| "loss": 3.1373, | |
| "num_input_tokens_seen": 5010840, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 0.8401841499506741, | |
| "grad_norm": 8.37070083618164, | |
| "learning_rate": 3.0978098466180246e-06, | |
| "loss": 3.1474, | |
| "num_input_tokens_seen": 5013264, | |
| "step": 7665 | |
| }, | |
| { | |
| "epoch": 0.8407322152800614, | |
| "grad_norm": 7.3890700340271, | |
| "learning_rate": 3.0770881154676244e-06, | |
| "loss": 2.9336, | |
| "num_input_tokens_seen": 5016288, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 0.8412802806094486, | |
| "grad_norm": 9.55408000946045, | |
| "learning_rate": 3.056431376711341e-06, | |
| "loss": 3.1662, | |
| "num_input_tokens_seen": 5019184, | |
| "step": 7675 | |
| }, | |
| { | |
| "epoch": 0.8418283459388359, | |
| "grad_norm": 9.764185905456543, | |
| "learning_rate": 3.035839691587891e-06, | |
| "loss": 3.3416, | |
| "num_input_tokens_seen": 5022032, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 0.8423764112682232, | |
| "grad_norm": 6.572988510131836, | |
| "learning_rate": 3.015313121143132e-06, | |
| "loss": 3.44, | |
| "num_input_tokens_seen": 5025704, | |
| "step": 7685 | |
| }, | |
| { | |
| "epoch": 0.8429244765976104, | |
| "grad_norm": 6.35365629196167, | |
| "learning_rate": 2.994851726229872e-06, | |
| "loss": 2.8245, | |
| "num_input_tokens_seen": 5029360, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 0.8434725419269977, | |
| "grad_norm": 5.579585552215576, | |
| "learning_rate": 2.9744555675077195e-06, | |
| "loss": 2.9123, | |
| "num_input_tokens_seen": 5032232, | |
| "step": 7695 | |
| }, | |
| { | |
| "epoch": 0.844020607256385, | |
| "grad_norm": 9.263272285461426, | |
| "learning_rate": 2.9541247054428732e-06, | |
| "loss": 3.1231, | |
| "num_input_tokens_seen": 5034616, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.8445686725857722, | |
| "grad_norm": 6.095417022705078, | |
| "learning_rate": 2.933859200307948e-06, | |
| "loss": 2.822, | |
| "num_input_tokens_seen": 5037736, | |
| "step": 7705 | |
| }, | |
| { | |
| "epoch": 0.8451167379151595, | |
| "grad_norm": 7.388354778289795, | |
| "learning_rate": 2.913659112181824e-06, | |
| "loss": 2.8813, | |
| "num_input_tokens_seen": 5040224, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 0.8456648032445467, | |
| "grad_norm": 5.476953983306885, | |
| "learning_rate": 2.893524500949424e-06, | |
| "loss": 2.9058, | |
| "num_input_tokens_seen": 5042920, | |
| "step": 7715 | |
| }, | |
| { | |
| "epoch": 0.846212868573934, | |
| "grad_norm": 8.243193626403809, | |
| "learning_rate": 2.8734554263015717e-06, | |
| "loss": 3.0815, | |
| "num_input_tokens_seen": 5046384, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 0.8467609339033213, | |
| "grad_norm": 5.285266399383545, | |
| "learning_rate": 2.853451947734795e-06, | |
| "loss": 2.8613, | |
| "num_input_tokens_seen": 5050096, | |
| "step": 7725 | |
| }, | |
| { | |
| "epoch": 0.8473089992327085, | |
| "grad_norm": 7.07433557510376, | |
| "learning_rate": 2.833514124551162e-06, | |
| "loss": 3.2751, | |
| "num_input_tokens_seen": 5053016, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 0.8478570645620958, | |
| "grad_norm": 7.447408676147461, | |
| "learning_rate": 2.8136420158580923e-06, | |
| "loss": 3.199, | |
| "num_input_tokens_seen": 5055816, | |
| "step": 7735 | |
| }, | |
| { | |
| "epoch": 0.8484051298914831, | |
| "grad_norm": 6.6446757316589355, | |
| "learning_rate": 2.793835680568202e-06, | |
| "loss": 2.9382, | |
| "num_input_tokens_seen": 5059872, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 0.8489531952208703, | |
| "grad_norm": 6.634135723114014, | |
| "learning_rate": 2.774095177399108e-06, | |
| "loss": 2.7486, | |
| "num_input_tokens_seen": 5063104, | |
| "step": 7745 | |
| }, | |
| { | |
| "epoch": 0.8495012605502575, | |
| "grad_norm": 6.349103927612305, | |
| "learning_rate": 2.75442056487325e-06, | |
| "loss": 2.8114, | |
| "num_input_tokens_seen": 5067312, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.8500493258796449, | |
| "grad_norm": 9.979939460754395, | |
| "learning_rate": 2.7348119013177605e-06, | |
| "loss": 3.0652, | |
| "num_input_tokens_seen": 5070232, | |
| "step": 7755 | |
| }, | |
| { | |
| "epoch": 0.8505973912090321, | |
| "grad_norm": 9.005098342895508, | |
| "learning_rate": 2.7152692448642297e-06, | |
| "loss": 2.7476, | |
| "num_input_tokens_seen": 5073736, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 0.8511454565384193, | |
| "grad_norm": 7.502773761749268, | |
| "learning_rate": 2.695792653448573e-06, | |
| "loss": 2.6705, | |
| "num_input_tokens_seen": 5076032, | |
| "step": 7765 | |
| }, | |
| { | |
| "epoch": 0.8516935218678067, | |
| "grad_norm": 6.317687511444092, | |
| "learning_rate": 2.6763821848108634e-06, | |
| "loss": 2.7642, | |
| "num_input_tokens_seen": 5078736, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 0.8522415871971939, | |
| "grad_norm": 6.520786762237549, | |
| "learning_rate": 2.6570378964951322e-06, | |
| "loss": 2.9362, | |
| "num_input_tokens_seen": 5081560, | |
| "step": 7775 | |
| }, | |
| { | |
| "epoch": 0.8527896525265811, | |
| "grad_norm": 7.41638708114624, | |
| "learning_rate": 2.637759845849211e-06, | |
| "loss": 2.9981, | |
| "num_input_tokens_seen": 5084504, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 0.8533377178559685, | |
| "grad_norm": 7.572868824005127, | |
| "learning_rate": 2.6185480900245836e-06, | |
| "loss": 2.7595, | |
| "num_input_tokens_seen": 5088232, | |
| "step": 7785 | |
| }, | |
| { | |
| "epoch": 0.8538857831853557, | |
| "grad_norm": 6.104272842407227, | |
| "learning_rate": 2.5994026859761766e-06, | |
| "loss": 2.9084, | |
| "num_input_tokens_seen": 5090552, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 0.8544338485147429, | |
| "grad_norm": 8.887699127197266, | |
| "learning_rate": 2.5803236904622134e-06, | |
| "loss": 3.3633, | |
| "num_input_tokens_seen": 5093720, | |
| "step": 7795 | |
| }, | |
| { | |
| "epoch": 0.8549819138441302, | |
| "grad_norm": 7.048088550567627, | |
| "learning_rate": 2.5613111600440637e-06, | |
| "loss": 2.94, | |
| "num_input_tokens_seen": 5096984, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.8555299791735175, | |
| "grad_norm": 7.457699775695801, | |
| "learning_rate": 2.5423651510860292e-06, | |
| "loss": 2.9086, | |
| "num_input_tokens_seen": 5100088, | |
| "step": 7805 | |
| }, | |
| { | |
| "epoch": 0.8560780445029047, | |
| "grad_norm": 7.127599239349365, | |
| "learning_rate": 2.5234857197552197e-06, | |
| "loss": 3.2513, | |
| "num_input_tokens_seen": 5102776, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 0.856626109832292, | |
| "grad_norm": 6.716034412384033, | |
| "learning_rate": 2.5046729220213615e-06, | |
| "loss": 3.1929, | |
| "num_input_tokens_seen": 5106680, | |
| "step": 7815 | |
| }, | |
| { | |
| "epoch": 0.8571741751616793, | |
| "grad_norm": 8.033172607421875, | |
| "learning_rate": 2.4859268136566415e-06, | |
| "loss": 3.2828, | |
| "num_input_tokens_seen": 5110400, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 0.8577222404910665, | |
| "grad_norm": 7.232936859130859, | |
| "learning_rate": 2.4672474502355406e-06, | |
| "loss": 2.9178, | |
| "num_input_tokens_seen": 5113896, | |
| "step": 7825 | |
| }, | |
| { | |
| "epoch": 0.8582703058204538, | |
| "grad_norm": 7.433042526245117, | |
| "learning_rate": 2.4486348871346738e-06, | |
| "loss": 3.2398, | |
| "num_input_tokens_seen": 5116440, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 0.858818371149841, | |
| "grad_norm": 6.7432756423950195, | |
| "learning_rate": 2.4300891795326157e-06, | |
| "loss": 2.8448, | |
| "num_input_tokens_seen": 5119296, | |
| "step": 7835 | |
| }, | |
| { | |
| "epoch": 0.8593664364792283, | |
| "grad_norm": 6.955072402954102, | |
| "learning_rate": 2.4116103824097345e-06, | |
| "loss": 3.0554, | |
| "num_input_tokens_seen": 5122136, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 0.8599145018086156, | |
| "grad_norm": 7.900850296020508, | |
| "learning_rate": 2.3931985505480564e-06, | |
| "loss": 2.9951, | |
| "num_input_tokens_seen": 5125056, | |
| "step": 7845 | |
| }, | |
| { | |
| "epoch": 0.8604625671380028, | |
| "grad_norm": 5.292073726654053, | |
| "learning_rate": 2.374853738531063e-06, | |
| "loss": 3.1992, | |
| "num_input_tokens_seen": 5128688, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.8610106324673901, | |
| "grad_norm": 6.894753932952881, | |
| "learning_rate": 2.356576000743557e-06, | |
| "loss": 3.2569, | |
| "num_input_tokens_seen": 5132184, | |
| "step": 7855 | |
| }, | |
| { | |
| "epoch": 0.8615586977967774, | |
| "grad_norm": 6.101509094238281, | |
| "learning_rate": 2.3383653913714996e-06, | |
| "loss": 2.8422, | |
| "num_input_tokens_seen": 5136352, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 0.8621067631261646, | |
| "grad_norm": 6.467989444732666, | |
| "learning_rate": 2.3202219644018365e-06, | |
| "loss": 3.0615, | |
| "num_input_tokens_seen": 5139152, | |
| "step": 7865 | |
| }, | |
| { | |
| "epoch": 0.8626548284555519, | |
| "grad_norm": 6.982528209686279, | |
| "learning_rate": 2.3021457736223412e-06, | |
| "loss": 3.0371, | |
| "num_input_tokens_seen": 5142336, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 0.8632028937849392, | |
| "grad_norm": 5.719668388366699, | |
| "learning_rate": 2.2841368726214755e-06, | |
| "loss": 3.1793, | |
| "num_input_tokens_seen": 5145504, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 0.8637509591143264, | |
| "grad_norm": 6.815168380737305, | |
| "learning_rate": 2.2661953147882024e-06, | |
| "loss": 3.2501, | |
| "num_input_tokens_seen": 5148672, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 0.8642990244437136, | |
| "grad_norm": 6.836389541625977, | |
| "learning_rate": 2.2483211533118357e-06, | |
| "loss": 3.2825, | |
| "num_input_tokens_seen": 5152104, | |
| "step": 7885 | |
| }, | |
| { | |
| "epoch": 0.864847089773101, | |
| "grad_norm": 9.11992359161377, | |
| "learning_rate": 2.2305144411819052e-06, | |
| "loss": 3.1458, | |
| "num_input_tokens_seen": 5154840, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 0.8653951551024882, | |
| "grad_norm": 7.1421308517456055, | |
| "learning_rate": 2.212775231187966e-06, | |
| "loss": 3.2977, | |
| "num_input_tokens_seen": 5157496, | |
| "step": 7895 | |
| }, | |
| { | |
| "epoch": 0.8659432204318754, | |
| "grad_norm": 6.900385856628418, | |
| "learning_rate": 2.1951035759194605e-06, | |
| "loss": 2.9658, | |
| "num_input_tokens_seen": 5161824, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.8664912857612628, | |
| "grad_norm": 8.681853294372559, | |
| "learning_rate": 2.1774995277655556e-06, | |
| "loss": 2.9868, | |
| "num_input_tokens_seen": 5164840, | |
| "step": 7905 | |
| }, | |
| { | |
| "epoch": 0.86703935109065, | |
| "grad_norm": 6.421346187591553, | |
| "learning_rate": 2.1599631389150027e-06, | |
| "loss": 3.3, | |
| "num_input_tokens_seen": 5169320, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 0.8675874164200372, | |
| "grad_norm": 6.86265754699707, | |
| "learning_rate": 2.1424944613559537e-06, | |
| "loss": 3.1633, | |
| "num_input_tokens_seen": 5172784, | |
| "step": 7915 | |
| }, | |
| { | |
| "epoch": 0.8681354817494246, | |
| "grad_norm": 4.766587257385254, | |
| "learning_rate": 2.1250935468758446e-06, | |
| "loss": 3.2877, | |
| "num_input_tokens_seen": 5175600, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 0.8686835470788118, | |
| "grad_norm": 6.533714771270752, | |
| "learning_rate": 2.1077604470612106e-06, | |
| "loss": 2.9995, | |
| "num_input_tokens_seen": 5178624, | |
| "step": 7925 | |
| }, | |
| { | |
| "epoch": 0.869231612408199, | |
| "grad_norm": 7.438570022583008, | |
| "learning_rate": 2.0904952132975386e-06, | |
| "loss": 2.7973, | |
| "num_input_tokens_seen": 5181688, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 0.8697796777375864, | |
| "grad_norm": 7.600935459136963, | |
| "learning_rate": 2.0732978967691357e-06, | |
| "loss": 3.4927, | |
| "num_input_tokens_seen": 5184008, | |
| "step": 7935 | |
| }, | |
| { | |
| "epoch": 0.8703277430669736, | |
| "grad_norm": 10.930978775024414, | |
| "learning_rate": 2.0561685484589506e-06, | |
| "loss": 3.0121, | |
| "num_input_tokens_seen": 5187600, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 0.8708758083963608, | |
| "grad_norm": 8.671449661254883, | |
| "learning_rate": 2.0391072191484338e-06, | |
| "loss": 3.1692, | |
| "num_input_tokens_seen": 5190976, | |
| "step": 7945 | |
| }, | |
| { | |
| "epoch": 0.8714238737257481, | |
| "grad_norm": 9.432777404785156, | |
| "learning_rate": 2.0221139594174018e-06, | |
| "loss": 3.0802, | |
| "num_input_tokens_seen": 5193664, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.8719719390551354, | |
| "grad_norm": 8.096484184265137, | |
| "learning_rate": 2.0051888196438552e-06, | |
| "loss": 2.8438, | |
| "num_input_tokens_seen": 5196696, | |
| "step": 7955 | |
| }, | |
| { | |
| "epoch": 0.8725200043845226, | |
| "grad_norm": 8.458807945251465, | |
| "learning_rate": 1.988331850003855e-06, | |
| "loss": 3.4075, | |
| "num_input_tokens_seen": 5200640, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 0.8730680697139099, | |
| "grad_norm": 9.191377639770508, | |
| "learning_rate": 1.971543100471368e-06, | |
| "loss": 3.276, | |
| "num_input_tokens_seen": 5204240, | |
| "step": 7965 | |
| }, | |
| { | |
| "epoch": 0.8736161350432972, | |
| "grad_norm": 6.790607929229736, | |
| "learning_rate": 1.954822620818114e-06, | |
| "loss": 2.9706, | |
| "num_input_tokens_seen": 5208024, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 0.8741642003726844, | |
| "grad_norm": 7.511916637420654, | |
| "learning_rate": 1.938170460613417e-06, | |
| "loss": 2.8037, | |
| "num_input_tokens_seen": 5211272, | |
| "step": 7975 | |
| }, | |
| { | |
| "epoch": 0.8747122657020717, | |
| "grad_norm": 6.600817680358887, | |
| "learning_rate": 1.921586669224071e-06, | |
| "loss": 3.3576, | |
| "num_input_tokens_seen": 5215392, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 0.875260331031459, | |
| "grad_norm": 5.347980976104736, | |
| "learning_rate": 1.9050712958141758e-06, | |
| "loss": 3.3071, | |
| "num_input_tokens_seen": 5217928, | |
| "step": 7985 | |
| }, | |
| { | |
| "epoch": 0.8758083963608462, | |
| "grad_norm": 6.689899921417236, | |
| "learning_rate": 1.8886243893450061e-06, | |
| "loss": 3.2119, | |
| "num_input_tokens_seen": 5220984, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 0.8763564616902335, | |
| "grad_norm": 6.363076210021973, | |
| "learning_rate": 1.8722459985748563e-06, | |
| "loss": 2.9524, | |
| "num_input_tokens_seen": 5224504, | |
| "step": 7995 | |
| }, | |
| { | |
| "epoch": 0.8769045270196207, | |
| "grad_norm": 7.521759986877441, | |
| "learning_rate": 1.8559361720588974e-06, | |
| "loss": 3.1379, | |
| "num_input_tokens_seen": 5227336, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.877452592349008, | |
| "grad_norm": 8.488334655761719, | |
| "learning_rate": 1.8396949581490463e-06, | |
| "loss": 3.2758, | |
| "num_input_tokens_seen": 5229968, | |
| "step": 8005 | |
| }, | |
| { | |
| "epoch": 0.8780006576783953, | |
| "grad_norm": 7.164643287658691, | |
| "learning_rate": 1.8235224049938049e-06, | |
| "loss": 3.0142, | |
| "num_input_tokens_seen": 5233280, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 0.8785487230077825, | |
| "grad_norm": 8.150335311889648, | |
| "learning_rate": 1.8074185605381239e-06, | |
| "loss": 3.2278, | |
| "num_input_tokens_seen": 5236408, | |
| "step": 8015 | |
| }, | |
| { | |
| "epoch": 0.8790967883371698, | |
| "grad_norm": 9.74315357208252, | |
| "learning_rate": 1.791383472523256e-06, | |
| "loss": 3.3009, | |
| "num_input_tokens_seen": 5240040, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 0.8796448536665571, | |
| "grad_norm": 6.548309326171875, | |
| "learning_rate": 1.7754171884866362e-06, | |
| "loss": 3.0949, | |
| "num_input_tokens_seen": 5243480, | |
| "step": 8025 | |
| }, | |
| { | |
| "epoch": 0.8801929189959443, | |
| "grad_norm": 6.918182373046875, | |
| "learning_rate": 1.7595197557617044e-06, | |
| "loss": 3.1496, | |
| "num_input_tokens_seen": 5246664, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 0.8807409843253315, | |
| "grad_norm": 6.263129711151123, | |
| "learning_rate": 1.7436912214777945e-06, | |
| "loss": 2.9099, | |
| "num_input_tokens_seen": 5249392, | |
| "step": 8035 | |
| }, | |
| { | |
| "epoch": 0.8812890496547189, | |
| "grad_norm": 8.55476188659668, | |
| "learning_rate": 1.7279316325599898e-06, | |
| "loss": 2.8569, | |
| "num_input_tokens_seen": 5252584, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 0.8818371149841061, | |
| "grad_norm": 7.661272048950195, | |
| "learning_rate": 1.7122410357289703e-06, | |
| "loss": 2.9037, | |
| "num_input_tokens_seen": 5256184, | |
| "step": 8045 | |
| }, | |
| { | |
| "epoch": 0.8823851803134933, | |
| "grad_norm": 5.52952766418457, | |
| "learning_rate": 1.6966194775008798e-06, | |
| "loss": 3.0452, | |
| "num_input_tokens_seen": 5260048, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.8829332456428807, | |
| "grad_norm": 8.354534149169922, | |
| "learning_rate": 1.6810670041872062e-06, | |
| "loss": 3.005, | |
| "num_input_tokens_seen": 5264288, | |
| "step": 8055 | |
| }, | |
| { | |
| "epoch": 0.8834813109722679, | |
| "grad_norm": 7.364735126495361, | |
| "learning_rate": 1.6655836618946151e-06, | |
| "loss": 3.1181, | |
| "num_input_tokens_seen": 5268000, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 0.8840293763016551, | |
| "grad_norm": 7.844119071960449, | |
| "learning_rate": 1.650169496524831e-06, | |
| "loss": 2.9376, | |
| "num_input_tokens_seen": 5270984, | |
| "step": 8065 | |
| }, | |
| { | |
| "epoch": 0.8845774416310425, | |
| "grad_norm": 5.87100076675415, | |
| "learning_rate": 1.6348245537745028e-06, | |
| "loss": 3.1916, | |
| "num_input_tokens_seen": 5274448, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 0.8851255069604297, | |
| "grad_norm": 7.44371223449707, | |
| "learning_rate": 1.6195488791350548e-06, | |
| "loss": 2.9924, | |
| "num_input_tokens_seen": 5277432, | |
| "step": 8075 | |
| }, | |
| { | |
| "epoch": 0.8856735722898169, | |
| "grad_norm": 6.34487771987915, | |
| "learning_rate": 1.6043425178925652e-06, | |
| "loss": 3.0224, | |
| "num_input_tokens_seen": 5279944, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 0.8862216376192042, | |
| "grad_norm": 5.726871490478516, | |
| "learning_rate": 1.5892055151276258e-06, | |
| "loss": 2.7579, | |
| "num_input_tokens_seen": 5283720, | |
| "step": 8085 | |
| }, | |
| { | |
| "epoch": 0.8867697029485915, | |
| "grad_norm": 9.92805004119873, | |
| "learning_rate": 1.574137915715207e-06, | |
| "loss": 3.0515, | |
| "num_input_tokens_seen": 5286392, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 0.8873177682779787, | |
| "grad_norm": 9.383995056152344, | |
| "learning_rate": 1.559139764324527e-06, | |
| "loss": 3.3639, | |
| "num_input_tokens_seen": 5289440, | |
| "step": 8095 | |
| }, | |
| { | |
| "epoch": 0.887865833607366, | |
| "grad_norm": 6.371479034423828, | |
| "learning_rate": 1.5442111054189246e-06, | |
| "loss": 3.0694, | |
| "num_input_tokens_seen": 5293168, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.8884138989367533, | |
| "grad_norm": 7.600619316101074, | |
| "learning_rate": 1.5293519832557113e-06, | |
| "loss": 3.1645, | |
| "num_input_tokens_seen": 5296272, | |
| "step": 8105 | |
| }, | |
| { | |
| "epoch": 0.8889619642661405, | |
| "grad_norm": 10.624588966369629, | |
| "learning_rate": 1.5145624418860637e-06, | |
| "loss": 2.9331, | |
| "num_input_tokens_seen": 5299248, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 0.8895100295955278, | |
| "grad_norm": 6.536969184875488, | |
| "learning_rate": 1.4998425251548654e-06, | |
| "loss": 2.962, | |
| "num_input_tokens_seen": 5302376, | |
| "step": 8115 | |
| }, | |
| { | |
| "epoch": 0.890058094924915, | |
| "grad_norm": 5.556844234466553, | |
| "learning_rate": 1.4851922767006088e-06, | |
| "loss": 2.9318, | |
| "num_input_tokens_seen": 5305704, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 0.8906061602543023, | |
| "grad_norm": 7.522222995758057, | |
| "learning_rate": 1.4706117399552383e-06, | |
| "loss": 3.0438, | |
| "num_input_tokens_seen": 5308112, | |
| "step": 8125 | |
| }, | |
| { | |
| "epoch": 0.8911542255836896, | |
| "grad_norm": 9.176352500915527, | |
| "learning_rate": 1.4561009581440272e-06, | |
| "loss": 3.0732, | |
| "num_input_tokens_seen": 5310768, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 0.8917022909130768, | |
| "grad_norm": 6.739439010620117, | |
| "learning_rate": 1.441659974285467e-06, | |
| "loss": 3.0154, | |
| "num_input_tokens_seen": 5313544, | |
| "step": 8135 | |
| }, | |
| { | |
| "epoch": 0.8922503562424641, | |
| "grad_norm": 6.810214042663574, | |
| "learning_rate": 1.4272888311911176e-06, | |
| "loss": 3.0619, | |
| "num_input_tokens_seen": 5316352, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 0.8927984215718514, | |
| "grad_norm": 5.931697368621826, | |
| "learning_rate": 1.4129875714654905e-06, | |
| "loss": 3.3196, | |
| "num_input_tokens_seen": 5320160, | |
| "step": 8145 | |
| }, | |
| { | |
| "epoch": 0.8933464869012386, | |
| "grad_norm": 7.526365280151367, | |
| "learning_rate": 1.398756237505927e-06, | |
| "loss": 2.9404, | |
| "num_input_tokens_seen": 5323560, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.8938945522306259, | |
| "grad_norm": 6.762884616851807, | |
| "learning_rate": 1.3845948715024648e-06, | |
| "loss": 3.2493, | |
| "num_input_tokens_seen": 5326504, | |
| "step": 8155 | |
| }, | |
| { | |
| "epoch": 0.8944426175600132, | |
| "grad_norm": 4.969104290008545, | |
| "learning_rate": 1.37050351543771e-06, | |
| "loss": 3.3379, | |
| "num_input_tokens_seen": 5329424, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 0.8949906828894004, | |
| "grad_norm": 6.4593586921691895, | |
| "learning_rate": 1.3564822110867264e-06, | |
| "loss": 3.2228, | |
| "num_input_tokens_seen": 5332600, | |
| "step": 8165 | |
| }, | |
| { | |
| "epoch": 0.8955387482187877, | |
| "grad_norm": 7.721135139465332, | |
| "learning_rate": 1.3425310000169028e-06, | |
| "loss": 3.2133, | |
| "num_input_tokens_seen": 5335792, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 0.896086813548175, | |
| "grad_norm": 8.572230339050293, | |
| "learning_rate": 1.3286499235878214e-06, | |
| "loss": 3.1945, | |
| "num_input_tokens_seen": 5339616, | |
| "step": 8175 | |
| }, | |
| { | |
| "epoch": 0.8966348788775622, | |
| "grad_norm": 7.773857593536377, | |
| "learning_rate": 1.3148390229511532e-06, | |
| "loss": 2.9125, | |
| "num_input_tokens_seen": 5342320, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 0.8971829442069494, | |
| "grad_norm": 7.451086521148682, | |
| "learning_rate": 1.3010983390505244e-06, | |
| "loss": 3.1514, | |
| "num_input_tokens_seen": 5345336, | |
| "step": 8185 | |
| }, | |
| { | |
| "epoch": 0.8977310095363368, | |
| "grad_norm": 7.28810453414917, | |
| "learning_rate": 1.2874279126213973e-06, | |
| "loss": 3.1191, | |
| "num_input_tokens_seen": 5348880, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 0.898279074865724, | |
| "grad_norm": 4.2049078941345215, | |
| "learning_rate": 1.2738277841909479e-06, | |
| "loss": 2.9685, | |
| "num_input_tokens_seen": 5352936, | |
| "step": 8195 | |
| }, | |
| { | |
| "epoch": 0.8988271401951112, | |
| "grad_norm": 7.404577732086182, | |
| "learning_rate": 1.2602979940779524e-06, | |
| "loss": 3.107, | |
| "num_input_tokens_seen": 5355952, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.8993752055244986, | |
| "grad_norm": 11.230597496032715, | |
| "learning_rate": 1.2468385823926481e-06, | |
| "loss": 2.9561, | |
| "num_input_tokens_seen": 5359608, | |
| "step": 8205 | |
| }, | |
| { | |
| "epoch": 0.8999232708538858, | |
| "grad_norm": 8.928146362304688, | |
| "learning_rate": 1.233449589036656e-06, | |
| "loss": 3.172, | |
| "num_input_tokens_seen": 5363024, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 0.900471336183273, | |
| "grad_norm": 5.939243316650391, | |
| "learning_rate": 1.2201310537028138e-06, | |
| "loss": 3.0996, | |
| "num_input_tokens_seen": 5366928, | |
| "step": 8215 | |
| }, | |
| { | |
| "epoch": 0.9010194015126604, | |
| "grad_norm": 7.374519348144531, | |
| "learning_rate": 1.206883015875085e-06, | |
| "loss": 3.0966, | |
| "num_input_tokens_seen": 5369984, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 0.9015674668420476, | |
| "grad_norm": 8.059386253356934, | |
| "learning_rate": 1.1937055148284444e-06, | |
| "loss": 3.0717, | |
| "num_input_tokens_seen": 5372632, | |
| "step": 8225 | |
| }, | |
| { | |
| "epoch": 0.9021155321714348, | |
| "grad_norm": 8.80373764038086, | |
| "learning_rate": 1.1805985896287452e-06, | |
| "loss": 3.1543, | |
| "num_input_tokens_seen": 5375544, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 0.9026635975008221, | |
| "grad_norm": 6.8497443199157715, | |
| "learning_rate": 1.1675622791326169e-06, | |
| "loss": 2.9531, | |
| "num_input_tokens_seen": 5378856, | |
| "step": 8235 | |
| }, | |
| { | |
| "epoch": 0.9032116628302094, | |
| "grad_norm": 7.791383266448975, | |
| "learning_rate": 1.1545966219873444e-06, | |
| "loss": 2.9187, | |
| "num_input_tokens_seen": 5382752, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 0.9037597281595966, | |
| "grad_norm": 6.825507640838623, | |
| "learning_rate": 1.1417016566307586e-06, | |
| "loss": 2.8782, | |
| "num_input_tokens_seen": 5386080, | |
| "step": 8245 | |
| }, | |
| { | |
| "epoch": 0.9043077934889839, | |
| "grad_norm": 6.135127544403076, | |
| "learning_rate": 1.1288774212911052e-06, | |
| "loss": 2.8879, | |
| "num_input_tokens_seen": 5389680, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.9048558588183712, | |
| "grad_norm": 8.292460441589355, | |
| "learning_rate": 1.1161239539869668e-06, | |
| "loss": 2.9108, | |
| "num_input_tokens_seen": 5393112, | |
| "step": 8255 | |
| }, | |
| { | |
| "epoch": 0.9054039241477584, | |
| "grad_norm": 6.192307949066162, | |
| "learning_rate": 1.1034412925271075e-06, | |
| "loss": 2.72, | |
| "num_input_tokens_seen": 5397056, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 0.9059519894771457, | |
| "grad_norm": 6.773381233215332, | |
| "learning_rate": 1.0908294745103882e-06, | |
| "loss": 2.7747, | |
| "num_input_tokens_seen": 5400928, | |
| "step": 8265 | |
| }, | |
| { | |
| "epoch": 0.906500054806533, | |
| "grad_norm": 9.411810874938965, | |
| "learning_rate": 1.078288537325653e-06, | |
| "loss": 3.1762, | |
| "num_input_tokens_seen": 5403744, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 0.9070481201359202, | |
| "grad_norm": 5.909646511077881, | |
| "learning_rate": 1.0658185181516094e-06, | |
| "loss": 2.9356, | |
| "num_input_tokens_seen": 5406888, | |
| "step": 8275 | |
| }, | |
| { | |
| "epoch": 0.9075961854653075, | |
| "grad_norm": 8.18594741821289, | |
| "learning_rate": 1.0534194539567194e-06, | |
| "loss": 3.0487, | |
| "num_input_tokens_seen": 5409856, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 0.9081442507946947, | |
| "grad_norm": 10.775045394897461, | |
| "learning_rate": 1.0410913814990985e-06, | |
| "loss": 2.8025, | |
| "num_input_tokens_seen": 5412416, | |
| "step": 8285 | |
| }, | |
| { | |
| "epoch": 0.908692316124082, | |
| "grad_norm": 8.237727165222168, | |
| "learning_rate": 1.0288343373263954e-06, | |
| "loss": 3.0227, | |
| "num_input_tokens_seen": 5415176, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 0.9092403814534693, | |
| "grad_norm": 7.0511884689331055, | |
| "learning_rate": 1.016648357775693e-06, | |
| "loss": 2.8189, | |
| "num_input_tokens_seen": 5418552, | |
| "step": 8295 | |
| }, | |
| { | |
| "epoch": 0.9097884467828565, | |
| "grad_norm": 6.959300518035889, | |
| "learning_rate": 1.004533478973399e-06, | |
| "loss": 3.3864, | |
| "num_input_tokens_seen": 5421712, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.9103365121122438, | |
| "grad_norm": 7.333334922790527, | |
| "learning_rate": 9.924897368351282e-07, | |
| "loss": 3.1543, | |
| "num_input_tokens_seen": 5425312, | |
| "step": 8305 | |
| }, | |
| { | |
| "epoch": 0.9108845774416311, | |
| "grad_norm": 7.005816459655762, | |
| "learning_rate": 9.805171670656117e-07, | |
| "loss": 3.1113, | |
| "num_input_tokens_seen": 5428680, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 0.9114326427710183, | |
| "grad_norm": 5.512388229370117, | |
| "learning_rate": 9.686158051585874e-07, | |
| "loss": 3.0001, | |
| "num_input_tokens_seen": 5431848, | |
| "step": 8315 | |
| }, | |
| { | |
| "epoch": 0.9119807081004055, | |
| "grad_norm": 6.378774642944336, | |
| "learning_rate": 9.56785686396683e-07, | |
| "loss": 3.1063, | |
| "num_input_tokens_seen": 5434648, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 0.9125287734297929, | |
| "grad_norm": 6.719765663146973, | |
| "learning_rate": 9.450268458513156e-07, | |
| "loss": 2.7967, | |
| "num_input_tokens_seen": 5438728, | |
| "step": 8325 | |
| }, | |
| { | |
| "epoch": 0.9130768387591801, | |
| "grad_norm": 8.518233299255371, | |
| "learning_rate": 9.333393183826089e-07, | |
| "loss": 2.7597, | |
| "num_input_tokens_seen": 5442232, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 0.9136249040885673, | |
| "grad_norm": 7.718142986297607, | |
| "learning_rate": 9.217231386392577e-07, | |
| "loss": 3.5149, | |
| "num_input_tokens_seen": 5445320, | |
| "step": 8335 | |
| }, | |
| { | |
| "epoch": 0.9141729694179547, | |
| "grad_norm": 7.286013603210449, | |
| "learning_rate": 9.101783410584458e-07, | |
| "loss": 3.2542, | |
| "num_input_tokens_seen": 5448280, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 0.9147210347473419, | |
| "grad_norm": 6.524003028869629, | |
| "learning_rate": 8.987049598657398e-07, | |
| "loss": 3.0042, | |
| "num_input_tokens_seen": 5452360, | |
| "step": 8345 | |
| }, | |
| { | |
| "epoch": 0.9152691000767291, | |
| "grad_norm": 6.262417316436768, | |
| "learning_rate": 8.87303029074979e-07, | |
| "loss": 2.6819, | |
| "num_input_tokens_seen": 5455872, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.9158171654061165, | |
| "grad_norm": 6.51323127746582, | |
| "learning_rate": 8.75972582488191e-07, | |
| "loss": 3.1662, | |
| "num_input_tokens_seen": 5458616, | |
| "step": 8355 | |
| }, | |
| { | |
| "epoch": 0.9163652307355037, | |
| "grad_norm": 7.502628803253174, | |
| "learning_rate": 8.647136536954787e-07, | |
| "loss": 2.4922, | |
| "num_input_tokens_seen": 5461408, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 0.9169132960648909, | |
| "grad_norm": 6.768873691558838, | |
| "learning_rate": 8.535262760749202e-07, | |
| "loss": 2.7696, | |
| "num_input_tokens_seen": 5466664, | |
| "step": 8365 | |
| }, | |
| { | |
| "epoch": 0.9174613613942783, | |
| "grad_norm": 9.054154396057129, | |
| "learning_rate": 8.4241048279248e-07, | |
| "loss": 3.3125, | |
| "num_input_tokens_seen": 5469400, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 0.9180094267236655, | |
| "grad_norm": 7.729340076446533, | |
| "learning_rate": 8.313663068019007e-07, | |
| "loss": 3.383, | |
| "num_input_tokens_seen": 5472936, | |
| "step": 8375 | |
| }, | |
| { | |
| "epoch": 0.9185574920530527, | |
| "grad_norm": 8.844609260559082, | |
| "learning_rate": 8.203937808446083e-07, | |
| "loss": 2.7089, | |
| "num_input_tokens_seen": 5476176, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 0.91910555738244, | |
| "grad_norm": 7.043740272521973, | |
| "learning_rate": 8.094929374496185e-07, | |
| "loss": 3.2024, | |
| "num_input_tokens_seen": 5479576, | |
| "step": 8385 | |
| }, | |
| { | |
| "epoch": 0.9196536227118273, | |
| "grad_norm": 8.144498825073242, | |
| "learning_rate": 7.986638089334392e-07, | |
| "loss": 3.4681, | |
| "num_input_tokens_seen": 5483592, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 0.9202016880412145, | |
| "grad_norm": 7.295477867126465, | |
| "learning_rate": 7.879064273999731e-07, | |
| "loss": 3.3592, | |
| "num_input_tokens_seen": 5486736, | |
| "step": 8395 | |
| }, | |
| { | |
| "epoch": 0.9207497533706018, | |
| "grad_norm": 6.9401960372924805, | |
| "learning_rate": 7.772208247404128e-07, | |
| "loss": 2.8916, | |
| "num_input_tokens_seen": 5489720, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.9212978186999891, | |
| "grad_norm": 5.044391632080078, | |
| "learning_rate": 7.666070326331709e-07, | |
| "loss": 2.9984, | |
| "num_input_tokens_seen": 5494312, | |
| "step": 8405 | |
| }, | |
| { | |
| "epoch": 0.9218458840293763, | |
| "grad_norm": 7.426214218139648, | |
| "learning_rate": 7.560650825437637e-07, | |
| "loss": 2.6398, | |
| "num_input_tokens_seen": 5498536, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 0.9223939493587635, | |
| "grad_norm": 6.066382884979248, | |
| "learning_rate": 7.455950057247252e-07, | |
| "loss": 3.0293, | |
| "num_input_tokens_seen": 5501256, | |
| "step": 8415 | |
| }, | |
| { | |
| "epoch": 0.9229420146881508, | |
| "grad_norm": 6.4779181480407715, | |
| "learning_rate": 7.351968332155152e-07, | |
| "loss": 3.0215, | |
| "num_input_tokens_seen": 5504440, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 0.9234900800175381, | |
| "grad_norm": 5.473248481750488, | |
| "learning_rate": 7.248705958424307e-07, | |
| "loss": 2.9114, | |
| "num_input_tokens_seen": 5507752, | |
| "step": 8425 | |
| }, | |
| { | |
| "epoch": 0.9240381453469253, | |
| "grad_norm": 7.87445592880249, | |
| "learning_rate": 7.146163242185033e-07, | |
| "loss": 3.0642, | |
| "num_input_tokens_seen": 5511168, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 0.9245862106763126, | |
| "grad_norm": 7.2715959548950195, | |
| "learning_rate": 7.044340487434242e-07, | |
| "loss": 3.0391, | |
| "num_input_tokens_seen": 5513984, | |
| "step": 8435 | |
| }, | |
| { | |
| "epoch": 0.9251342760056999, | |
| "grad_norm": 7.839521408081055, | |
| "learning_rate": 6.943237996034386e-07, | |
| "loss": 3.2316, | |
| "num_input_tokens_seen": 5516632, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 0.9256823413350871, | |
| "grad_norm": 7.8146820068359375, | |
| "learning_rate": 6.842856067712677e-07, | |
| "loss": 3.0688, | |
| "num_input_tokens_seen": 5520488, | |
| "step": 8445 | |
| }, | |
| { | |
| "epoch": 0.9262304066644744, | |
| "grad_norm": 7.480862140655518, | |
| "learning_rate": 6.743195000060154e-07, | |
| "loss": 2.8072, | |
| "num_input_tokens_seen": 5524136, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.9267784719938617, | |
| "grad_norm": 6.187289237976074, | |
| "learning_rate": 6.644255088530782e-07, | |
| "loss": 3.1597, | |
| "num_input_tokens_seen": 5528256, | |
| "step": 8455 | |
| }, | |
| { | |
| "epoch": 0.9273265373232489, | |
| "grad_norm": 7.108201026916504, | |
| "learning_rate": 6.546036626440599e-07, | |
| "loss": 2.8195, | |
| "num_input_tokens_seen": 5531368, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 0.9278746026526362, | |
| "grad_norm": 9.429540634155273, | |
| "learning_rate": 6.448539904966827e-07, | |
| "loss": 3.1321, | |
| "num_input_tokens_seen": 5534144, | |
| "step": 8465 | |
| }, | |
| { | |
| "epoch": 0.9284226679820234, | |
| "grad_norm": 6.745710849761963, | |
| "learning_rate": 6.351765213147037e-07, | |
| "loss": 2.8217, | |
| "num_input_tokens_seen": 5536848, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 0.9289707333114107, | |
| "grad_norm": 6.650664806365967, | |
| "learning_rate": 6.255712837878347e-07, | |
| "loss": 3.1658, | |
| "num_input_tokens_seen": 5540136, | |
| "step": 8475 | |
| }, | |
| { | |
| "epoch": 0.929518798640798, | |
| "grad_norm": 7.63946008682251, | |
| "learning_rate": 6.160383063916419e-07, | |
| "loss": 3.1177, | |
| "num_input_tokens_seen": 5543192, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 0.9300668639701852, | |
| "grad_norm": 7.223082542419434, | |
| "learning_rate": 6.065776173874687e-07, | |
| "loss": 3.6049, | |
| "num_input_tokens_seen": 5547392, | |
| "step": 8485 | |
| }, | |
| { | |
| "epoch": 0.9306149292995725, | |
| "grad_norm": 7.673356533050537, | |
| "learning_rate": 5.971892448223576e-07, | |
| "loss": 2.8851, | |
| "num_input_tokens_seen": 5550056, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 0.9311629946289598, | |
| "grad_norm": 7.799294471740723, | |
| "learning_rate": 5.878732165289668e-07, | |
| "loss": 3.2135, | |
| "num_input_tokens_seen": 5552728, | |
| "step": 8495 | |
| }, | |
| { | |
| "epoch": 0.931711059958347, | |
| "grad_norm": 5.8991312980651855, | |
| "learning_rate": 5.786295601254765e-07, | |
| "loss": 3.5495, | |
| "num_input_tokens_seen": 5556008, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.9322591252877342, | |
| "grad_norm": 8.919817924499512, | |
| "learning_rate": 5.694583030155131e-07, | |
| "loss": 3.2696, | |
| "num_input_tokens_seen": 5558680, | |
| "step": 8505 | |
| }, | |
| { | |
| "epoch": 0.9328071906171216, | |
| "grad_norm": 6.0595293045043945, | |
| "learning_rate": 5.60359472388075e-07, | |
| "loss": 3.1983, | |
| "num_input_tokens_seen": 5561976, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 0.9333552559465088, | |
| "grad_norm": 7.8532185554504395, | |
| "learning_rate": 5.513330952174462e-07, | |
| "loss": 2.8831, | |
| "num_input_tokens_seen": 5565032, | |
| "step": 8515 | |
| }, | |
| { | |
| "epoch": 0.933903321275896, | |
| "grad_norm": 6.592312335968018, | |
| "learning_rate": 5.423791982631071e-07, | |
| "loss": 3.2783, | |
| "num_input_tokens_seen": 5567976, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 0.9344513866052834, | |
| "grad_norm": 5.455694198608398, | |
| "learning_rate": 5.334978080696773e-07, | |
| "loss": 2.3299, | |
| "num_input_tokens_seen": 5572544, | |
| "step": 8525 | |
| }, | |
| { | |
| "epoch": 0.9349994519346706, | |
| "grad_norm": 6.956151008605957, | |
| "learning_rate": 5.246889509668118e-07, | |
| "loss": 3.0221, | |
| "num_input_tokens_seen": 5575256, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 0.9355475172640578, | |
| "grad_norm": 7.278057098388672, | |
| "learning_rate": 5.159526530691378e-07, | |
| "loss": 3.2783, | |
| "num_input_tokens_seen": 5577928, | |
| "step": 8535 | |
| }, | |
| { | |
| "epoch": 0.9360955825934452, | |
| "grad_norm": 5.909106731414795, | |
| "learning_rate": 5.072889402761821e-07, | |
| "loss": 3.2452, | |
| "num_input_tokens_seen": 5580632, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 0.9366436479228324, | |
| "grad_norm": 6.952794075012207, | |
| "learning_rate": 4.986978382722773e-07, | |
| "loss": 3.0232, | |
| "num_input_tokens_seen": 5584824, | |
| "step": 8545 | |
| }, | |
| { | |
| "epoch": 0.9371917132522196, | |
| "grad_norm": 8.14654541015625, | |
| "learning_rate": 4.901793725264975e-07, | |
| "loss": 3.0803, | |
| "num_input_tokens_seen": 5589208, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.937739778581607, | |
| "grad_norm": 6.610713958740234, | |
| "learning_rate": 4.817335682925805e-07, | |
| "loss": 2.8802, | |
| "num_input_tokens_seen": 5592056, | |
| "step": 8555 | |
| }, | |
| { | |
| "epoch": 0.9382878439109942, | |
| "grad_norm": 10.567109107971191, | |
| "learning_rate": 4.73360450608859e-07, | |
| "loss": 3.3952, | |
| "num_input_tokens_seen": 5595120, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 0.9388359092403814, | |
| "grad_norm": 7.1954545974731445, | |
| "learning_rate": 4.6506004429817117e-07, | |
| "loss": 3.2835, | |
| "num_input_tokens_seen": 5598408, | |
| "step": 8565 | |
| }, | |
| { | |
| "epoch": 0.9393839745697687, | |
| "grad_norm": 7.200895309448242, | |
| "learning_rate": 4.568323739677971e-07, | |
| "loss": 3.2721, | |
| "num_input_tokens_seen": 5602328, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 0.939932039899156, | |
| "grad_norm": 7.637218952178955, | |
| "learning_rate": 4.486774640093894e-07, | |
| "loss": 3.0411, | |
| "num_input_tokens_seen": 5606096, | |
| "step": 8575 | |
| }, | |
| { | |
| "epoch": 0.9404801052285432, | |
| "grad_norm": 8.214374542236328, | |
| "learning_rate": 4.405953385988898e-07, | |
| "loss": 3.1399, | |
| "num_input_tokens_seen": 5608544, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 0.9410281705579305, | |
| "grad_norm": 7.163279056549072, | |
| "learning_rate": 4.325860216964711e-07, | |
| "loss": 2.7451, | |
| "num_input_tokens_seen": 5611872, | |
| "step": 8585 | |
| }, | |
| { | |
| "epoch": 0.9415762358873178, | |
| "grad_norm": 7.930347919464111, | |
| "learning_rate": 4.2464953704645647e-07, | |
| "loss": 2.9838, | |
| "num_input_tokens_seen": 5614440, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 0.942124301216705, | |
| "grad_norm": 4.849373817443848, | |
| "learning_rate": 4.167859081772446e-07, | |
| "loss": 2.9805, | |
| "num_input_tokens_seen": 5617856, | |
| "step": 8595 | |
| }, | |
| { | |
| "epoch": 0.9426723665460923, | |
| "grad_norm": 8.461563110351562, | |
| "learning_rate": 4.0899515840125966e-07, | |
| "loss": 3.2951, | |
| "num_input_tokens_seen": 5620824, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.9432204318754795, | |
| "grad_norm": 8.734384536743164, | |
| "learning_rate": 4.0127731081485987e-07, | |
| "loss": 3.3802, | |
| "num_input_tokens_seen": 5624696, | |
| "step": 8605 | |
| }, | |
| { | |
| "epoch": 0.9437684972048668, | |
| "grad_norm": 9.480766296386719, | |
| "learning_rate": 3.936323882982762e-07, | |
| "loss": 2.8742, | |
| "num_input_tokens_seen": 5628648, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 0.9443165625342541, | |
| "grad_norm": 8.393555641174316, | |
| "learning_rate": 3.8606041351555986e-07, | |
| "loss": 3.3445, | |
| "num_input_tokens_seen": 5631048, | |
| "step": 8615 | |
| }, | |
| { | |
| "epoch": 0.9448646278636413, | |
| "grad_norm": 5.754420757293701, | |
| "learning_rate": 3.785614089144879e-07, | |
| "loss": 3.2994, | |
| "num_input_tokens_seen": 5634840, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 0.9454126931930286, | |
| "grad_norm": 7.406842231750488, | |
| "learning_rate": 3.7113539672651853e-07, | |
| "loss": 3.2169, | |
| "num_input_tokens_seen": 5639056, | |
| "step": 8625 | |
| }, | |
| { | |
| "epoch": 0.9459607585224159, | |
| "grad_norm": 8.346644401550293, | |
| "learning_rate": 3.637823989667166e-07, | |
| "loss": 3.5016, | |
| "num_input_tokens_seen": 5642368, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 0.9465088238518031, | |
| "grad_norm": 6.256731033325195, | |
| "learning_rate": 3.565024374336895e-07, | |
| "loss": 2.9251, | |
| "num_input_tokens_seen": 5645288, | |
| "step": 8635 | |
| }, | |
| { | |
| "epoch": 0.9470568891811904, | |
| "grad_norm": 8.30922794342041, | |
| "learning_rate": 3.4929553370951496e-07, | |
| "loss": 2.897, | |
| "num_input_tokens_seen": 5648256, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 0.9476049545105777, | |
| "grad_norm": 5.839921951293945, | |
| "learning_rate": 3.421617091596996e-07, | |
| "loss": 3.0709, | |
| "num_input_tokens_seen": 5651456, | |
| "step": 8645 | |
| }, | |
| { | |
| "epoch": 0.9481530198399649, | |
| "grad_norm": 8.873268127441406, | |
| "learning_rate": 3.3510098493308715e-07, | |
| "loss": 2.8349, | |
| "num_input_tokens_seen": 5654936, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.9487010851693521, | |
| "grad_norm": 7.447127342224121, | |
| "learning_rate": 3.2811338196181706e-07, | |
| "loss": 3.1457, | |
| "num_input_tokens_seen": 5658344, | |
| "step": 8655 | |
| }, | |
| { | |
| "epoch": 0.9492491504987395, | |
| "grad_norm": 7.901216506958008, | |
| "learning_rate": 3.211989209612437e-07, | |
| "loss": 3.0331, | |
| "num_input_tokens_seen": 5661088, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 0.9497972158281267, | |
| "grad_norm": 6.363575458526611, | |
| "learning_rate": 3.1435762242990053e-07, | |
| "loss": 3.0904, | |
| "num_input_tokens_seen": 5664544, | |
| "step": 8665 | |
| }, | |
| { | |
| "epoch": 0.9503452811575139, | |
| "grad_norm": 8.245457649230957, | |
| "learning_rate": 3.0758950664940833e-07, | |
| "loss": 2.9634, | |
| "num_input_tokens_seen": 5667704, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 0.9508933464869013, | |
| "grad_norm": 6.969222068786621, | |
| "learning_rate": 3.008945936844504e-07, | |
| "loss": 2.9006, | |
| "num_input_tokens_seen": 5671088, | |
| "step": 8675 | |
| }, | |
| { | |
| "epoch": 0.9514414118162885, | |
| "grad_norm": 9.956710815429688, | |
| "learning_rate": 2.942729033826752e-07, | |
| "loss": 3.3092, | |
| "num_input_tokens_seen": 5673784, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 0.9519894771456757, | |
| "grad_norm": 6.730470657348633, | |
| "learning_rate": 2.877244553746633e-07, | |
| "loss": 2.8794, | |
| "num_input_tokens_seen": 5677024, | |
| "step": 8685 | |
| }, | |
| { | |
| "epoch": 0.9525375424750631, | |
| "grad_norm": 7.628656387329102, | |
| "learning_rate": 2.8124926907386885e-07, | |
| "loss": 2.9683, | |
| "num_input_tokens_seen": 5680552, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 0.9530856078044503, | |
| "grad_norm": 8.587575912475586, | |
| "learning_rate": 2.748473636765475e-07, | |
| "loss": 3.0311, | |
| "num_input_tokens_seen": 5684128, | |
| "step": 8695 | |
| }, | |
| { | |
| "epoch": 0.9536336731338375, | |
| "grad_norm": 8.781567573547363, | |
| "learning_rate": 2.6851875816170655e-07, | |
| "loss": 2.9722, | |
| "num_input_tokens_seen": 5687784, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.9541817384632248, | |
| "grad_norm": 6.88287353515625, | |
| "learning_rate": 2.622634712910521e-07, | |
| "loss": 3.3128, | |
| "num_input_tokens_seen": 5690464, | |
| "step": 8705 | |
| }, | |
| { | |
| "epoch": 0.9547298037926121, | |
| "grad_norm": 7.1090874671936035, | |
| "learning_rate": 2.560815216089335e-07, | |
| "loss": 3.0189, | |
| "num_input_tokens_seen": 5693312, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 0.9552778691219993, | |
| "grad_norm": 7.3000168800354, | |
| "learning_rate": 2.499729274422796e-07, | |
| "loss": 3.5534, | |
| "num_input_tokens_seen": 5697232, | |
| "step": 8715 | |
| }, | |
| { | |
| "epoch": 0.9558259344513866, | |
| "grad_norm": 8.97269344329834, | |
| "learning_rate": 2.439377069005544e-07, | |
| "loss": 3.5597, | |
| "num_input_tokens_seen": 5699808, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 0.9563739997807739, | |
| "grad_norm": 8.973227500915527, | |
| "learning_rate": 2.3797587787569852e-07, | |
| "loss": 3.0848, | |
| "num_input_tokens_seen": 5703784, | |
| "step": 8725 | |
| }, | |
| { | |
| "epoch": 0.9569220651101611, | |
| "grad_norm": 7.142612934112549, | |
| "learning_rate": 2.3208745804207398e-07, | |
| "loss": 2.8029, | |
| "num_input_tokens_seen": 5706344, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 0.9574701304395484, | |
| "grad_norm": 8.567402839660645, | |
| "learning_rate": 2.262724648564224e-07, | |
| "loss": 3.3482, | |
| "num_input_tokens_seen": 5710600, | |
| "step": 8735 | |
| }, | |
| { | |
| "epoch": 0.9580181957689357, | |
| "grad_norm": 11.277481079101562, | |
| "learning_rate": 2.2053091555779837e-07, | |
| "loss": 3.0415, | |
| "num_input_tokens_seen": 5714152, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 0.9585662610983229, | |
| "grad_norm": 7.343226432800293, | |
| "learning_rate": 2.1486282716752791e-07, | |
| "loss": 3.0087, | |
| "num_input_tokens_seen": 5716376, | |
| "step": 8745 | |
| }, | |
| { | |
| "epoch": 0.9591143264277102, | |
| "grad_norm": 6.354895114898682, | |
| "learning_rate": 2.0926821648915574e-07, | |
| "loss": 3.0672, | |
| "num_input_tokens_seen": 5719152, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.9596623917570974, | |
| "grad_norm": 7.212831497192383, | |
| "learning_rate": 2.0374710010839793e-07, | |
| "loss": 3.3, | |
| "num_input_tokens_seen": 5723064, | |
| "step": 8755 | |
| }, | |
| { | |
| "epoch": 0.9602104570864847, | |
| "grad_norm": 6.967692852020264, | |
| "learning_rate": 1.982994943930838e-07, | |
| "loss": 3.1401, | |
| "num_input_tokens_seen": 5725768, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 0.960758522415872, | |
| "grad_norm": 8.500665664672852, | |
| "learning_rate": 1.9292541549311983e-07, | |
| "loss": 3.2358, | |
| "num_input_tokens_seen": 5728104, | |
| "step": 8765 | |
| }, | |
| { | |
| "epoch": 0.9613065877452592, | |
| "grad_norm": 7.204361915588379, | |
| "learning_rate": 1.876248793404367e-07, | |
| "loss": 2.9241, | |
| "num_input_tokens_seen": 5730688, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 0.9618546530746465, | |
| "grad_norm": 7.031684398651123, | |
| "learning_rate": 1.8239790164893412e-07, | |
| "loss": 3.2293, | |
| "num_input_tokens_seen": 5733936, | |
| "step": 8775 | |
| }, | |
| { | |
| "epoch": 0.9624027184040338, | |
| "grad_norm": 8.101325035095215, | |
| "learning_rate": 1.7724449791444997e-07, | |
| "loss": 2.7716, | |
| "num_input_tokens_seen": 5737880, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 0.962950783733421, | |
| "grad_norm": 6.74721622467041, | |
| "learning_rate": 1.721646834146967e-07, | |
| "loss": 2.715, | |
| "num_input_tokens_seen": 5741936, | |
| "step": 8785 | |
| }, | |
| { | |
| "epoch": 0.9634988490628082, | |
| "grad_norm": 9.26173210144043, | |
| "learning_rate": 1.671584732092335e-07, | |
| "loss": 2.8224, | |
| "num_input_tokens_seen": 5746160, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 0.9640469143921956, | |
| "grad_norm": 5.797330856323242, | |
| "learning_rate": 1.6222588213940792e-07, | |
| "loss": 3.3261, | |
| "num_input_tokens_seen": 5750696, | |
| "step": 8795 | |
| }, | |
| { | |
| "epoch": 0.9645949797215828, | |
| "grad_norm": 9.205500602722168, | |
| "learning_rate": 1.5736692482831995e-07, | |
| "loss": 2.9268, | |
| "num_input_tokens_seen": 5753384, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.96514304505097, | |
| "grad_norm": 6.270941257476807, | |
| "learning_rate": 1.5258161568077188e-07, | |
| "loss": 2.8041, | |
| "num_input_tokens_seen": 5756640, | |
| "step": 8805 | |
| }, | |
| { | |
| "epoch": 0.9656911103803574, | |
| "grad_norm": 7.947140693664551, | |
| "learning_rate": 1.4786996888323524e-07, | |
| "loss": 3.1006, | |
| "num_input_tokens_seen": 5759848, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 0.9662391757097446, | |
| "grad_norm": 8.765256881713867, | |
| "learning_rate": 1.4323199840380053e-07, | |
| "loss": 3.2065, | |
| "num_input_tokens_seen": 5763416, | |
| "step": 8815 | |
| }, | |
| { | |
| "epoch": 0.9667872410391318, | |
| "grad_norm": 5.335040092468262, | |
| "learning_rate": 1.3866771799213307e-07, | |
| "loss": 2.9768, | |
| "num_input_tokens_seen": 5766160, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 0.9673353063685192, | |
| "grad_norm": 5.483620643615723, | |
| "learning_rate": 1.3417714117944513e-07, | |
| "loss": 2.8682, | |
| "num_input_tokens_seen": 5771024, | |
| "step": 8825 | |
| }, | |
| { | |
| "epoch": 0.9678833716979064, | |
| "grad_norm": 8.511704444885254, | |
| "learning_rate": 1.2976028127844597e-07, | |
| "loss": 3.1851, | |
| "num_input_tokens_seen": 5774632, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 0.9684314370272936, | |
| "grad_norm": 6.916325569152832, | |
| "learning_rate": 1.25417151383303e-07, | |
| "loss": 3.2018, | |
| "num_input_tokens_seen": 5778048, | |
| "step": 8835 | |
| }, | |
| { | |
| "epoch": 0.968979502356681, | |
| "grad_norm": 6.791527271270752, | |
| "learning_rate": 1.2114776436960294e-07, | |
| "loss": 3.1153, | |
| "num_input_tokens_seen": 5781288, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 0.9695275676860682, | |
| "grad_norm": 7.304278373718262, | |
| "learning_rate": 1.1695213289432406e-07, | |
| "loss": 2.7359, | |
| "num_input_tokens_seen": 5783776, | |
| "step": 8845 | |
| }, | |
| { | |
| "epoch": 0.9700756330154554, | |
| "grad_norm": 7.467769145965576, | |
| "learning_rate": 1.128302693957778e-07, | |
| "loss": 3.1941, | |
| "num_input_tokens_seen": 5786120, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.9706236983448427, | |
| "grad_norm": 8.969725608825684, | |
| "learning_rate": 1.0878218609359502e-07, | |
| "loss": 3.0654, | |
| "num_input_tokens_seen": 5789672, | |
| "step": 8855 | |
| }, | |
| { | |
| "epoch": 0.97117176367423, | |
| "grad_norm": 8.292722702026367, | |
| "learning_rate": 1.0480789498866772e-07, | |
| "loss": 2.9517, | |
| "num_input_tokens_seen": 5792480, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 0.9717198290036172, | |
| "grad_norm": 5.788974285125732, | |
| "learning_rate": 1.0090740786313502e-07, | |
| "loss": 2.9964, | |
| "num_input_tokens_seen": 5796848, | |
| "step": 8865 | |
| }, | |
| { | |
| "epoch": 0.9722678943330045, | |
| "grad_norm": 8.003725051879883, | |
| "learning_rate": 9.708073628033055e-08, | |
| "loss": 2.8592, | |
| "num_input_tokens_seen": 5801376, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 0.9728159596623918, | |
| "grad_norm": 6.711467742919922, | |
| "learning_rate": 9.332789158476018e-08, | |
| "loss": 2.9653, | |
| "num_input_tokens_seen": 5804480, | |
| "step": 8875 | |
| }, | |
| { | |
| "epoch": 0.973364024991779, | |
| "grad_norm": 5.3671417236328125, | |
| "learning_rate": 8.964888490205769e-08, | |
| "loss": 3.1577, | |
| "num_input_tokens_seen": 5807632, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 0.9739120903211663, | |
| "grad_norm": 6.408278942108154, | |
| "learning_rate": 8.604372713896247e-08, | |
| "loss": 2.7764, | |
| "num_input_tokens_seen": 5810096, | |
| "step": 8885 | |
| }, | |
| { | |
| "epoch": 0.9744601556505536, | |
| "grad_norm": 8.041277885437012, | |
| "learning_rate": 8.251242898328071e-08, | |
| "loss": 3.2175, | |
| "num_input_tokens_seen": 5813808, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 0.9750082209799408, | |
| "grad_norm": 6.138535499572754, | |
| "learning_rate": 7.905500090385487e-08, | |
| "loss": 2.9364, | |
| "num_input_tokens_seen": 5816552, | |
| "step": 8895 | |
| }, | |
| { | |
| "epoch": 0.9755562863093281, | |
| "grad_norm": 8.328486442565918, | |
| "learning_rate": 7.567145315053314e-08, | |
| "loss": 3.163, | |
| "num_input_tokens_seen": 5820568, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.9761043516387153, | |
| "grad_norm": 9.473198890686035, | |
| "learning_rate": 7.236179575414448e-08, | |
| "loss": 3.2253, | |
| "num_input_tokens_seen": 5823808, | |
| "step": 8905 | |
| }, | |
| { | |
| "epoch": 0.9766524169681026, | |
| "grad_norm": 5.804590225219727, | |
| "learning_rate": 6.912603852645138e-08, | |
| "loss": 3.0782, | |
| "num_input_tokens_seen": 5826744, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 0.9772004822974899, | |
| "grad_norm": 5.613870620727539, | |
| "learning_rate": 6.596419106014163e-08, | |
| "loss": 2.9843, | |
| "num_input_tokens_seen": 5831144, | |
| "step": 8915 | |
| }, | |
| { | |
| "epoch": 0.9777485476268771, | |
| "grad_norm": 8.519886016845703, | |
| "learning_rate": 6.28762627287921e-08, | |
| "loss": 3.0685, | |
| "num_input_tokens_seen": 5834792, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 0.9782966129562644, | |
| "grad_norm": 7.168541431427002, | |
| "learning_rate": 5.986226268683282e-08, | |
| "loss": 3.2515, | |
| "num_input_tokens_seen": 5838368, | |
| "step": 8925 | |
| }, | |
| { | |
| "epoch": 0.9788446782856517, | |
| "grad_norm": 10.949654579162598, | |
| "learning_rate": 5.692219986953573e-08, | |
| "loss": 2.9654, | |
| "num_input_tokens_seen": 5842120, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 0.9793927436150389, | |
| "grad_norm": 6.906786918640137, | |
| "learning_rate": 5.4056082992973155e-08, | |
| "loss": 3.0675, | |
| "num_input_tokens_seen": 5845248, | |
| "step": 8935 | |
| }, | |
| { | |
| "epoch": 0.9799408089444261, | |
| "grad_norm": 5.457529067993164, | |
| "learning_rate": 5.1263920553998315e-08, | |
| "loss": 2.9989, | |
| "num_input_tokens_seen": 5848536, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 0.9804888742738135, | |
| "grad_norm": 9.393891334533691, | |
| "learning_rate": 4.854572083022313e-08, | |
| "loss": 3.1355, | |
| "num_input_tokens_seen": 5851824, | |
| "step": 8945 | |
| }, | |
| { | |
| "epoch": 0.9810369396032007, | |
| "grad_norm": 8.42390251159668, | |
| "learning_rate": 4.5901491879984934e-08, | |
| "loss": 3.0677, | |
| "num_input_tokens_seen": 5855152, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.9815850049325879, | |
| "grad_norm": 7.749826908111572, | |
| "learning_rate": 4.3331241542340916e-08, | |
| "loss": 3.1391, | |
| "num_input_tokens_seen": 5858576, | |
| "step": 8955 | |
| }, | |
| { | |
| "epoch": 0.9821330702619753, | |
| "grad_norm": 8.214120864868164, | |
| "learning_rate": 4.083497743701259e-08, | |
| "loss": 2.8317, | |
| "num_input_tokens_seen": 5861528, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 0.9826811355913625, | |
| "grad_norm": 6.369811058044434, | |
| "learning_rate": 3.8412706964402465e-08, | |
| "loss": 2.9487, | |
| "num_input_tokens_seen": 5865128, | |
| "step": 8965 | |
| }, | |
| { | |
| "epoch": 0.9832292009207497, | |
| "grad_norm": 8.29269027709961, | |
| "learning_rate": 3.606443730554132e-08, | |
| "loss": 3.0666, | |
| "num_input_tokens_seen": 5867928, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 0.9837772662501371, | |
| "grad_norm": 7.444830417633057, | |
| "learning_rate": 3.379017542207707e-08, | |
| "loss": 3.0067, | |
| "num_input_tokens_seen": 5870968, | |
| "step": 8975 | |
| }, | |
| { | |
| "epoch": 0.9843253315795243, | |
| "grad_norm": 7.021453380584717, | |
| "learning_rate": 3.1589928056263704e-08, | |
| "loss": 3.1972, | |
| "num_input_tokens_seen": 5874496, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 0.9848733969089115, | |
| "grad_norm": 7.41176176071167, | |
| "learning_rate": 2.9463701730922388e-08, | |
| "loss": 2.826, | |
| "num_input_tokens_seen": 5878088, | |
| "step": 8985 | |
| }, | |
| { | |
| "epoch": 0.9854214622382989, | |
| "grad_norm": 9.515088081359863, | |
| "learning_rate": 2.7411502749441488e-08, | |
| "loss": 3.1693, | |
| "num_input_tokens_seen": 5881752, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 0.9859695275676861, | |
| "grad_norm": 8.658610343933105, | |
| "learning_rate": 2.5433337195743258e-08, | |
| "loss": 2.8453, | |
| "num_input_tokens_seen": 5884816, | |
| "step": 8995 | |
| }, | |
| { | |
| "epoch": 0.9865175928970733, | |
| "grad_norm": 7.5331830978393555, | |
| "learning_rate": 2.3529210934272738e-08, | |
| "loss": 2.8423, | |
| "num_input_tokens_seen": 5887864, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.9870656582264606, | |
| "grad_norm": 8.601006507873535, | |
| "learning_rate": 2.2059222016279636e-08, | |
| "loss": 3.5074, | |
| "num_input_tokens_seen": 5892776, | |
| "step": 9005 | |
| }, | |
| { | |
| "epoch": 0.9876137235558479, | |
| "grad_norm": 9.700572967529297, | |
| "learning_rate": 2.0288380558580732e-08, | |
| "loss": 2.9729, | |
| "num_input_tokens_seen": 5895976, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 0.9881617888852351, | |
| "grad_norm": 7.793155193328857, | |
| "learning_rate": 1.859159364578089e-08, | |
| "loss": 3.1164, | |
| "num_input_tokens_seen": 5897952, | |
| "step": 9015 | |
| }, | |
| { | |
| "epoch": 0.9887098542146224, | |
| "grad_norm": 6.612551212310791, | |
| "learning_rate": 1.696886630815908e-08, | |
| "loss": 2.9729, | |
| "num_input_tokens_seen": 5901264, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 0.9892579195440097, | |
| "grad_norm": 7.382999897003174, | |
| "learning_rate": 1.5420203356431018e-08, | |
| "loss": 3.2611, | |
| "num_input_tokens_seen": 5904096, | |
| "step": 9025 | |
| }, | |
| { | |
| "epoch": 0.9898059848733969, | |
| "grad_norm": 6.810866832733154, | |
| "learning_rate": 1.3945609381743607e-08, | |
| "loss": 2.8127, | |
| "num_input_tokens_seen": 5907072, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 0.9903540502027842, | |
| "grad_norm": 7.927409648895264, | |
| "learning_rate": 1.2545088755658296e-08, | |
| "loss": 3.2365, | |
| "num_input_tokens_seen": 5910056, | |
| "step": 9035 | |
| }, | |
| { | |
| "epoch": 0.9909021155321714, | |
| "grad_norm": 7.214841842651367, | |
| "learning_rate": 1.121864563014552e-08, | |
| "loss": 3.0081, | |
| "num_input_tokens_seen": 5913112, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 0.9914501808615587, | |
| "grad_norm": 8.652878761291504, | |
| "learning_rate": 9.966283937559716e-09, | |
| "loss": 3.0332, | |
| "num_input_tokens_seen": 5916360, | |
| "step": 9045 | |
| }, | |
| { | |
| "epoch": 0.991998246190946, | |
| "grad_norm": 8.960352897644043, | |
| "learning_rate": 8.78800739063379e-09, | |
| "loss": 2.6109, | |
| "num_input_tokens_seen": 5918704, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 0.9925463115203332, | |
| "grad_norm": 7.337709903717041, | |
| "learning_rate": 7.683819482479094e-09, | |
| "loss": 2.7987, | |
| "num_input_tokens_seen": 5921928, | |
| "step": 9055 | |
| }, | |
| { | |
| "epoch": 0.9930943768497205, | |
| "grad_norm": 7.972464561462402, | |
| "learning_rate": 6.653723486549357e-09, | |
| "loss": 3.1164, | |
| "num_input_tokens_seen": 5924176, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 0.9936424421791078, | |
| "grad_norm": 5.17326021194458, | |
| "learning_rate": 5.69772245666289e-09, | |
| "loss": 2.8857, | |
| "num_input_tokens_seen": 5927832, | |
| "step": 9065 | |
| }, | |
| { | |
| "epoch": 0.994190507508495, | |
| "grad_norm": 9.227761268615723, | |
| "learning_rate": 4.815819226960949e-09, | |
| "loss": 3.0089, | |
| "num_input_tokens_seen": 5931264, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 0.9947385728378823, | |
| "grad_norm": 8.926158905029297, | |
| "learning_rate": 4.008016411927162e-09, | |
| "loss": 3.3191, | |
| "num_input_tokens_seen": 5933904, | |
| "step": 9075 | |
| }, | |
| { | |
| "epoch": 0.9952866381672696, | |
| "grad_norm": 10.433160781860352, | |
| "learning_rate": 3.274316406362554e-09, | |
| "loss": 3.447, | |
| "num_input_tokens_seen": 5936464, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 0.9958347034966568, | |
| "grad_norm": 7.052779197692871, | |
| "learning_rate": 2.6147213853855436e-09, | |
| "loss": 3.0385, | |
| "num_input_tokens_seen": 5939544, | |
| "step": 9085 | |
| }, | |
| { | |
| "epoch": 0.996382768826044, | |
| "grad_norm": 5.819647789001465, | |
| "learning_rate": 2.0292333044236166e-09, | |
| "loss": 3.3745, | |
| "num_input_tokens_seen": 5943312, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 0.9969308341554314, | |
| "grad_norm": 7.4259748458862305, | |
| "learning_rate": 1.5178538992050018e-09, | |
| "loss": 2.8346, | |
| "num_input_tokens_seen": 5946248, | |
| "step": 9095 | |
| }, | |
| { | |
| "epoch": 0.9974788994848186, | |
| "grad_norm": 9.022146224975586, | |
| "learning_rate": 1.0805846857642188e-09, | |
| "loss": 2.969, | |
| "num_input_tokens_seen": 5949520, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.9980269648142058, | |
| "grad_norm": 7.631455898284912, | |
| "learning_rate": 7.174269604171002e-10, | |
| "loss": 3.0908, | |
| "num_input_tokens_seen": 5953392, | |
| "step": 9105 | |
| }, | |
| { | |
| "epoch": 0.9985750301435932, | |
| "grad_norm": 8.837788581848145, | |
| "learning_rate": 4.283817997829953e-10, | |
| "loss": 2.8613, | |
| "num_input_tokens_seen": 5957048, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 0.9991230954729804, | |
| "grad_norm": 6.420173645019531, | |
| "learning_rate": 2.1345006075979e-10, | |
| "loss": 2.8579, | |
| "num_input_tokens_seen": 5959744, | |
| "step": 9115 | |
| }, | |
| { | |
| "epoch": 0.9996711608023676, | |
| "grad_norm": 8.133180618286133, | |
| "learning_rate": 7.263238052668264e-11, | |
| "loss": 3.1424, | |
| "num_input_tokens_seen": 5962752, | |
| "step": 9120 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 9123, | |
| "num_input_tokens_seen": 5964208, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.722124677282202e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |