| { |
| "best_metric": 0.3424564003944397, |
| "best_model_checkpoint": "./results/chembl.trans_only_T5Chem_dmodel512_layers12_batch120_lr0.0005_train0.80/checkpoint-25000", |
| "epoch": 1.4214236979758925, |
| "eval_steps": 5000, |
| "global_step": 25000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002842847395951785, |
| "grad_norm": 0.4646081328392029, |
| "learning_rate": 0.0004999955580509438, |
| "loss": 0.6585, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.00568569479190357, |
| "grad_norm": 0.19792014360427856, |
| "learning_rate": 0.0004999911161018877, |
| "loss": 0.53, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.008528542187855356, |
| "grad_norm": 0.16343972086906433, |
| "learning_rate": 0.0004999866741528315, |
| "loss": 0.5034, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.01137138958380714, |
| "grad_norm": 0.20607765018939972, |
| "learning_rate": 0.0004999822322037754, |
| "loss": 0.496, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.014214236979758927, |
| "grad_norm": 0.14076846837997437, |
| "learning_rate": 0.0004999777902547192, |
| "loss": 0.4841, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.017057084375710713, |
| "grad_norm": 0.12710894644260406, |
| "learning_rate": 0.000499973348305663, |
| "loss": 0.4816, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.019899931771662497, |
| "grad_norm": 0.1586833894252777, |
| "learning_rate": 0.0004999689063566068, |
| "loss": 0.477, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.02274277916761428, |
| "grad_norm": 0.18906599283218384, |
| "learning_rate": 0.0004999644644075506, |
| "loss": 0.4743, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.02558562656356607, |
| "grad_norm": 0.14747406542301178, |
| "learning_rate": 0.0004999600224584944, |
| "loss": 0.465, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.028428473959517853, |
| "grad_norm": 0.15983326733112335, |
| "learning_rate": 0.0004999555805094383, |
| "loss": 0.4647, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.03127132135546964, |
| "grad_norm": 0.14171165227890015, |
| "learning_rate": 0.0004999511385603821, |
| "loss": 0.4603, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.034114168751421425, |
| "grad_norm": 0.13967591524124146, |
| "learning_rate": 0.0004999466966113259, |
| "loss": 0.4574, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.036957016147373206, |
| "grad_norm": 0.2039215862751007, |
| "learning_rate": 0.0004999422546622698, |
| "loss": 0.4536, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.039799863543324994, |
| "grad_norm": 0.15800215303897858, |
| "learning_rate": 0.0004999378127132136, |
| "loss": 0.4544, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.04264271093927678, |
| "grad_norm": 0.16743983328342438, |
| "learning_rate": 0.0004999333707641574, |
| "loss": 0.4528, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.04548555833522856, |
| "grad_norm": 0.16410502791404724, |
| "learning_rate": 0.0004999289288151012, |
| "loss": 0.4498, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.04832840573118035, |
| "grad_norm": 0.17722439765930176, |
| "learning_rate": 0.000499924486866045, |
| "loss": 0.4461, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.05117125312713214, |
| "grad_norm": 0.13122214376926422, |
| "learning_rate": 0.0004999200449169889, |
| "loss": 0.4422, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.05401410052308392, |
| "grad_norm": 0.1328468769788742, |
| "learning_rate": 0.0004999156029679327, |
| "loss": 0.4438, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.05685694791903571, |
| "grad_norm": 0.1839675009250641, |
| "learning_rate": 0.0004999111610188765, |
| "loss": 0.4429, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.059699795314987494, |
| "grad_norm": 0.14567476511001587, |
| "learning_rate": 0.0004999067190698203, |
| "loss": 0.4356, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.06254264271093928, |
| "grad_norm": 0.1527237445116043, |
| "learning_rate": 0.0004999022771207641, |
| "loss": 0.438, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.06538549010689106, |
| "grad_norm": 0.1287076622247696, |
| "learning_rate": 0.000499897835171708, |
| "loss": 0.4343, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.06822833750284285, |
| "grad_norm": 0.11131107807159424, |
| "learning_rate": 0.0004998933932226518, |
| "loss": 0.4367, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.07107118489879463, |
| "grad_norm": 0.13373662531375885, |
| "learning_rate": 0.0004998889512735956, |
| "loss": 0.4316, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.07391403229474641, |
| "grad_norm": 0.13251946866512299, |
| "learning_rate": 0.0004998845093245395, |
| "loss": 0.4347, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.07675687969069821, |
| "grad_norm": 0.14924846589565277, |
| "learning_rate": 0.0004998800673754833, |
| "loss": 0.4315, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.07959972708664999, |
| "grad_norm": 0.12222905457019806, |
| "learning_rate": 0.0004998756254264271, |
| "loss": 0.4311, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.08244257448260177, |
| "grad_norm": 0.10474367439746857, |
| "learning_rate": 0.0004998711834773709, |
| "loss": 0.4289, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.08528542187855356, |
| "grad_norm": 0.11413396149873734, |
| "learning_rate": 0.0004998667415283148, |
| "loss": 0.4287, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.08812826927450534, |
| "grad_norm": 0.1532815843820572, |
| "learning_rate": 0.0004998622995792587, |
| "loss": 0.4266, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.09097111667045713, |
| "grad_norm": 0.16395249962806702, |
| "learning_rate": 0.0004998578576302025, |
| "loss": 0.4254, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.09381396406640892, |
| "grad_norm": 0.13058966398239136, |
| "learning_rate": 0.0004998534156811463, |
| "loss": 0.4236, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.0966568114623607, |
| "grad_norm": 0.15570798516273499, |
| "learning_rate": 0.0004998489737320901, |
| "loss": 0.4267, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.09949965885831248, |
| "grad_norm": 0.1350376456975937, |
| "learning_rate": 0.0004998445317830339, |
| "loss": 0.421, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.10234250625426428, |
| "grad_norm": 0.13523083925247192, |
| "learning_rate": 0.0004998400898339777, |
| "loss": 0.4212, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.10518535365021606, |
| "grad_norm": 0.12544061243534088, |
| "learning_rate": 0.0004998356478849215, |
| "loss": 0.424, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.10802820104616784, |
| "grad_norm": 0.12443090230226517, |
| "learning_rate": 0.0004998312059358654, |
| "loss": 0.4217, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.11087104844211963, |
| "grad_norm": 0.11098171025514603, |
| "learning_rate": 0.0004998267639868092, |
| "loss": 0.4226, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.11371389583807141, |
| "grad_norm": 0.129473477602005, |
| "learning_rate": 0.000499822322037753, |
| "loss": 0.4237, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.1165567432340232, |
| "grad_norm": 0.1107649877667427, |
| "learning_rate": 0.0004998178800886969, |
| "loss": 0.4241, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.11939959062997499, |
| "grad_norm": 0.12195456773042679, |
| "learning_rate": 0.0004998134381396407, |
| "loss": 0.4176, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.12224243802592677, |
| "grad_norm": 0.12097357213497162, |
| "learning_rate": 0.0004998089961905845, |
| "loss": 0.4156, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.12508528542187855, |
| "grad_norm": 0.11261444538831711, |
| "learning_rate": 0.0004998045542415283, |
| "loss": 0.4179, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.12792813281783033, |
| "grad_norm": 0.13488833606243134, |
| "learning_rate": 0.0004998001122924721, |
| "loss": 0.4175, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.1307709802137821, |
| "grad_norm": 0.11204372346401215, |
| "learning_rate": 0.000499795670343416, |
| "loss": 0.4169, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.13361382760973392, |
| "grad_norm": 0.10848614573478699, |
| "learning_rate": 0.0004997912283943598, |
| "loss": 0.4156, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.1364566750056857, |
| "grad_norm": 0.1451840102672577, |
| "learning_rate": 0.0004997867864453036, |
| "loss": 0.4154, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.13929952240163748, |
| "grad_norm": 0.14736251533031464, |
| "learning_rate": 0.0004997823444962474, |
| "loss": 0.4155, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.14214236979758926, |
| "grad_norm": 0.12735013663768768, |
| "learning_rate": 0.0004997779025471912, |
| "loss": 0.4089, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.14498521719354104, |
| "grad_norm": 0.212030827999115, |
| "learning_rate": 0.0004997734605981351, |
| "loss": 0.415, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.14782806458949282, |
| "grad_norm": 0.11738018691539764, |
| "learning_rate": 0.0004997690186490789, |
| "loss": 0.4102, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.15067091198544463, |
| "grad_norm": 0.1060812696814537, |
| "learning_rate": 0.0004997645767000228, |
| "loss": 0.4136, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.15351375938139641, |
| "grad_norm": 0.12909935414791107, |
| "learning_rate": 0.0004997601347509666, |
| "loss": 0.4102, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.1563566067773482, |
| "grad_norm": 0.15052352845668793, |
| "learning_rate": 0.0004997556928019104, |
| "loss": 0.4092, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.15919945417329998, |
| "grad_norm": 0.13438868522644043, |
| "learning_rate": 0.0004997512508528542, |
| "loss": 0.4102, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.16204230156925176, |
| "grad_norm": 0.11692175269126892, |
| "learning_rate": 0.000499746808903798, |
| "loss": 0.4103, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.16488514896520354, |
| "grad_norm": 0.11706849187612534, |
| "learning_rate": 0.0004997423669547419, |
| "loss": 0.4104, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.16772799636115535, |
| "grad_norm": 0.14066801965236664, |
| "learning_rate": 0.0004997379250056858, |
| "loss": 0.4104, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.17057084375710713, |
| "grad_norm": 0.14922069013118744, |
| "learning_rate": 0.0004997334830566296, |
| "loss": 0.4125, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.1734136911530589, |
| "grad_norm": 0.14433512091636658, |
| "learning_rate": 0.0004997290411075734, |
| "loss": 0.404, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.1762565385490107, |
| "grad_norm": 0.11257103830575943, |
| "learning_rate": 0.0004997245991585172, |
| "loss": 0.4089, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.17909938594496247, |
| "grad_norm": 0.13209494948387146, |
| "learning_rate": 0.000499720157209461, |
| "loss": 0.4081, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.18194223334091425, |
| "grad_norm": 0.13629132509231567, |
| "learning_rate": 0.0004997157152604048, |
| "loss": 0.4062, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.18478508073686606, |
| "grad_norm": 0.11546222865581512, |
| "learning_rate": 0.0004997112733113486, |
| "loss": 0.4037, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.18762792813281784, |
| "grad_norm": 0.12525054812431335, |
| "learning_rate": 0.0004997068313622925, |
| "loss": 0.4064, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.19047077552876962, |
| "grad_norm": 0.14944538474082947, |
| "learning_rate": 0.0004997023894132363, |
| "loss": 0.4085, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.1933136229247214, |
| "grad_norm": 0.15211829543113708, |
| "learning_rate": 0.0004996979474641801, |
| "loss": 0.4063, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.19615647032067318, |
| "grad_norm": 0.12800821661949158, |
| "learning_rate": 0.000499693505515124, |
| "loss": 0.4033, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.19899931771662496, |
| "grad_norm": 0.10901828110218048, |
| "learning_rate": 0.0004996890635660678, |
| "loss": 0.4041, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.20184216511257674, |
| "grad_norm": 0.13710814714431763, |
| "learning_rate": 0.0004996846216170116, |
| "loss": 0.4059, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.20468501250852855, |
| "grad_norm": 0.11688549071550369, |
| "learning_rate": 0.0004996801796679554, |
| "loss": 0.4019, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.20752785990448033, |
| "grad_norm": 0.10226283967494965, |
| "learning_rate": 0.0004996757377188992, |
| "loss": 0.4023, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.2103707073004321, |
| "grad_norm": 0.1010371595621109, |
| "learning_rate": 0.0004996712957698431, |
| "loss": 0.4031, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.2132135546963839, |
| "grad_norm": 0.12441287934780121, |
| "learning_rate": 0.0004996668538207869, |
| "loss": 0.4031, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.21605640209233568, |
| "grad_norm": 0.12629808485507965, |
| "learning_rate": 0.0004996624118717307, |
| "loss": 0.4046, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.21889924948828746, |
| "grad_norm": 0.1441943198442459, |
| "learning_rate": 0.0004996579699226745, |
| "loss": 0.4065, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.22174209688423926, |
| "grad_norm": 0.08717290312051773, |
| "learning_rate": 0.0004996535279736184, |
| "loss": 0.4043, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.22458494428019105, |
| "grad_norm": 0.12982551753520966, |
| "learning_rate": 0.0004996490860245623, |
| "loss": 0.3997, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.22742779167614283, |
| "grad_norm": 0.11208934336900711, |
| "learning_rate": 0.0004996446440755061, |
| "loss": 0.3966, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.2302706390720946, |
| "grad_norm": 0.13264520466327667, |
| "learning_rate": 0.0004996402021264499, |
| "loss": 0.401, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.2331134864680464, |
| "grad_norm": 0.1360943764448166, |
| "learning_rate": 0.0004996357601773937, |
| "loss": 0.3999, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.23595633386399817, |
| "grad_norm": 0.1145951896905899, |
| "learning_rate": 0.0004996313182283375, |
| "loss": 0.3995, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.23879918125994998, |
| "grad_norm": 0.12475068867206573, |
| "learning_rate": 0.0004996268762792813, |
| "loss": 0.4017, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.24164202865590176, |
| "grad_norm": 0.09585878252983093, |
| "learning_rate": 0.0004996224343302251, |
| "loss": 0.4007, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.24448487605185354, |
| "grad_norm": 0.11792745441198349, |
| "learning_rate": 0.000499617992381169, |
| "loss": 0.3991, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.24732772344780532, |
| "grad_norm": 0.16740038990974426, |
| "learning_rate": 0.0004996135504321129, |
| "loss": 0.3984, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.2501705708437571, |
| "grad_norm": 0.09640038013458252, |
| "learning_rate": 0.0004996091084830567, |
| "loss": 0.3976, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.2530134182397089, |
| "grad_norm": 0.09332569688558578, |
| "learning_rate": 0.0004996046665340005, |
| "loss": 0.3998, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.25585626563566066, |
| "grad_norm": 0.11328030377626419, |
| "learning_rate": 0.0004996002245849443, |
| "loss": 0.3979, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.25869911303161247, |
| "grad_norm": 0.1477869302034378, |
| "learning_rate": 0.0004995957826358881, |
| "loss": 0.3973, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.2615419604275642, |
| "grad_norm": 0.09623896330595016, |
| "learning_rate": 0.0004995913406868319, |
| "loss": 0.3962, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.26438480782351603, |
| "grad_norm": 0.11626730114221573, |
| "learning_rate": 0.0004995868987377757, |
| "loss": 0.3958, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.26722765521946784, |
| "grad_norm": 0.10988286882638931, |
| "learning_rate": 0.0004995824567887196, |
| "loss": 0.3944, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.2700705026154196, |
| "grad_norm": 0.12107770144939423, |
| "learning_rate": 0.0004995780148396634, |
| "loss": 0.3931, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.2729133500113714, |
| "grad_norm": 0.11602004617452621, |
| "learning_rate": 0.0004995735728906072, |
| "loss": 0.3963, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.27575619740732316, |
| "grad_norm": 0.10920259356498718, |
| "learning_rate": 0.0004995691309415511, |
| "loss": 0.395, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.27859904480327496, |
| "grad_norm": 0.1066461130976677, |
| "learning_rate": 0.0004995646889924949, |
| "loss": 0.3938, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.2814418921992268, |
| "grad_norm": 0.1071213036775589, |
| "learning_rate": 0.0004995602470434387, |
| "loss": 0.3943, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.2842847395951785, |
| "grad_norm": 0.08286375552415848, |
| "learning_rate": 0.0004995558050943825, |
| "loss": 0.3948, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.2842847395951785, |
| "eval_loss": 0.38135138154029846, |
| "eval_runtime": 583.2891, |
| "eval_samples_per_second": 226.149, |
| "eval_steps_per_second": 28.269, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.28712758699113033, |
| "grad_norm": 0.11819513142108917, |
| "learning_rate": 0.0004995513631453263, |
| "loss": 0.3969, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.2899704343870821, |
| "grad_norm": 0.10034069418907166, |
| "learning_rate": 0.0004995469211962702, |
| "loss": 0.3962, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.2928132817830339, |
| "grad_norm": 0.09750229120254517, |
| "learning_rate": 0.000499542479247214, |
| "loss": 0.3938, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.29565612917898565, |
| "grad_norm": 0.14667393267154694, |
| "learning_rate": 0.0004995380372981578, |
| "loss": 0.395, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.29849897657493746, |
| "grad_norm": 0.1200270876288414, |
| "learning_rate": 0.0004995335953491016, |
| "loss": 0.3939, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.30134182397088927, |
| "grad_norm": 0.12305665761232376, |
| "learning_rate": 0.0004995291534000456, |
| "loss": 0.3885, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.304184671366841, |
| "grad_norm": 0.09782176464796066, |
| "learning_rate": 0.0004995247114509894, |
| "loss": 0.3907, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.30702751876279283, |
| "grad_norm": 0.10045728832483292, |
| "learning_rate": 0.0004995202695019332, |
| "loss": 0.3921, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.3098703661587446, |
| "grad_norm": 0.15331445634365082, |
| "learning_rate": 0.000499515827552877, |
| "loss": 0.3953, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.3127132135546964, |
| "grad_norm": 0.10946525633335114, |
| "learning_rate": 0.0004995113856038208, |
| "loss": 0.3917, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.31555606095064814, |
| "grad_norm": 0.12001294642686844, |
| "learning_rate": 0.0004995069436547646, |
| "loss": 0.3915, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.31839890834659995, |
| "grad_norm": 0.11100505292415619, |
| "learning_rate": 0.0004995025017057084, |
| "loss": 0.3916, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.32124175574255176, |
| "grad_norm": 0.11148565262556076, |
| "learning_rate": 0.0004994980597566523, |
| "loss": 0.3947, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.3240846031385035, |
| "grad_norm": 0.10728944838047028, |
| "learning_rate": 0.0004994936178075961, |
| "loss": 0.3898, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.3269274505344553, |
| "grad_norm": 0.09397928416728973, |
| "learning_rate": 0.00049948917585854, |
| "loss": 0.3877, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.3297702979304071, |
| "grad_norm": 0.12607477605342865, |
| "learning_rate": 0.0004994847339094838, |
| "loss": 0.3919, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.3326131453263589, |
| "grad_norm": 0.11440327763557434, |
| "learning_rate": 0.0004994802919604276, |
| "loss": 0.3928, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.3354559927223107, |
| "grad_norm": 0.1275177150964737, |
| "learning_rate": 0.0004994758500113714, |
| "loss": 0.389, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.33829884011826244, |
| "grad_norm": 0.11246796697378159, |
| "learning_rate": 0.0004994714080623152, |
| "loss": 0.3862, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.34114168751421425, |
| "grad_norm": 0.11354568600654602, |
| "learning_rate": 0.000499466966113259, |
| "loss": 0.3908, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.343984534910166, |
| "grad_norm": 0.10693259537220001, |
| "learning_rate": 0.0004994625241642029, |
| "loss": 0.389, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.3468273823061178, |
| "grad_norm": 0.12362982332706451, |
| "learning_rate": 0.0004994580822151467, |
| "loss": 0.39, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.34967022970206957, |
| "grad_norm": 0.1343277543783188, |
| "learning_rate": 0.0004994536402660905, |
| "loss": 0.3934, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.3525130770980214, |
| "grad_norm": 0.15133637189865112, |
| "learning_rate": 0.0004994491983170344, |
| "loss": 0.382, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.3553559244939732, |
| "grad_norm": 0.11031973361968994, |
| "learning_rate": 0.0004994447563679782, |
| "loss": 0.3908, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.35819877188992494, |
| "grad_norm": 0.09201149642467499, |
| "learning_rate": 0.000499440314418922, |
| "loss": 0.3888, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.36104161928587675, |
| "grad_norm": 0.10396040976047516, |
| "learning_rate": 0.0004994358724698658, |
| "loss": 0.3861, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.3638844666818285, |
| "grad_norm": 0.11148199439048767, |
| "learning_rate": 0.0004994314305208096, |
| "loss": 0.3897, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.3667273140777803, |
| "grad_norm": 0.11479378491640091, |
| "learning_rate": 0.0004994269885717535, |
| "loss": 0.3883, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.3695701614737321, |
| "grad_norm": 0.09433585405349731, |
| "learning_rate": 0.0004994225466226973, |
| "loss": 0.3841, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.37241300886968387, |
| "grad_norm": 0.1304759830236435, |
| "learning_rate": 0.0004994181046736411, |
| "loss": 0.3876, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.3752558562656357, |
| "grad_norm": 0.11292573064565659, |
| "learning_rate": 0.0004994136627245849, |
| "loss": 0.3872, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.37809870366158743, |
| "grad_norm": 0.0928904116153717, |
| "learning_rate": 0.0004994092207755289, |
| "loss": 0.3875, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.38094155105753924, |
| "grad_norm": 0.10247938334941864, |
| "learning_rate": 0.0004994047788264727, |
| "loss": 0.3885, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.383784398453491, |
| "grad_norm": 0.11605235934257507, |
| "learning_rate": 0.0004994003368774165, |
| "loss": 0.3847, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.3866272458494428, |
| "grad_norm": 0.1496395617723465, |
| "learning_rate": 0.0004993958949283603, |
| "loss": 0.3861, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.3894700932453946, |
| "grad_norm": 0.11461085826158524, |
| "learning_rate": 0.0004993914529793041, |
| "loss": 0.3862, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.39231294064134636, |
| "grad_norm": 0.11239515244960785, |
| "learning_rate": 0.0004993870110302479, |
| "loss": 0.3865, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.3951557880372982, |
| "grad_norm": 0.11601614952087402, |
| "learning_rate": 0.0004993825690811917, |
| "loss": 0.3837, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.3979986354332499, |
| "grad_norm": 0.13389568030834198, |
| "learning_rate": 0.0004993781271321355, |
| "loss": 0.3866, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.40084148282920173, |
| "grad_norm": 0.12181925028562546, |
| "learning_rate": 0.0004993736851830794, |
| "loss": 0.388, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.4036843302251535, |
| "grad_norm": 0.10346251726150513, |
| "learning_rate": 0.0004993692432340233, |
| "loss": 0.3879, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.4065271776211053, |
| "grad_norm": 0.11786284297704697, |
| "learning_rate": 0.0004993648012849671, |
| "loss": 0.3859, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.4093700250170571, |
| "grad_norm": 0.10377379506826401, |
| "learning_rate": 0.0004993603593359109, |
| "loss": 0.3838, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.41221287241300886, |
| "grad_norm": 0.12437117844820023, |
| "learning_rate": 0.0004993559173868547, |
| "loss": 0.385, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.41505571980896067, |
| "grad_norm": 0.09871742129325867, |
| "learning_rate": 0.0004993514754377985, |
| "loss": 0.387, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.4178985672049124, |
| "grad_norm": 0.09648123383522034, |
| "learning_rate": 0.0004993470334887423, |
| "loss": 0.3843, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.4207414146008642, |
| "grad_norm": 0.14795717597007751, |
| "learning_rate": 0.0004993425915396861, |
| "loss": 0.3833, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.42358426199681604, |
| "grad_norm": 0.10902711004018784, |
| "learning_rate": 0.00049933814959063, |
| "loss": 0.3823, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.4264271093927678, |
| "grad_norm": 0.09183204919099808, |
| "learning_rate": 0.0004993337076415738, |
| "loss": 0.3871, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.4292699567887196, |
| "grad_norm": 0.11348464339971542, |
| "learning_rate": 0.0004993292656925176, |
| "loss": 0.3828, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.43211280418467135, |
| "grad_norm": 0.08863567560911179, |
| "learning_rate": 0.0004993248237434615, |
| "loss": 0.3817, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.43495565158062316, |
| "grad_norm": 0.1327882558107376, |
| "learning_rate": 0.0004993203817944053, |
| "loss": 0.3838, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.4377984989765749, |
| "grad_norm": 0.11749105900526047, |
| "learning_rate": 0.0004993159398453491, |
| "loss": 0.3865, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.4406413463725267, |
| "grad_norm": 0.09009282290935516, |
| "learning_rate": 0.0004993114978962929, |
| "loss": 0.3823, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.44348419376847853, |
| "grad_norm": 0.106484554708004, |
| "learning_rate": 0.0004993070559472367, |
| "loss": 0.3797, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.4463270411644303, |
| "grad_norm": 0.12862497568130493, |
| "learning_rate": 0.0004993026139981806, |
| "loss": 0.3804, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.4491698885603821, |
| "grad_norm": 0.1015087440609932, |
| "learning_rate": 0.0004992981720491244, |
| "loss": 0.3806, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.45201273595633384, |
| "grad_norm": 0.10782980173826218, |
| "learning_rate": 0.0004992937301000682, |
| "loss": 0.3858, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.45485558335228565, |
| "grad_norm": 0.09852251410484314, |
| "learning_rate": 0.000499289288151012, |
| "loss": 0.3824, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.45769843074823746, |
| "grad_norm": 0.10274305194616318, |
| "learning_rate": 0.000499284846201956, |
| "loss": 0.381, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.4605412781441892, |
| "grad_norm": 0.10695527493953705, |
| "learning_rate": 0.0004992804042528998, |
| "loss": 0.3839, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.463384125540141, |
| "grad_norm": 0.09151016920804977, |
| "learning_rate": 0.0004992759623038436, |
| "loss": 0.3842, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.4662269729360928, |
| "grad_norm": 0.15730440616607666, |
| "learning_rate": 0.0004992715203547874, |
| "loss": 0.3798, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.4690698203320446, |
| "grad_norm": 0.10243304073810577, |
| "learning_rate": 0.0004992670784057312, |
| "loss": 0.38, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.47191266772799634, |
| "grad_norm": 0.1174805536866188, |
| "learning_rate": 0.000499262636456675, |
| "loss": 0.3817, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.47475551512394815, |
| "grad_norm": 0.10942196846008301, |
| "learning_rate": 0.0004992581945076188, |
| "loss": 0.3829, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.47759836251989995, |
| "grad_norm": 0.10109516978263855, |
| "learning_rate": 0.0004992537525585626, |
| "loss": 0.3803, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.4804412099158517, |
| "grad_norm": 0.09248687326908112, |
| "learning_rate": 0.0004992493106095065, |
| "loss": 0.3796, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.4832840573118035, |
| "grad_norm": 0.11525557935237885, |
| "learning_rate": 0.0004992448686604504, |
| "loss": 0.3823, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.48612690470775527, |
| "grad_norm": 0.10481023788452148, |
| "learning_rate": 0.0004992404267113942, |
| "loss": 0.3781, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.4889697521037071, |
| "grad_norm": 0.09493274986743927, |
| "learning_rate": 0.000499235984762338, |
| "loss": 0.38, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.49181259949965883, |
| "grad_norm": 0.08859369158744812, |
| "learning_rate": 0.0004992315428132818, |
| "loss": 0.3789, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.49465544689561064, |
| "grad_norm": 0.11814738810062408, |
| "learning_rate": 0.0004992271008642256, |
| "loss": 0.3809, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.49749829429156245, |
| "grad_norm": 0.1322488635778427, |
| "learning_rate": 0.0004992226589151694, |
| "loss": 0.3795, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.5003411416875142, |
| "grad_norm": 0.1304454803466797, |
| "learning_rate": 0.0004992182169661132, |
| "loss": 0.3794, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.503183989083466, |
| "grad_norm": 0.09153052419424057, |
| "learning_rate": 0.0004992137750170571, |
| "loss": 0.3769, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.5060268364794178, |
| "grad_norm": 0.10608222335577011, |
| "learning_rate": 0.0004992093330680009, |
| "loss": 0.3748, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.5088696838753696, |
| "grad_norm": 0.1054573506116867, |
| "learning_rate": 0.0004992048911189447, |
| "loss": 0.3817, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.5117125312713213, |
| "grad_norm": 0.1694241166114807, |
| "learning_rate": 0.0004992004491698886, |
| "loss": 0.3774, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.5145553786672732, |
| "grad_norm": 0.13877439498901367, |
| "learning_rate": 0.0004991960072208324, |
| "loss": 0.3769, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.5173982260632249, |
| "grad_norm": 0.10788954794406891, |
| "learning_rate": 0.0004991915652717762, |
| "loss": 0.3819, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.5202410734591767, |
| "grad_norm": 0.12326483428478241, |
| "learning_rate": 0.00049918712332272, |
| "loss": 0.3812, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.5230839208551284, |
| "grad_norm": 0.07986534386873245, |
| "learning_rate": 0.0004991826813736638, |
| "loss": 0.3797, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.5259267682510803, |
| "grad_norm": 0.11102836579084396, |
| "learning_rate": 0.0004991782394246077, |
| "loss": 0.3797, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.5287696156470321, |
| "grad_norm": 0.10697384923696518, |
| "learning_rate": 0.0004991737974755515, |
| "loss": 0.3767, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.5316124630429838, |
| "grad_norm": 0.11679094284772873, |
| "learning_rate": 0.0004991693555264953, |
| "loss": 0.3782, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.5344553104389357, |
| "grad_norm": 0.10827042162418365, |
| "learning_rate": 0.0004991649135774391, |
| "loss": 0.376, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.5372981578348874, |
| "grad_norm": 0.0934649109840393, |
| "learning_rate": 0.0004991604716283831, |
| "loss": 0.3775, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.5401410052308392, |
| "grad_norm": 0.08999691903591156, |
| "learning_rate": 0.0004991560296793269, |
| "loss": 0.3783, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.5429838526267909, |
| "grad_norm": 0.11615636944770813, |
| "learning_rate": 0.0004991515877302707, |
| "loss": 0.3779, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.5458267000227428, |
| "grad_norm": 0.4005848467350006, |
| "learning_rate": 0.0004991471457812145, |
| "loss": 0.3745, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.5486695474186946, |
| "grad_norm": 0.12695498764514923, |
| "learning_rate": 0.0004991427038321583, |
| "loss": 0.3916, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.5515123948146463, |
| "grad_norm": 0.10009093582630157, |
| "learning_rate": 0.0004991382618831021, |
| "loss": 0.3798, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.5543552422105982, |
| "grad_norm": 0.1480248123407364, |
| "learning_rate": 0.0004991338199340459, |
| "loss": 0.3752, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.5571980896065499, |
| "grad_norm": 0.10428158193826675, |
| "learning_rate": 0.0004991293779849897, |
| "loss": 0.3787, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.5600409370025017, |
| "grad_norm": 0.1932564377784729, |
| "learning_rate": 0.0004991249360359336, |
| "loss": 0.3812, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.5628837843984535, |
| "grad_norm": 0.088712178170681, |
| "learning_rate": 0.0004991204940868775, |
| "loss": 0.3783, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.5657266317944053, |
| "grad_norm": 0.10896250605583191, |
| "learning_rate": 0.0004991160521378213, |
| "loss": 0.3782, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.568569479190357, |
| "grad_norm": 0.1077795922756195, |
| "learning_rate": 0.0004991116101887651, |
| "loss": 0.3773, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.568569479190357, |
| "eval_loss": 0.36588892340660095, |
| "eval_runtime": 584.439, |
| "eval_samples_per_second": 225.704, |
| "eval_steps_per_second": 28.213, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.5714123265863088, |
| "grad_norm": 0.09190759807825089, |
| "learning_rate": 0.0004991071682397089, |
| "loss": 0.3785, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.5742551739822607, |
| "grad_norm": 0.11082971841096878, |
| "learning_rate": 0.0004991027262906527, |
| "loss": 0.3775, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.5770980213782124, |
| "grad_norm": 0.10099225491285324, |
| "learning_rate": 0.0004990982843415965, |
| "loss": 0.3775, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.5799408687741642, |
| "grad_norm": 0.1112075224518776, |
| "learning_rate": 0.0004990938423925403, |
| "loss": 0.3764, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.582783716170116, |
| "grad_norm": 0.09865789860486984, |
| "learning_rate": 0.0004990894004434842, |
| "loss": 0.3802, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.5856265635660678, |
| "grad_norm": 0.10351789742708206, |
| "learning_rate": 0.000499084958494428, |
| "loss": 0.3795, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.5884694109620195, |
| "grad_norm": 0.10376661270856857, |
| "learning_rate": 0.0004990805165453719, |
| "loss": 0.3753, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.5913122583579713, |
| "grad_norm": 0.09663544595241547, |
| "learning_rate": 0.0004990760745963157, |
| "loss": 0.3737, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.5941551057539232, |
| "grad_norm": 0.09701116383075714, |
| "learning_rate": 0.0004990716326472595, |
| "loss": 0.3737, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.5969979531498749, |
| "grad_norm": 0.1021418496966362, |
| "learning_rate": 0.0004990671906982033, |
| "loss": 0.3728, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.5998408005458267, |
| "grad_norm": 0.12170026451349258, |
| "learning_rate": 0.0004990627487491472, |
| "loss": 0.3755, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.6026836479417785, |
| "grad_norm": 0.1142646074295044, |
| "learning_rate": 0.000499058306800091, |
| "loss": 0.378, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.6055264953377303, |
| "grad_norm": 0.09568295627832413, |
| "learning_rate": 0.0004990538648510348, |
| "loss": 0.3742, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.608369342733682, |
| "grad_norm": 0.11586258560419083, |
| "learning_rate": 0.0004990494229019786, |
| "loss": 0.3731, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.6112121901296338, |
| "grad_norm": 0.0984320268034935, |
| "learning_rate": 0.0004990449809529224, |
| "loss": 0.3767, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.6140550375255857, |
| "grad_norm": 0.10665503889322281, |
| "learning_rate": 0.0004990405390038662, |
| "loss": 0.3756, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.6168978849215374, |
| "grad_norm": 0.10374121367931366, |
| "learning_rate": 0.0004990360970548102, |
| "loss": 0.376, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.6197407323174892, |
| "grad_norm": 0.10508013516664505, |
| "learning_rate": 0.000499031655105754, |
| "loss": 0.3717, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.622583579713441, |
| "grad_norm": 0.10919308662414551, |
| "learning_rate": 0.0004990272131566978, |
| "loss": 0.3746, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.6254264271093928, |
| "grad_norm": 0.09990997612476349, |
| "learning_rate": 0.0004990227712076416, |
| "loss": 0.3738, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.6282692745053445, |
| "grad_norm": 0.10817346721887589, |
| "learning_rate": 0.0004990183292585854, |
| "loss": 0.3688, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.6311121219012963, |
| "grad_norm": 0.0922752320766449, |
| "learning_rate": 0.0004990138873095292, |
| "loss": 0.3751, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.6339549692972481, |
| "grad_norm": 0.09761642664670944, |
| "learning_rate": 0.000499009445360473, |
| "loss": 0.3724, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.6367978166931999, |
| "grad_norm": 0.09401151537895203, |
| "learning_rate": 0.0004990050034114168, |
| "loss": 0.3717, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.6396406640891517, |
| "grad_norm": 0.10084612667560577, |
| "learning_rate": 0.0004990005614623607, |
| "loss": 0.3711, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.6424835114851035, |
| "grad_norm": 0.11709214001893997, |
| "learning_rate": 0.0004989961195133046, |
| "loss": 0.3739, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.6453263588810553, |
| "grad_norm": 0.1035287156701088, |
| "learning_rate": 0.0004989916775642484, |
| "loss": 0.3684, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.648169206277007, |
| "grad_norm": 0.09968025982379913, |
| "learning_rate": 0.0004989872356151922, |
| "loss": 0.3749, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.6510120536729589, |
| "grad_norm": 0.1003747433423996, |
| "learning_rate": 0.000498982793666136, |
| "loss": 0.3721, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.6538549010689106, |
| "grad_norm": 0.09462849795818329, |
| "learning_rate": 0.0004989783517170798, |
| "loss": 0.3687, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.6566977484648624, |
| "grad_norm": 0.10089970380067825, |
| "learning_rate": 0.0004989739097680236, |
| "loss": 0.3724, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.6595405958608141, |
| "grad_norm": 0.11282893270254135, |
| "learning_rate": 0.0004989694678189675, |
| "loss": 0.3726, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.662383443256766, |
| "grad_norm": 0.12321527302265167, |
| "learning_rate": 0.0004989650258699113, |
| "loss": 0.3704, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.6652262906527178, |
| "grad_norm": 0.10225247591733932, |
| "learning_rate": 0.0004989605839208551, |
| "loss": 0.3717, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.6680691380486695, |
| "grad_norm": 0.09634348750114441, |
| "learning_rate": 0.000498956141971799, |
| "loss": 0.3731, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.6709119854446214, |
| "grad_norm": 0.09670031815767288, |
| "learning_rate": 0.0004989517000227428, |
| "loss": 0.3697, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.6737548328405731, |
| "grad_norm": 0.11874838918447495, |
| "learning_rate": 0.0004989472580736867, |
| "loss": 0.3717, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.6765976802365249, |
| "grad_norm": 0.1474309116601944, |
| "learning_rate": 0.0004989428161246305, |
| "loss": 0.3719, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.6794405276324766, |
| "grad_norm": 0.0953158363699913, |
| "learning_rate": 0.0004989383741755743, |
| "loss": 0.3698, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.6822833750284285, |
| "grad_norm": 0.07613354921340942, |
| "learning_rate": 0.0004989339322265181, |
| "loss": 0.372, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.6851262224243803, |
| "grad_norm": 0.10099935531616211, |
| "learning_rate": 0.0004989294902774619, |
| "loss": 0.3715, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.687969069820332, |
| "grad_norm": 0.1142035722732544, |
| "learning_rate": 0.0004989250483284057, |
| "loss": 0.371, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.6908119172162839, |
| "grad_norm": 0.11868602782487869, |
| "learning_rate": 0.0004989206063793495, |
| "loss": 0.3718, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.6936547646122356, |
| "grad_norm": 0.11403531581163406, |
| "learning_rate": 0.0004989161644302935, |
| "loss": 0.37, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.6964976120081874, |
| "grad_norm": 0.10048527270555496, |
| "learning_rate": 0.0004989117224812373, |
| "loss": 0.3689, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.6993404594041391, |
| "grad_norm": 0.0894421935081482, |
| "learning_rate": 0.0004989072805321811, |
| "loss": 0.3675, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.702183306800091, |
| "grad_norm": 0.10465215891599655, |
| "learning_rate": 0.0004989028385831249, |
| "loss": 0.3707, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.7050261541960428, |
| "grad_norm": 0.10983111709356308, |
| "learning_rate": 0.0004988983966340687, |
| "loss": 0.3713, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.7078690015919945, |
| "grad_norm": 0.11033093929290771, |
| "learning_rate": 0.0004988939546850125, |
| "loss": 0.3723, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.7107118489879464, |
| "grad_norm": 0.11053282022476196, |
| "learning_rate": 0.0004988895127359563, |
| "loss": 0.3701, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.7135546963838981, |
| "grad_norm": 0.09599506109952927, |
| "learning_rate": 0.0004988850707869001, |
| "loss": 0.3696, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.7163975437798499, |
| "grad_norm": 0.10971349477767944, |
| "learning_rate": 0.000498880628837844, |
| "loss": 0.3692, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.7192403911758016, |
| "grad_norm": 0.09890740364789963, |
| "learning_rate": 0.0004988761868887879, |
| "loss": 0.3671, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.7220832385717535, |
| "grad_norm": 0.09667664021253586, |
| "learning_rate": 0.0004988717449397317, |
| "loss": 0.3708, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.7249260859677052, |
| "grad_norm": 0.08852635324001312, |
| "learning_rate": 0.0004988673029906755, |
| "loss": 0.3704, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.727768933363657, |
| "grad_norm": 0.08930686861276627, |
| "learning_rate": 0.0004988628610416193, |
| "loss": 0.3676, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.7306117807596089, |
| "grad_norm": 0.10307100415229797, |
| "learning_rate": 0.0004988584190925631, |
| "loss": 0.3702, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.7334546281555606, |
| "grad_norm": 0.09995568543672562, |
| "learning_rate": 0.0004988539771435069, |
| "loss": 0.367, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.7362974755515124, |
| "grad_norm": 0.09159702807664871, |
| "learning_rate": 0.0004988495351944507, |
| "loss": 0.3661, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.7391403229474642, |
| "grad_norm": 0.0963875874876976, |
| "learning_rate": 0.0004988450932453946, |
| "loss": 0.3667, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.741983170343416, |
| "grad_norm": 0.09584073722362518, |
| "learning_rate": 0.0004988406512963384, |
| "loss": 0.3697, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.7448260177393677, |
| "grad_norm": 0.09499222785234451, |
| "learning_rate": 0.0004988362093472822, |
| "loss": 0.3692, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.7476688651353195, |
| "grad_norm": 0.10076665878295898, |
| "learning_rate": 0.0004988317673982261, |
| "loss": 0.3672, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.7505117125312714, |
| "grad_norm": 0.1046692505478859, |
| "learning_rate": 0.00049882732544917, |
| "loss": 0.3669, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.7533545599272231, |
| "grad_norm": 0.11345544457435608, |
| "learning_rate": 0.0004988228835001138, |
| "loss": 0.3676, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.7561974073231749, |
| "grad_norm": 0.09689969569444656, |
| "learning_rate": 0.0004988184415510576, |
| "loss": 0.367, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.7590402547191267, |
| "grad_norm": 0.10489045083522797, |
| "learning_rate": 0.0004988139996020014, |
| "loss": 0.3681, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.7618831021150785, |
| "grad_norm": 0.09123999625444412, |
| "learning_rate": 0.0004988095576529452, |
| "loss": 0.3687, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.7647259495110302, |
| "grad_norm": 0.09405972063541412, |
| "learning_rate": 0.000498805115703889, |
| "loss": 0.3715, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.767568796906982, |
| "grad_norm": 0.08794659376144409, |
| "learning_rate": 0.0004988006737548328, |
| "loss": 0.3653, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.7704116443029339, |
| "grad_norm": 0.11547774076461792, |
| "learning_rate": 0.0004987962318057766, |
| "loss": 0.3677, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.7732544916988856, |
| "grad_norm": 0.0955629050731659, |
| "learning_rate": 0.0004987917898567206, |
| "loss": 0.369, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.7760973390948374, |
| "grad_norm": 0.10582061856985092, |
| "learning_rate": 0.0004987873479076644, |
| "loss": 0.3696, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.7789401864907892, |
| "grad_norm": 0.11626145988702774, |
| "learning_rate": 0.0004987829059586082, |
| "loss": 0.369, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.781783033886741, |
| "grad_norm": 0.08376283198595047, |
| "learning_rate": 0.000498778464009552, |
| "loss": 0.3686, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.7846258812826927, |
| "grad_norm": 0.09740207344293594, |
| "learning_rate": 0.0004987740220604958, |
| "loss": 0.3649, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.7874687286786445, |
| "grad_norm": 0.08660556375980377, |
| "learning_rate": 0.0004987695801114396, |
| "loss": 0.3654, |
| "step": 13850 |
| }, |
| { |
| "epoch": 0.7903115760745963, |
| "grad_norm": 0.09063564240932465, |
| "learning_rate": 0.0004987651381623834, |
| "loss": 0.3671, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.7931544234705481, |
| "grad_norm": 0.12205325812101364, |
| "learning_rate": 0.0004987606962133272, |
| "loss": 0.3667, |
| "step": 13950 |
| }, |
| { |
| "epoch": 0.7959972708664999, |
| "grad_norm": 0.1259283423423767, |
| "learning_rate": 0.0004987562542642711, |
| "loss": 0.3611, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.7988401182624517, |
| "grad_norm": 0.0938865914940834, |
| "learning_rate": 0.000498751812315215, |
| "loss": 0.3666, |
| "step": 14050 |
| }, |
| { |
| "epoch": 0.8016829656584035, |
| "grad_norm": 0.10185267776250839, |
| "learning_rate": 0.0004987473703661588, |
| "loss": 0.3648, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.8045258130543552, |
| "grad_norm": 0.11661996692419052, |
| "learning_rate": 0.0004987429284171026, |
| "loss": 0.3657, |
| "step": 14150 |
| }, |
| { |
| "epoch": 0.807368660450307, |
| "grad_norm": 0.0994558110833168, |
| "learning_rate": 0.0004987384864680464, |
| "loss": 0.3639, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.8102115078462588, |
| "grad_norm": 0.09819088876247406, |
| "learning_rate": 0.0004987340445189902, |
| "loss": 0.3642, |
| "step": 14250 |
| }, |
| { |
| "epoch": 0.8130543552422106, |
| "grad_norm": 0.09436295926570892, |
| "learning_rate": 0.000498729602569934, |
| "loss": 0.3677, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.8158972026381623, |
| "grad_norm": 0.12002996355295181, |
| "learning_rate": 0.0004987251606208778, |
| "loss": 0.366, |
| "step": 14350 |
| }, |
| { |
| "epoch": 0.8187400500341142, |
| "grad_norm": 0.08650200068950653, |
| "learning_rate": 0.0004987207186718217, |
| "loss": 0.3645, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.821582897430066, |
| "grad_norm": 0.1826743632555008, |
| "learning_rate": 0.0004987162767227655, |
| "loss": 0.3681, |
| "step": 14450 |
| }, |
| { |
| "epoch": 0.8244257448260177, |
| "grad_norm": 0.10164210200309753, |
| "learning_rate": 0.0004987118347737093, |
| "loss": 0.3706, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.8272685922219696, |
| "grad_norm": 0.09652096033096313, |
| "learning_rate": 0.0004987073928246532, |
| "loss": 0.364, |
| "step": 14550 |
| }, |
| { |
| "epoch": 0.8301114396179213, |
| "grad_norm": 0.1318165808916092, |
| "learning_rate": 0.0004987029508755971, |
| "loss": 0.3651, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.8329542870138731, |
| "grad_norm": 0.09845109283924103, |
| "learning_rate": 0.0004986985089265409, |
| "loss": 0.3636, |
| "step": 14650 |
| }, |
| { |
| "epoch": 0.8357971344098248, |
| "grad_norm": 0.12955370545387268, |
| "learning_rate": 0.0004986940669774847, |
| "loss": 0.3641, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.8386399818057767, |
| "grad_norm": 0.10223904252052307, |
| "learning_rate": 0.0004986896250284285, |
| "loss": 0.363, |
| "step": 14750 |
| }, |
| { |
| "epoch": 0.8414828292017285, |
| "grad_norm": 0.09701918810606003, |
| "learning_rate": 0.0004986851830793723, |
| "loss": 0.3627, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.8443256765976802, |
| "grad_norm": 0.0875435471534729, |
| "learning_rate": 0.0004986807411303161, |
| "loss": 0.3651, |
| "step": 14850 |
| }, |
| { |
| "epoch": 0.8471685239936321, |
| "grad_norm": 0.09006401896476746, |
| "learning_rate": 0.0004986762991812599, |
| "loss": 0.3632, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.8500113713895838, |
| "grad_norm": 0.09841691702604294, |
| "learning_rate": 0.0004986718572322037, |
| "loss": 0.3641, |
| "step": 14950 |
| }, |
| { |
| "epoch": 0.8528542187855356, |
| "grad_norm": 0.11014077812433243, |
| "learning_rate": 0.0004986674152831477, |
| "loss": 0.3654, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.8528542187855356, |
| "eval_loss": 0.3545059561729431, |
| "eval_runtime": 572.3313, |
| "eval_samples_per_second": 230.478, |
| "eval_steps_per_second": 28.81, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.8556970661814873, |
| "grad_norm": 0.11560770869255066, |
| "learning_rate": 0.0004986629733340915, |
| "loss": 0.3644, |
| "step": 15050 |
| }, |
| { |
| "epoch": 0.8585399135774392, |
| "grad_norm": 0.08875507861375809, |
| "learning_rate": 0.0004986585313850353, |
| "loss": 0.3637, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.861382760973391, |
| "grad_norm": 0.09458891302347183, |
| "learning_rate": 0.0004986540894359791, |
| "loss": 0.3638, |
| "step": 15150 |
| }, |
| { |
| "epoch": 0.8642256083693427, |
| "grad_norm": 0.10495459288358688, |
| "learning_rate": 0.0004986496474869229, |
| "loss": 0.3647, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.8670684557652946, |
| "grad_norm": 0.10532251745462418, |
| "learning_rate": 0.0004986452055378667, |
| "loss": 0.3664, |
| "step": 15250 |
| }, |
| { |
| "epoch": 0.8699113031612463, |
| "grad_norm": 0.09231790155172348, |
| "learning_rate": 0.0004986407635888105, |
| "loss": 0.3643, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.8727541505571981, |
| "grad_norm": 0.09325330704450607, |
| "learning_rate": 0.0004986363216397543, |
| "loss": 0.3659, |
| "step": 15350 |
| }, |
| { |
| "epoch": 0.8755969979531498, |
| "grad_norm": 0.11519546806812286, |
| "learning_rate": 0.0004986318796906982, |
| "loss": 0.3654, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.8784398453491017, |
| "grad_norm": 0.08858964592218399, |
| "learning_rate": 0.0004986274377416421, |
| "loss": 0.3647, |
| "step": 15450 |
| }, |
| { |
| "epoch": 0.8812826927450534, |
| "grad_norm": 0.11211369931697845, |
| "learning_rate": 0.0004986229957925859, |
| "loss": 0.3646, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.8841255401410052, |
| "grad_norm": 0.09691707044839859, |
| "learning_rate": 0.0004986185538435297, |
| "loss": 0.3633, |
| "step": 15550 |
| }, |
| { |
| "epoch": 0.8869683875369571, |
| "grad_norm": 0.09831210970878601, |
| "learning_rate": 0.0004986141118944735, |
| "loss": 0.3644, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.8898112349329088, |
| "grad_norm": 0.10148192197084427, |
| "learning_rate": 0.0004986096699454173, |
| "loss": 0.3613, |
| "step": 15650 |
| }, |
| { |
| "epoch": 0.8926540823288606, |
| "grad_norm": 0.0974993035197258, |
| "learning_rate": 0.0004986052279963611, |
| "loss": 0.3648, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.8954969297248123, |
| "grad_norm": 0.09062571078538895, |
| "learning_rate": 0.0004986007860473049, |
| "loss": 0.3622, |
| "step": 15750 |
| }, |
| { |
| "epoch": 0.8983397771207642, |
| "grad_norm": 0.11505638062953949, |
| "learning_rate": 0.0004985963440982488, |
| "loss": 0.3647, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.9011826245167159, |
| "grad_norm": 0.10409519821405411, |
| "learning_rate": 0.0004985919021491926, |
| "loss": 0.3684, |
| "step": 15850 |
| }, |
| { |
| "epoch": 0.9040254719126677, |
| "grad_norm": 0.09695091098546982, |
| "learning_rate": 0.0004985874602001365, |
| "loss": 0.3627, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.9068683193086196, |
| "grad_norm": 0.10802540183067322, |
| "learning_rate": 0.0004985830182510803, |
| "loss": 0.363, |
| "step": 15950 |
| }, |
| { |
| "epoch": 0.9097111667045713, |
| "grad_norm": 0.08416867256164551, |
| "learning_rate": 0.0004985785763020242, |
| "loss": 0.3633, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.9125540141005231, |
| "grad_norm": 0.12350185215473175, |
| "learning_rate": 0.000498574134352968, |
| "loss": 0.3645, |
| "step": 16050 |
| }, |
| { |
| "epoch": 0.9153968614964749, |
| "grad_norm": 0.10537421703338623, |
| "learning_rate": 0.0004985696924039118, |
| "loss": 0.3635, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.9182397088924267, |
| "grad_norm": 0.09744290262460709, |
| "learning_rate": 0.0004985652504548556, |
| "loss": 0.3623, |
| "step": 16150 |
| }, |
| { |
| "epoch": 0.9210825562883784, |
| "grad_norm": 0.09753882884979248, |
| "learning_rate": 0.0004985608085057994, |
| "loss": 0.3617, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.9239254036843302, |
| "grad_norm": 0.08662727475166321, |
| "learning_rate": 0.0004985563665567432, |
| "loss": 0.3624, |
| "step": 16250 |
| }, |
| { |
| "epoch": 0.926768251080282, |
| "grad_norm": 0.09310087561607361, |
| "learning_rate": 0.000498551924607687, |
| "loss": 0.3631, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.9296110984762338, |
| "grad_norm": 0.10131137073040009, |
| "learning_rate": 0.0004985474826586308, |
| "loss": 0.3593, |
| "step": 16350 |
| }, |
| { |
| "epoch": 0.9324539458721856, |
| "grad_norm": 0.09597592055797577, |
| "learning_rate": 0.0004985430407095748, |
| "loss": 0.3628, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.9352967932681374, |
| "grad_norm": 0.09163256734609604, |
| "learning_rate": 0.0004985385987605186, |
| "loss": 0.3596, |
| "step": 16450 |
| }, |
| { |
| "epoch": 0.9381396406640892, |
| "grad_norm": 0.11283744126558304, |
| "learning_rate": 0.0004985341568114624, |
| "loss": 0.3629, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.9409824880600409, |
| "grad_norm": 0.1027892455458641, |
| "learning_rate": 0.0004985297148624062, |
| "loss": 0.3624, |
| "step": 16550 |
| }, |
| { |
| "epoch": 0.9438253354559927, |
| "grad_norm": 0.09853541105985641, |
| "learning_rate": 0.00049852527291335, |
| "loss": 0.3606, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.9466681828519445, |
| "grad_norm": 0.09280608594417572, |
| "learning_rate": 0.0004985208309642938, |
| "loss": 0.3626, |
| "step": 16650 |
| }, |
| { |
| "epoch": 0.9495110302478963, |
| "grad_norm": 0.09166574478149414, |
| "learning_rate": 0.0004985163890152376, |
| "loss": 0.3631, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.952353877643848, |
| "grad_norm": 0.10630334913730621, |
| "learning_rate": 0.0004985119470661814, |
| "loss": 0.3611, |
| "step": 16750 |
| }, |
| { |
| "epoch": 0.9551967250397999, |
| "grad_norm": 0.09024935215711594, |
| "learning_rate": 0.0004985075051171253, |
| "loss": 0.3627, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.9580395724357517, |
| "grad_norm": 0.08191289007663727, |
| "learning_rate": 0.0004985030631680692, |
| "loss": 0.3586, |
| "step": 16850 |
| }, |
| { |
| "epoch": 0.9608824198317034, |
| "grad_norm": 0.08423138409852982, |
| "learning_rate": 0.000498498621219013, |
| "loss": 0.3622, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.9637252672276552, |
| "grad_norm": 0.09373466670513153, |
| "learning_rate": 0.0004984941792699568, |
| "loss": 0.3611, |
| "step": 16950 |
| }, |
| { |
| "epoch": 0.966568114623607, |
| "grad_norm": 0.09812022745609283, |
| "learning_rate": 0.0004984897373209006, |
| "loss": 0.3627, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.9694109620195588, |
| "grad_norm": 0.09764017909765244, |
| "learning_rate": 0.0004984852953718444, |
| "loss": 0.3589, |
| "step": 17050 |
| }, |
| { |
| "epoch": 0.9722538094155105, |
| "grad_norm": 0.10176458954811096, |
| "learning_rate": 0.0004984808534227883, |
| "loss": 0.3594, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.9750966568114624, |
| "grad_norm": 0.10278456658124924, |
| "learning_rate": 0.0004984764114737321, |
| "loss": 0.3581, |
| "step": 17150 |
| }, |
| { |
| "epoch": 0.9779395042074142, |
| "grad_norm": 0.1218334510922432, |
| "learning_rate": 0.0004984719695246759, |
| "loss": 0.3606, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.9807823516033659, |
| "grad_norm": 0.09451757371425629, |
| "learning_rate": 0.0004984675275756197, |
| "loss": 0.3614, |
| "step": 17250 |
| }, |
| { |
| "epoch": 0.9836251989993177, |
| "grad_norm": 0.10088694840669632, |
| "learning_rate": 0.0004984630856265636, |
| "loss": 0.3604, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.9864680463952695, |
| "grad_norm": 0.10026043653488159, |
| "learning_rate": 0.0004984586436775075, |
| "loss": 0.3604, |
| "step": 17350 |
| }, |
| { |
| "epoch": 0.9893108937912213, |
| "grad_norm": 0.11412831395864487, |
| "learning_rate": 0.0004984542017284513, |
| "loss": 0.3608, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.992153741187173, |
| "grad_norm": 0.10250277817249298, |
| "learning_rate": 0.0004984497597793951, |
| "loss": 0.3597, |
| "step": 17450 |
| }, |
| { |
| "epoch": 0.9949965885831249, |
| "grad_norm": 0.10922574251890182, |
| "learning_rate": 0.0004984453178303389, |
| "loss": 0.3611, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.9978394359790766, |
| "grad_norm": 0.09554197639226913, |
| "learning_rate": 0.0004984408758812827, |
| "loss": 0.3596, |
| "step": 17550 |
| }, |
| { |
| "epoch": 1.0006822833750284, |
| "grad_norm": 0.11213108897209167, |
| "learning_rate": 0.0004984364339322265, |
| "loss": 0.3617, |
| "step": 17600 |
| }, |
| { |
| "epoch": 1.0035251307709803, |
| "grad_norm": 0.13735197484493256, |
| "learning_rate": 0.0004984319919831703, |
| "loss": 0.3528, |
| "step": 17650 |
| }, |
| { |
| "epoch": 1.006367978166932, |
| "grad_norm": 0.10477373749017715, |
| "learning_rate": 0.0004984275500341141, |
| "loss": 0.3561, |
| "step": 17700 |
| }, |
| { |
| "epoch": 1.0092108255628838, |
| "grad_norm": 0.10232077538967133, |
| "learning_rate": 0.0004984231080850581, |
| "loss": 0.3563, |
| "step": 17750 |
| }, |
| { |
| "epoch": 1.0120536729588356, |
| "grad_norm": 0.11949522793292999, |
| "learning_rate": 0.0004984186661360019, |
| "loss": 0.3534, |
| "step": 17800 |
| }, |
| { |
| "epoch": 1.0148965203547873, |
| "grad_norm": 0.10367763787508011, |
| "learning_rate": 0.0004984142241869457, |
| "loss": 0.3564, |
| "step": 17850 |
| }, |
| { |
| "epoch": 1.0177393677507391, |
| "grad_norm": 0.11785215884447098, |
| "learning_rate": 0.0004984097822378895, |
| "loss": 0.3584, |
| "step": 17900 |
| }, |
| { |
| "epoch": 1.020582215146691, |
| "grad_norm": 0.09687703847885132, |
| "learning_rate": 0.0004984053402888333, |
| "loss": 0.356, |
| "step": 17950 |
| }, |
| { |
| "epoch": 1.0234250625426426, |
| "grad_norm": 0.1126837283372879, |
| "learning_rate": 0.0004984008983397771, |
| "loss": 0.3545, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.0262679099385945, |
| "grad_norm": 0.0994856208562851, |
| "learning_rate": 0.0004983964563907209, |
| "loss": 0.3556, |
| "step": 18050 |
| }, |
| { |
| "epoch": 1.0291107573345464, |
| "grad_norm": 0.09857575595378876, |
| "learning_rate": 0.0004983920144416647, |
| "loss": 0.3596, |
| "step": 18100 |
| }, |
| { |
| "epoch": 1.031953604730498, |
| "grad_norm": 0.10661829262971878, |
| "learning_rate": 0.0004983875724926086, |
| "loss": 0.3557, |
| "step": 18150 |
| }, |
| { |
| "epoch": 1.0347964521264499, |
| "grad_norm": 0.08805033564567566, |
| "learning_rate": 0.0004983831305435525, |
| "loss": 0.3552, |
| "step": 18200 |
| }, |
| { |
| "epoch": 1.0376392995224015, |
| "grad_norm": 0.09851615130901337, |
| "learning_rate": 0.0004983786885944963, |
| "loss": 0.3595, |
| "step": 18250 |
| }, |
| { |
| "epoch": 1.0404821469183534, |
| "grad_norm": 0.10526396334171295, |
| "learning_rate": 0.0004983742466454401, |
| "loss": 0.3557, |
| "step": 18300 |
| }, |
| { |
| "epoch": 1.0433249943143053, |
| "grad_norm": 0.10095933824777603, |
| "learning_rate": 0.0004983698046963839, |
| "loss": 0.3545, |
| "step": 18350 |
| }, |
| { |
| "epoch": 1.046167841710257, |
| "grad_norm": 0.0906311646103859, |
| "learning_rate": 0.0004983653627473278, |
| "loss": 0.3539, |
| "step": 18400 |
| }, |
| { |
| "epoch": 1.0490106891062088, |
| "grad_norm": 0.10459648072719574, |
| "learning_rate": 0.0004983609207982716, |
| "loss": 0.3551, |
| "step": 18450 |
| }, |
| { |
| "epoch": 1.0518535365021606, |
| "grad_norm": 0.08786173164844513, |
| "learning_rate": 0.0004983564788492154, |
| "loss": 0.3534, |
| "step": 18500 |
| }, |
| { |
| "epoch": 1.0546963838981123, |
| "grad_norm": 0.10580555349588394, |
| "learning_rate": 0.0004983520369001592, |
| "loss": 0.3552, |
| "step": 18550 |
| }, |
| { |
| "epoch": 1.0575392312940641, |
| "grad_norm": 0.09845657646656036, |
| "learning_rate": 0.000498347594951103, |
| "loss": 0.3567, |
| "step": 18600 |
| }, |
| { |
| "epoch": 1.060382078690016, |
| "grad_norm": 0.09070323407649994, |
| "learning_rate": 0.0004983431530020468, |
| "loss": 0.3572, |
| "step": 18650 |
| }, |
| { |
| "epoch": 1.0632249260859676, |
| "grad_norm": 0.11504799127578735, |
| "learning_rate": 0.0004983387110529907, |
| "loss": 0.3546, |
| "step": 18700 |
| }, |
| { |
| "epoch": 1.0660677734819195, |
| "grad_norm": 0.0939740464091301, |
| "learning_rate": 0.0004983342691039346, |
| "loss": 0.3572, |
| "step": 18750 |
| }, |
| { |
| "epoch": 1.0689106208778714, |
| "grad_norm": 0.11082996428012848, |
| "learning_rate": 0.0004983298271548784, |
| "loss": 0.357, |
| "step": 18800 |
| }, |
| { |
| "epoch": 1.071753468273823, |
| "grad_norm": 0.09426229447126389, |
| "learning_rate": 0.0004983253852058222, |
| "loss": 0.3552, |
| "step": 18850 |
| }, |
| { |
| "epoch": 1.0745963156697749, |
| "grad_norm": 0.09430352598428726, |
| "learning_rate": 0.000498320943256766, |
| "loss": 0.3559, |
| "step": 18900 |
| }, |
| { |
| "epoch": 1.0774391630657267, |
| "grad_norm": 0.11377741396427155, |
| "learning_rate": 0.0004983165013077098, |
| "loss": 0.3517, |
| "step": 18950 |
| }, |
| { |
| "epoch": 1.0802820104616784, |
| "grad_norm": 0.0836120992898941, |
| "learning_rate": 0.0004983120593586536, |
| "loss": 0.3546, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1.0831248578576302, |
| "grad_norm": 0.09483205527067184, |
| "learning_rate": 0.0004983076174095974, |
| "loss": 0.354, |
| "step": 19050 |
| }, |
| { |
| "epoch": 1.0859677052535819, |
| "grad_norm": 0.08619007468223572, |
| "learning_rate": 0.0004983031754605412, |
| "loss": 0.3526, |
| "step": 19100 |
| }, |
| { |
| "epoch": 1.0888105526495337, |
| "grad_norm": 0.09521963447332382, |
| "learning_rate": 0.0004982987335114852, |
| "loss": 0.3542, |
| "step": 19150 |
| }, |
| { |
| "epoch": 1.0916534000454856, |
| "grad_norm": 0.08765676617622375, |
| "learning_rate": 0.000498294291562429, |
| "loss": 0.3542, |
| "step": 19200 |
| }, |
| { |
| "epoch": 1.0944962474414373, |
| "grad_norm": 0.10146308690309525, |
| "learning_rate": 0.0004982898496133728, |
| "loss": 0.3559, |
| "step": 19250 |
| }, |
| { |
| "epoch": 1.0973390948373891, |
| "grad_norm": 0.09321428835391998, |
| "learning_rate": 0.0004982854076643166, |
| "loss": 0.3576, |
| "step": 19300 |
| }, |
| { |
| "epoch": 1.100181942233341, |
| "grad_norm": 0.11401006579399109, |
| "learning_rate": 0.0004982809657152604, |
| "loss": 0.3549, |
| "step": 19350 |
| }, |
| { |
| "epoch": 1.1030247896292926, |
| "grad_norm": 0.11573155969381332, |
| "learning_rate": 0.0004982765237662042, |
| "loss": 0.3569, |
| "step": 19400 |
| }, |
| { |
| "epoch": 1.1058676370252445, |
| "grad_norm": 0.08966556191444397, |
| "learning_rate": 0.000498272081817148, |
| "loss": 0.3603, |
| "step": 19450 |
| }, |
| { |
| "epoch": 1.1087104844211964, |
| "grad_norm": 0.12934494018554688, |
| "learning_rate": 0.0004982676398680918, |
| "loss": 0.3533, |
| "step": 19500 |
| }, |
| { |
| "epoch": 1.111553331817148, |
| "grad_norm": 0.13741086423397064, |
| "learning_rate": 0.0004982631979190357, |
| "loss": 0.3542, |
| "step": 19550 |
| }, |
| { |
| "epoch": 1.1143961792130999, |
| "grad_norm": 0.10928856581449509, |
| "learning_rate": 0.0004982587559699796, |
| "loss": 0.3559, |
| "step": 19600 |
| }, |
| { |
| "epoch": 1.1172390266090517, |
| "grad_norm": 0.09697018563747406, |
| "learning_rate": 0.0004982543140209234, |
| "loss": 0.3559, |
| "step": 19650 |
| }, |
| { |
| "epoch": 1.1200818740050034, |
| "grad_norm": 0.09221459925174713, |
| "learning_rate": 0.0004982498720718672, |
| "loss": 0.3554, |
| "step": 19700 |
| }, |
| { |
| "epoch": 1.1229247214009552, |
| "grad_norm": 0.091732919216156, |
| "learning_rate": 0.0004982454301228111, |
| "loss": 0.352, |
| "step": 19750 |
| }, |
| { |
| "epoch": 1.125767568796907, |
| "grad_norm": 0.10662009567022324, |
| "learning_rate": 0.0004982409881737549, |
| "loss": 0.3582, |
| "step": 19800 |
| }, |
| { |
| "epoch": 1.1286104161928587, |
| "grad_norm": 0.08810966461896896, |
| "learning_rate": 0.0004982365462246987, |
| "loss": 0.3534, |
| "step": 19850 |
| }, |
| { |
| "epoch": 1.1314532635888106, |
| "grad_norm": 0.10295290499925613, |
| "learning_rate": 0.0004982321042756425, |
| "loss": 0.3577, |
| "step": 19900 |
| }, |
| { |
| "epoch": 1.1342961109847622, |
| "grad_norm": 0.08759327977895737, |
| "learning_rate": 0.0004982276623265863, |
| "loss": 0.3529, |
| "step": 19950 |
| }, |
| { |
| "epoch": 1.137138958380714, |
| "grad_norm": 0.08622145652770996, |
| "learning_rate": 0.0004982232203775301, |
| "loss": 0.3541, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.137138958380714, |
| "eval_loss": 0.3468220829963684, |
| "eval_runtime": 575.5127, |
| "eval_samples_per_second": 229.204, |
| "eval_steps_per_second": 28.651, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.139981805776666, |
| "grad_norm": 0.09593155980110168, |
| "learning_rate": 0.000498218778428474, |
| "loss": 0.3533, |
| "step": 20050 |
| }, |
| { |
| "epoch": 1.1428246531726176, |
| "grad_norm": 0.1172272190451622, |
| "learning_rate": 0.0004982143364794178, |
| "loss": 0.3513, |
| "step": 20100 |
| }, |
| { |
| "epoch": 1.1456675005685695, |
| "grad_norm": 0.10402148216962814, |
| "learning_rate": 0.0004982098945303617, |
| "loss": 0.3587, |
| "step": 20150 |
| }, |
| { |
| "epoch": 1.1485103479645213, |
| "grad_norm": 0.14057371020317078, |
| "learning_rate": 0.0004982054525813055, |
| "loss": 0.3525, |
| "step": 20200 |
| }, |
| { |
| "epoch": 1.151353195360473, |
| "grad_norm": 0.11363080888986588, |
| "learning_rate": 0.0004982010106322493, |
| "loss": 0.3527, |
| "step": 20250 |
| }, |
| { |
| "epoch": 1.1541960427564248, |
| "grad_norm": 0.10476770997047424, |
| "learning_rate": 0.0004981965686831931, |
| "loss": 0.354, |
| "step": 20300 |
| }, |
| { |
| "epoch": 1.1570388901523767, |
| "grad_norm": 0.11272389441728592, |
| "learning_rate": 0.0004981921267341369, |
| "loss": 0.3562, |
| "step": 20350 |
| }, |
| { |
| "epoch": 1.1598817375483284, |
| "grad_norm": 0.0867941826581955, |
| "learning_rate": 0.0004981876847850807, |
| "loss": 0.3531, |
| "step": 20400 |
| }, |
| { |
| "epoch": 1.1627245849442802, |
| "grad_norm": 0.12125921249389648, |
| "learning_rate": 0.0004981832428360245, |
| "loss": 0.3523, |
| "step": 20450 |
| }, |
| { |
| "epoch": 1.1655674323402319, |
| "grad_norm": 0.09518919885158539, |
| "learning_rate": 0.0004981788008869683, |
| "loss": 0.3562, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.1684102797361837, |
| "grad_norm": 0.09472860395908356, |
| "learning_rate": 0.0004981743589379123, |
| "loss": 0.3555, |
| "step": 20550 |
| }, |
| { |
| "epoch": 1.1712531271321356, |
| "grad_norm": 0.09812992066144943, |
| "learning_rate": 0.0004981699169888561, |
| "loss": 0.3559, |
| "step": 20600 |
| }, |
| { |
| "epoch": 1.1740959745280874, |
| "grad_norm": 0.09982824325561523, |
| "learning_rate": 0.0004981654750397999, |
| "loss": 0.3511, |
| "step": 20650 |
| }, |
| { |
| "epoch": 1.176938821924039, |
| "grad_norm": 0.11572203040122986, |
| "learning_rate": 0.0004981610330907437, |
| "loss": 0.3524, |
| "step": 20700 |
| }, |
| { |
| "epoch": 1.179781669319991, |
| "grad_norm": 0.113502636551857, |
| "learning_rate": 0.0004981565911416875, |
| "loss": 0.3526, |
| "step": 20750 |
| }, |
| { |
| "epoch": 1.1826245167159426, |
| "grad_norm": 0.09475291520357132, |
| "learning_rate": 0.0004981521491926313, |
| "loss": 0.3536, |
| "step": 20800 |
| }, |
| { |
| "epoch": 1.1854673641118945, |
| "grad_norm": 0.09980995953083038, |
| "learning_rate": 0.0004981477072435751, |
| "loss": 0.3525, |
| "step": 20850 |
| }, |
| { |
| "epoch": 1.1883102115078463, |
| "grad_norm": 0.09389813244342804, |
| "learning_rate": 0.0004981432652945189, |
| "loss": 0.3536, |
| "step": 20900 |
| }, |
| { |
| "epoch": 1.191153058903798, |
| "grad_norm": 0.09969864040613174, |
| "learning_rate": 0.0004981388233454628, |
| "loss": 0.3536, |
| "step": 20950 |
| }, |
| { |
| "epoch": 1.1939959062997498, |
| "grad_norm": 0.08675362169742584, |
| "learning_rate": 0.0004981343813964067, |
| "loss": 0.3538, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.1968387536957017, |
| "grad_norm": 0.13347360491752625, |
| "learning_rate": 0.0004981299394473505, |
| "loss": 0.3536, |
| "step": 21050 |
| }, |
| { |
| "epoch": 1.1996816010916533, |
| "grad_norm": 0.1156553402543068, |
| "learning_rate": 0.0004981254974982943, |
| "loss": 0.3516, |
| "step": 21100 |
| }, |
| { |
| "epoch": 1.2025244484876052, |
| "grad_norm": 0.08983340859413147, |
| "learning_rate": 0.0004981210555492382, |
| "loss": 0.3537, |
| "step": 21150 |
| }, |
| { |
| "epoch": 1.205367295883557, |
| "grad_norm": 0.0790681466460228, |
| "learning_rate": 0.000498116613600182, |
| "loss": 0.3527, |
| "step": 21200 |
| }, |
| { |
| "epoch": 1.2082101432795087, |
| "grad_norm": 0.12216062098741531, |
| "learning_rate": 0.0004981121716511258, |
| "loss": 0.3535, |
| "step": 21250 |
| }, |
| { |
| "epoch": 1.2110529906754606, |
| "grad_norm": 0.08232084661722183, |
| "learning_rate": 0.0004981077297020696, |
| "loss": 0.3543, |
| "step": 21300 |
| }, |
| { |
| "epoch": 1.2138958380714122, |
| "grad_norm": 0.09808045625686646, |
| "learning_rate": 0.0004981032877530134, |
| "loss": 0.3526, |
| "step": 21350 |
| }, |
| { |
| "epoch": 1.216738685467364, |
| "grad_norm": 0.0974017009139061, |
| "learning_rate": 0.0004980988458039572, |
| "loss": 0.3518, |
| "step": 21400 |
| }, |
| { |
| "epoch": 1.219581532863316, |
| "grad_norm": 0.11234429478645325, |
| "learning_rate": 0.000498094403854901, |
| "loss": 0.3549, |
| "step": 21450 |
| }, |
| { |
| "epoch": 1.2224243802592678, |
| "grad_norm": 0.0913853719830513, |
| "learning_rate": 0.0004980899619058449, |
| "loss": 0.3572, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.2252672276552194, |
| "grad_norm": 0.11108215898275375, |
| "learning_rate": 0.0004980855199567888, |
| "loss": 0.3532, |
| "step": 21550 |
| }, |
| { |
| "epoch": 1.2281100750511713, |
| "grad_norm": 0.09476979076862335, |
| "learning_rate": 0.0004980810780077326, |
| "loss": 0.3557, |
| "step": 21600 |
| }, |
| { |
| "epoch": 1.230952922447123, |
| "grad_norm": 0.0904000923037529, |
| "learning_rate": 0.0004980766360586764, |
| "loss": 0.3512, |
| "step": 21650 |
| }, |
| { |
| "epoch": 1.2337957698430748, |
| "grad_norm": 0.09948475658893585, |
| "learning_rate": 0.0004980721941096202, |
| "loss": 0.3533, |
| "step": 21700 |
| }, |
| { |
| "epoch": 1.2366386172390267, |
| "grad_norm": 0.0987270325422287, |
| "learning_rate": 0.000498067752160564, |
| "loss": 0.354, |
| "step": 21750 |
| }, |
| { |
| "epoch": 1.2394814646349783, |
| "grad_norm": 0.10672775655984879, |
| "learning_rate": 0.0004980633102115078, |
| "loss": 0.3521, |
| "step": 21800 |
| }, |
| { |
| "epoch": 1.2423243120309302, |
| "grad_norm": 0.11016605794429779, |
| "learning_rate": 0.0004980588682624516, |
| "loss": 0.3519, |
| "step": 21850 |
| }, |
| { |
| "epoch": 1.245167159426882, |
| "grad_norm": 0.11307205259799957, |
| "learning_rate": 0.0004980544263133954, |
| "loss": 0.3545, |
| "step": 21900 |
| }, |
| { |
| "epoch": 1.2480100068228337, |
| "grad_norm": 0.09647481143474579, |
| "learning_rate": 0.0004980499843643394, |
| "loss": 0.3497, |
| "step": 21950 |
| }, |
| { |
| "epoch": 1.2508528542187856, |
| "grad_norm": 0.11781589686870575, |
| "learning_rate": 0.0004980455424152832, |
| "loss": 0.3527, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.2536957016147374, |
| "grad_norm": 0.09155919402837753, |
| "learning_rate": 0.000498041100466227, |
| "loss": 0.3541, |
| "step": 22050 |
| }, |
| { |
| "epoch": 1.256538549010689, |
| "grad_norm": 0.09770865738391876, |
| "learning_rate": 0.0004980366585171708, |
| "loss": 0.351, |
| "step": 22100 |
| }, |
| { |
| "epoch": 1.259381396406641, |
| "grad_norm": 0.09057383239269257, |
| "learning_rate": 0.0004980322165681146, |
| "loss": 0.3526, |
| "step": 22150 |
| }, |
| { |
| "epoch": 1.2622242438025926, |
| "grad_norm": 0.10171497613191605, |
| "learning_rate": 0.0004980277746190584, |
| "loss": 0.3516, |
| "step": 22200 |
| }, |
| { |
| "epoch": 1.2650670911985444, |
| "grad_norm": 0.12109605222940445, |
| "learning_rate": 0.0004980233326700022, |
| "loss": 0.3479, |
| "step": 22250 |
| }, |
| { |
| "epoch": 1.2679099385944963, |
| "grad_norm": 0.08494057506322861, |
| "learning_rate": 0.000498018890720946, |
| "loss": 0.3536, |
| "step": 22300 |
| }, |
| { |
| "epoch": 1.2707527859904482, |
| "grad_norm": 0.10715505480766296, |
| "learning_rate": 0.00049801444877189, |
| "loss": 0.354, |
| "step": 22350 |
| }, |
| { |
| "epoch": 1.2735956333863998, |
| "grad_norm": 0.09710002690553665, |
| "learning_rate": 0.0004980100068228338, |
| "loss": 0.3504, |
| "step": 22400 |
| }, |
| { |
| "epoch": 1.2764384807823517, |
| "grad_norm": 0.10588357597589493, |
| "learning_rate": 0.0004980055648737776, |
| "loss": 0.3552, |
| "step": 22450 |
| }, |
| { |
| "epoch": 1.2792813281783033, |
| "grad_norm": 0.09577326476573944, |
| "learning_rate": 0.0004980011229247214, |
| "loss": 0.3523, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.2821241755742552, |
| "grad_norm": 0.0960511788725853, |
| "learning_rate": 0.0004979966809756653, |
| "loss": 0.3496, |
| "step": 22550 |
| }, |
| { |
| "epoch": 1.284967022970207, |
| "grad_norm": 0.09112073481082916, |
| "learning_rate": 0.0004979922390266091, |
| "loss": 0.3516, |
| "step": 22600 |
| }, |
| { |
| "epoch": 1.2878098703661587, |
| "grad_norm": 0.11254040151834488, |
| "learning_rate": 0.0004979877970775529, |
| "loss": 0.3508, |
| "step": 22650 |
| }, |
| { |
| "epoch": 1.2906527177621105, |
| "grad_norm": 0.1305217444896698, |
| "learning_rate": 0.0004979833551284967, |
| "loss": 0.3484, |
| "step": 22700 |
| }, |
| { |
| "epoch": 1.2934955651580622, |
| "grad_norm": 0.11914518475532532, |
| "learning_rate": 0.0004979789131794405, |
| "loss": 0.3492, |
| "step": 22750 |
| }, |
| { |
| "epoch": 1.296338412554014, |
| "grad_norm": 0.10428331047296524, |
| "learning_rate": 0.0004979744712303843, |
| "loss": 0.3516, |
| "step": 22800 |
| }, |
| { |
| "epoch": 1.299181259949966, |
| "grad_norm": 0.10348132997751236, |
| "learning_rate": 0.0004979700292813282, |
| "loss": 0.3497, |
| "step": 22850 |
| }, |
| { |
| "epoch": 1.3020241073459178, |
| "grad_norm": 0.08855703473091125, |
| "learning_rate": 0.0004979655873322721, |
| "loss": 0.3516, |
| "step": 22900 |
| }, |
| { |
| "epoch": 1.3048669547418694, |
| "grad_norm": 0.11277921497821808, |
| "learning_rate": 0.0004979611453832159, |
| "loss": 0.3473, |
| "step": 22950 |
| }, |
| { |
| "epoch": 1.3077098021378213, |
| "grad_norm": 0.092954620718956, |
| "learning_rate": 0.0004979567034341597, |
| "loss": 0.3483, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.310552649533773, |
| "grad_norm": 0.11179167777299881, |
| "learning_rate": 0.0004979522614851035, |
| "loss": 0.3512, |
| "step": 23050 |
| }, |
| { |
| "epoch": 1.3133954969297248, |
| "grad_norm": 0.09125496447086334, |
| "learning_rate": 0.0004979478195360473, |
| "loss": 0.3508, |
| "step": 23100 |
| }, |
| { |
| "epoch": 1.3162383443256767, |
| "grad_norm": 0.0916043147444725, |
| "learning_rate": 0.0004979433775869911, |
| "loss": 0.3523, |
| "step": 23150 |
| }, |
| { |
| "epoch": 1.3190811917216285, |
| "grad_norm": 0.09685220569372177, |
| "learning_rate": 0.0004979389356379349, |
| "loss": 0.3546, |
| "step": 23200 |
| }, |
| { |
| "epoch": 1.3219240391175802, |
| "grad_norm": 0.10811689496040344, |
| "learning_rate": 0.0004979344936888787, |
| "loss": 0.3498, |
| "step": 23250 |
| }, |
| { |
| "epoch": 1.324766886513532, |
| "grad_norm": 0.08741540461778641, |
| "learning_rate": 0.0004979300517398227, |
| "loss": 0.3493, |
| "step": 23300 |
| }, |
| { |
| "epoch": 1.3276097339094837, |
| "grad_norm": 0.08877366036176682, |
| "learning_rate": 0.0004979256097907665, |
| "loss": 0.3527, |
| "step": 23350 |
| }, |
| { |
| "epoch": 1.3304525813054355, |
| "grad_norm": 0.1235891655087471, |
| "learning_rate": 0.0004979211678417103, |
| "loss": 0.3525, |
| "step": 23400 |
| }, |
| { |
| "epoch": 1.3332954287013874, |
| "grad_norm": 0.0945284515619278, |
| "learning_rate": 0.0004979167258926541, |
| "loss": 0.354, |
| "step": 23450 |
| }, |
| { |
| "epoch": 1.336138276097339, |
| "grad_norm": 0.08477913588285446, |
| "learning_rate": 0.0004979122839435979, |
| "loss": 0.3515, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.338981123493291, |
| "grad_norm": 0.09075385332107544, |
| "learning_rate": 0.0004979078419945417, |
| "loss": 0.3515, |
| "step": 23550 |
| }, |
| { |
| "epoch": 1.3418239708892425, |
| "grad_norm": 0.08231678605079651, |
| "learning_rate": 0.0004979034000454855, |
| "loss": 0.3506, |
| "step": 23600 |
| }, |
| { |
| "epoch": 1.3446668182851944, |
| "grad_norm": 0.08512122184038162, |
| "learning_rate": 0.0004978989580964293, |
| "loss": 0.3506, |
| "step": 23650 |
| }, |
| { |
| "epoch": 1.3475096656811463, |
| "grad_norm": 0.09195214509963989, |
| "learning_rate": 0.0004978945161473732, |
| "loss": 0.3493, |
| "step": 23700 |
| }, |
| { |
| "epoch": 1.3503525130770981, |
| "grad_norm": 0.09307179600000381, |
| "learning_rate": 0.000497890074198317, |
| "loss": 0.3466, |
| "step": 23750 |
| }, |
| { |
| "epoch": 1.3531953604730498, |
| "grad_norm": 0.08473914861679077, |
| "learning_rate": 0.0004978856322492609, |
| "loss": 0.3512, |
| "step": 23800 |
| }, |
| { |
| "epoch": 1.3560382078690016, |
| "grad_norm": 0.09676692634820938, |
| "learning_rate": 0.0004978811903002047, |
| "loss": 0.3495, |
| "step": 23850 |
| }, |
| { |
| "epoch": 1.3588810552649533, |
| "grad_norm": 0.08961619436740875, |
| "learning_rate": 0.0004978767483511486, |
| "loss": 0.3493, |
| "step": 23900 |
| }, |
| { |
| "epoch": 1.3617239026609051, |
| "grad_norm": 0.10256984084844589, |
| "learning_rate": 0.0004978723064020924, |
| "loss": 0.3519, |
| "step": 23950 |
| }, |
| { |
| "epoch": 1.364566750056857, |
| "grad_norm": 0.09426955133676529, |
| "learning_rate": 0.0004978678644530362, |
| "loss": 0.3496, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.3674095974528087, |
| "grad_norm": 0.09407513588666916, |
| "learning_rate": 0.00049786342250398, |
| "loss": 0.35, |
| "step": 24050 |
| }, |
| { |
| "epoch": 1.3702524448487605, |
| "grad_norm": 0.11077912896871567, |
| "learning_rate": 0.0004978589805549238, |
| "loss": 0.3499, |
| "step": 24100 |
| }, |
| { |
| "epoch": 1.3730952922447124, |
| "grad_norm": 0.09664590656757355, |
| "learning_rate": 0.0004978545386058676, |
| "loss": 0.3509, |
| "step": 24150 |
| }, |
| { |
| "epoch": 1.375938139640664, |
| "grad_norm": 0.08676467835903168, |
| "learning_rate": 0.0004978500966568114, |
| "loss": 0.3498, |
| "step": 24200 |
| }, |
| { |
| "epoch": 1.378780987036616, |
| "grad_norm": 0.07996726781129837, |
| "learning_rate": 0.0004978456547077553, |
| "loss": 0.3525, |
| "step": 24250 |
| }, |
| { |
| "epoch": 1.3816238344325678, |
| "grad_norm": 0.1071229949593544, |
| "learning_rate": 0.0004978412127586992, |
| "loss": 0.3529, |
| "step": 24300 |
| }, |
| { |
| "epoch": 1.3844666818285194, |
| "grad_norm": 0.09521368145942688, |
| "learning_rate": 0.000497836770809643, |
| "loss": 0.3501, |
| "step": 24350 |
| }, |
| { |
| "epoch": 1.3873095292244713, |
| "grad_norm": 0.10842622071504593, |
| "learning_rate": 0.0004978323288605868, |
| "loss": 0.3469, |
| "step": 24400 |
| }, |
| { |
| "epoch": 1.390152376620423, |
| "grad_norm": 0.08254586160182953, |
| "learning_rate": 0.0004978278869115306, |
| "loss": 0.3481, |
| "step": 24450 |
| }, |
| { |
| "epoch": 1.3929952240163748, |
| "grad_norm": 0.122976154088974, |
| "learning_rate": 0.0004978234449624744, |
| "loss": 0.3519, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.3958380714123266, |
| "grad_norm": 0.10202399641275406, |
| "learning_rate": 0.0004978190030134182, |
| "loss": 0.3481, |
| "step": 24550 |
| }, |
| { |
| "epoch": 1.3986809188082785, |
| "grad_norm": 0.09278547018766403, |
| "learning_rate": 0.000497814561064362, |
| "loss": 0.3477, |
| "step": 24600 |
| }, |
| { |
| "epoch": 1.4015237662042301, |
| "grad_norm": 0.09204497933387756, |
| "learning_rate": 0.0004978101191153058, |
| "loss": 0.3487, |
| "step": 24650 |
| }, |
| { |
| "epoch": 1.404366613600182, |
| "grad_norm": 0.09469152987003326, |
| "learning_rate": 0.0004978056771662498, |
| "loss": 0.352, |
| "step": 24700 |
| }, |
| { |
| "epoch": 1.4072094609961336, |
| "grad_norm": 0.09524402767419815, |
| "learning_rate": 0.0004978012352171936, |
| "loss": 0.3502, |
| "step": 24750 |
| }, |
| { |
| "epoch": 1.4100523083920855, |
| "grad_norm": 0.09983129799365997, |
| "learning_rate": 0.0004977967932681374, |
| "loss": 0.3491, |
| "step": 24800 |
| }, |
| { |
| "epoch": 1.4128951557880374, |
| "grad_norm": 0.13477516174316406, |
| "learning_rate": 0.0004977923513190812, |
| "loss": 0.3505, |
| "step": 24850 |
| }, |
| { |
| "epoch": 1.415738003183989, |
| "grad_norm": 0.09205208718776703, |
| "learning_rate": 0.000497787909370025, |
| "loss": 0.3505, |
| "step": 24900 |
| }, |
| { |
| "epoch": 1.4185808505799409, |
| "grad_norm": 0.08766143023967743, |
| "learning_rate": 0.0004977834674209688, |
| "loss": 0.3508, |
| "step": 24950 |
| }, |
| { |
| "epoch": 1.4214236979758925, |
| "grad_norm": 0.11106764525175095, |
| "learning_rate": 0.0004977790254719127, |
| "loss": 0.3501, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.4214236979758925, |
| "eval_loss": 0.3424564003944397, |
| "eval_runtime": 577.5935, |
| "eval_samples_per_second": 228.379, |
| "eval_steps_per_second": 28.548, |
| "step": 25000 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 5628160, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 320, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 3, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.134362268821094e+17, |
| "train_batch_size": 120, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|