| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.01, |
| "eval_steps": 1000, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1e-05, |
| "grad_norm": 0.37634041905403137, |
| "learning_rate": 5e-06, |
| "loss": 0.169, |
| "loss/crossentropy": 2.8720462918281555, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16897856071591377, |
| "loss/reg": 4.4040703773498535, |
| "step": 1 |
| }, |
| { |
| "epoch": 2e-05, |
| "grad_norm": 0.35649582743644714, |
| "learning_rate": 1e-05, |
| "loss": 0.1696, |
| "loss/crossentropy": 2.715533673763275, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1695844642817974, |
| "loss/reg": 4.399058818817139, |
| "step": 2 |
| }, |
| { |
| "epoch": 3e-05, |
| "grad_norm": 0.3591013252735138, |
| "learning_rate": 1.5e-05, |
| "loss": 0.1782, |
| "loss/crossentropy": 2.6291310787200928, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1782267540693283, |
| "loss/reg": 4.394084930419922, |
| "step": 3 |
| }, |
| { |
| "epoch": 4e-05, |
| "grad_norm": 0.36401960253715515, |
| "learning_rate": 2e-05, |
| "loss": 0.1843, |
| "loss/crossentropy": 2.7142109274864197, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1843317598104477, |
| "loss/reg": 4.389005661010742, |
| "step": 4 |
| }, |
| { |
| "epoch": 5e-05, |
| "grad_norm": 0.3119131922721863, |
| "learning_rate": 2.5e-05, |
| "loss": 0.1625, |
| "loss/crossentropy": 2.7586326003074646, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1625315584242344, |
| "loss/reg": 4.3841166496276855, |
| "step": 5 |
| }, |
| { |
| "epoch": 6e-05, |
| "grad_norm": 0.3388400673866272, |
| "learning_rate": 3e-05, |
| "loss": 0.1844, |
| "loss/crossentropy": 2.8104345202445984, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1844346523284912, |
| "loss/reg": 4.3792877197265625, |
| "step": 6 |
| }, |
| { |
| "epoch": 7e-05, |
| "grad_norm": 0.4783320426940918, |
| "learning_rate": 3.5e-05, |
| "loss": 0.1843, |
| "loss/crossentropy": 2.8321655988693237, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18431555479764938, |
| "loss/reg": 4.37478494644165, |
| "step": 7 |
| }, |
| { |
| "epoch": 8e-05, |
| "grad_norm": 0.29636114835739136, |
| "learning_rate": 4e-05, |
| "loss": 0.1589, |
| "loss/crossentropy": 2.6809526681900024, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15894119441509247, |
| "loss/reg": 4.370139122009277, |
| "step": 8 |
| }, |
| { |
| "epoch": 9e-05, |
| "grad_norm": 0.30071625113487244, |
| "learning_rate": 4.5e-05, |
| "loss": 0.1657, |
| "loss/crossentropy": 2.6759764552116394, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16574353352189064, |
| "loss/reg": 4.365106105804443, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0001, |
| "grad_norm": 0.28883349895477295, |
| "learning_rate": 5e-05, |
| "loss": 0.1572, |
| "loss/crossentropy": 2.808637499809265, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15719739720225334, |
| "loss/reg": 4.360220909118652, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00011, |
| "grad_norm": 0.28243017196655273, |
| "learning_rate": 5e-05, |
| "loss": 0.1426, |
| "loss/crossentropy": 2.72423392534256, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.14257685840129852, |
| "loss/reg": 4.355813503265381, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.00012, |
| "grad_norm": 0.31152331829071045, |
| "learning_rate": 5e-05, |
| "loss": 0.147, |
| "loss/crossentropy": 2.710044264793396, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.14701137319207191, |
| "loss/reg": 4.351265907287598, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.00013, |
| "grad_norm": 0.2739678919315338, |
| "learning_rate": 5e-05, |
| "loss": 0.1499, |
| "loss/crossentropy": 2.7644649744033813, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.149860430508852, |
| "loss/reg": 4.346287727355957, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.00014, |
| "grad_norm": 0.2712353467941284, |
| "learning_rate": 5e-05, |
| "loss": 0.1454, |
| "loss/crossentropy": 2.7370432019233704, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.14539287611842155, |
| "loss/reg": 4.340969085693359, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.00015, |
| "grad_norm": 0.2667863667011261, |
| "learning_rate": 5e-05, |
| "loss": 0.1403, |
| "loss/crossentropy": 2.5638718008995056, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.14029696956276894, |
| "loss/reg": 4.336019515991211, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.00016, |
| "grad_norm": 0.30467212200164795, |
| "grad_norm_var": 0.0029449483710212204, |
| "learning_rate": 5e-05, |
| "loss": 0.1361, |
| "loss/crossentropy": 2.797445595264435, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.13607431203126907, |
| "loss/reg": 4.330692291259766, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.00017, |
| "grad_norm": 0.2617621421813965, |
| "grad_norm_var": 0.0029635281595075556, |
| "learning_rate": 5e-05, |
| "loss": 0.1443, |
| "loss/crossentropy": 2.7542406916618347, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.14427556470036507, |
| "loss/reg": 4.325323581695557, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.00018, |
| "grad_norm": 0.28648674488067627, |
| "grad_norm_var": 0.0028982593896559215, |
| "learning_rate": 5e-05, |
| "loss": 0.1396, |
| "loss/crossentropy": 2.674492835998535, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.13961521908640862, |
| "loss/reg": 4.31995153427124, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.00019, |
| "grad_norm": 0.26269060373306274, |
| "grad_norm_var": 0.002877724259904054, |
| "learning_rate": 5e-05, |
| "loss": 0.141, |
| "loss/crossentropy": 2.8323662281036377, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.14103225618600845, |
| "loss/reg": 4.315446853637695, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0002, |
| "grad_norm": 0.2718074321746826, |
| "grad_norm_var": 0.0026993307095730186, |
| "learning_rate": 5e-05, |
| "loss": 0.1314, |
| "loss/crossentropy": 2.63212913274765, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1313977725803852, |
| "loss/reg": 4.310704708099365, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00021, |
| "grad_norm": 0.2430431842803955, |
| "grad_norm_var": 0.0028911751903802204, |
| "learning_rate": 5e-05, |
| "loss": 0.1324, |
| "loss/crossentropy": 2.664808928966522, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1324238833039999, |
| "loss/reg": 4.305792808532715, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.00022, |
| "grad_norm": 0.24898661673069, |
| "grad_norm_var": 0.00288514612507397, |
| "learning_rate": 5e-05, |
| "loss": 0.1242, |
| "loss/crossentropy": 2.7142711877822876, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.12423932552337646, |
| "loss/reg": 4.300712585449219, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.00023, |
| "grad_norm": 0.3123313784599304, |
| "grad_norm_var": 0.0004523056580034851, |
| "learning_rate": 5e-05, |
| "loss": 0.1321, |
| "loss/crossentropy": 2.7829225063323975, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.13212688639760017, |
| "loss/reg": 4.295501232147217, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.00024, |
| "grad_norm": 0.25187963247299194, |
| "grad_norm_var": 0.00048027979198491945, |
| "learning_rate": 5e-05, |
| "loss": 0.1248, |
| "loss/crossentropy": 2.692659854888916, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.12482420355081558, |
| "loss/reg": 4.2908830642700195, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.00025, |
| "grad_norm": 0.2151177078485489, |
| "grad_norm_var": 0.0006726495064564575, |
| "learning_rate": 5e-05, |
| "loss": 0.1232, |
| "loss/crossentropy": 2.738182246685028, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1231868714094162, |
| "loss/reg": 4.285846710205078, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.00026, |
| "grad_norm": 0.23308518528938293, |
| "grad_norm_var": 0.0007424884519799501, |
| "learning_rate": 5e-05, |
| "loss": 0.1174, |
| "loss/crossentropy": 2.555102586746216, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.11737299524247646, |
| "loss/reg": 4.281113147735596, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.00027, |
| "grad_norm": 0.24523235857486725, |
| "grad_norm_var": 0.0007604384721796281, |
| "learning_rate": 5e-05, |
| "loss": 0.1201, |
| "loss/crossentropy": 2.6816893815994263, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.12014555744826794, |
| "loss/reg": 4.2765069007873535, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.00028, |
| "grad_norm": 0.25897473096847534, |
| "grad_norm_var": 0.0006160828367585275, |
| "learning_rate": 5e-05, |
| "loss": 0.1227, |
| "loss/crossentropy": 2.7505548000335693, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.12271320074796677, |
| "loss/reg": 4.27158260345459, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.00029, |
| "grad_norm": 0.23087331652641296, |
| "grad_norm_var": 0.0006691547004593392, |
| "learning_rate": 5e-05, |
| "loss": 0.1181, |
| "loss/crossentropy": 2.8483291268348694, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.11810225620865822, |
| "loss/reg": 4.267061233520508, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0003, |
| "grad_norm": 1.2210192680358887, |
| "grad_norm_var": 0.05843327221954173, |
| "learning_rate": 5e-05, |
| "loss": 0.1723, |
| "loss/crossentropy": 2.8535077571868896, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17234252952039242, |
| "loss/reg": 4.262645244598389, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.00031, |
| "grad_norm": 0.2712586224079132, |
| "grad_norm_var": 0.058402986662709634, |
| "learning_rate": 5e-05, |
| "loss": 0.1156, |
| "loss/crossentropy": 2.6525614261627197, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.11560441367328167, |
| "loss/reg": 4.258092403411865, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.00032, |
| "grad_norm": 0.5226843953132629, |
| "grad_norm_var": 0.06092943089461011, |
| "learning_rate": 5e-05, |
| "loss": 0.1537, |
| "loss/crossentropy": 2.6228127479553223, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15369537472724915, |
| "loss/reg": 4.253781318664551, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.00033, |
| "grad_norm": 0.35246461629867554, |
| "grad_norm_var": 0.06057510886832484, |
| "learning_rate": 5e-05, |
| "loss": 0.1216, |
| "loss/crossentropy": 2.6986429691314697, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.12163393199443817, |
| "loss/reg": 4.249208450317383, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.00034, |
| "grad_norm": 0.2868311405181885, |
| "grad_norm_var": 0.060572693607631393, |
| "learning_rate": 5e-05, |
| "loss": 0.1215, |
| "loss/crossentropy": 2.7423174381256104, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.12151895463466644, |
| "loss/reg": 4.244677543640137, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.00035, |
| "grad_norm": 0.2556142210960388, |
| "grad_norm_var": 0.06064807497415105, |
| "learning_rate": 5e-05, |
| "loss": 0.1137, |
| "loss/crossentropy": 2.7171207070350647, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1137176975607872, |
| "loss/reg": 4.2399797439575195, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.00036, |
| "grad_norm": 0.2783287763595581, |
| "grad_norm_var": 0.060592460146055585, |
| "learning_rate": 5e-05, |
| "loss": 0.1138, |
| "loss/crossentropy": 2.7394094467163086, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.11381806619465351, |
| "loss/reg": 4.235424041748047, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.00037, |
| "grad_norm": 0.3065175712108612, |
| "grad_norm_var": 0.06003019540430902, |
| "learning_rate": 5e-05, |
| "loss": 0.1235, |
| "loss/crossentropy": 2.755502223968506, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.12348765879869461, |
| "loss/reg": 4.2310051918029785, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.00038, |
| "grad_norm": 0.26492562890052795, |
| "grad_norm_var": 0.059845851287469956, |
| "learning_rate": 5e-05, |
| "loss": 0.1119, |
| "loss/crossentropy": 2.8106552362442017, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.11191634088754654, |
| "loss/reg": 4.226707935333252, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.00039, |
| "grad_norm": 0.24673967063426971, |
| "grad_norm_var": 0.06039341868271975, |
| "learning_rate": 5e-05, |
| "loss": 0.1161, |
| "loss/crossentropy": 2.7490118741989136, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.11609707958996296, |
| "loss/reg": 4.222842216491699, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0004, |
| "grad_norm": 0.2973298132419586, |
| "grad_norm_var": 0.05998792869591778, |
| "learning_rate": 5e-05, |
| "loss": 0.1124, |
| "loss/crossentropy": 2.7798808813095093, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.11244922317564487, |
| "loss/reg": 4.218531131744385, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.00041, |
| "grad_norm": 0.7517657279968262, |
| "grad_norm_var": 0.06884148715130983, |
| "learning_rate": 5e-05, |
| "loss": 0.1545, |
| "loss/crossentropy": 2.749855697154999, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15445118583738804, |
| "loss/reg": 4.214253902435303, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.00042, |
| "grad_norm": 0.2417730987071991, |
| "grad_norm_var": 0.06868010027414732, |
| "learning_rate": 5e-05, |
| "loss": 0.1099, |
| "loss/crossentropy": 2.751042366027832, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1099155992269516, |
| "loss/reg": 4.2101359367370605, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.00043, |
| "grad_norm": 0.2631951570510864, |
| "grad_norm_var": 0.06838462807177058, |
| "learning_rate": 5e-05, |
| "loss": 0.1165, |
| "loss/crossentropy": 2.7250843048095703, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.11648696288466454, |
| "loss/reg": 4.206397533416748, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.00044, |
| "grad_norm": 0.2518296241760254, |
| "grad_norm_var": 0.06850134865244813, |
| "learning_rate": 5e-05, |
| "loss": 0.1111, |
| "loss/crossentropy": 2.7153283953666687, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.11108221486210823, |
| "loss/reg": 4.201878547668457, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.00045, |
| "grad_norm": 0.24082158505916595, |
| "grad_norm_var": 0.06831278207672915, |
| "learning_rate": 5e-05, |
| "loss": 0.1177, |
| "loss/crossentropy": 2.6632660627365112, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.11769118346273899, |
| "loss/reg": 4.19778299331665, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.00046, |
| "grad_norm": 0.260890394449234, |
| "grad_norm_var": 0.018048092726357542, |
| "learning_rate": 5e-05, |
| "loss": 0.1227, |
| "loss/crossentropy": 2.7315176129341125, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.12269957736134529, |
| "loss/reg": 4.193592071533203, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.00047, |
| "grad_norm": 0.25268790125846863, |
| "grad_norm_var": 0.018186152495949234, |
| "learning_rate": 5e-05, |
| "loss": 0.1178, |
| "loss/crossentropy": 2.774504065513611, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.11776839196681976, |
| "loss/reg": 4.189169406890869, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.00048, |
| "grad_norm": 0.2759403884410858, |
| "grad_norm_var": 0.015229396543742831, |
| "learning_rate": 5e-05, |
| "loss": 0.1289, |
| "loss/crossentropy": 2.8515073657035828, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.12885254248976707, |
| "loss/reg": 4.185054779052734, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.00049, |
| "grad_norm": 0.24765782058238983, |
| "grad_norm_var": 0.015206926335741973, |
| "learning_rate": 5e-05, |
| "loss": 0.1256, |
| "loss/crossentropy": 2.7131593823432922, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1256290916353464, |
| "loss/reg": 4.1810526847839355, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.0005, |
| "grad_norm": 0.3096969425678253, |
| "grad_norm_var": 0.015214156358291781, |
| "learning_rate": 5e-05, |
| "loss": 0.1401, |
| "loss/crossentropy": 2.7528311014175415, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.14005928859114647, |
| "loss/reg": 4.176880359649658, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.00051, |
| "grad_norm": 0.33225017786026, |
| "grad_norm_var": 0.015162352298149247, |
| "learning_rate": 5e-05, |
| "loss": 0.1618, |
| "loss/crossentropy": 2.73341304063797, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1618291698396206, |
| "loss/reg": 4.173260688781738, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.00052, |
| "grad_norm": 0.33166685700416565, |
| "grad_norm_var": 0.015176107188209845, |
| "learning_rate": 5e-05, |
| "loss": 0.1704, |
| "loss/crossentropy": 2.824883460998535, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1703827939927578, |
| "loss/reg": 4.168625354766846, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.00053, |
| "grad_norm": 0.4255874752998352, |
| "grad_norm_var": 0.01609058098027729, |
| "learning_rate": 5e-05, |
| "loss": 0.1856, |
| "loss/crossentropy": 2.8565452694892883, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18561138212680817, |
| "loss/reg": 4.164296627044678, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.00054, |
| "grad_norm": 0.33207008242607117, |
| "grad_norm_var": 0.015949373509081675, |
| "learning_rate": 5e-05, |
| "loss": 0.1762, |
| "loss/crossentropy": 2.7211243510246277, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1762254200875759, |
| "loss/reg": 4.16010856628418, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.00055, |
| "grad_norm": 0.3105420470237732, |
| "grad_norm_var": 0.01561146008609899, |
| "learning_rate": 5e-05, |
| "loss": 0.172, |
| "loss/crossentropy": 2.7821205854415894, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17203472182154655, |
| "loss/reg": 4.155950546264648, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.00056, |
| "grad_norm": 0.3342844247817993, |
| "grad_norm_var": 0.015583353488029018, |
| "learning_rate": 5e-05, |
| "loss": 0.1675, |
| "loss/crossentropy": 2.783965766429901, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1675088219344616, |
| "loss/reg": 4.151437759399414, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.00057, |
| "grad_norm": 0.3392151892185211, |
| "grad_norm_var": 0.0026173613848745727, |
| "learning_rate": 5e-05, |
| "loss": 0.1675, |
| "loss/crossentropy": 2.782883048057556, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16754426062107086, |
| "loss/reg": 4.1469950675964355, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.00058, |
| "grad_norm": 0.46169230341911316, |
| "grad_norm_var": 0.004024211017059094, |
| "learning_rate": 5e-05, |
| "loss": 0.1828, |
| "loss/crossentropy": 2.6869139075279236, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18278859555721283, |
| "loss/reg": 4.142712116241455, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.00059, |
| "grad_norm": 0.35874953866004944, |
| "grad_norm_var": 0.00399056950783742, |
| "learning_rate": 5e-05, |
| "loss": 0.179, |
| "loss/crossentropy": 2.683705747127533, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17896704375743866, |
| "loss/reg": 4.138728141784668, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0006, |
| "grad_norm": 0.3390788435935974, |
| "grad_norm_var": 0.0037128700604173097, |
| "learning_rate": 5e-05, |
| "loss": 0.1824, |
| "loss/crossentropy": 2.6724974513053894, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18236950412392616, |
| "loss/reg": 4.1345534324646, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.00061, |
| "grad_norm": 0.3341596722602844, |
| "grad_norm_var": 0.003246451116369023, |
| "learning_rate": 5e-05, |
| "loss": 0.1694, |
| "loss/crossentropy": 2.956072986125946, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16935936734080315, |
| "loss/reg": 4.130521774291992, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.00062, |
| "grad_norm": 0.33658263087272644, |
| "grad_norm_var": 0.0029283974011622186, |
| "learning_rate": 5e-05, |
| "loss": 0.1668, |
| "loss/crossentropy": 2.8409587144851685, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16678539663553238, |
| "loss/reg": 4.126163005828857, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.00063, |
| "grad_norm": 0.33723217248916626, |
| "grad_norm_var": 0.0024741312804299983, |
| "learning_rate": 5e-05, |
| "loss": 0.1856, |
| "loss/crossentropy": 2.7388935685157776, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18555545806884766, |
| "loss/reg": 4.121931552886963, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.00064, |
| "grad_norm": 0.34580445289611816, |
| "grad_norm_var": 0.0022020224702210757, |
| "learning_rate": 5e-05, |
| "loss": 0.1658, |
| "loss/crossentropy": 2.6729788780212402, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16578427329659462, |
| "loss/reg": 4.117753982543945, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.00065, |
| "grad_norm": 0.33867374062538147, |
| "grad_norm_var": 0.0015716415803633144, |
| "learning_rate": 5e-05, |
| "loss": 0.1643, |
| "loss/crossentropy": 2.8432253003120422, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16425132378935814, |
| "loss/reg": 4.113894939422607, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.00066, |
| "grad_norm": 0.42098623514175415, |
| "grad_norm_var": 0.001778022217079652, |
| "learning_rate": 5e-05, |
| "loss": 0.2155, |
| "loss/crossentropy": 2.6712504625320435, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21550852805376053, |
| "loss/reg": 4.10945463180542, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.00067, |
| "grad_norm": 0.35403043031692505, |
| "grad_norm_var": 0.0017418631675115888, |
| "learning_rate": 5e-05, |
| "loss": 0.1798, |
| "loss/crossentropy": 2.7415149211883545, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1797672137618065, |
| "loss/reg": 4.1049418449401855, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.00068, |
| "grad_norm": 0.34834232926368713, |
| "grad_norm_var": 0.0017045350753313, |
| "learning_rate": 5e-05, |
| "loss": 0.1783, |
| "loss/crossentropy": 2.6858341097831726, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17833665013313293, |
| "loss/reg": 4.100775718688965, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.00069, |
| "grad_norm": 0.3541049063205719, |
| "grad_norm_var": 0.0013731843169029498, |
| "learning_rate": 5e-05, |
| "loss": 0.1744, |
| "loss/crossentropy": 2.8710713982582092, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1744227409362793, |
| "loss/reg": 4.096506595611572, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.0007, |
| "grad_norm": 0.3736323118209839, |
| "grad_norm_var": 0.0013660110363047928, |
| "learning_rate": 5e-05, |
| "loss": 0.1994, |
| "loss/crossentropy": 2.858128011226654, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19940509647130966, |
| "loss/reg": 4.091678142547607, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.00071, |
| "grad_norm": 0.33025625348091125, |
| "grad_norm_var": 0.001272272953577754, |
| "learning_rate": 5e-05, |
| "loss": 0.1646, |
| "loss/crossentropy": 2.692229390144348, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16458340734243393, |
| "loss/reg": 4.087361812591553, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.00072, |
| "grad_norm": 0.6907688975334167, |
| "grad_norm_var": 0.00815051878013667, |
| "learning_rate": 5e-05, |
| "loss": 0.1757, |
| "loss/crossentropy": 2.886055052280426, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1757429726421833, |
| "loss/reg": 4.08318567276001, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.00073, |
| "grad_norm": 0.3311053514480591, |
| "grad_norm_var": 0.008197602515626375, |
| "learning_rate": 5e-05, |
| "loss": 0.1682, |
| "loss/crossentropy": 2.704796850681305, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1681583784520626, |
| "loss/reg": 4.079033374786377, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.00074, |
| "grad_norm": 0.3336332142353058, |
| "grad_norm_var": 0.0078012237613196535, |
| "learning_rate": 5e-05, |
| "loss": 0.1689, |
| "loss/crossentropy": 2.6181225776672363, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16892167925834656, |
| "loss/reg": 4.074740409851074, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.00075, |
| "grad_norm": 0.33766406774520874, |
| "grad_norm_var": 0.007861895340318493, |
| "learning_rate": 5e-05, |
| "loss": 0.1712, |
| "loss/crossentropy": 2.756729245185852, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17122048512101173, |
| "loss/reg": 4.070303916931152, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.00076, |
| "grad_norm": 0.34048837423324585, |
| "grad_norm_var": 0.007856372064757134, |
| "learning_rate": 5e-05, |
| "loss": 0.1763, |
| "loss/crossentropy": 2.62674218416214, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17628077790141106, |
| "loss/reg": 4.065893650054932, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.00077, |
| "grad_norm": 0.3368911147117615, |
| "grad_norm_var": 0.007844070912018693, |
| "learning_rate": 5e-05, |
| "loss": 0.1789, |
| "loss/crossentropy": 2.838981509208679, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17892110347747803, |
| "loss/reg": 4.061193943023682, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.00078, |
| "grad_norm": 0.2983826696872711, |
| "grad_norm_var": 0.008102358070792626, |
| "learning_rate": 5e-05, |
| "loss": 0.151, |
| "loss/crossentropy": 2.8157095909118652, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15098581835627556, |
| "loss/reg": 4.05631685256958, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.00079, |
| "grad_norm": 0.34036847949028015, |
| "grad_norm_var": 0.008090524798600873, |
| "learning_rate": 5e-05, |
| "loss": 0.1772, |
| "loss/crossentropy": 2.742383122444153, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17718595638871193, |
| "loss/reg": 4.051788330078125, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.0008, |
| "grad_norm": 0.3196929097175598, |
| "grad_norm_var": 0.008207612908988405, |
| "learning_rate": 5e-05, |
| "loss": 0.1574, |
| "loss/crossentropy": 2.64748877286911, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15740340948104858, |
| "loss/reg": 4.046438694000244, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.00081, |
| "grad_norm": 0.3145473897457123, |
| "grad_norm_var": 0.008330494258097032, |
| "learning_rate": 5e-05, |
| "loss": 0.1591, |
| "loss/crossentropy": 2.7640033960342407, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15912048518657684, |
| "loss/reg": 4.041863441467285, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.00082, |
| "grad_norm": 0.37658828496932983, |
| "grad_norm_var": 0.008116681055328008, |
| "learning_rate": 5e-05, |
| "loss": 0.1783, |
| "loss/crossentropy": 2.8226330876350403, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17833809927105904, |
| "loss/reg": 4.0372796058654785, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.00083, |
| "grad_norm": 0.36421865224838257, |
| "grad_norm_var": 0.00811331907494814, |
| "learning_rate": 5e-05, |
| "loss": 0.1636, |
| "loss/crossentropy": 2.762717604637146, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16359057649970055, |
| "loss/reg": 4.032177925109863, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.00084, |
| "grad_norm": 0.3138120174407959, |
| "grad_norm_var": 0.00825034262581384, |
| "learning_rate": 5e-05, |
| "loss": 0.1606, |
| "loss/crossentropy": 2.625426709651947, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16061001271009445, |
| "loss/reg": 4.027446269989014, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.00085, |
| "grad_norm": 0.34441590309143066, |
| "grad_norm_var": 0.00826351514204321, |
| "learning_rate": 5e-05, |
| "loss": 0.1667, |
| "loss/crossentropy": 2.8294222950935364, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16673466563224792, |
| "loss/reg": 4.022748947143555, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.00086, |
| "grad_norm": 0.316683828830719, |
| "grad_norm_var": 0.00835627592765974, |
| "learning_rate": 5e-05, |
| "loss": 0.1564, |
| "loss/crossentropy": 2.8250383734703064, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1564498096704483, |
| "loss/reg": 4.017378330230713, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.00087, |
| "grad_norm": 0.3178180456161499, |
| "grad_norm_var": 0.008407967451986308, |
| "learning_rate": 5e-05, |
| "loss": 0.1589, |
| "loss/crossentropy": 2.831330358982086, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15890633687376976, |
| "loss/reg": 4.012408256530762, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.00088, |
| "grad_norm": 0.33865824341773987, |
| "grad_norm_var": 0.00038455914158520567, |
| "learning_rate": 5e-05, |
| "loss": 0.1665, |
| "loss/crossentropy": 2.8202422857284546, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16647625714540482, |
| "loss/reg": 4.00655460357666, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.00089, |
| "grad_norm": 0.33375900983810425, |
| "grad_norm_var": 0.00038439593085719167, |
| "learning_rate": 5e-05, |
| "loss": 0.1655, |
| "loss/crossentropy": 2.748092472553253, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1655096672475338, |
| "loss/reg": 4.000852584838867, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.0009, |
| "grad_norm": 0.41060250997543335, |
| "grad_norm_var": 0.000761403690223957, |
| "learning_rate": 5e-05, |
| "loss": 0.1679, |
| "loss/crossentropy": 2.8519994616508484, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1679377369582653, |
| "loss/reg": 3.9966533184051514, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.00091, |
| "grad_norm": 0.3349744379520416, |
| "grad_norm_var": 0.0007618998964447029, |
| "learning_rate": 5e-05, |
| "loss": 0.1663, |
| "loss/crossentropy": 2.8302014470100403, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16629018262028694, |
| "loss/reg": 3.9916272163391113, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.00092, |
| "grad_norm": 0.40859073400497437, |
| "grad_norm_var": 0.0010778266384652254, |
| "learning_rate": 5e-05, |
| "loss": 0.1631, |
| "loss/crossentropy": 2.831357002258301, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16314184293150902, |
| "loss/reg": 3.9862587451934814, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.00093, |
| "grad_norm": 0.3679395616054535, |
| "grad_norm_var": 0.0011174436691973562, |
| "learning_rate": 5e-05, |
| "loss": 0.1749, |
| "loss/crossentropy": 2.653463125228882, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17491210997104645, |
| "loss/reg": 3.9809703826904297, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.00094, |
| "grad_norm": 0.33192068338394165, |
| "grad_norm_var": 0.000984578674839117, |
| "learning_rate": 5e-05, |
| "loss": 0.1689, |
| "loss/crossentropy": 2.9128816723823547, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16890091821551323, |
| "loss/reg": 3.9768238067626953, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.00095, |
| "grad_norm": 0.33981162309646606, |
| "grad_norm_var": 0.000985009641976816, |
| "learning_rate": 5e-05, |
| "loss": 0.1651, |
| "loss/crossentropy": 2.8998738527297974, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1651129573583603, |
| "loss/reg": 3.9723405838012695, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.00096, |
| "grad_norm": 0.31845277547836304, |
| "grad_norm_var": 0.0009894353533322537, |
| "learning_rate": 5e-05, |
| "loss": 0.1566, |
| "loss/crossentropy": 2.738618314266205, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15662826597690582, |
| "loss/reg": 3.9680373668670654, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.00097, |
| "grad_norm": 0.3521839678287506, |
| "grad_norm_var": 0.0009211371554959176, |
| "learning_rate": 5e-05, |
| "loss": 0.1571, |
| "loss/crossentropy": 2.896687388420105, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15710216015577316, |
| "loss/reg": 3.964097499847412, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.00098, |
| "grad_norm": 0.41529935598373413, |
| "grad_norm_var": 0.0011615701056859014, |
| "learning_rate": 5e-05, |
| "loss": 0.1761, |
| "loss/crossentropy": 2.6711183190345764, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.176058791577816, |
| "loss/reg": 3.959585428237915, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.00099, |
| "grad_norm": 0.3406970202922821, |
| "grad_norm_var": 0.0011533483453351997, |
| "learning_rate": 5e-05, |
| "loss": 0.1755, |
| "loss/crossentropy": 2.762200713157654, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17553818225860596, |
| "loss/reg": 3.9551267623901367, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.001, |
| "grad_norm": 0.3295409083366394, |
| "grad_norm_var": 0.0010948026927074712, |
| "learning_rate": 5e-05, |
| "loss": 0.1791, |
| "loss/crossentropy": 2.666721522808075, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17914289608597755, |
| "loss/reg": 3.9509167671203613, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.00101, |
| "grad_norm": 0.3429720401763916, |
| "grad_norm_var": 0.001096024238407974, |
| "learning_rate": 5e-05, |
| "loss": 0.1793, |
| "loss/crossentropy": 2.82060843706131, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1792576014995575, |
| "loss/reg": 3.9469358921051025, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.00102, |
| "grad_norm": 0.3215195834636688, |
| "grad_norm_var": 0.0010760084324249537, |
| "learning_rate": 5e-05, |
| "loss": 0.1632, |
| "loss/crossentropy": 2.808405876159668, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16316882148385048, |
| "loss/reg": 3.943436622619629, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.00103, |
| "grad_norm": 0.33158427476882935, |
| "grad_norm_var": 0.0010282390377130302, |
| "learning_rate": 5e-05, |
| "loss": 0.1783, |
| "loss/crossentropy": 2.8497248888015747, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1783306896686554, |
| "loss/reg": 3.9394803047180176, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.00104, |
| "grad_norm": 0.3384368121623993, |
| "grad_norm_var": 0.001028611107856688, |
| "learning_rate": 5e-05, |
| "loss": 0.1773, |
| "loss/crossentropy": 2.8479551672935486, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17731818184256554, |
| "loss/reg": 3.935678243637085, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.00105, |
| "grad_norm": 0.3275454342365265, |
| "grad_norm_var": 0.0010454262321925218, |
| "learning_rate": 5e-05, |
| "loss": 0.172, |
| "loss/crossentropy": 2.7240310311317444, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17204875499010086, |
| "loss/reg": 3.932224750518799, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.00106, |
| "grad_norm": 0.3352244198322296, |
| "grad_norm_var": 0.0007990449288615142, |
| "learning_rate": 5e-05, |
| "loss": 0.1687, |
| "loss/crossentropy": 2.657980978488922, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16869833320379257, |
| "loss/reg": 3.92889142036438, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.00107, |
| "grad_norm": 0.3195781409740448, |
| "grad_norm_var": 0.00083658300653268, |
| "learning_rate": 5e-05, |
| "loss": 0.1642, |
| "loss/crossentropy": 2.7351735830307007, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16421591117978096, |
| "loss/reg": 3.9260904788970947, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.00108, |
| "grad_norm": 0.3216703534126282, |
| "grad_norm_var": 0.0005727423089818255, |
| "learning_rate": 5e-05, |
| "loss": 0.1611, |
| "loss/crossentropy": 2.835266649723053, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1611352562904358, |
| "loss/reg": 3.923356533050537, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.00109, |
| "grad_norm": 0.3534785807132721, |
| "grad_norm_var": 0.0005312635552543169, |
| "learning_rate": 5e-05, |
| "loss": 0.1689, |
| "loss/crossentropy": 2.8821677565574646, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1689467802643776, |
| "loss/reg": 3.9208316802978516, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.0011, |
| "grad_norm": 0.33851271867752075, |
| "grad_norm_var": 0.0005279815580263729, |
| "learning_rate": 5e-05, |
| "loss": 0.171, |
| "loss/crossentropy": 2.7201637029647827, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17095838487148285, |
| "loss/reg": 3.918743133544922, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.00111, |
| "grad_norm": 0.32998839020729065, |
| "grad_norm_var": 0.0005331548233647158, |
| "learning_rate": 5e-05, |
| "loss": 0.166, |
| "loss/crossentropy": 2.6836928725242615, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16604754701256752, |
| "loss/reg": 3.914886951446533, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.00112, |
| "grad_norm": 0.420744925737381, |
| "grad_norm_var": 0.0009131281860373264, |
| "learning_rate": 5e-05, |
| "loss": 0.1738, |
| "loss/crossentropy": 2.568650722503662, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1737859919667244, |
| "loss/reg": 3.9106812477111816, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.00113, |
| "grad_norm": 0.3349835276603699, |
| "grad_norm_var": 0.000914996833659265, |
| "learning_rate": 5e-05, |
| "loss": 0.1522, |
| "loss/crossentropy": 2.7411792278289795, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15223057195544243, |
| "loss/reg": 3.9069032669067383, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.00114, |
| "grad_norm": 0.34276068210601807, |
| "grad_norm_var": 0.0005529241807124034, |
| "learning_rate": 5e-05, |
| "loss": 0.1567, |
| "loss/crossentropy": 2.80877947807312, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1567244492471218, |
| "loss/reg": 3.90332293510437, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.00115, |
| "grad_norm": 0.35375383496284485, |
| "grad_norm_var": 0.0005659636539689298, |
| "learning_rate": 5e-05, |
| "loss": 0.1657, |
| "loss/crossentropy": 2.698065936565399, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16574329882860184, |
| "loss/reg": 3.8998756408691406, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.00116, |
| "grad_norm": 0.33278602361679077, |
| "grad_norm_var": 0.0005620343134485931, |
| "learning_rate": 5e-05, |
| "loss": 0.1739, |
| "loss/crossentropy": 2.7814364433288574, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17385346069931984, |
| "loss/reg": 3.8964290618896484, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.00117, |
| "grad_norm": 0.35139891505241394, |
| "grad_norm_var": 0.0005694228893132684, |
| "learning_rate": 5e-05, |
| "loss": 0.1701, |
| "loss/crossentropy": 2.7721198201179504, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1701316274702549, |
| "loss/reg": 3.8925936222076416, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.00118, |
| "grad_norm": 0.3708522915840149, |
| "grad_norm_var": 0.0005942298534055627, |
| "learning_rate": 5e-05, |
| "loss": 0.1723, |
| "loss/crossentropy": 2.8753750920295715, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17226434499025345, |
| "loss/reg": 3.888739824295044, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.00119, |
| "grad_norm": 0.32619452476501465, |
| "grad_norm_var": 0.0006049363247454272, |
| "learning_rate": 5e-05, |
| "loss": 0.1559, |
| "loss/crossentropy": 2.792622923851013, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15585486218333244, |
| "loss/reg": 3.8849120140075684, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0012, |
| "grad_norm": 0.3160404562950134, |
| "grad_norm_var": 0.0006517621123632485, |
| "learning_rate": 5e-05, |
| "loss": 0.1657, |
| "loss/crossentropy": 2.833389937877655, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16574294120073318, |
| "loss/reg": 3.8814921379089355, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.00121, |
| "grad_norm": 2.6332755088806152, |
| "grad_norm_var": 0.328414929277446, |
| "learning_rate": 5e-05, |
| "loss": 0.2807, |
| "loss/crossentropy": 2.960978329181671, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.280683059245348, |
| "loss/reg": 3.8778162002563477, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.00122, |
| "grad_norm": 0.39280807971954346, |
| "grad_norm_var": 0.32746202761424736, |
| "learning_rate": 5e-05, |
| "loss": 0.1791, |
| "loss/crossentropy": 2.8656354546546936, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17905254289507866, |
| "loss/reg": 3.8742706775665283, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.00123, |
| "grad_norm": 0.36644095182418823, |
| "grad_norm_var": 0.3265348837601918, |
| "learning_rate": 5e-05, |
| "loss": 0.1765, |
| "loss/crossentropy": 2.776346266269684, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1764557734131813, |
| "loss/reg": 3.8701822757720947, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.00124, |
| "grad_norm": 0.39717525243759155, |
| "grad_norm_var": 0.3251678188828664, |
| "learning_rate": 5e-05, |
| "loss": 0.1796, |
| "loss/crossentropy": 2.9204375743865967, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1796155981719494, |
| "loss/reg": 3.866316556930542, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.00125, |
| "grad_norm": 0.366623193025589, |
| "grad_norm_var": 0.3249260727271075, |
| "learning_rate": 5e-05, |
| "loss": 0.1654, |
| "loss/crossentropy": 2.42034849524498, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.165392205119133, |
| "loss/reg": 3.8625807762145996, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.00126, |
| "grad_norm": 0.3638598918914795, |
| "grad_norm_var": 0.32442588175429127, |
| "learning_rate": 5e-05, |
| "loss": 0.1601, |
| "loss/crossentropy": 2.936553716659546, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16014225035905838, |
| "loss/reg": 3.8585283756256104, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.00127, |
| "grad_norm": 0.3437521159648895, |
| "grad_norm_var": 0.3241257586372512, |
| "learning_rate": 5e-05, |
| "loss": 0.1603, |
| "loss/crossentropy": 2.8428520560264587, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16030794754624367, |
| "loss/reg": 3.854602813720703, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.00128, |
| "grad_norm": 0.3604683578014374, |
| "grad_norm_var": 0.3249965569466151, |
| "learning_rate": 5e-05, |
| "loss": 0.1688, |
| "loss/crossentropy": 2.717309355735779, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1687549129128456, |
| "loss/reg": 3.85067081451416, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.00129, |
| "grad_norm": 0.3499651849269867, |
| "grad_norm_var": 0.32468680185211135, |
| "learning_rate": 5e-05, |
| "loss": 0.1748, |
| "loss/crossentropy": 2.819560468196869, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17475899681448936, |
| "loss/reg": 3.8467037677764893, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.0013, |
| "grad_norm": 0.3231496512889862, |
| "grad_norm_var": 0.32511678466571453, |
| "learning_rate": 5e-05, |
| "loss": 0.1695, |
| "loss/crossentropy": 2.5843223929405212, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16951489821076393, |
| "loss/reg": 3.843282699584961, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.00131, |
| "grad_norm": 0.3588982820510864, |
| "grad_norm_var": 0.325020330590364, |
| "learning_rate": 5e-05, |
| "loss": 0.169, |
| "loss/crossentropy": 2.725651264190674, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16896183416247368, |
| "loss/reg": 3.839895725250244, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.00132, |
| "grad_norm": 0.37743306159973145, |
| "grad_norm_var": 0.32416673149153025, |
| "learning_rate": 5e-05, |
| "loss": 0.1833, |
| "loss/crossentropy": 3.0410608053207397, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1833292953670025, |
| "loss/reg": 3.8355963230133057, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.00133, |
| "grad_norm": 0.32988330721855164, |
| "grad_norm_var": 0.32462166470000664, |
| "learning_rate": 5e-05, |
| "loss": 0.1654, |
| "loss/crossentropy": 2.7005507349967957, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1653790920972824, |
| "loss/reg": 3.831345558166504, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.00134, |
| "grad_norm": 0.35988613963127136, |
| "grad_norm_var": 0.32481589623167567, |
| "learning_rate": 5e-05, |
| "loss": 0.1792, |
| "loss/crossentropy": 2.7048683762550354, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17917973920702934, |
| "loss/reg": 3.8267781734466553, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.00135, |
| "grad_norm": 0.32649827003479004, |
| "grad_norm_var": 0.324808949416691, |
| "learning_rate": 5e-05, |
| "loss": 0.1642, |
| "loss/crossentropy": 2.791461765766144, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16420895606279373, |
| "loss/reg": 3.8223133087158203, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.00136, |
| "grad_norm": 0.6779212355613708, |
| "grad_norm_var": 0.32421967313153754, |
| "learning_rate": 5e-05, |
| "loss": 0.2361, |
| "loss/crossentropy": 3.063343107700348, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2360655590891838, |
| "loss/reg": 3.818582057952881, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.00137, |
| "grad_norm": 0.4217770993709564, |
| "grad_norm_var": 0.0069040846383882, |
| "learning_rate": 5e-05, |
| "loss": 0.1936, |
| "loss/crossentropy": 2.8291149735450745, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19361505657434464, |
| "loss/reg": 3.814713716506958, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.00138, |
| "grad_norm": 0.3183574378490448, |
| "grad_norm_var": 0.0071460434004817905, |
| "learning_rate": 5e-05, |
| "loss": 0.1596, |
| "loss/crossentropy": 2.733646512031555, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15959006920456886, |
| "loss/reg": 3.8112361431121826, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.00139, |
| "grad_norm": 0.35119444131851196, |
| "grad_norm_var": 0.007183318962822194, |
| "learning_rate": 5e-05, |
| "loss": 0.1706, |
| "loss/crossentropy": 2.777931809425354, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17056189104914665, |
| "loss/reg": 3.807130813598633, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.0014, |
| "grad_norm": 0.3381962478160858, |
| "grad_norm_var": 0.007239536480815012, |
| "learning_rate": 5e-05, |
| "loss": 0.1651, |
| "loss/crossentropy": 2.865752935409546, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16511252894997597, |
| "loss/reg": 3.8030734062194824, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.00141, |
| "grad_norm": 0.35082533955574036, |
| "grad_norm_var": 0.007268548808216302, |
| "learning_rate": 5e-05, |
| "loss": 0.1608, |
| "loss/crossentropy": 2.734546184539795, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16080284118652344, |
| "loss/reg": 3.7996251583099365, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.00142, |
| "grad_norm": 0.4269000291824341, |
| "grad_norm_var": 0.007448472313405929, |
| "learning_rate": 5e-05, |
| "loss": 0.1806, |
| "loss/crossentropy": 2.9227113127708435, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1805506870150566, |
| "loss/reg": 3.7955057621002197, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.00143, |
| "grad_norm": 0.3532395660877228, |
| "grad_norm_var": 0.0074133753520221855, |
| "learning_rate": 5e-05, |
| "loss": 0.1588, |
| "loss/crossentropy": 2.9407125115394592, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15880529955029488, |
| "loss/reg": 3.791508197784424, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.00144, |
| "grad_norm": 0.3449239134788513, |
| "grad_norm_var": 0.007461781173789813, |
| "learning_rate": 5e-05, |
| "loss": 0.1652, |
| "loss/crossentropy": 2.8305121660232544, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1652398444712162, |
| "loss/reg": 3.7871744632720947, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.00145, |
| "grad_norm": 0.3272966742515564, |
| "grad_norm_var": 0.007571273873210712, |
| "learning_rate": 5e-05, |
| "loss": 0.1714, |
| "loss/crossentropy": 2.876939594745636, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17143940553069115, |
| "loss/reg": 3.7832887172698975, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.00146, |
| "grad_norm": 0.31960922479629517, |
| "grad_norm_var": 0.007596131782178968, |
| "learning_rate": 5e-05, |
| "loss": 0.1558, |
| "loss/crossentropy": 2.7597694993019104, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15579523891210556, |
| "loss/reg": 3.77976393699646, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.00147, |
| "grad_norm": 0.3329758048057556, |
| "grad_norm_var": 0.007690076208493398, |
| "learning_rate": 5e-05, |
| "loss": 0.1602, |
| "loss/crossentropy": 2.823091506958008, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16016652062535286, |
| "loss/reg": 3.776364326477051, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.00148, |
| "grad_norm": 0.3245135545730591, |
| "grad_norm_var": 0.007828939248271782, |
| "learning_rate": 5e-05, |
| "loss": 0.1608, |
| "loss/crossentropy": 2.622242748737335, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16081608831882477, |
| "loss/reg": 3.7724997997283936, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.00149, |
| "grad_norm": 0.3239537179470062, |
| "grad_norm_var": 0.007862062788276463, |
| "learning_rate": 5e-05, |
| "loss": 0.1559, |
| "loss/crossentropy": 2.826173484325409, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15591008588671684, |
| "loss/reg": 3.7680001258850098, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.0015, |
| "grad_norm": 0.3199516534805298, |
| "grad_norm_var": 0.00800828926831548, |
| "learning_rate": 5e-05, |
| "loss": 0.1705, |
| "loss/crossentropy": 2.73406845331192, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17048393934965134, |
| "loss/reg": 3.7640268802642822, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.00151, |
| "grad_norm": 0.3810157775878906, |
| "grad_norm_var": 0.00790594146931481, |
| "learning_rate": 5e-05, |
| "loss": 0.1772, |
| "loss/crossentropy": 2.746786952018738, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17715823650360107, |
| "loss/reg": 3.760627508163452, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.00152, |
| "grad_norm": 0.33840498328208923, |
| "grad_norm_var": 0.0011503711202599262, |
| "learning_rate": 5e-05, |
| "loss": 0.168, |
| "loss/crossentropy": 2.7671576738357544, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1679898537695408, |
| "loss/reg": 3.7571685314178467, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.00153, |
| "grad_norm": 0.35103219747543335, |
| "grad_norm_var": 0.0007702874374444798, |
| "learning_rate": 5e-05, |
| "loss": 0.1683, |
| "loss/crossentropy": 2.8394588828086853, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1682782731950283, |
| "loss/reg": 3.7533957958221436, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.00154, |
| "grad_norm": 0.34948527812957764, |
| "grad_norm_var": 0.000724837481797543, |
| "learning_rate": 5e-05, |
| "loss": 0.1551, |
| "loss/crossentropy": 2.637475073337555, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1551469974219799, |
| "loss/reg": 3.7496984004974365, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.00155, |
| "grad_norm": 0.32411250472068787, |
| "grad_norm_var": 0.000751360146424022, |
| "learning_rate": 5e-05, |
| "loss": 0.1655, |
| "loss/crossentropy": 2.65782767534256, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16551653295755386, |
| "loss/reg": 3.7462174892425537, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.00156, |
| "grad_norm": 0.3659244775772095, |
| "grad_norm_var": 0.0007773935392291246, |
| "learning_rate": 5e-05, |
| "loss": 0.1618, |
| "loss/crossentropy": 2.8054139614105225, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16182733327150345, |
| "loss/reg": 3.7422730922698975, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.00157, |
| "grad_norm": 0.3639696538448334, |
| "grad_norm_var": 0.0007968496539047743, |
| "learning_rate": 5e-05, |
| "loss": 0.172, |
| "loss/crossentropy": 2.643721103668213, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17196981981396675, |
| "loss/reg": 3.7390189170837402, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.00158, |
| "grad_norm": 0.372111439704895, |
| "grad_norm_var": 0.0003986384080602812, |
| "learning_rate": 5e-05, |
| "loss": 0.1752, |
| "loss/crossentropy": 2.6860750317573547, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17522458359599113, |
| "loss/reg": 3.7351748943328857, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.00159, |
| "grad_norm": 0.3412966728210449, |
| "grad_norm_var": 0.0003916975034196302, |
| "learning_rate": 5e-05, |
| "loss": 0.1732, |
| "loss/crossentropy": 2.7506829500198364, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17320549115538597, |
| "loss/reg": 3.731645345687866, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.0016, |
| "grad_norm": 0.31508323550224304, |
| "grad_norm_var": 0.0004378510847698321, |
| "learning_rate": 5e-05, |
| "loss": 0.1676, |
| "loss/crossentropy": 2.672293782234192, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16758090257644653, |
| "loss/reg": 3.727598190307617, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.00161, |
| "grad_norm": 0.39773106575012207, |
| "grad_norm_var": 0.0006223116385708161, |
| "learning_rate": 5e-05, |
| "loss": 0.1867, |
| "loss/crossentropy": 2.975751519203186, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18666821345686913, |
| "loss/reg": 3.7237842082977295, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.00162, |
| "grad_norm": 0.3057797849178314, |
| "grad_norm_var": 0.0006812186499233134, |
| "learning_rate": 5e-05, |
| "loss": 0.1511, |
| "loss/crossentropy": 2.768982172012329, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15112394466996193, |
| "loss/reg": 3.7201473712921143, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.00163, |
| "grad_norm": 0.39109617471694946, |
| "grad_norm_var": 0.0008052929738533592, |
| "learning_rate": 5e-05, |
| "loss": 0.1692, |
| "loss/crossentropy": 2.7556854486465454, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1692204400897026, |
| "loss/reg": 3.715847969055176, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.00164, |
| "grad_norm": 0.3230038285255432, |
| "grad_norm_var": 0.0008101312463145642, |
| "learning_rate": 5e-05, |
| "loss": 0.158, |
| "loss/crossentropy": 2.663906216621399, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1579984687268734, |
| "loss/reg": 3.712200403213501, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.00165, |
| "grad_norm": 0.32820436358451843, |
| "grad_norm_var": 0.0007977755717131292, |
| "learning_rate": 5e-05, |
| "loss": 0.1535, |
| "loss/crossentropy": 2.7556238174438477, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15348907560110092, |
| "loss/reg": 3.7093729972839355, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.00166, |
| "grad_norm": 0.37247714400291443, |
| "grad_norm_var": 0.0007736858685811421, |
| "learning_rate": 5e-05, |
| "loss": 0.168, |
| "loss/crossentropy": 2.623964309692383, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16797634214162827, |
| "loss/reg": 3.7055835723876953, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.00167, |
| "grad_norm": 0.31921809911727905, |
| "grad_norm_var": 0.0007674848471050747, |
| "learning_rate": 5e-05, |
| "loss": 0.1618, |
| "loss/crossentropy": 2.6233983039855957, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16180693730711937, |
| "loss/reg": 3.7018704414367676, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.00168, |
| "grad_norm": 0.41518375277519226, |
| "grad_norm_var": 0.0010434978692974088, |
| "learning_rate": 5e-05, |
| "loss": 0.1842, |
| "loss/crossentropy": 2.794585347175598, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18423354998230934, |
| "loss/reg": 3.6984987258911133, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.00169, |
| "grad_norm": 0.3530808985233307, |
| "grad_norm_var": 0.0010434324942960296, |
| "learning_rate": 5e-05, |
| "loss": 0.1818, |
| "loss/crossentropy": 2.725895941257477, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18175217881798744, |
| "loss/reg": 3.6949737071990967, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.0017, |
| "grad_norm": 0.35729339718818665, |
| "grad_norm_var": 0.001044250197534243, |
| "learning_rate": 5e-05, |
| "loss": 0.1758, |
| "loss/crossentropy": 2.8144423365592957, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1758369542658329, |
| "loss/reg": 3.6909079551696777, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.00171, |
| "grad_norm": 0.3258056044578552, |
| "grad_norm_var": 0.0010379424391956011, |
| "learning_rate": 5e-05, |
| "loss": 0.1615, |
| "loss/crossentropy": 2.6860609650611877, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1615053378045559, |
| "loss/reg": 3.6872336864471436, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.00172, |
| "grad_norm": 0.3320024907588959, |
| "grad_norm_var": 0.0010511954351829684, |
| "learning_rate": 5e-05, |
| "loss": 0.1669, |
| "loss/crossentropy": 2.7618680596351624, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16686224937438965, |
| "loss/reg": 3.684033155441284, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.00173, |
| "grad_norm": 0.32370057702064514, |
| "grad_norm_var": 0.001082015111668518, |
| "learning_rate": 5e-05, |
| "loss": 0.1568, |
| "loss/crossentropy": 2.8911356329917908, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15675026923418045, |
| "loss/reg": 3.6797101497650146, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.00174, |
| "grad_norm": 0.3590388298034668, |
| "grad_norm_var": 0.0010512214971074684, |
| "learning_rate": 5e-05, |
| "loss": 0.1608, |
| "loss/crossentropy": 2.894763946533203, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16077794507145882, |
| "loss/reg": 3.676694393157959, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.00175, |
| "grad_norm": 0.362693190574646, |
| "grad_norm_var": 0.0010621381304175893, |
| "learning_rate": 5e-05, |
| "loss": 0.181, |
| "loss/crossentropy": 2.9355967044830322, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18095535412430763, |
| "loss/reg": 3.6728715896606445, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.00176, |
| "grad_norm": 0.3421201705932617, |
| "grad_norm_var": 0.0009861454944628978, |
| "learning_rate": 5e-05, |
| "loss": 0.1752, |
| "loss/crossentropy": 2.771928548812866, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17522436380386353, |
| "loss/reg": 3.6698157787323, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.00177, |
| "grad_norm": 0.3921768069267273, |
| "grad_norm_var": 0.0009531156716223066, |
| "learning_rate": 5e-05, |
| "loss": 0.1682, |
| "loss/crossentropy": 2.9020140171051025, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16816864535212517, |
| "loss/reg": 3.6669130325317383, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.00178, |
| "grad_norm": 0.414460688829422, |
| "grad_norm_var": 0.0010479472090343092, |
| "learning_rate": 5e-05, |
| "loss": 0.1652, |
| "loss/crossentropy": 2.871070384979248, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1651761755347252, |
| "loss/reg": 3.6637353897094727, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.00179, |
| "grad_norm": 0.37821123003959656, |
| "grad_norm_var": 0.0009996989224075473, |
| "learning_rate": 5e-05, |
| "loss": 0.1618, |
| "loss/crossentropy": 2.8318552374839783, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16177014261484146, |
| "loss/reg": 3.6601526737213135, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.0018, |
| "grad_norm": 0.33756861090660095, |
| "grad_norm_var": 0.0009485554235717804, |
| "learning_rate": 5e-05, |
| "loss": 0.164, |
| "loss/crossentropy": 2.7179840803146362, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16402991488575935, |
| "loss/reg": 3.655977725982666, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.00181, |
| "grad_norm": 0.3508152663707733, |
| "grad_norm_var": 0.0008934631549546879, |
| "learning_rate": 5e-05, |
| "loss": 0.1824, |
| "loss/crossentropy": 2.655538856983185, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18240001425147057, |
| "loss/reg": 3.6522390842437744, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.00182, |
| "grad_norm": 0.4800889194011688, |
| "grad_norm_var": 0.0018179163356779901, |
| "learning_rate": 5e-05, |
| "loss": 0.1773, |
| "loss/crossentropy": 2.9170504808425903, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17730093747377396, |
| "loss/reg": 3.648420810699463, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.00183, |
| "grad_norm": 0.32715606689453125, |
| "grad_norm_var": 0.0017731703957083382, |
| "learning_rate": 5e-05, |
| "loss": 0.1599, |
| "loss/crossentropy": 2.6978230476379395, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15988203510642052, |
| "loss/reg": 3.644439458847046, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.00184, |
| "grad_norm": 0.3219493329524994, |
| "grad_norm_var": 0.0017014689354580615, |
| "learning_rate": 5e-05, |
| "loss": 0.1588, |
| "loss/crossentropy": 2.772395610809326, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.158803328871727, |
| "loss/reg": 3.6410605907440186, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.00185, |
| "grad_norm": 0.3204100728034973, |
| "grad_norm_var": 0.0017978203455529696, |
| "learning_rate": 5e-05, |
| "loss": 0.1595, |
| "loss/crossentropy": 2.7290788292884827, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15948805212974548, |
| "loss/reg": 3.637272596359253, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.00186, |
| "grad_norm": 0.34646865725517273, |
| "grad_norm_var": 0.0018059373173852718, |
| "learning_rate": 5e-05, |
| "loss": 0.1723, |
| "loss/crossentropy": 2.68435937166214, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17226089164614677, |
| "loss/reg": 3.6334545612335205, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.00187, |
| "grad_norm": 0.35515356063842773, |
| "grad_norm_var": 0.001737051018651666, |
| "learning_rate": 5e-05, |
| "loss": 0.1656, |
| "loss/crossentropy": 2.8159299492836, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16557194665074348, |
| "loss/reg": 3.629215717315674, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.00188, |
| "grad_norm": 0.31605055928230286, |
| "grad_norm_var": 0.0018103786054489293, |
| "learning_rate": 5e-05, |
| "loss": 0.1587, |
| "loss/crossentropy": 2.737620174884796, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15867746248841286, |
| "loss/reg": 3.6252663135528564, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.00189, |
| "grad_norm": 0.3383916914463043, |
| "grad_norm_var": 0.0017566740185558556, |
| "learning_rate": 5e-05, |
| "loss": 0.1621, |
| "loss/crossentropy": 2.7829577326774597, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.162098228931427, |
| "loss/reg": 3.621067523956299, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.0019, |
| "grad_norm": 0.4556836783885956, |
| "grad_norm_var": 0.0023419423247556044, |
| "learning_rate": 5e-05, |
| "loss": 0.1687, |
| "loss/crossentropy": 2.9624626636505127, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1686898171901703, |
| "loss/reg": 3.6163265705108643, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.00191, |
| "grad_norm": 0.3975931406021118, |
| "grad_norm_var": 0.0024075083289669527, |
| "learning_rate": 5e-05, |
| "loss": 0.155, |
| "loss/crossentropy": 2.731001079082489, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15501929074525833, |
| "loss/reg": 3.6121585369110107, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.00192, |
| "grad_norm": 0.37328633666038513, |
| "grad_norm_var": 0.002364231645687964, |
| "learning_rate": 5e-05, |
| "loss": 0.1683, |
| "loss/crossentropy": 2.754942238330841, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16829831898212433, |
| "loss/reg": 3.6073873043060303, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.00193, |
| "grad_norm": 0.3342723250389099, |
| "grad_norm_var": 0.0023955576435807737, |
| "learning_rate": 5e-05, |
| "loss": 0.1663, |
| "loss/crossentropy": 2.7424720525741577, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16633369401097298, |
| "loss/reg": 3.6036906242370605, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.00194, |
| "grad_norm": 0.38286155462265015, |
| "grad_norm_var": 0.002251566346172081, |
| "learning_rate": 5e-05, |
| "loss": 0.1652, |
| "loss/crossentropy": 2.9778133630752563, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16522743180394173, |
| "loss/reg": 3.600316286087036, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.00195, |
| "grad_norm": 0.36051952838897705, |
| "grad_norm_var": 0.0022364206403587715, |
| "learning_rate": 5e-05, |
| "loss": 0.1772, |
| "loss/crossentropy": 2.6842609643936157, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1771794743835926, |
| "loss/reg": 3.596491813659668, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.00196, |
| "grad_norm": 0.3526027202606201, |
| "grad_norm_var": 0.0022007878333510996, |
| "learning_rate": 5e-05, |
| "loss": 0.1561, |
| "loss/crossentropy": 2.7837477922439575, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15605639293789864, |
| "loss/reg": 3.5926032066345215, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.00197, |
| "grad_norm": 0.35895583033561707, |
| "grad_norm_var": 0.002191344445433652, |
| "learning_rate": 5e-05, |
| "loss": 0.1801, |
| "loss/crossentropy": 2.85478812456131, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1800978109240532, |
| "loss/reg": 3.589280843734741, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.00198, |
| "grad_norm": 0.3372839689254761, |
| "grad_norm_var": 0.0012524713862786308, |
| "learning_rate": 5e-05, |
| "loss": 0.1571, |
| "loss/crossentropy": 2.805725872516632, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15710647776722908, |
| "loss/reg": 3.5851662158966064, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.00199, |
| "grad_norm": 0.33652499318122864, |
| "grad_norm_var": 0.0012232813247675149, |
| "learning_rate": 5e-05, |
| "loss": 0.1652, |
| "loss/crossentropy": 2.657254457473755, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1651643067598343, |
| "loss/reg": 3.581798553466797, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.002, |
| "grad_norm": 0.36757001280784607, |
| "grad_norm_var": 0.001149275638629573, |
| "learning_rate": 5e-05, |
| "loss": 0.1756, |
| "loss/crossentropy": 2.7496553659439087, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17555997148156166, |
| "loss/reg": 3.577878475189209, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.00201, |
| "grad_norm": 0.4317435324192047, |
| "grad_norm_var": 0.0013607474972908151, |
| "learning_rate": 5e-05, |
| "loss": 0.1643, |
| "loss/crossentropy": 3.168861448764801, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.164311021566391, |
| "loss/reg": 3.5741024017333984, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.00202, |
| "grad_norm": 0.3569833040237427, |
| "grad_norm_var": 0.0013412425012825579, |
| "learning_rate": 5e-05, |
| "loss": 0.1778, |
| "loss/crossentropy": 2.7941558957099915, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17778108268976212, |
| "loss/reg": 3.5706787109375, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.00203, |
| "grad_norm": 0.31648150086402893, |
| "grad_norm_var": 0.0014904716039333447, |
| "learning_rate": 5e-05, |
| "loss": 0.156, |
| "loss/crossentropy": 2.872058689594269, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1559964008629322, |
| "loss/reg": 3.5671305656433105, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.00204, |
| "grad_norm": 0.32686129212379456, |
| "grad_norm_var": 0.0014293085106024154, |
| "learning_rate": 5e-05, |
| "loss": 0.1593, |
| "loss/crossentropy": 2.7316592931747437, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15925980731844902, |
| "loss/reg": 3.5632758140563965, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.00205, |
| "grad_norm": 0.3191937506198883, |
| "grad_norm_var": 0.001518472211395964, |
| "learning_rate": 5e-05, |
| "loss": 0.1527, |
| "loss/crossentropy": 2.7802085876464844, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15268265083432198, |
| "loss/reg": 3.559633493423462, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.00206, |
| "grad_norm": 0.34924882650375366, |
| "grad_norm_var": 0.0009115629505157467, |
| "learning_rate": 5e-05, |
| "loss": 0.1773, |
| "loss/crossentropy": 2.792604923248291, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17729893326759338, |
| "loss/reg": 3.555882453918457, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.00207, |
| "grad_norm": 0.38204553723335266, |
| "grad_norm_var": 0.0008412229229646054, |
| "learning_rate": 5e-05, |
| "loss": 0.1735, |
| "loss/crossentropy": 2.729912281036377, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17347190529108047, |
| "loss/reg": 3.551867723464966, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.00208, |
| "grad_norm": 0.316631555557251, |
| "grad_norm_var": 0.0009067368526577339, |
| "learning_rate": 5e-05, |
| "loss": 0.1521, |
| "loss/crossentropy": 2.6910020112991333, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15211007744073868, |
| "loss/reg": 3.547140598297119, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.00209, |
| "grad_norm": 0.3024788200855255, |
| "grad_norm_var": 0.0010444754089082963, |
| "learning_rate": 5e-05, |
| "loss": 0.1534, |
| "loss/crossentropy": 2.6174367666244507, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15340904891490936, |
| "loss/reg": 3.5430798530578613, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.0021, |
| "grad_norm": 0.31879743933677673, |
| "grad_norm_var": 0.0010192142441715734, |
| "learning_rate": 5e-05, |
| "loss": 0.1644, |
| "loss/crossentropy": 2.6434658765792847, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.164449330419302, |
| "loss/reg": 3.539293050765991, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.00211, |
| "grad_norm": 0.37038934230804443, |
| "grad_norm_var": 0.0010445807718520773, |
| "learning_rate": 5e-05, |
| "loss": 0.1618, |
| "loss/crossentropy": 2.7187950015068054, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16179471090435982, |
| "loss/reg": 3.5359723567962646, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.00212, |
| "grad_norm": 0.3256055414676666, |
| "grad_norm_var": 0.0010681195543044476, |
| "learning_rate": 5e-05, |
| "loss": 0.1634, |
| "loss/crossentropy": 2.6802476048469543, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16339639574289322, |
| "loss/reg": 3.5320651531219482, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.00213, |
| "grad_norm": 0.363210529088974, |
| "grad_norm_var": 0.0010772816324646883, |
| "learning_rate": 5e-05, |
| "loss": 0.1682, |
| "loss/crossentropy": 2.925456941127777, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16816257312893867, |
| "loss/reg": 3.527592420578003, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.00214, |
| "grad_norm": 0.3341169059276581, |
| "grad_norm_var": 0.0010811945233913268, |
| "learning_rate": 5e-05, |
| "loss": 0.169, |
| "loss/crossentropy": 2.8775156140327454, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1689641959965229, |
| "loss/reg": 3.5241305828094482, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.00215, |
| "grad_norm": 0.7971848249435425, |
| "grad_norm_var": 0.013831743286372744, |
| "learning_rate": 5e-05, |
| "loss": 0.1898, |
| "loss/crossentropy": 2.769020676612854, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18977811932563782, |
| "loss/reg": 3.5197558403015137, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.00216, |
| "grad_norm": 0.3044687807559967, |
| "grad_norm_var": 0.014131832632900828, |
| "learning_rate": 5e-05, |
| "loss": 0.1467, |
| "loss/crossentropy": 2.792181670665741, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1466773971915245, |
| "loss/reg": 3.516072988510132, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.00217, |
| "grad_norm": 0.3434732258319855, |
| "grad_norm_var": 0.013888774653188173, |
| "learning_rate": 5e-05, |
| "loss": 0.1698, |
| "loss/crossentropy": 2.577077627182007, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16977669671177864, |
| "loss/reg": 3.512517213821411, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.00218, |
| "grad_norm": 0.37019920349121094, |
| "grad_norm_var": 0.013886977393692842, |
| "learning_rate": 5e-05, |
| "loss": 0.1943, |
| "loss/crossentropy": 2.722847878932953, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19428952783346176, |
| "loss/reg": 3.5091969966888428, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.00219, |
| "grad_norm": 0.31637635827064514, |
| "grad_norm_var": 0.013887658605223226, |
| "learning_rate": 5e-05, |
| "loss": 0.1547, |
| "loss/crossentropy": 2.787532150745392, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1546883024275303, |
| "loss/reg": 3.5059781074523926, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.0022, |
| "grad_norm": 0.368344783782959, |
| "grad_norm_var": 0.013784165910995568, |
| "learning_rate": 5e-05, |
| "loss": 0.1773, |
| "loss/crossentropy": 2.7095659971237183, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1773015893995762, |
| "loss/reg": 3.502683162689209, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.00221, |
| "grad_norm": 0.3447912037372589, |
| "grad_norm_var": 0.013659872247631084, |
| "learning_rate": 5e-05, |
| "loss": 0.1688, |
| "loss/crossentropy": 2.7072474360466003, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1687602400779724, |
| "loss/reg": 3.4986109733581543, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.00222, |
| "grad_norm": 0.3812227249145508, |
| "grad_norm_var": 0.013638668912457892, |
| "learning_rate": 5e-05, |
| "loss": 0.1811, |
| "loss/crossentropy": 2.8128660917282104, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18113631010055542, |
| "loss/reg": 3.4947147369384766, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.00223, |
| "grad_norm": 0.339374303817749, |
| "grad_norm_var": 0.013690814024359154, |
| "learning_rate": 5e-05, |
| "loss": 0.1716, |
| "loss/crossentropy": 2.885101020336151, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17156245186924934, |
| "loss/reg": 3.4905753135681152, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.00224, |
| "grad_norm": 0.3169143497943878, |
| "grad_norm_var": 0.013688861707923295, |
| "learning_rate": 5e-05, |
| "loss": 0.1589, |
| "loss/crossentropy": 2.6434147357940674, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15887855738401413, |
| "loss/reg": 3.486919641494751, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.00225, |
| "grad_norm": 0.4436502456665039, |
| "grad_norm_var": 0.013690624557478688, |
| "learning_rate": 5e-05, |
| "loss": 0.2037, |
| "loss/crossentropy": 2.9042821526527405, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20374128222465515, |
| "loss/reg": 3.483499765396118, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.00226, |
| "grad_norm": 0.44937804341316223, |
| "grad_norm_var": 0.01373632101878638, |
| "learning_rate": 5e-05, |
| "loss": 0.1588, |
| "loss/crossentropy": 2.79194039106369, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1587841510772705, |
| "loss/reg": 3.480142593383789, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.00227, |
| "grad_norm": 0.3453376889228821, |
| "grad_norm_var": 0.013826164241530992, |
| "learning_rate": 5e-05, |
| "loss": 0.1659, |
| "loss/crossentropy": 2.7488330006599426, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16589100658893585, |
| "loss/reg": 3.476062297821045, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.00228, |
| "grad_norm": 0.3845584988594055, |
| "grad_norm_var": 0.013584549048495138, |
| "learning_rate": 5e-05, |
| "loss": 0.1842, |
| "loss/crossentropy": 2.6935607194900513, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18416164070367813, |
| "loss/reg": 3.4726946353912354, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.00229, |
| "grad_norm": 0.3347846567630768, |
| "grad_norm_var": 0.013727727146734722, |
| "learning_rate": 5e-05, |
| "loss": 0.1767, |
| "loss/crossentropy": 2.6182947754859924, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1767422929406166, |
| "loss/reg": 3.469238519668579, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.0023, |
| "grad_norm": 0.35126739740371704, |
| "grad_norm_var": 0.01362772883112919, |
| "learning_rate": 5e-05, |
| "loss": 0.1694, |
| "loss/crossentropy": 2.8005401492118835, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1694028675556183, |
| "loss/reg": 3.4662599563598633, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.00231, |
| "grad_norm": 0.37644773721694946, |
| "grad_norm_var": 0.0016784352808341082, |
| "learning_rate": 5e-05, |
| "loss": 0.1677, |
| "loss/crossentropy": 2.7537949085235596, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16772692278027534, |
| "loss/reg": 3.4623701572418213, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.00232, |
| "grad_norm": 0.33086928725242615, |
| "grad_norm_var": 0.0015241936410912834, |
| "learning_rate": 5e-05, |
| "loss": 0.1624, |
| "loss/crossentropy": 2.7844293117523193, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1624348722398281, |
| "loss/reg": 3.459073066711426, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.00233, |
| "grad_norm": 0.3152429461479187, |
| "grad_norm_var": 0.0016449122438399724, |
| "learning_rate": 5e-05, |
| "loss": 0.1607, |
| "loss/crossentropy": 2.5863555669784546, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16065017879009247, |
| "loss/reg": 3.456038475036621, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.00234, |
| "grad_norm": 0.34679386019706726, |
| "grad_norm_var": 0.001649030072333372, |
| "learning_rate": 5e-05, |
| "loss": 0.1656, |
| "loss/crossentropy": 2.9068891406059265, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16555847227573395, |
| "loss/reg": 3.452618360519409, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.00235, |
| "grad_norm": 0.36684513092041016, |
| "grad_norm_var": 0.001520832425550959, |
| "learning_rate": 5e-05, |
| "loss": 0.1878, |
| "loss/crossentropy": 2.6781840920448303, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18775511160492897, |
| "loss/reg": 3.4493637084960938, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.00236, |
| "grad_norm": 0.39043235778808594, |
| "grad_norm_var": 0.0015693055369300879, |
| "learning_rate": 5e-05, |
| "loss": 0.1559, |
| "loss/crossentropy": 2.9237093925476074, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15592358261346817, |
| "loss/reg": 3.446392059326172, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.00237, |
| "grad_norm": 0.3486286997795105, |
| "grad_norm_var": 0.0015605921838873513, |
| "learning_rate": 5e-05, |
| "loss": 0.1524, |
| "loss/crossentropy": 2.8276549577713013, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1523873247206211, |
| "loss/reg": 3.443490505218506, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.00238, |
| "grad_norm": 0.4030380845069885, |
| "grad_norm_var": 0.0016408419596595262, |
| "learning_rate": 5e-05, |
| "loss": 0.1839, |
| "loss/crossentropy": 2.7374503016471863, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1839219257235527, |
| "loss/reg": 3.440230131149292, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.00239, |
| "grad_norm": 0.3677695095539093, |
| "grad_norm_var": 0.0015933721835237928, |
| "learning_rate": 5e-05, |
| "loss": 0.1725, |
| "loss/crossentropy": 2.637487053871155, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1725292131304741, |
| "loss/reg": 3.4369444847106934, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.0024, |
| "grad_norm": 0.3092736303806305, |
| "grad_norm_var": 0.001648043714460871, |
| "learning_rate": 5e-05, |
| "loss": 0.1608, |
| "loss/crossentropy": 2.785566747188568, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16075557842850685, |
| "loss/reg": 3.4329705238342285, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.00241, |
| "grad_norm": 0.3242727518081665, |
| "grad_norm_var": 0.001311046071157899, |
| "learning_rate": 5e-05, |
| "loss": 0.1641, |
| "loss/crossentropy": 2.7823829650878906, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16410458087921143, |
| "loss/reg": 3.429222345352173, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.00242, |
| "grad_norm": 0.3544396758079529, |
| "grad_norm_var": 0.0007310749754719385, |
| "learning_rate": 5e-05, |
| "loss": 0.1742, |
| "loss/crossentropy": 2.7899482250213623, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1741911694407463, |
| "loss/reg": 3.4251747131347656, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.00243, |
| "grad_norm": 0.3156209886074066, |
| "grad_norm_var": 0.0008171231835736463, |
| "learning_rate": 5e-05, |
| "loss": 0.159, |
| "loss/crossentropy": 2.7414376735687256, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15898872911930084, |
| "loss/reg": 3.421576976776123, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.00244, |
| "grad_norm": 0.3353999853134155, |
| "grad_norm_var": 0.000749955482525048, |
| "learning_rate": 5e-05, |
| "loss": 0.1669, |
| "loss/crossentropy": 2.707472503185272, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16693224385380745, |
| "loss/reg": 3.417820930480957, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.00245, |
| "grad_norm": 0.32766133546829224, |
| "grad_norm_var": 0.0007658640613261528, |
| "learning_rate": 5e-05, |
| "loss": 0.1761, |
| "loss/crossentropy": 2.6950490474700928, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17608999833464622, |
| "loss/reg": 3.414095640182495, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.00246, |
| "grad_norm": 0.31360548734664917, |
| "grad_norm_var": 0.0008368534177580581, |
| "learning_rate": 5e-05, |
| "loss": 0.1578, |
| "loss/crossentropy": 2.6977627873420715, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15783175826072693, |
| "loss/reg": 3.409533739089966, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.00247, |
| "grad_norm": 0.35324403643608093, |
| "grad_norm_var": 0.0007744365123312817, |
| "learning_rate": 5e-05, |
| "loss": 0.1688, |
| "loss/crossentropy": 2.8509859442710876, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16875524446368217, |
| "loss/reg": 3.4052798748016357, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.00248, |
| "grad_norm": 0.41796907782554626, |
| "grad_norm_var": 0.0010967197155327421, |
| "learning_rate": 5e-05, |
| "loss": 0.18, |
| "loss/crossentropy": 2.701251804828644, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1800428181886673, |
| "loss/reg": 3.4006803035736084, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.00249, |
| "grad_norm": 0.33844876289367676, |
| "grad_norm_var": 0.0010247223552569313, |
| "learning_rate": 5e-05, |
| "loss": 0.1737, |
| "loss/crossentropy": 2.7646324038505554, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17367269843816757, |
| "loss/reg": 3.3968873023986816, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.0025, |
| "grad_norm": 0.31011876463890076, |
| "grad_norm_var": 0.0011285754764581786, |
| "learning_rate": 5e-05, |
| "loss": 0.1591, |
| "loss/crossentropy": 2.7303661704063416, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15912269055843353, |
| "loss/reg": 3.3925790786743164, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.00251, |
| "grad_norm": 0.4837491512298584, |
| "grad_norm_var": 0.0022679356659945546, |
| "learning_rate": 5e-05, |
| "loss": 0.1845, |
| "loss/crossentropy": 2.718783438205719, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18454211205244064, |
| "loss/reg": 3.389193296432495, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.00252, |
| "grad_norm": 0.30302709341049194, |
| "grad_norm_var": 0.002342444325573334, |
| "learning_rate": 5e-05, |
| "loss": 0.1527, |
| "loss/crossentropy": 2.7513213753700256, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15272299572825432, |
| "loss/reg": 3.384976863861084, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.00253, |
| "grad_norm": 0.3376671075820923, |
| "grad_norm_var": 0.002352530797232196, |
| "learning_rate": 5e-05, |
| "loss": 0.1717, |
| "loss/crossentropy": 2.7082377672195435, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1717442087829113, |
| "loss/reg": 3.381958246231079, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.00254, |
| "grad_norm": 0.3470434546470642, |
| "grad_norm_var": 0.00215032290339258, |
| "learning_rate": 5e-05, |
| "loss": 0.1751, |
| "loss/crossentropy": 2.7747623324394226, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17506984621286392, |
| "loss/reg": 3.3780975341796875, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.00255, |
| "grad_norm": 0.35893791913986206, |
| "grad_norm_var": 0.002129806794166807, |
| "learning_rate": 5e-05, |
| "loss": 0.176, |
| "loss/crossentropy": 2.6670790910720825, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17602670192718506, |
| "loss/reg": 3.374431848526001, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.00256, |
| "grad_norm": 0.33274415135383606, |
| "grad_norm_var": 0.002050384071076557, |
| "learning_rate": 5e-05, |
| "loss": 0.1683, |
| "loss/crossentropy": 2.930284321308136, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16833152994513512, |
| "loss/reg": 3.3709969520568848, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.00257, |
| "grad_norm": 0.3107251822948456, |
| "grad_norm_var": 0.0021031284267367073, |
| "learning_rate": 5e-05, |
| "loss": 0.154, |
| "loss/crossentropy": 2.7738651037216187, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15401111543178558, |
| "loss/reg": 3.36681866645813, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.00258, |
| "grad_norm": 0.3238702118396759, |
| "grad_norm_var": 0.00212825610345269, |
| "learning_rate": 5e-05, |
| "loss": 0.1485, |
| "loss/crossentropy": 2.7926384806632996, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.14850713685154915, |
| "loss/reg": 3.363298177719116, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.00259, |
| "grad_norm": 0.3937188982963562, |
| "grad_norm_var": 0.0022101531057158843, |
| "learning_rate": 5e-05, |
| "loss": 0.1796, |
| "loss/crossentropy": 2.732594311237335, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17957409471273422, |
| "loss/reg": 3.3592464923858643, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.0026, |
| "grad_norm": 0.35869738459587097, |
| "grad_norm_var": 0.002201067050087302, |
| "learning_rate": 5e-05, |
| "loss": 0.1608, |
| "loss/crossentropy": 2.6412158608436584, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1607726439833641, |
| "loss/reg": 3.355531692504883, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.00261, |
| "grad_norm": 0.342753529548645, |
| "grad_norm_var": 0.002168938888458849, |
| "learning_rate": 5e-05, |
| "loss": 0.1679, |
| "loss/crossentropy": 2.8253002762794495, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16785955801606178, |
| "loss/reg": 3.3510327339172363, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.00262, |
| "grad_norm": 0.3396557867527008, |
| "grad_norm_var": 0.0020792270475482005, |
| "learning_rate": 5e-05, |
| "loss": 0.1719, |
| "loss/crossentropy": 2.5446697473526, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1719457022845745, |
| "loss/reg": 3.3462460041046143, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.00263, |
| "grad_norm": 0.326615571975708, |
| "grad_norm_var": 0.002123647634079288, |
| "learning_rate": 5e-05, |
| "loss": 0.1662, |
| "loss/crossentropy": 2.7185133695602417, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16621045768260956, |
| "loss/reg": 3.342698097229004, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.00264, |
| "grad_norm": 0.372024804353714, |
| "grad_norm_var": 0.0018490612448516057, |
| "learning_rate": 5e-05, |
| "loss": 0.1785, |
| "loss/crossentropy": 2.90339195728302, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17848360165953636, |
| "loss/reg": 3.3390297889709473, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.00265, |
| "grad_norm": 0.336412638425827, |
| "grad_norm_var": 0.0018521135396843155, |
| "learning_rate": 5e-05, |
| "loss": 0.1685, |
| "loss/crossentropy": 2.6998194456100464, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1684669330716133, |
| "loss/reg": 3.3360044956207275, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.00266, |
| "grad_norm": 0.3179170787334442, |
| "grad_norm_var": 0.0018158920564407192, |
| "learning_rate": 5e-05, |
| "loss": 0.164, |
| "loss/crossentropy": 2.713620126247406, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16402245312929153, |
| "loss/reg": 3.33297061920166, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.00267, |
| "grad_norm": 0.32180216908454895, |
| "grad_norm_var": 0.0005475447645484169, |
| "learning_rate": 5e-05, |
| "loss": 0.1561, |
| "loss/crossentropy": 2.8285736441612244, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15614933148026466, |
| "loss/reg": 3.330070972442627, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.00268, |
| "grad_norm": 0.34155359864234924, |
| "grad_norm_var": 0.00045564919377512797, |
| "learning_rate": 5e-05, |
| "loss": 0.1666, |
| "loss/crossentropy": 2.698326587677002, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16663997247815132, |
| "loss/reg": 3.326782464981079, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.00269, |
| "grad_norm": 0.3281239867210388, |
| "grad_norm_var": 0.0004660702159405468, |
| "learning_rate": 5e-05, |
| "loss": 0.1547, |
| "loss/crossentropy": 2.7132135033607483, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15473050251603127, |
| "loss/reg": 3.3232901096343994, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.0027, |
| "grad_norm": 0.3694444000720978, |
| "grad_norm_var": 0.0005161187813034911, |
| "learning_rate": 5e-05, |
| "loss": 0.1658, |
| "loss/crossentropy": 2.943029820919037, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16579603031277657, |
| "loss/reg": 3.319425344467163, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.00271, |
| "grad_norm": 0.3521305024623871, |
| "grad_norm_var": 0.0005038113254072218, |
| "learning_rate": 5e-05, |
| "loss": 0.1762, |
| "loss/crossentropy": 2.813421130180359, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17624986171722412, |
| "loss/reg": 3.31587553024292, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.00272, |
| "grad_norm": 0.3419167995452881, |
| "grad_norm_var": 0.0004980410714001496, |
| "learning_rate": 5e-05, |
| "loss": 0.1579, |
| "loss/crossentropy": 2.6725985407829285, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1579086296260357, |
| "loss/reg": 3.313774347305298, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.00273, |
| "grad_norm": 0.45973771810531616, |
| "grad_norm_var": 0.001257799356739812, |
| "learning_rate": 5e-05, |
| "loss": 0.1806, |
| "loss/crossentropy": 2.7593576908111572, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1806396320462227, |
| "loss/reg": 3.311671257019043, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.00274, |
| "grad_norm": 0.327812522649765, |
| "grad_norm_var": 0.00124416933097297, |
| "learning_rate": 5e-05, |
| "loss": 0.1544, |
| "loss/crossentropy": 2.7368595004081726, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15438436716794968, |
| "loss/reg": 3.308312177658081, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.00275, |
| "grad_norm": 0.43593037128448486, |
| "grad_norm_var": 0.001590926391083336, |
| "learning_rate": 5e-05, |
| "loss": 0.1721, |
| "loss/crossentropy": 2.8178694248199463, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17214355245232582, |
| "loss/reg": 3.30526065826416, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.00276, |
| "grad_norm": 0.361247181892395, |
| "grad_norm_var": 0.0015927484925991053, |
| "learning_rate": 5e-05, |
| "loss": 0.1735, |
| "loss/crossentropy": 2.734869599342346, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17345493659377098, |
| "loss/reg": 3.3015174865722656, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.00277, |
| "grad_norm": 0.3708873689174652, |
| "grad_norm_var": 0.0015974331537993436, |
| "learning_rate": 5e-05, |
| "loss": 0.1628, |
| "loss/crossentropy": 2.67022705078125, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16277828440070152, |
| "loss/reg": 3.2979342937469482, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.00278, |
| "grad_norm": 0.3481086790561676, |
| "grad_norm_var": 0.0015829700282981919, |
| "learning_rate": 5e-05, |
| "loss": 0.1577, |
| "loss/crossentropy": 2.700168251991272, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15773406997323036, |
| "loss/reg": 3.294177532196045, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.00279, |
| "grad_norm": 0.5134589076042175, |
| "grad_norm_var": 0.003008442642246208, |
| "learning_rate": 5e-05, |
| "loss": 0.1766, |
| "loss/crossentropy": 2.9033528566360474, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17655130848288536, |
| "loss/reg": 3.2909915447235107, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.0028, |
| "grad_norm": 0.38205716013908386, |
| "grad_norm_var": 0.0030192383608610503, |
| "learning_rate": 5e-05, |
| "loss": 0.1934, |
| "loss/crossentropy": 2.7160211205482483, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19341961666941643, |
| "loss/reg": 3.2878634929656982, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.00281, |
| "grad_norm": 0.3628558814525604, |
| "grad_norm_var": 0.002947045102075485, |
| "learning_rate": 5e-05, |
| "loss": 0.1709, |
| "loss/crossentropy": 2.6912715435028076, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17094064503908157, |
| "loss/reg": 3.284353256225586, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.00282, |
| "grad_norm": 0.32757696509361267, |
| "grad_norm_var": 0.002884588952342071, |
| "learning_rate": 5e-05, |
| "loss": 0.1702, |
| "loss/crossentropy": 2.7818912267684937, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17016061395406723, |
| "loss/reg": 3.281097412109375, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.00283, |
| "grad_norm": 0.5035110712051392, |
| "grad_norm_var": 0.003743174506031214, |
| "learning_rate": 5e-05, |
| "loss": 0.1891, |
| "loss/crossentropy": 2.8356027603149414, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1890563629567623, |
| "loss/reg": 3.277557611465454, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.00284, |
| "grad_norm": 0.4021988809108734, |
| "grad_norm_var": 0.003638735284583853, |
| "learning_rate": 5e-05, |
| "loss": 0.1574, |
| "loss/crossentropy": 2.768595337867737, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15735788643360138, |
| "loss/reg": 3.274083375930786, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.00285, |
| "grad_norm": 0.3557356297969818, |
| "grad_norm_var": 0.0034707811870306284, |
| "learning_rate": 5e-05, |
| "loss": 0.1618, |
| "loss/crossentropy": 2.7620763182640076, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16183337569236755, |
| "loss/reg": 3.2709619998931885, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.00286, |
| "grad_norm": 0.35383257269859314, |
| "grad_norm_var": 0.0035254991255895857, |
| "learning_rate": 5e-05, |
| "loss": 0.164, |
| "loss/crossentropy": 2.7777530550956726, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16404272243380547, |
| "loss/reg": 3.2678396701812744, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.00287, |
| "grad_norm": 0.3291935324668884, |
| "grad_norm_var": 0.0036663583934404683, |
| "learning_rate": 5e-05, |
| "loss": 0.1545, |
| "loss/crossentropy": 2.7828534841537476, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15450828149914742, |
| "loss/reg": 3.2639055252075195, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.00288, |
| "grad_norm": 0.3174595534801483, |
| "grad_norm_var": 0.003847509504795695, |
| "learning_rate": 5e-05, |
| "loss": 0.163, |
| "loss/crossentropy": 2.7973127365112305, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16296324506402016, |
| "loss/reg": 3.2608911991119385, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.00289, |
| "grad_norm": 0.3723791539669037, |
| "grad_norm_var": 0.0034478366033269445, |
| "learning_rate": 5e-05, |
| "loss": 0.1757, |
| "loss/crossentropy": 2.630415976047516, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17573364078998566, |
| "loss/reg": 3.257523775100708, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.0029, |
| "grad_norm": 0.38034215569496155, |
| "grad_norm_var": 0.003261674725795945, |
| "learning_rate": 5e-05, |
| "loss": 0.1689, |
| "loss/crossentropy": 2.704579532146454, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16892588511109352, |
| "loss/reg": 3.254065752029419, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.00291, |
| "grad_norm": 1.2464065551757812, |
| "grad_norm_var": 0.05011180607704591, |
| "learning_rate": 5e-05, |
| "loss": 0.1921, |
| "loss/crossentropy": 2.8595897555351257, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19211571291089058, |
| "loss/reg": 3.2510294914245605, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.00292, |
| "grad_norm": 0.3307066559791565, |
| "grad_norm_var": 0.05046209325623486, |
| "learning_rate": 5e-05, |
| "loss": 0.1681, |
| "loss/crossentropy": 2.8195151686668396, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16810721158981323, |
| "loss/reg": 3.2478625774383545, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.00293, |
| "grad_norm": 0.33664193749427795, |
| "grad_norm_var": 0.05081007066127065, |
| "learning_rate": 5e-05, |
| "loss": 0.1602, |
| "loss/crossentropy": 2.912789523601532, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16015203669667244, |
| "loss/reg": 3.2451751232147217, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.00294, |
| "grad_norm": 0.42365437746047974, |
| "grad_norm_var": 0.05035293502217161, |
| "learning_rate": 5e-05, |
| "loss": 0.1833, |
| "loss/crossentropy": 2.7459517121315002, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18333038315176964, |
| "loss/reg": 3.242023229598999, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.00295, |
| "grad_norm": 0.40840578079223633, |
| "grad_norm_var": 0.049924464393714924, |
| "learning_rate": 5e-05, |
| "loss": 0.176, |
| "loss/crossentropy": 2.778249144554138, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17595936357975006, |
| "loss/reg": 3.2388389110565186, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.00296, |
| "grad_norm": 0.3591618835926056, |
| "grad_norm_var": 0.05009460642018338, |
| "learning_rate": 5e-05, |
| "loss": 0.1722, |
| "loss/crossentropy": 2.7121748328208923, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17217102646827698, |
| "loss/reg": 3.2364017963409424, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.00297, |
| "grad_norm": 0.31615006923675537, |
| "grad_norm_var": 0.05062186135785553, |
| "learning_rate": 5e-05, |
| "loss": 0.1488, |
| "loss/crossentropy": 2.7933038473129272, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.14879318699240685, |
| "loss/reg": 3.2344272136688232, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.00298, |
| "grad_norm": 0.3586377799510956, |
| "grad_norm_var": 0.050288172636787816, |
| "learning_rate": 5e-05, |
| "loss": 0.1492, |
| "loss/crossentropy": 2.869333803653717, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.14924299344420433, |
| "loss/reg": 3.232086181640625, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.00299, |
| "grad_norm": 0.37798771262168884, |
| "grad_norm_var": 0.04995309393064352, |
| "learning_rate": 5e-05, |
| "loss": 0.1652, |
| "loss/crossentropy": 2.913083255290985, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1651969812810421, |
| "loss/reg": 3.2296016216278076, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.003, |
| "grad_norm": 0.5914519429206848, |
| "grad_norm_var": 0.05182304954391634, |
| "learning_rate": 5e-05, |
| "loss": 0.1893, |
| "loss/crossentropy": 2.8007007241249084, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18929021432995796, |
| "loss/reg": 3.2266576290130615, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.00301, |
| "grad_norm": 0.3292617201805115, |
| "grad_norm_var": 0.05212417516215169, |
| "learning_rate": 5e-05, |
| "loss": 0.16, |
| "loss/crossentropy": 2.7326099276542664, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15998771041631699, |
| "loss/reg": 3.2236239910125732, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.00302, |
| "grad_norm": 0.3807355761528015, |
| "grad_norm_var": 0.05190702763823275, |
| "learning_rate": 5e-05, |
| "loss": 0.1831, |
| "loss/crossentropy": 2.7199636101722717, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1831417679786682, |
| "loss/reg": 3.220425844192505, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.00303, |
| "grad_norm": 0.4008902907371521, |
| "grad_norm_var": 0.05127743798183474, |
| "learning_rate": 5e-05, |
| "loss": 0.1777, |
| "loss/crossentropy": 2.7570589184761047, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.177694384008646, |
| "loss/reg": 3.2171859741210938, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.00304, |
| "grad_norm": 0.35962697863578796, |
| "grad_norm_var": 0.05073816419262332, |
| "learning_rate": 5e-05, |
| "loss": 0.1574, |
| "loss/crossentropy": 2.809377670288086, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15741629898548126, |
| "loss/reg": 3.2137656211853027, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.00305, |
| "grad_norm": 0.404453843832016, |
| "grad_norm_var": 0.05053133217663517, |
| "learning_rate": 5e-05, |
| "loss": 0.196, |
| "loss/crossentropy": 2.6374824047088623, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19603004679083824, |
| "loss/reg": 3.2106897830963135, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.00306, |
| "grad_norm": 0.3411775231361389, |
| "grad_norm_var": 0.05092714807133293, |
| "learning_rate": 5e-05, |
| "loss": 0.1701, |
| "loss/crossentropy": 2.7098072171211243, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17005891352891922, |
| "loss/reg": 3.2072975635528564, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.00307, |
| "grad_norm": 0.48913073539733887, |
| "grad_norm_var": 0.004874772049479148, |
| "learning_rate": 5e-05, |
| "loss": 0.2396, |
| "loss/crossentropy": 2.8529589772224426, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23958228901028633, |
| "loss/reg": 3.2032482624053955, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.00308, |
| "grad_norm": 0.3359135389328003, |
| "grad_norm_var": 0.004836687315538634, |
| "learning_rate": 5e-05, |
| "loss": 0.154, |
| "loss/crossentropy": 2.825865149497986, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15401727706193924, |
| "loss/reg": 3.2001187801361084, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.00309, |
| "grad_norm": 0.3673790693283081, |
| "grad_norm_var": 0.004683902714770716, |
| "learning_rate": 5e-05, |
| "loss": 0.1701, |
| "loss/crossentropy": 2.757752239704132, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1700810343027115, |
| "loss/reg": 3.197371244430542, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.0031, |
| "grad_norm": 0.3675878643989563, |
| "grad_norm_var": 0.004630661781797581, |
| "learning_rate": 5e-05, |
| "loss": 0.1742, |
| "loss/crossentropy": 2.9055893421173096, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17424792051315308, |
| "loss/reg": 3.193922758102417, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.00311, |
| "grad_norm": 0.3216918110847473, |
| "grad_norm_var": 0.004850203191397428, |
| "learning_rate": 5e-05, |
| "loss": 0.1529, |
| "loss/crossentropy": 2.7421942353248596, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15289029106497765, |
| "loss/reg": 3.1913256645202637, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.00312, |
| "grad_norm": 0.30283358693122864, |
| "grad_norm_var": 0.005214980747276743, |
| "learning_rate": 5e-05, |
| "loss": 0.1501, |
| "loss/crossentropy": 2.68456107378006, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15012749284505844, |
| "loss/reg": 3.188302755355835, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.00313, |
| "grad_norm": 0.3840731978416443, |
| "grad_norm_var": 0.004944937932171186, |
| "learning_rate": 5e-05, |
| "loss": 0.1565, |
| "loss/crossentropy": 2.7386457920074463, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15645165741443634, |
| "loss/reg": 3.1856327056884766, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.00314, |
| "grad_norm": 0.3471456468105316, |
| "grad_norm_var": 0.004989069609234416, |
| "learning_rate": 5e-05, |
| "loss": 0.1661, |
| "loss/crossentropy": 2.8941837549209595, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16613885760307312, |
| "loss/reg": 3.182285785675049, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.00315, |
| "grad_norm": 0.40083590149879456, |
| "grad_norm_var": 0.005011503442318803, |
| "learning_rate": 5e-05, |
| "loss": 0.1816, |
| "loss/crossentropy": 2.830922782421112, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18162691593170166, |
| "loss/reg": 3.1795294284820557, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.00316, |
| "grad_norm": 0.40112313628196716, |
| "grad_norm_var": 0.001979603921073251, |
| "learning_rate": 5e-05, |
| "loss": 0.163, |
| "loss/crossentropy": 2.5529216527938843, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1629548817873001, |
| "loss/reg": 3.1763863563537598, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.00317, |
| "grad_norm": 0.3936459720134735, |
| "grad_norm_var": 0.001881530067811854, |
| "learning_rate": 5e-05, |
| "loss": 0.1851, |
| "loss/crossentropy": 2.780943751335144, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18506472185254097, |
| "loss/reg": 3.173980474472046, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.00318, |
| "grad_norm": 0.3727342486381531, |
| "grad_norm_var": 0.0018792953911145364, |
| "learning_rate": 5e-05, |
| "loss": 0.1827, |
| "loss/crossentropy": 2.76874041557312, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1826922371983528, |
| "loss/reg": 3.1704628467559814, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.00319, |
| "grad_norm": 0.3470066785812378, |
| "grad_norm_var": 0.0018703712702832478, |
| "learning_rate": 5e-05, |
| "loss": 0.1598, |
| "loss/crossentropy": 2.740228831768036, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1597808077931404, |
| "loss/reg": 3.1674654483795166, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.0032, |
| "grad_norm": 0.3653993010520935, |
| "grad_norm_var": 0.0018636832816178708, |
| "learning_rate": 5e-05, |
| "loss": 0.1549, |
| "loss/crossentropy": 2.883521616458893, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15490083023905754, |
| "loss/reg": 3.1641883850097656, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.00321, |
| "grad_norm": 0.3510638475418091, |
| "grad_norm_var": 0.0018064205203171017, |
| "learning_rate": 5e-05, |
| "loss": 0.1577, |
| "loss/crossentropy": 2.9007150530815125, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15773681923747063, |
| "loss/reg": 3.1610915660858154, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.00322, |
| "grad_norm": 0.5068875551223755, |
| "grad_norm_var": 0.0029290004167608335, |
| "learning_rate": 5e-05, |
| "loss": 0.2013, |
| "loss/crossentropy": 2.716179847717285, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2013130635023117, |
| "loss/reg": 3.1571173667907715, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.00323, |
| "grad_norm": 0.40178200602531433, |
| "grad_norm_var": 0.0021162756618779235, |
| "learning_rate": 5e-05, |
| "loss": 0.1809, |
| "loss/crossentropy": 2.9381837844848633, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1809130534529686, |
| "loss/reg": 3.1540093421936035, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.00324, |
| "grad_norm": 0.35252845287323, |
| "grad_norm_var": 0.0020514948206895294, |
| "learning_rate": 5e-05, |
| "loss": 0.1617, |
| "loss/crossentropy": 2.743869721889496, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16171807795763016, |
| "loss/reg": 3.1503779888153076, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.00325, |
| "grad_norm": 0.36802011728286743, |
| "grad_norm_var": 0.0020509560983741053, |
| "learning_rate": 5e-05, |
| "loss": 0.1722, |
| "loss/crossentropy": 2.934039294719696, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17217914387583733, |
| "loss/reg": 3.1467440128326416, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.00326, |
| "grad_norm": 0.4267319142818451, |
| "grad_norm_var": 0.0022188398751517274, |
| "learning_rate": 5e-05, |
| "loss": 0.1924, |
| "loss/crossentropy": 2.802468180656433, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19241869449615479, |
| "loss/reg": 3.1427693367004395, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.00327, |
| "grad_norm": 0.34044548869132996, |
| "grad_norm_var": 0.0021007258044081806, |
| "learning_rate": 5e-05, |
| "loss": 0.1522, |
| "loss/crossentropy": 2.8443135619163513, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1522187888622284, |
| "loss/reg": 3.1388471126556396, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.00328, |
| "grad_norm": 0.4276120066642761, |
| "grad_norm_var": 0.001808451579785623, |
| "learning_rate": 5e-05, |
| "loss": 0.1708, |
| "loss/crossentropy": 2.8915366530418396, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17084889113903046, |
| "loss/reg": 3.134800434112549, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.00329, |
| "grad_norm": 0.3486219346523285, |
| "grad_norm_var": 0.0018993689379833108, |
| "learning_rate": 5e-05, |
| "loss": 0.1687, |
| "loss/crossentropy": 2.575106978416443, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16874929517507553, |
| "loss/reg": 3.1303272247314453, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.0033, |
| "grad_norm": 0.365105539560318, |
| "grad_norm_var": 0.0018301403367672127, |
| "learning_rate": 5e-05, |
| "loss": 0.1842, |
| "loss/crossentropy": 2.6813217401504517, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18416454643011093, |
| "loss/reg": 3.1269419193267822, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.00331, |
| "grad_norm": 0.5757820010185242, |
| "grad_norm_var": 0.004098500311938921, |
| "learning_rate": 5e-05, |
| "loss": 0.1935, |
| "loss/crossentropy": 2.9679067730903625, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19354857876896858, |
| "loss/reg": 3.1228113174438477, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.00332, |
| "grad_norm": 0.405617356300354, |
| "grad_norm_var": 0.004102514647771457, |
| "learning_rate": 5e-05, |
| "loss": 0.1716, |
| "loss/crossentropy": 2.843691408634186, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1716487891972065, |
| "loss/reg": 3.120131015777588, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.00333, |
| "grad_norm": 0.3825243413448334, |
| "grad_norm_var": 0.004114939464605513, |
| "learning_rate": 5e-05, |
| "loss": 0.1692, |
| "loss/crossentropy": 2.722069561481476, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16915880143642426, |
| "loss/reg": 3.117812395095825, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.00334, |
| "grad_norm": 0.38414397835731506, |
| "grad_norm_var": 0.004087504594686679, |
| "learning_rate": 5e-05, |
| "loss": 0.1678, |
| "loss/crossentropy": 2.727014124393463, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1677936352789402, |
| "loss/reg": 3.115847587585449, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.00335, |
| "grad_norm": 0.4531976580619812, |
| "grad_norm_var": 0.0040868556651997325, |
| "learning_rate": 5e-05, |
| "loss": 0.1726, |
| "loss/crossentropy": 2.5817691683769226, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17255331575870514, |
| "loss/reg": 3.1140594482421875, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.00336, |
| "grad_norm": 0.33963721990585327, |
| "grad_norm_var": 0.004259094561609115, |
| "learning_rate": 5e-05, |
| "loss": 0.1605, |
| "loss/crossentropy": 2.7009602189064026, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16050074249505997, |
| "loss/reg": 3.1128299236297607, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.00337, |
| "grad_norm": 0.36085280776023865, |
| "grad_norm_var": 0.00419878945557195, |
| "learning_rate": 5e-05, |
| "loss": 0.1641, |
| "loss/crossentropy": 2.7012510299682617, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16410352289676666, |
| "loss/reg": 3.109898805618286, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.00338, |
| "grad_norm": 0.3331363797187805, |
| "grad_norm_var": 0.003666565441549352, |
| "learning_rate": 5e-05, |
| "loss": 0.1619, |
| "loss/crossentropy": 2.802642047405243, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1618719846010208, |
| "loss/reg": 3.1067681312561035, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.00339, |
| "grad_norm": 0.41531723737716675, |
| "grad_norm_var": 0.003696375336840474, |
| "learning_rate": 5e-05, |
| "loss": 0.1877, |
| "loss/crossentropy": 2.622368335723877, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18770882859826088, |
| "loss/reg": 3.103231191635132, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.0034, |
| "grad_norm": 0.3483443260192871, |
| "grad_norm_var": 0.0037197436901762657, |
| "learning_rate": 5e-05, |
| "loss": 0.1603, |
| "loss/crossentropy": 2.7523834109306335, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16034872457385063, |
| "loss/reg": 3.1008808612823486, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.00341, |
| "grad_norm": 0.46117284893989563, |
| "grad_norm_var": 0.003961845355148208, |
| "learning_rate": 5e-05, |
| "loss": 0.1801, |
| "loss/crossentropy": 2.820544958114624, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18007208034396172, |
| "loss/reg": 3.098954439163208, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.00342, |
| "grad_norm": 0.38020917773246765, |
| "grad_norm_var": 0.003918987421685794, |
| "learning_rate": 5e-05, |
| "loss": 0.1711, |
| "loss/crossentropy": 2.864526093006134, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1710633859038353, |
| "loss/reg": 3.0957016944885254, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.00343, |
| "grad_norm": 0.3978392779827118, |
| "grad_norm_var": 0.0037065638898653073, |
| "learning_rate": 5e-05, |
| "loss": 0.1782, |
| "loss/crossentropy": 2.859494388103485, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1782137230038643, |
| "loss/reg": 3.09333872795105, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.00344, |
| "grad_norm": 0.36975786089897156, |
| "grad_norm_var": 0.0036926924317912187, |
| "learning_rate": 5e-05, |
| "loss": 0.1658, |
| "loss/crossentropy": 2.7827839255332947, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1657763496041298, |
| "loss/reg": 3.090590715408325, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.00345, |
| "grad_norm": 0.333897203207016, |
| "grad_norm_var": 0.0037974520830182084, |
| "learning_rate": 5e-05, |
| "loss": 0.1612, |
| "loss/crossentropy": 2.7804853320121765, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1611798331141472, |
| "loss/reg": 3.0877137184143066, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.00346, |
| "grad_norm": 0.43794891238212585, |
| "grad_norm_var": 0.0038469119530984567, |
| "learning_rate": 5e-05, |
| "loss": 0.1678, |
| "loss/crossentropy": 2.7229984402656555, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1677897423505783, |
| "loss/reg": 3.085155725479126, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.00347, |
| "grad_norm": 0.33257824182510376, |
| "grad_norm_var": 0.0018017603976316725, |
| "learning_rate": 5e-05, |
| "loss": 0.1745, |
| "loss/crossentropy": 2.7936434745788574, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17448442056775093, |
| "loss/reg": 3.0829925537109375, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.00348, |
| "grad_norm": 0.393646240234375, |
| "grad_norm_var": 0.001775431972661142, |
| "learning_rate": 5e-05, |
| "loss": 0.1647, |
| "loss/crossentropy": 2.8590177297592163, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16474304348230362, |
| "loss/reg": 3.080383062362671, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.00349, |
| "grad_norm": 0.34549105167388916, |
| "grad_norm_var": 0.0018623256252658482, |
| "learning_rate": 5e-05, |
| "loss": 0.1678, |
| "loss/crossentropy": 2.7182729840278625, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16776488721370697, |
| "loss/reg": 3.0792622566223145, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.0035, |
| "grad_norm": 0.9833559393882751, |
| "grad_norm_var": 0.024598539346201563, |
| "learning_rate": 5e-05, |
| "loss": 0.1911, |
| "loss/crossentropy": 2.720784068107605, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19106518849730492, |
| "loss/reg": 3.0780370235443115, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.00351, |
| "grad_norm": 0.3550430238246918, |
| "grad_norm_var": 0.02473872020472854, |
| "learning_rate": 5e-05, |
| "loss": 0.1683, |
| "loss/crossentropy": 2.7388776540756226, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16827991232275963, |
| "loss/reg": 3.07700777053833, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.00352, |
| "grad_norm": 0.32236865162849426, |
| "grad_norm_var": 0.024923428623413246, |
| "learning_rate": 5e-05, |
| "loss": 0.1555, |
| "loss/crossentropy": 2.787019371986389, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1554781049489975, |
| "loss/reg": 3.075169324874878, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.00353, |
| "grad_norm": 0.34089383482933044, |
| "grad_norm_var": 0.025080939274787682, |
| "learning_rate": 5e-05, |
| "loss": 0.1625, |
| "loss/crossentropy": 2.763973832130432, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1625489443540573, |
| "loss/reg": 3.0732734203338623, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.00354, |
| "grad_norm": 0.35467466711997986, |
| "grad_norm_var": 0.02489081345717287, |
| "learning_rate": 5e-05, |
| "loss": 0.161, |
| "loss/crossentropy": 2.6696255207061768, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1610397771000862, |
| "loss/reg": 3.071441411972046, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.00355, |
| "grad_norm": 0.3465348184108734, |
| "grad_norm_var": 0.02514492485323772, |
| "learning_rate": 5e-05, |
| "loss": 0.1692, |
| "loss/crossentropy": 2.8481903076171875, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16923030093312263, |
| "loss/reg": 3.068796157836914, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.00356, |
| "grad_norm": 0.3337138891220093, |
| "grad_norm_var": 0.025271718941650815, |
| "learning_rate": 5e-05, |
| "loss": 0.1675, |
| "loss/crossentropy": 2.734935760498047, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1674855425953865, |
| "loss/reg": 3.066350221633911, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.00357, |
| "grad_norm": 0.3486330509185791, |
| "grad_norm_var": 0.02522896182872459, |
| "learning_rate": 5e-05, |
| "loss": 0.1751, |
| "loss/crossentropy": 2.8282878398895264, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17508375644683838, |
| "loss/reg": 3.0633251667022705, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.00358, |
| "grad_norm": 0.3714129626750946, |
| "grad_norm_var": 0.025255292610223575, |
| "learning_rate": 5e-05, |
| "loss": 0.1798, |
| "loss/crossentropy": 2.723433256149292, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17982058972120285, |
| "loss/reg": 3.060797929763794, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.00359, |
| "grad_norm": 0.38819992542266846, |
| "grad_norm_var": 0.025261289598676597, |
| "learning_rate": 5e-05, |
| "loss": 0.179, |
| "loss/crossentropy": 2.5971017479896545, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17904112860560417, |
| "loss/reg": 3.057884693145752, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.0036, |
| "grad_norm": 0.3948271870613098, |
| "grad_norm_var": 0.02520822524046924, |
| "learning_rate": 5e-05, |
| "loss": 0.1826, |
| "loss/crossentropy": 2.631825864315033, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1826026625931263, |
| "loss/reg": 3.0550246238708496, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.00361, |
| "grad_norm": 0.46747469902038574, |
| "grad_norm_var": 0.025164775676019657, |
| "learning_rate": 5e-05, |
| "loss": 0.1849, |
| "loss/crossentropy": 2.628718376159668, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1848563477396965, |
| "loss/reg": 3.052072525024414, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.00362, |
| "grad_norm": 0.9672635197639465, |
| "grad_norm_var": 0.044838716189940266, |
| "learning_rate": 5e-05, |
| "loss": 0.2236, |
| "loss/crossentropy": 2.7798518538475037, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22359847277402878, |
| "loss/reg": 3.048891305923462, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.00363, |
| "grad_norm": 0.3722783029079437, |
| "grad_norm_var": 0.04436658011174813, |
| "learning_rate": 5e-05, |
| "loss": 0.175, |
| "loss/crossentropy": 2.7808294892311096, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17498808726668358, |
| "loss/reg": 3.0457394123077393, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.00364, |
| "grad_norm": 0.3547132611274719, |
| "grad_norm_var": 0.04471680473078544, |
| "learning_rate": 5e-05, |
| "loss": 0.1602, |
| "loss/crossentropy": 2.7656018137931824, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16020696610212326, |
| "loss/reg": 3.043097734451294, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.00365, |
| "grad_norm": 0.4774816632270813, |
| "grad_norm_var": 0.04413484451680517, |
| "learning_rate": 5e-05, |
| "loss": 0.1831, |
| "loss/crossentropy": 2.9051772356033325, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18311960250139236, |
| "loss/reg": 3.0403990745544434, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.00366, |
| "grad_norm": 0.41332709789276123, |
| "grad_norm_var": 0.0238056716485936, |
| "learning_rate": 5e-05, |
| "loss": 0.1705, |
| "loss/crossentropy": 2.8529672026634216, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17049206793308258, |
| "loss/reg": 3.037370204925537, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.00367, |
| "grad_norm": 0.39109355211257935, |
| "grad_norm_var": 0.023608062717162412, |
| "learning_rate": 5e-05, |
| "loss": 0.1798, |
| "loss/crossentropy": 2.796768307685852, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17983367666602135, |
| "loss/reg": 3.0343563556671143, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.00368, |
| "grad_norm": 0.36531057953834534, |
| "grad_norm_var": 0.023191193861390014, |
| "learning_rate": 5e-05, |
| "loss": 0.1715, |
| "loss/crossentropy": 2.8276050686836243, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1715383380651474, |
| "loss/reg": 3.031611919403076, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.00369, |
| "grad_norm": 0.33283501863479614, |
| "grad_norm_var": 0.023278092934366657, |
| "learning_rate": 5e-05, |
| "loss": 0.1499, |
| "loss/crossentropy": 2.6641258597373962, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.14990831911563873, |
| "loss/reg": 3.0287797451019287, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.0037, |
| "grad_norm": 0.4542810618877411, |
| "grad_norm_var": 0.023063995994232415, |
| "learning_rate": 5e-05, |
| "loss": 0.1722, |
| "loss/crossentropy": 2.7453941702842712, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1721569411456585, |
| "loss/reg": 3.0258800983428955, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.00371, |
| "grad_norm": 0.3705763816833496, |
| "grad_norm_var": 0.022852728398777448, |
| "learning_rate": 5e-05, |
| "loss": 0.1849, |
| "loss/crossentropy": 2.635721504688263, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18491211906075478, |
| "loss/reg": 3.0235960483551025, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.00372, |
| "grad_norm": 0.4085729420185089, |
| "grad_norm_var": 0.022289690361487074, |
| "learning_rate": 5e-05, |
| "loss": 0.1871, |
| "loss/crossentropy": 2.732766628265381, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18710973486304283, |
| "loss/reg": 3.0203185081481934, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.00373, |
| "grad_norm": 0.3334612250328064, |
| "grad_norm_var": 0.022468457594482887, |
| "learning_rate": 5e-05, |
| "loss": 0.1662, |
| "loss/crossentropy": 2.783429443836212, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1662071831524372, |
| "loss/reg": 3.0171730518341064, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.00374, |
| "grad_norm": 0.35536903142929077, |
| "grad_norm_var": 0.022607616164545874, |
| "learning_rate": 5e-05, |
| "loss": 0.1654, |
| "loss/crossentropy": 2.818749785423279, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16541225090622902, |
| "loss/reg": 3.0140058994293213, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.00375, |
| "grad_norm": 0.348376989364624, |
| "grad_norm_var": 0.022917750109528078, |
| "learning_rate": 5e-05, |
| "loss": 0.1628, |
| "loss/crossentropy": 2.9099320769309998, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1627991460263729, |
| "loss/reg": 3.010875701904297, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.00376, |
| "grad_norm": 0.3394787311553955, |
| "grad_norm_var": 0.023335225496050292, |
| "learning_rate": 5e-05, |
| "loss": 0.1605, |
| "loss/crossentropy": 2.811407744884491, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16051743179559708, |
| "loss/reg": 3.0073623657226562, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.00377, |
| "grad_norm": 0.42454567551612854, |
| "grad_norm_var": 0.02319007765550817, |
| "learning_rate": 5e-05, |
| "loss": 0.1645, |
| "loss/crossentropy": 2.773725748062134, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16450630128383636, |
| "loss/reg": 3.004185676574707, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.00378, |
| "grad_norm": 0.3412385582923889, |
| "grad_norm_var": 0.001946629707866813, |
| "learning_rate": 5e-05, |
| "loss": 0.1742, |
| "loss/crossentropy": 2.785709500312805, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17420916631817818, |
| "loss/reg": 3.0007028579711914, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.00379, |
| "grad_norm": 0.3544103503227234, |
| "grad_norm_var": 0.001985417588834304, |
| "learning_rate": 5e-05, |
| "loss": 0.1716, |
| "loss/crossentropy": 2.670408546924591, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17155754566192627, |
| "loss/reg": 2.9972825050354004, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.0038, |
| "grad_norm": 0.36286091804504395, |
| "grad_norm_var": 0.001963109812450625, |
| "learning_rate": 5e-05, |
| "loss": 0.1786, |
| "loss/crossentropy": 2.822770357131958, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17864727228879929, |
| "loss/reg": 2.993536949157715, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.00381, |
| "grad_norm": 0.5003440976142883, |
| "grad_norm_var": 0.002294225514870618, |
| "learning_rate": 5e-05, |
| "loss": 0.1896, |
| "loss/crossentropy": 2.790800392627716, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18961890786886215, |
| "loss/reg": 2.990283489227295, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.00382, |
| "grad_norm": 0.3698294758796692, |
| "grad_norm_var": 0.0022250210916228584, |
| "learning_rate": 5e-05, |
| "loss": 0.1648, |
| "loss/crossentropy": 2.8308547139167786, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16477400809526443, |
| "loss/reg": 2.986691474914551, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.00383, |
| "grad_norm": 0.36506953835487366, |
| "grad_norm_var": 0.0022229105132923347, |
| "learning_rate": 5e-05, |
| "loss": 0.1682, |
| "loss/crossentropy": 2.744426727294922, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16819821670651436, |
| "loss/reg": 2.983008861541748, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.00384, |
| "grad_norm": 0.3243113160133362, |
| "grad_norm_var": 0.002390011819316588, |
| "learning_rate": 5e-05, |
| "loss": 0.1596, |
| "loss/crossentropy": 2.89188152551651, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1595698669552803, |
| "loss/reg": 2.979191541671753, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.00385, |
| "grad_norm": 0.36836785078048706, |
| "grad_norm_var": 0.002273433106164295, |
| "learning_rate": 5e-05, |
| "loss": 0.1747, |
| "loss/crossentropy": 2.902570128440857, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1746898777782917, |
| "loss/reg": 2.975698947906494, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.00386, |
| "grad_norm": 0.3365325927734375, |
| "grad_norm_var": 0.001915978849994604, |
| "learning_rate": 5e-05, |
| "loss": 0.1608, |
| "loss/crossentropy": 2.766001045703888, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16081618145108223, |
| "loss/reg": 2.972667694091797, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.00387, |
| "grad_norm": 0.35417604446411133, |
| "grad_norm_var": 0.0019292530227877358, |
| "learning_rate": 5e-05, |
| "loss": 0.1605, |
| "loss/crossentropy": 2.7814798951148987, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16046970710158348, |
| "loss/reg": 2.969967842102051, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.00388, |
| "grad_norm": 1.9537514448165894, |
| "grad_norm_var": 0.15952536292831518, |
| "learning_rate": 5e-05, |
| "loss": 0.1926, |
| "loss/crossentropy": 2.782427728176117, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1925731934607029, |
| "loss/reg": 2.9671437740325928, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.00389, |
| "grad_norm": 0.3620118498802185, |
| "grad_norm_var": 0.15907744774636304, |
| "learning_rate": 5e-05, |
| "loss": 0.1714, |
| "loss/crossentropy": 2.716952919960022, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17136194929480553, |
| "loss/reg": 2.9639716148376465, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.0039, |
| "grad_norm": 0.3765539526939392, |
| "grad_norm_var": 0.1587921781193889, |
| "learning_rate": 5e-05, |
| "loss": 0.1689, |
| "loss/crossentropy": 2.8395472168922424, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16893662884831429, |
| "loss/reg": 2.9617481231689453, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.00391, |
| "grad_norm": 0.39779096841812134, |
| "grad_norm_var": 0.15815917569478716, |
| "learning_rate": 5e-05, |
| "loss": 0.1677, |
| "loss/crossentropy": 2.813215434551239, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1676802597939968, |
| "loss/reg": 2.958872079849243, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.00392, |
| "grad_norm": 2.267273187637329, |
| "grad_norm_var": 0.35670344578828533, |
| "learning_rate": 5e-05, |
| "loss": 0.1898, |
| "loss/crossentropy": 2.710484206676483, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18981827050447464, |
| "loss/reg": 2.9562907218933105, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.00393, |
| "grad_norm": 0.4732659161090851, |
| "grad_norm_var": 0.35576926148027355, |
| "learning_rate": 5e-05, |
| "loss": 0.1789, |
| "loss/crossentropy": 2.7463297247886658, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17890166491270065, |
| "loss/reg": 2.953505277633667, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.00394, |
| "grad_norm": 0.46487849950790405, |
| "grad_norm_var": 0.3525539310677323, |
| "learning_rate": 5e-05, |
| "loss": 0.1737, |
| "loss/crossentropy": 2.781617820262909, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17365656793117523, |
| "loss/reg": 2.951185464859009, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.00395, |
| "grad_norm": 0.36613309383392334, |
| "grad_norm_var": 0.352175585204308, |
| "learning_rate": 5e-05, |
| "loss": 0.1749, |
| "loss/crossentropy": 2.9521047472953796, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1749158501625061, |
| "loss/reg": 2.949521780014038, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.00396, |
| "grad_norm": 0.33889761567115784, |
| "grad_norm_var": 0.35297777688562104, |
| "learning_rate": 5e-05, |
| "loss": 0.1611, |
| "loss/crossentropy": 2.709399461746216, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.161102045327425, |
| "loss/reg": 2.94758677482605, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.00397, |
| "grad_norm": 0.37460586428642273, |
| "grad_norm_var": 0.3556567542520952, |
| "learning_rate": 5e-05, |
| "loss": 0.1709, |
| "loss/crossentropy": 2.8318939208984375, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1709057316184044, |
| "loss/reg": 2.9449052810668945, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.00398, |
| "grad_norm": 0.36912715435028076, |
| "grad_norm_var": 0.3556777153015602, |
| "learning_rate": 5e-05, |
| "loss": 0.1699, |
| "loss/crossentropy": 2.7699413895606995, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16991987824440002, |
| "loss/reg": 2.9416375160217285, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.00399, |
| "grad_norm": 0.4335935711860657, |
| "grad_norm_var": 0.35388598085202433, |
| "learning_rate": 5e-05, |
| "loss": 0.1621, |
| "loss/crossentropy": 2.6929262280464172, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16208457946777344, |
| "loss/reg": 2.9385571479797363, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.004, |
| "grad_norm": 0.36466526985168457, |
| "grad_norm_var": 0.35251743192284957, |
| "learning_rate": 5e-05, |
| "loss": 0.1603, |
| "loss/crossentropy": 2.9028329849243164, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16026458516716957, |
| "loss/reg": 2.935270071029663, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.00401, |
| "grad_norm": 0.31859657168388367, |
| "grad_norm_var": 0.3542100800677372, |
| "learning_rate": 5e-05, |
| "loss": 0.155, |
| "loss/crossentropy": 2.7707905769348145, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15500668808817863, |
| "loss/reg": 2.9319961071014404, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.00402, |
| "grad_norm": 0.39714375138282776, |
| "grad_norm_var": 0.35233479687143326, |
| "learning_rate": 5e-05, |
| "loss": 0.1722, |
| "loss/crossentropy": 2.5947351455688477, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17224042490124702, |
| "loss/reg": 2.929413318634033, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.00403, |
| "grad_norm": 2.267681121826172, |
| "grad_norm_var": 0.518261838886723, |
| "learning_rate": 5e-05, |
| "loss": 0.2313, |
| "loss/crossentropy": 2.703626811504364, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.23128759488463402, |
| "loss/reg": 2.925968647003174, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.00404, |
| "grad_norm": 0.38179340958595276, |
| "grad_norm_var": 0.414193396025083, |
| "learning_rate": 5e-05, |
| "loss": 0.1852, |
| "loss/crossentropy": 2.820598065853119, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18524771928787231, |
| "loss/reg": 2.9223124980926514, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.00405, |
| "grad_norm": 0.35225149989128113, |
| "grad_norm_var": 0.4145378570625937, |
| "learning_rate": 5e-05, |
| "loss": 0.1672, |
| "loss/crossentropy": 2.761472165584564, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1672290712594986, |
| "loss/reg": 2.9187073707580566, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.00406, |
| "grad_norm": 0.35987603664398193, |
| "grad_norm_var": 0.4150999685132215, |
| "learning_rate": 5e-05, |
| "loss": 0.1731, |
| "loss/crossentropy": 2.7906153202056885, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17313192784786224, |
| "loss/reg": 2.915867805480957, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.00407, |
| "grad_norm": 0.36714085936546326, |
| "grad_norm_var": 0.416068714723823, |
| "learning_rate": 5e-05, |
| "loss": 0.1729, |
| "loss/crossentropy": 2.7815486192703247, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17289156094193459, |
| "loss/reg": 2.9124350547790527, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.00408, |
| "grad_norm": 0.43249061703681946, |
| "grad_norm_var": 0.22313248530400895, |
| "learning_rate": 5e-05, |
| "loss": 0.1886, |
| "loss/crossentropy": 2.7781424522399902, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18864024803042412, |
| "loss/reg": 2.9094510078430176, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.00409, |
| "grad_norm": 0.34418484568595886, |
| "grad_norm_var": 0.22470081409508588, |
| "learning_rate": 5e-05, |
| "loss": 0.1713, |
| "loss/crossentropy": 2.7818892002105713, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17131192237138748, |
| "loss/reg": 2.9064505100250244, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.0041, |
| "grad_norm": 0.5792982578277588, |
| "grad_norm_var": 0.2250470715469535, |
| "learning_rate": 5e-05, |
| "loss": 0.1925, |
| "loss/crossentropy": 2.8059155344963074, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1925133354961872, |
| "loss/reg": 2.9032788276672363, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.00411, |
| "grad_norm": 0.35917767882347107, |
| "grad_norm_var": 0.22517699381034958, |
| "learning_rate": 5e-05, |
| "loss": 0.1597, |
| "loss/crossentropy": 2.9948400259017944, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1597156822681427, |
| "loss/reg": 2.900125741958618, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.00412, |
| "grad_norm": 0.32936394214630127, |
| "grad_norm_var": 0.2253906803631866, |
| "learning_rate": 5e-05, |
| "loss": 0.16, |
| "loss/crossentropy": 2.8464353680610657, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15996254980564117, |
| "loss/reg": 2.897120952606201, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.00413, |
| "grad_norm": 0.3636591136455536, |
| "grad_norm_var": 0.22558401797348096, |
| "learning_rate": 5e-05, |
| "loss": 0.1896, |
| "loss/crossentropy": 2.5940242409706116, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18961919099092484, |
| "loss/reg": 2.8938040733337402, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.00414, |
| "grad_norm": 0.3614409565925598, |
| "grad_norm_var": 0.2257231161008428, |
| "learning_rate": 5e-05, |
| "loss": 0.1669, |
| "loss/crossentropy": 2.9938586950302124, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16688520461320877, |
| "loss/reg": 2.891237497329712, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.00415, |
| "grad_norm": 0.33793795108795166, |
| "grad_norm_var": 0.2271517945765009, |
| "learning_rate": 5e-05, |
| "loss": 0.1662, |
| "loss/crossentropy": 2.7566803693771362, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16617370769381523, |
| "loss/reg": 2.888444185256958, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.00416, |
| "grad_norm": 0.33697640895843506, |
| "grad_norm_var": 0.22768012665927795, |
| "learning_rate": 5e-05, |
| "loss": 0.1609, |
| "loss/crossentropy": 2.7538956999778748, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1609276346862316, |
| "loss/reg": 2.8854165077209473, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.00417, |
| "grad_norm": 0.33163169026374817, |
| "grad_norm_var": 0.22738752034767185, |
| "learning_rate": 5e-05, |
| "loss": 0.1686, |
| "loss/crossentropy": 2.716135025024414, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16858552396297455, |
| "loss/reg": 2.883502244949341, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.00418, |
| "grad_norm": 0.3197973072528839, |
| "grad_norm_var": 0.22875903165210631, |
| "learning_rate": 5e-05, |
| "loss": 0.1622, |
| "loss/crossentropy": 2.8385114669799805, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16216163337230682, |
| "loss/reg": 2.8822600841522217, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.00419, |
| "grad_norm": 0.3929068446159363, |
| "grad_norm_var": 0.0038269076397950408, |
| "learning_rate": 5e-05, |
| "loss": 0.1747, |
| "loss/crossentropy": 2.7871418595314026, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17471741139888763, |
| "loss/reg": 2.8793692588806152, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.0042, |
| "grad_norm": 0.3870161473751068, |
| "grad_norm_var": 0.0038355224442556198, |
| "learning_rate": 5e-05, |
| "loss": 0.1704, |
| "loss/crossentropy": 2.7993595600128174, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1703585907816887, |
| "loss/reg": 2.8776159286499023, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.00421, |
| "grad_norm": 0.35682201385498047, |
| "grad_norm_var": 0.0038246732894099337, |
| "learning_rate": 5e-05, |
| "loss": 0.1886, |
| "loss/crossentropy": 2.655856966972351, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18859218060970306, |
| "loss/reg": 2.87612247467041, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.00422, |
| "grad_norm": 0.33115604519844055, |
| "grad_norm_var": 0.003924500155300174, |
| "learning_rate": 5e-05, |
| "loss": 0.1611, |
| "loss/crossentropy": 2.8695462942123413, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1611364483833313, |
| "loss/reg": 2.873897075653076, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.00423, |
| "grad_norm": 0.4912989139556885, |
| "grad_norm_var": 0.004829238325957341, |
| "learning_rate": 5e-05, |
| "loss": 0.1784, |
| "loss/crossentropy": 2.7167177200317383, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17839327454566956, |
| "loss/reg": 2.8718714714050293, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.00424, |
| "grad_norm": 0.3349898159503937, |
| "grad_norm_var": 0.004720821391959795, |
| "learning_rate": 5e-05, |
| "loss": 0.1615, |
| "loss/crossentropy": 2.7473002076148987, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1615452691912651, |
| "loss/reg": 2.8705270290374756, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.00425, |
| "grad_norm": 0.3930635154247284, |
| "grad_norm_var": 0.004686561363231038, |
| "learning_rate": 5e-05, |
| "loss": 0.1775, |
| "loss/crossentropy": 2.785146713256836, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17748162522912025, |
| "loss/reg": 2.868584394454956, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.00426, |
| "grad_norm": 0.3448260426521301, |
| "grad_norm_var": 0.0017484410160554464, |
| "learning_rate": 5e-05, |
| "loss": 0.1758, |
| "loss/crossentropy": 2.726165235042572, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17576807364821434, |
| "loss/reg": 2.8673741817474365, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.00427, |
| "grad_norm": 0.3846610188484192, |
| "grad_norm_var": 0.0017836724819081718, |
| "learning_rate": 5e-05, |
| "loss": 0.1762, |
| "loss/crossentropy": 2.7086002230644226, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1762431263923645, |
| "loss/reg": 2.8651485443115234, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.00428, |
| "grad_norm": 0.3494791090488434, |
| "grad_norm_var": 0.0017205006490997802, |
| "learning_rate": 5e-05, |
| "loss": 0.1818, |
| "loss/crossentropy": 2.7305288314819336, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18181117624044418, |
| "loss/reg": 2.8629541397094727, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.00429, |
| "grad_norm": 0.3337409794330597, |
| "grad_norm_var": 0.0017762239427149495, |
| "learning_rate": 5e-05, |
| "loss": 0.1661, |
| "loss/crossentropy": 2.840167284011841, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1660769209265709, |
| "loss/reg": 2.8615217208862305, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.0043, |
| "grad_norm": 0.4685284495353699, |
| "grad_norm_var": 0.0024887722894077887, |
| "learning_rate": 5e-05, |
| "loss": 0.1741, |
| "loss/crossentropy": 2.7593295574188232, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1740834154188633, |
| "loss/reg": 2.860365629196167, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.00431, |
| "grad_norm": 0.35651838779449463, |
| "grad_norm_var": 0.002434815976952542, |
| "learning_rate": 5e-05, |
| "loss": 0.1673, |
| "loss/crossentropy": 2.777701735496521, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16725125908851624, |
| "loss/reg": 2.858222007751465, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.00432, |
| "grad_norm": 0.34670454263687134, |
| "grad_norm_var": 0.0023984303943363817, |
| "learning_rate": 5e-05, |
| "loss": 0.165, |
| "loss/crossentropy": 2.749099850654602, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16504037007689476, |
| "loss/reg": 2.855973958969116, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.00433, |
| "grad_norm": 0.3284713923931122, |
| "grad_norm_var": 0.0024153046998328874, |
| "learning_rate": 5e-05, |
| "loss": 0.1521, |
| "loss/crossentropy": 2.7869237661361694, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15208067372441292, |
| "loss/reg": 2.853942632675171, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.00434, |
| "grad_norm": 0.48883649706840515, |
| "grad_norm_var": 0.0030697262784900037, |
| "learning_rate": 5e-05, |
| "loss": 0.1759, |
| "loss/crossentropy": 2.9403671622276306, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17589127644896507, |
| "loss/reg": 2.851728916168213, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.00435, |
| "grad_norm": 0.36951273679733276, |
| "grad_norm_var": 0.0030654307324534447, |
| "learning_rate": 5e-05, |
| "loss": 0.185, |
| "loss/crossentropy": 2.7797312140464783, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1850355602800846, |
| "loss/reg": 2.8488640785217285, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.00436, |
| "grad_norm": 0.4184967577457428, |
| "grad_norm_var": 0.0031605906698184564, |
| "learning_rate": 5e-05, |
| "loss": 0.1855, |
| "loss/crossentropy": 2.812410533428192, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18553681299090385, |
| "loss/reg": 2.8465514183044434, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.00437, |
| "grad_norm": 0.4329560101032257, |
| "grad_norm_var": 0.0032767273553133062, |
| "learning_rate": 5e-05, |
| "loss": 0.1784, |
| "loss/crossentropy": 2.840768814086914, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17843929678201675, |
| "loss/reg": 2.844315767288208, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.00438, |
| "grad_norm": 0.6038658022880554, |
| "grad_norm_var": 0.005936964872234999, |
| "learning_rate": 5e-05, |
| "loss": 0.1949, |
| "loss/crossentropy": 2.7183879017829895, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19491342082619667, |
| "loss/reg": 2.842548131942749, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.00439, |
| "grad_norm": 0.4069391191005707, |
| "grad_norm_var": 0.005387125873613382, |
| "learning_rate": 5e-05, |
| "loss": 0.1875, |
| "loss/crossentropy": 2.7780433297157288, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18749799579381943, |
| "loss/reg": 2.840106964111328, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.0044, |
| "grad_norm": 0.35941290855407715, |
| "grad_norm_var": 0.005220523762257064, |
| "learning_rate": 5e-05, |
| "loss": 0.1639, |
| "loss/crossentropy": 2.7595601081848145, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1638675332069397, |
| "loss/reg": 2.8378958702087402, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.00441, |
| "grad_norm": 0.3669149875640869, |
| "grad_norm_var": 0.005284393934492052, |
| "learning_rate": 5e-05, |
| "loss": 0.1714, |
| "loss/crossentropy": 2.673116147518158, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1713937260210514, |
| "loss/reg": 2.8351917266845703, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.00442, |
| "grad_norm": 0.3643859922885895, |
| "grad_norm_var": 0.0051709546313713755, |
| "learning_rate": 5e-05, |
| "loss": 0.1795, |
| "loss/crossentropy": 2.8026832342147827, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17951133847236633, |
| "loss/reg": 2.8323209285736084, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.00443, |
| "grad_norm": 0.34250232577323914, |
| "grad_norm_var": 0.005361033629373261, |
| "learning_rate": 5e-05, |
| "loss": 0.1729, |
| "loss/crossentropy": 2.7829501032829285, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17286691814661026, |
| "loss/reg": 2.8302509784698486, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.00444, |
| "grad_norm": 0.3323063552379608, |
| "grad_norm_var": 0.005486165176397177, |
| "learning_rate": 5e-05, |
| "loss": 0.1685, |
| "loss/crossentropy": 2.7025471329689026, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1684984639286995, |
| "loss/reg": 2.827455997467041, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.00445, |
| "grad_norm": 0.35889074206352234, |
| "grad_norm_var": 0.005320257567319386, |
| "learning_rate": 5e-05, |
| "loss": 0.1872, |
| "loss/crossentropy": 2.6426368355751038, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18716050684452057, |
| "loss/reg": 2.8253841400146484, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.00446, |
| "grad_norm": 0.39696604013442993, |
| "grad_norm_var": 0.004953801905317123, |
| "learning_rate": 5e-05, |
| "loss": 0.1844, |
| "loss/crossentropy": 2.6918662786483765, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18442435935139656, |
| "loss/reg": 2.8228838443756104, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.00447, |
| "grad_norm": 0.3320043981075287, |
| "grad_norm_var": 0.005107676487313561, |
| "learning_rate": 5e-05, |
| "loss": 0.1689, |
| "loss/crossentropy": 2.7309769988059998, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16887113079428673, |
| "loss/reg": 2.8211417198181152, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.00448, |
| "grad_norm": 0.3350951373577118, |
| "grad_norm_var": 0.0051840048892141, |
| "learning_rate": 5e-05, |
| "loss": 0.1736, |
| "loss/crossentropy": 2.7696202397346497, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17358100041747093, |
| "loss/reg": 2.818211793899536, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.00449, |
| "grad_norm": 0.35995370149612427, |
| "grad_norm_var": 0.004988316730949372, |
| "learning_rate": 5e-05, |
| "loss": 0.185, |
| "loss/crossentropy": 2.7628800868988037, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1849852055311203, |
| "loss/reg": 2.8163435459136963, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.0045, |
| "grad_norm": 0.3433259427547455, |
| "grad_norm_var": 0.004429295151525636, |
| "learning_rate": 5e-05, |
| "loss": 0.1776, |
| "loss/crossentropy": 2.92121821641922, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1775917150080204, |
| "loss/reg": 2.8140530586242676, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.00451, |
| "grad_norm": 0.3525676131248474, |
| "grad_norm_var": 0.004477082320186199, |
| "learning_rate": 5e-05, |
| "loss": 0.1765, |
| "loss/crossentropy": 2.8223352432250977, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1765166036784649, |
| "loss/reg": 2.8114964962005615, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.00452, |
| "grad_norm": 0.3349536955356598, |
| "grad_norm_var": 0.004502986709870172, |
| "learning_rate": 5e-05, |
| "loss": 0.1617, |
| "loss/crossentropy": 2.58266818523407, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16168920323252678, |
| "loss/reg": 2.8082778453826904, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.00453, |
| "grad_norm": 0.3272739350795746, |
| "grad_norm_var": 0.004404667304666754, |
| "learning_rate": 5e-05, |
| "loss": 0.1606, |
| "loss/crossentropy": 2.791221022605896, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16057174652814865, |
| "loss/reg": 2.8055834770202637, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.00454, |
| "grad_norm": 0.35802412033081055, |
| "grad_norm_var": 0.0005107777789474354, |
| "learning_rate": 5e-05, |
| "loss": 0.1758, |
| "loss/crossentropy": 2.856186628341675, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17580854520201683, |
| "loss/reg": 2.8030307292938232, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.00455, |
| "grad_norm": 0.34605538845062256, |
| "grad_norm_var": 0.0003165176266941239, |
| "learning_rate": 5e-05, |
| "loss": 0.1651, |
| "loss/crossentropy": 2.734806716442108, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16513444855809212, |
| "loss/reg": 2.7998156547546387, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.00456, |
| "grad_norm": 0.35396888852119446, |
| "grad_norm_var": 0.0003120198180476634, |
| "learning_rate": 5e-05, |
| "loss": 0.1701, |
| "loss/crossentropy": 2.8904529213905334, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17014532163739204, |
| "loss/reg": 2.796231508255005, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.00457, |
| "grad_norm": 0.3613145649433136, |
| "grad_norm_var": 0.00030159148728288546, |
| "learning_rate": 5e-05, |
| "loss": 0.1835, |
| "loss/crossentropy": 2.813112735748291, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1834680140018463, |
| "loss/reg": 2.7929091453552246, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.00458, |
| "grad_norm": 0.36372610926628113, |
| "grad_norm_var": 0.00030035069871777733, |
| "learning_rate": 5e-05, |
| "loss": 0.167, |
| "loss/crossentropy": 2.795239508152008, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1669648103415966, |
| "loss/reg": 2.789707899093628, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.00459, |
| "grad_norm": 0.3581913113594055, |
| "grad_norm_var": 0.00030019062479412403, |
| "learning_rate": 5e-05, |
| "loss": 0.165, |
| "loss/crossentropy": 2.797567903995514, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16498373076319695, |
| "loss/reg": 2.786154270172119, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.0046, |
| "grad_norm": 0.3571149408817291, |
| "grad_norm_var": 0.00027710791712463786, |
| "learning_rate": 5e-05, |
| "loss": 0.161, |
| "loss/crossentropy": 2.743883192539215, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16101711615920067, |
| "loss/reg": 2.7824792861938477, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.00461, |
| "grad_norm": 0.356715589761734, |
| "grad_norm_var": 0.0002755397827386948, |
| "learning_rate": 5e-05, |
| "loss": 0.1732, |
| "loss/crossentropy": 2.724743604660034, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1731928214430809, |
| "loss/reg": 2.778890371322632, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.00462, |
| "grad_norm": 0.3243059813976288, |
| "grad_norm_var": 0.00017305590364662023, |
| "learning_rate": 5e-05, |
| "loss": 0.1592, |
| "loss/crossentropy": 2.7731017470359802, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15923070535063744, |
| "loss/reg": 2.775613784790039, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.00463, |
| "grad_norm": 0.3843972980976105, |
| "grad_norm_var": 0.00023436686166613171, |
| "learning_rate": 5e-05, |
| "loss": 0.1776, |
| "loss/crossentropy": 2.6426811814308167, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17761223763227463, |
| "loss/reg": 2.772561550140381, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.00464, |
| "grad_norm": 0.3468632102012634, |
| "grad_norm_var": 0.00021796986892265539, |
| "learning_rate": 5e-05, |
| "loss": 0.1742, |
| "loss/crossentropy": 2.882816791534424, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17419364303350449, |
| "loss/reg": 2.769129991531372, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.00465, |
| "grad_norm": 0.3967267870903015, |
| "grad_norm_var": 0.0003424789629975648, |
| "learning_rate": 5e-05, |
| "loss": 0.1785, |
| "loss/crossentropy": 2.9460648894309998, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1784828118979931, |
| "loss/reg": 2.767021656036377, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.00466, |
| "grad_norm": 0.36927327513694763, |
| "grad_norm_var": 0.0003472996962895862, |
| "learning_rate": 5e-05, |
| "loss": 0.1723, |
| "loss/crossentropy": 2.7243736386299133, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17225344851613045, |
| "loss/reg": 2.7637596130371094, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.00467, |
| "grad_norm": 0.6855224967002869, |
| "grad_norm_var": 0.007136168552583877, |
| "learning_rate": 5e-05, |
| "loss": 0.1725, |
| "loss/crossentropy": 2.9400131702423096, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17254997044801712, |
| "loss/reg": 2.7604963779449463, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.00468, |
| "grad_norm": 0.41388872265815735, |
| "grad_norm_var": 0.007088047286249225, |
| "learning_rate": 5e-05, |
| "loss": 0.1608, |
| "loss/crossentropy": 2.7375746369361877, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16082635894417763, |
| "loss/reg": 2.7572667598724365, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.00469, |
| "grad_norm": 0.3997427225112915, |
| "grad_norm_var": 0.006892705403346755, |
| "learning_rate": 5e-05, |
| "loss": 0.1704, |
| "loss/crossentropy": 2.8761582374572754, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17043552175164223, |
| "loss/reg": 2.754149913787842, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.0047, |
| "grad_norm": 0.33424052596092224, |
| "grad_norm_var": 0.007016741295476203, |
| "learning_rate": 5e-05, |
| "loss": 0.1652, |
| "loss/crossentropy": 2.8255309462547302, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16519855335354805, |
| "loss/reg": 2.751276731491089, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.00471, |
| "grad_norm": 0.41415977478027344, |
| "grad_norm_var": 0.006957502567752493, |
| "learning_rate": 5e-05, |
| "loss": 0.1832, |
| "loss/crossentropy": 2.676911950111389, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18318749964237213, |
| "loss/reg": 2.7486090660095215, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.00472, |
| "grad_norm": 0.3688299357891083, |
| "grad_norm_var": 0.006902369057221236, |
| "learning_rate": 5e-05, |
| "loss": 0.162, |
| "loss/crossentropy": 2.6736281514167786, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16196409612894058, |
| "loss/reg": 2.745225667953491, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.00473, |
| "grad_norm": 0.42543119192123413, |
| "grad_norm_var": 0.006916738926360373, |
| "learning_rate": 5e-05, |
| "loss": 0.1685, |
| "loss/crossentropy": 2.7871673703193665, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16850638762116432, |
| "loss/reg": 2.7421023845672607, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.00474, |
| "grad_norm": 0.3542870581150055, |
| "grad_norm_var": 0.006960025235757868, |
| "learning_rate": 5e-05, |
| "loss": 0.1695, |
| "loss/crossentropy": 2.727401077747345, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1695132479071617, |
| "loss/reg": 2.738083839416504, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.00475, |
| "grad_norm": 0.42508408427238464, |
| "grad_norm_var": 0.006928287935252916, |
| "learning_rate": 5e-05, |
| "loss": 0.1967, |
| "loss/crossentropy": 2.756391167640686, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19667796790599823, |
| "loss/reg": 2.734565496444702, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.00476, |
| "grad_norm": 0.3327108919620514, |
| "grad_norm_var": 0.007096223362361916, |
| "learning_rate": 5e-05, |
| "loss": 0.1698, |
| "loss/crossentropy": 2.662286937236786, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16976173967123032, |
| "loss/reg": 2.730898141860962, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.00477, |
| "grad_norm": 0.3538263142108917, |
| "grad_norm_var": 0.007111786918880665, |
| "learning_rate": 5e-05, |
| "loss": 0.1728, |
| "loss/crossentropy": 2.6600981950759888, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17283405736088753, |
| "loss/reg": 2.7276227474212646, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.00478, |
| "grad_norm": 0.6719810962677002, |
| "grad_norm_var": 0.011362604241119423, |
| "learning_rate": 5e-05, |
| "loss": 0.1956, |
| "loss/crossentropy": 2.9192944169044495, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1955549158155918, |
| "loss/reg": 2.7246785163879395, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.00479, |
| "grad_norm": 0.40175002813339233, |
| "grad_norm_var": 0.011305273259017534, |
| "learning_rate": 5e-05, |
| "loss": 0.1707, |
| "loss/crossentropy": 2.8099315762519836, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1706707924604416, |
| "loss/reg": 2.721214771270752, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.0048, |
| "grad_norm": 0.5700014233589172, |
| "grad_norm_var": 0.012288996380569357, |
| "learning_rate": 5e-05, |
| "loss": 0.175, |
| "loss/crossentropy": 2.764845371246338, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17503220587968826, |
| "loss/reg": 2.71852970123291, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.00481, |
| "grad_norm": 0.3602856993675232, |
| "grad_norm_var": 0.012545036289332723, |
| "learning_rate": 5e-05, |
| "loss": 0.1675, |
| "loss/crossentropy": 2.817295730113983, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16745564714074135, |
| "loss/reg": 2.7155778408050537, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.00482, |
| "grad_norm": 0.37470605969429016, |
| "grad_norm_var": 0.012502846327791594, |
| "learning_rate": 5e-05, |
| "loss": 0.1722, |
| "loss/crossentropy": 2.7710434794425964, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.172159094363451, |
| "loss/reg": 2.7133727073669434, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.00483, |
| "grad_norm": 0.319488525390625, |
| "grad_norm_var": 0.008425663660975724, |
| "learning_rate": 5e-05, |
| "loss": 0.1524, |
| "loss/crossentropy": 2.783412456512451, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15241163223981857, |
| "loss/reg": 2.710848331451416, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.00484, |
| "grad_norm": 0.3474343419075012, |
| "grad_norm_var": 0.008645296689366684, |
| "learning_rate": 5e-05, |
| "loss": 0.1582, |
| "loss/crossentropy": 2.8712441325187683, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1582440249621868, |
| "loss/reg": 2.708759307861328, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.00485, |
| "grad_norm": 0.3881974518299103, |
| "grad_norm_var": 0.008659215056144554, |
| "learning_rate": 5e-05, |
| "loss": 0.168, |
| "loss/crossentropy": 2.7801290154457092, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16804108768701553, |
| "loss/reg": 2.707090139389038, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.00486, |
| "grad_norm": 0.3865320086479187, |
| "grad_norm_var": 0.008353144350497502, |
| "learning_rate": 5e-05, |
| "loss": 0.1622, |
| "loss/crossentropy": 2.762259840965271, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1622123382985592, |
| "loss/reg": 2.704341173171997, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.00487, |
| "grad_norm": 0.3601287007331848, |
| "grad_norm_var": 0.008476237288047564, |
| "learning_rate": 5e-05, |
| "loss": 0.1717, |
| "loss/crossentropy": 2.7640222311019897, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1716899275779724, |
| "loss/reg": 2.702449321746826, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.00488, |
| "grad_norm": 0.3476349711418152, |
| "grad_norm_var": 0.008599584577097493, |
| "learning_rate": 5e-05, |
| "loss": 0.1771, |
| "loss/crossentropy": 2.8057321906089783, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1770944595336914, |
| "loss/reg": 2.6996164321899414, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.00489, |
| "grad_norm": 0.35880380868911743, |
| "grad_norm_var": 0.008661929013581293, |
| "learning_rate": 5e-05, |
| "loss": 0.1759, |
| "loss/crossentropy": 2.7546836137771606, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17589908093214035, |
| "loss/reg": 2.69681978225708, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.0049, |
| "grad_norm": 0.3216891586780548, |
| "grad_norm_var": 0.008914221483014847, |
| "learning_rate": 5e-05, |
| "loss": 0.1628, |
| "loss/crossentropy": 2.755846858024597, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16277150437235832, |
| "loss/reg": 2.6948180198669434, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.00491, |
| "grad_norm": 0.3739294409751892, |
| "grad_norm_var": 0.008872687766587003, |
| "learning_rate": 5e-05, |
| "loss": 0.1737, |
| "loss/crossentropy": 2.758453607559204, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.173715490847826, |
| "loss/reg": 2.692713737487793, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.00492, |
| "grad_norm": 0.3633546531200409, |
| "grad_norm_var": 0.008689872848313067, |
| "learning_rate": 5e-05, |
| "loss": 0.1755, |
| "loss/crossentropy": 2.756626844406128, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17549088224768639, |
| "loss/reg": 2.6911702156066895, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.00493, |
| "grad_norm": 0.4165309965610504, |
| "grad_norm_var": 0.00860196217059566, |
| "learning_rate": 5e-05, |
| "loss": 0.1882, |
| "loss/crossentropy": 2.69485205411911, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18822569772601128, |
| "loss/reg": 2.6890034675598145, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.00494, |
| "grad_norm": 0.34585461020469666, |
| "grad_norm_var": 0.0033206140596304815, |
| "learning_rate": 5e-05, |
| "loss": 0.1742, |
| "loss/crossentropy": 2.73829984664917, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17416464537382126, |
| "loss/reg": 2.687713623046875, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.00495, |
| "grad_norm": 0.3443280756473541, |
| "grad_norm_var": 0.003339269529387142, |
| "learning_rate": 5e-05, |
| "loss": 0.1613, |
| "loss/crossentropy": 2.745963931083679, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.161319550126791, |
| "loss/reg": 2.685638189315796, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.00496, |
| "grad_norm": 0.3689098656177521, |
| "grad_norm_var": 0.000602855553897171, |
| "learning_rate": 5e-05, |
| "loss": 0.1735, |
| "loss/crossentropy": 2.6744813919067383, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17346932739019394, |
| "loss/reg": 2.6839358806610107, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.00497, |
| "grad_norm": 0.36457374691963196, |
| "grad_norm_var": 0.0006035317496342747, |
| "learning_rate": 5e-05, |
| "loss": 0.1735, |
| "loss/crossentropy": 2.799642562866211, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17352834343910217, |
| "loss/reg": 2.683485507965088, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.00498, |
| "grad_norm": 0.35222312808036804, |
| "grad_norm_var": 0.0005951796117876367, |
| "learning_rate": 5e-05, |
| "loss": 0.1742, |
| "loss/crossentropy": 2.739416480064392, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1742345169186592, |
| "loss/reg": 2.6830482482910156, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.00499, |
| "grad_norm": 0.3427422344684601, |
| "grad_norm_var": 0.0005034448418147264, |
| "learning_rate": 5e-05, |
| "loss": 0.1673, |
| "loss/crossentropy": 2.772252082824707, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16728588938713074, |
| "loss/reg": 2.682189464569092, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.005, |
| "grad_norm": 0.39299577474594116, |
| "grad_norm_var": 0.0005481683329227494, |
| "learning_rate": 5e-05, |
| "loss": 0.1926, |
| "loss/crossentropy": 2.7702752351760864, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1926349699497223, |
| "loss/reg": 2.6808254718780518, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.00501, |
| "grad_norm": 0.3431949019432068, |
| "grad_norm_var": 0.0005312130675710792, |
| "learning_rate": 5e-05, |
| "loss": 0.1634, |
| "loss/crossentropy": 2.7881234288215637, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1633942425251007, |
| "loss/reg": 2.6798629760742188, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.00502, |
| "grad_norm": 0.36641839146614075, |
| "grad_norm_var": 0.0004892704880637311, |
| "learning_rate": 5e-05, |
| "loss": 0.1787, |
| "loss/crossentropy": 2.848407030105591, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17872987315058708, |
| "loss/reg": 2.677311658859253, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.00503, |
| "grad_norm": 0.3278079330921173, |
| "grad_norm_var": 0.000554897538649816, |
| "learning_rate": 5e-05, |
| "loss": 0.1587, |
| "loss/crossentropy": 2.75662362575531, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15868044644594193, |
| "loss/reg": 2.675185203552246, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.00504, |
| "grad_norm": 0.3251039683818817, |
| "grad_norm_var": 0.0006183250665441046, |
| "learning_rate": 5e-05, |
| "loss": 0.1551, |
| "loss/crossentropy": 2.731416165828705, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15506618097424507, |
| "loss/reg": 2.673948287963867, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.00505, |
| "grad_norm": 0.35344070196151733, |
| "grad_norm_var": 0.0006186746986458047, |
| "learning_rate": 5e-05, |
| "loss": 0.167, |
| "loss/crossentropy": 2.740668296813965, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16695522889494896, |
| "loss/reg": 2.6712498664855957, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.00506, |
| "grad_norm": 0.36658284068107605, |
| "grad_norm_var": 0.0005366058949143918, |
| "learning_rate": 5e-05, |
| "loss": 0.1668, |
| "loss/crossentropy": 2.802608013153076, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16682763025164604, |
| "loss/reg": 2.669286012649536, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.00507, |
| "grad_norm": 0.4423954486846924, |
| "grad_norm_var": 0.000963591213409624, |
| "learning_rate": 5e-05, |
| "loss": 0.1963, |
| "loss/crossentropy": 2.811932861804962, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19632378965616226, |
| "loss/reg": 2.666898012161255, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.00508, |
| "grad_norm": 0.3770610988140106, |
| "grad_norm_var": 0.000975015024308116, |
| "learning_rate": 5e-05, |
| "loss": 0.1753, |
| "loss/crossentropy": 2.7279282808303833, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17527905479073524, |
| "loss/reg": 2.664764881134033, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.00509, |
| "grad_norm": 0.35589146614074707, |
| "grad_norm_var": 0.0007832244440521922, |
| "learning_rate": 5e-05, |
| "loss": 0.168, |
| "loss/crossentropy": 2.8977367281913757, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16798892244696617, |
| "loss/reg": 2.6622438430786133, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.0051, |
| "grad_norm": 0.3419097661972046, |
| "grad_norm_var": 0.0007919503322765919, |
| "learning_rate": 5e-05, |
| "loss": 0.1729, |
| "loss/crossentropy": 2.6906025409698486, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.172856405377388, |
| "loss/reg": 2.6595211029052734, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.00511, |
| "grad_norm": 0.3972381353378296, |
| "grad_norm_var": 0.0008538971282195384, |
| "learning_rate": 5e-05, |
| "loss": 0.1825, |
| "loss/crossentropy": 2.762513279914856, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18245646730065346, |
| "loss/reg": 2.6572320461273193, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.00512, |
| "grad_norm": 0.3489353358745575, |
| "grad_norm_var": 0.0008648399289393501, |
| "learning_rate": 5e-05, |
| "loss": 0.179, |
| "loss/crossentropy": 2.7472071647644043, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1789936050772667, |
| "loss/reg": 2.6544342041015625, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.00513, |
| "grad_norm": 0.3673308491706848, |
| "grad_norm_var": 0.0008661114894439326, |
| "learning_rate": 5e-05, |
| "loss": 0.1745, |
| "loss/crossentropy": 2.69700163602829, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1745261810719967, |
| "loss/reg": 2.6520497798919678, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.00514, |
| "grad_norm": 0.33870744705200195, |
| "grad_norm_var": 0.0008961917113334199, |
| "learning_rate": 5e-05, |
| "loss": 0.1649, |
| "loss/crossentropy": 2.762860357761383, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16494135558605194, |
| "loss/reg": 2.6500847339630127, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.00515, |
| "grad_norm": 0.40411266684532166, |
| "grad_norm_var": 0.0009761766654230563, |
| "learning_rate": 5e-05, |
| "loss": 0.1628, |
| "loss/crossentropy": 3.01085501909256, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16284478455781937, |
| "loss/reg": 2.648311138153076, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.00516, |
| "grad_norm": 0.37194308638572693, |
| "grad_norm_var": 0.0009268939874421604, |
| "learning_rate": 5e-05, |
| "loss": 0.182, |
| "loss/crossentropy": 2.721080005168915, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18202906847000122, |
| "loss/reg": 2.6469640731811523, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.00517, |
| "grad_norm": 0.3380615711212158, |
| "grad_norm_var": 0.0009429551352979477, |
| "learning_rate": 5e-05, |
| "loss": 0.1639, |
| "loss/crossentropy": 2.6788495779037476, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16385124996304512, |
| "loss/reg": 2.6445441246032715, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.00518, |
| "grad_norm": 0.37696361541748047, |
| "grad_norm_var": 0.0009533986625055632, |
| "learning_rate": 5e-05, |
| "loss": 0.1587, |
| "loss/crossentropy": 2.6845511198043823, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1586880125105381, |
| "loss/reg": 2.6424736976623535, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.00519, |
| "grad_norm": 0.32983675599098206, |
| "grad_norm_var": 0.0009437052369864992, |
| "learning_rate": 5e-05, |
| "loss": 0.1585, |
| "loss/crossentropy": 2.5984672904014587, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15849433466792107, |
| "loss/reg": 2.639796257019043, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.0052, |
| "grad_norm": 0.3439983129501343, |
| "grad_norm_var": 0.000866215802107521, |
| "learning_rate": 5e-05, |
| "loss": 0.1578, |
| "loss/crossentropy": 2.7057528495788574, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15775253251194954, |
| "loss/reg": 2.636976480484009, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.00521, |
| "grad_norm": 0.4739494323730469, |
| "grad_norm_var": 0.0015736599047053415, |
| "learning_rate": 5e-05, |
| "loss": 0.178, |
| "loss/crossentropy": 2.6239394545555115, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17801255360245705, |
| "loss/reg": 2.6342852115631104, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.00522, |
| "grad_norm": 0.5270029306411743, |
| "grad_norm_var": 0.003035565907296726, |
| "learning_rate": 5e-05, |
| "loss": 0.1801, |
| "loss/crossentropy": 2.890467643737793, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18007512018084526, |
| "loss/reg": 2.631289005279541, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.00523, |
| "grad_norm": 0.42719003558158875, |
| "grad_norm_var": 0.002930528350278516, |
| "learning_rate": 5e-05, |
| "loss": 0.1781, |
| "loss/crossentropy": 2.9749565720558167, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17812742665410042, |
| "loss/reg": 2.6284420490264893, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.00524, |
| "grad_norm": 0.37133005261421204, |
| "grad_norm_var": 0.0029367435634455913, |
| "learning_rate": 5e-05, |
| "loss": 0.1597, |
| "loss/crossentropy": 2.692670702934265, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1597190946340561, |
| "loss/reg": 2.6251702308654785, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.00525, |
| "grad_norm": 0.3646347224712372, |
| "grad_norm_var": 0.0029109098946428253, |
| "learning_rate": 5e-05, |
| "loss": 0.1676, |
| "loss/crossentropy": 2.8696910738945007, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16764900088310242, |
| "loss/reg": 2.621973991394043, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.00526, |
| "grad_norm": 0.3347557485103607, |
| "grad_norm_var": 0.002953013887398237, |
| "learning_rate": 5e-05, |
| "loss": 0.1659, |
| "loss/crossentropy": 2.844240427017212, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16587505862116814, |
| "loss/reg": 2.6180617809295654, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.00527, |
| "grad_norm": 0.3301764726638794, |
| "grad_norm_var": 0.003100070614909223, |
| "learning_rate": 5e-05, |
| "loss": 0.1554, |
| "loss/crossentropy": 2.822225272655487, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15541274286806583, |
| "loss/reg": 2.6156363487243652, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.00528, |
| "grad_norm": 0.3668423593044281, |
| "grad_norm_var": 0.003050578439524883, |
| "learning_rate": 5e-05, |
| "loss": 0.1725, |
| "loss/crossentropy": 2.873881459236145, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17249644920229912, |
| "loss/reg": 2.6129000186920166, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.00529, |
| "grad_norm": 0.33062636852264404, |
| "grad_norm_var": 0.0031927551041592986, |
| "learning_rate": 5e-05, |
| "loss": 0.169, |
| "loss/crossentropy": 2.7202290296554565, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16901781037449837, |
| "loss/reg": 2.6098320484161377, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.0053, |
| "grad_norm": 0.33170488476753235, |
| "grad_norm_var": 0.003231463613689256, |
| "learning_rate": 5e-05, |
| "loss": 0.1708, |
| "loss/crossentropy": 2.7543463706970215, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17077547311782837, |
| "loss/reg": 2.606674909591675, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.00531, |
| "grad_norm": 0.3436318337917328, |
| "grad_norm_var": 0.0032369737172315838, |
| "learning_rate": 5e-05, |
| "loss": 0.1822, |
| "loss/crossentropy": 2.6231788992881775, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1821577101945877, |
| "loss/reg": 2.603997230529785, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.00532, |
| "grad_norm": 0.33105242252349854, |
| "grad_norm_var": 0.0033454153420392264, |
| "learning_rate": 5e-05, |
| "loss": 0.1661, |
| "loss/crossentropy": 2.819184124469757, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16607840731739998, |
| "loss/reg": 2.6012203693389893, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.00533, |
| "grad_norm": 0.3485148847103119, |
| "grad_norm_var": 0.0033075767398377588, |
| "learning_rate": 5e-05, |
| "loss": 0.1676, |
| "loss/crossentropy": 2.8594303727149963, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1676221825182438, |
| "loss/reg": 2.5986573696136475, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.00534, |
| "grad_norm": 0.3541623651981354, |
| "grad_norm_var": 0.003321219936842216, |
| "learning_rate": 5e-05, |
| "loss": 0.1742, |
| "loss/crossentropy": 2.7382256984710693, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17420669272542, |
| "loss/reg": 2.5956368446350098, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.00535, |
| "grad_norm": 0.362183541059494, |
| "grad_norm_var": 0.003216249066768325, |
| "learning_rate": 5e-05, |
| "loss": 0.1732, |
| "loss/crossentropy": 2.820302128791809, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1731831431388855, |
| "loss/reg": 2.591860294342041, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.00536, |
| "grad_norm": 0.340348482131958, |
| "grad_norm_var": 0.0032303969391706505, |
| "learning_rate": 5e-05, |
| "loss": 0.1677, |
| "loss/crossentropy": 2.978896915912628, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16773569583892822, |
| "loss/reg": 2.5879762172698975, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.00537, |
| "grad_norm": 0.359326034784317, |
| "grad_norm_var": 0.002480178301492671, |
| "learning_rate": 5e-05, |
| "loss": 0.1767, |
| "loss/crossentropy": 2.7645240426063538, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17668773606419563, |
| "loss/reg": 2.5847809314727783, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.00538, |
| "grad_norm": 0.3420480489730835, |
| "grad_norm_var": 0.0005976425682412671, |
| "learning_rate": 5e-05, |
| "loss": 0.1758, |
| "loss/crossentropy": 2.7278724908828735, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17576001212000847, |
| "loss/reg": 2.581143379211426, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.00539, |
| "grad_norm": 0.33362701535224915, |
| "grad_norm_var": 0.00021185911019383125, |
| "learning_rate": 5e-05, |
| "loss": 0.1708, |
| "loss/crossentropy": 2.6828721165657043, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1707863062620163, |
| "loss/reg": 2.5769548416137695, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.0054, |
| "grad_norm": 0.6795082092285156, |
| "grad_norm_var": 0.007165518560383773, |
| "learning_rate": 5e-05, |
| "loss": 0.2001, |
| "loss/crossentropy": 2.7977577447891235, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20013980567455292, |
| "loss/reg": 2.5742313861846924, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.00541, |
| "grad_norm": 0.35366278886795044, |
| "grad_norm_var": 0.007174778628811747, |
| "learning_rate": 5e-05, |
| "loss": 0.1703, |
| "loss/crossentropy": 2.859143853187561, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17025134339928627, |
| "loss/reg": 2.57037353515625, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.00542, |
| "grad_norm": 0.3655552566051483, |
| "grad_norm_var": 0.007109308326582827, |
| "learning_rate": 5e-05, |
| "loss": 0.1775, |
| "loss/crossentropy": 2.8502614498138428, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1775321438908577, |
| "loss/reg": 2.566716432571411, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.00543, |
| "grad_norm": 0.3574732542037964, |
| "grad_norm_var": 0.007021635262450318, |
| "learning_rate": 5e-05, |
| "loss": 0.1735, |
| "loss/crossentropy": 2.892129361629486, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17345865443348885, |
| "loss/reg": 2.563842296600342, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.00544, |
| "grad_norm": 0.36598220467567444, |
| "grad_norm_var": 0.007021902205424502, |
| "learning_rate": 5e-05, |
| "loss": 0.1721, |
| "loss/crossentropy": 2.7138225436210632, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17211398482322693, |
| "loss/reg": 2.5608513355255127, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.00545, |
| "grad_norm": 0.35922348499298096, |
| "grad_norm_var": 0.0069277921155704155, |
| "learning_rate": 5e-05, |
| "loss": 0.1695, |
| "loss/crossentropy": 2.8138818740844727, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16949571669101715, |
| "loss/reg": 2.557931423187256, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.00546, |
| "grad_norm": 0.3538724184036255, |
| "grad_norm_var": 0.006843838113958241, |
| "learning_rate": 5e-05, |
| "loss": 0.1722, |
| "loss/crossentropy": 2.7698569893836975, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17218982055783272, |
| "loss/reg": 2.5551669597625732, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.00547, |
| "grad_norm": 0.38070008158683777, |
| "grad_norm_var": 0.006790073386325786, |
| "learning_rate": 5e-05, |
| "loss": 0.1843, |
| "loss/crossentropy": 2.631078600883484, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1842627413570881, |
| "loss/reg": 2.5517876148223877, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.00548, |
| "grad_norm": 0.35319533944129944, |
| "grad_norm_var": 0.006693321782661003, |
| "learning_rate": 5e-05, |
| "loss": 0.16, |
| "loss/crossentropy": 2.850399076938629, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15995023399591446, |
| "loss/reg": 2.548754930496216, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.00549, |
| "grad_norm": 0.4596186578273773, |
| "grad_norm_var": 0.0070637908733671, |
| "learning_rate": 5e-05, |
| "loss": 0.164, |
| "loss/crossentropy": 2.868459641933441, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16395244374871254, |
| "loss/reg": 2.5462560653686523, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.0055, |
| "grad_norm": 0.3474785387516022, |
| "grad_norm_var": 0.007091863949161529, |
| "learning_rate": 5e-05, |
| "loss": 0.1641, |
| "loss/crossentropy": 2.6114882230758667, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1641043722629547, |
| "loss/reg": 2.5431270599365234, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.00551, |
| "grad_norm": 0.3570033013820648, |
| "grad_norm_var": 0.007107306178779664, |
| "learning_rate": 5e-05, |
| "loss": 0.1579, |
| "loss/crossentropy": 2.8220438957214355, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15791887789964676, |
| "loss/reg": 2.539910078048706, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.00552, |
| "grad_norm": 0.32915255427360535, |
| "grad_norm_var": 0.0071770024029156184, |
| "learning_rate": 5e-05, |
| "loss": 0.1612, |
| "loss/crossentropy": 2.878856658935547, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16122740507125854, |
| "loss/reg": 2.5368034839630127, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.00553, |
| "grad_norm": 0.3565903604030609, |
| "grad_norm_var": 0.007185408405122592, |
| "learning_rate": 5e-05, |
| "loss": 0.1689, |
| "loss/crossentropy": 2.705552637577057, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16892266646027565, |
| "loss/reg": 2.5333409309387207, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.00554, |
| "grad_norm": 0.31767770648002625, |
| "grad_norm_var": 0.0073488319211029345, |
| "learning_rate": 5e-05, |
| "loss": 0.1579, |
| "loss/crossentropy": 2.685009717941284, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15788856148719788, |
| "loss/reg": 2.5296521186828613, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.00555, |
| "grad_norm": 0.35047340393066406, |
| "grad_norm_var": 0.0072637659398345844, |
| "learning_rate": 5e-05, |
| "loss": 0.1746, |
| "loss/crossentropy": 2.5745012760162354, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17458590865135193, |
| "loss/reg": 2.5270395278930664, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.00556, |
| "grad_norm": 0.3832140266895294, |
| "grad_norm_var": 0.0009360149891549837, |
| "learning_rate": 5e-05, |
| "loss": 0.1676, |
| "loss/crossentropy": 2.8551809787750244, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16756092011928558, |
| "loss/reg": 2.523982048034668, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.00557, |
| "grad_norm": 0.4020329713821411, |
| "grad_norm_var": 0.0010289291164416311, |
| "learning_rate": 5e-05, |
| "loss": 0.1755, |
| "loss/crossentropy": 2.787672698497772, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17547398060560226, |
| "loss/reg": 2.520615816116333, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.00558, |
| "grad_norm": 0.38412049412727356, |
| "grad_norm_var": 0.0010519623608851428, |
| "learning_rate": 5e-05, |
| "loss": 0.1815, |
| "loss/crossentropy": 2.846573293209076, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1815263032913208, |
| "loss/reg": 2.518004894256592, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.00559, |
| "grad_norm": 0.3456071615219116, |
| "grad_norm_var": 0.0010744320361522322, |
| "learning_rate": 5e-05, |
| "loss": 0.1746, |
| "loss/crossentropy": 2.922893524169922, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17459525167942047, |
| "loss/reg": 2.5153868198394775, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.0056, |
| "grad_norm": 0.36563733220100403, |
| "grad_norm_var": 0.0010744113839659304, |
| "learning_rate": 5e-05, |
| "loss": 0.169, |
| "loss/crossentropy": 2.7154372334480286, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16903281211853027, |
| "loss/reg": 2.5126099586486816, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.00561, |
| "grad_norm": 0.3387238085269928, |
| "grad_norm_var": 0.00111742135319511, |
| "learning_rate": 5e-05, |
| "loss": 0.1666, |
| "loss/crossentropy": 2.5947054624557495, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16662058234214783, |
| "loss/reg": 2.509439706802368, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.00562, |
| "grad_norm": 0.45574790239334106, |
| "grad_norm_var": 0.0016275854789366514, |
| "learning_rate": 5e-05, |
| "loss": 0.1814, |
| "loss/crossentropy": 2.961915969848633, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1813669353723526, |
| "loss/reg": 2.5074241161346436, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.00563, |
| "grad_norm": 0.39113175868988037, |
| "grad_norm_var": 0.0016486631382784092, |
| "learning_rate": 5e-05, |
| "loss": 0.1643, |
| "loss/crossentropy": 2.7337673902511597, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.164301548153162, |
| "loss/reg": 2.5046684741973877, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.00564, |
| "grad_norm": 0.36300358176231384, |
| "grad_norm_var": 0.0016312765518430934, |
| "learning_rate": 5e-05, |
| "loss": 0.1602, |
| "loss/crossentropy": 2.713749051094055, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16021040827035904, |
| "loss/reg": 2.5020864009857178, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.00565, |
| "grad_norm": 0.3250221312046051, |
| "grad_norm_var": 0.001185749693630452, |
| "learning_rate": 5e-05, |
| "loss": 0.1661, |
| "loss/crossentropy": 2.739534556865692, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.166114691644907, |
| "loss/reg": 2.500089645385742, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.00566, |
| "grad_norm": 0.3059675395488739, |
| "grad_norm_var": 0.0013809527139825861, |
| "learning_rate": 5e-05, |
| "loss": 0.1528, |
| "loss/crossentropy": 2.7676697373390198, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15275665000081062, |
| "loss/reg": 2.497802257537842, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.00567, |
| "grad_norm": 0.41637444496154785, |
| "grad_norm_var": 0.0015720438674995396, |
| "learning_rate": 5e-05, |
| "loss": 0.1899, |
| "loss/crossentropy": 2.7852693796157837, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18987080082297325, |
| "loss/reg": 2.4960269927978516, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.00568, |
| "grad_norm": 0.48216167092323303, |
| "grad_norm_var": 0.002316091582714641, |
| "learning_rate": 5e-05, |
| "loss": 0.179, |
| "loss/crossentropy": 2.919625759124756, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17901213094592094, |
| "loss/reg": 2.4943020343780518, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.00569, |
| "grad_norm": 0.34773337841033936, |
| "grad_norm_var": 0.0023415161321106623, |
| "learning_rate": 5e-05, |
| "loss": 0.1689, |
| "loss/crossentropy": 2.829575002193451, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1688704527914524, |
| "loss/reg": 2.4922549724578857, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.0057, |
| "grad_norm": 0.42466020584106445, |
| "grad_norm_var": 0.0022617987789910494, |
| "learning_rate": 5e-05, |
| "loss": 0.2065, |
| "loss/crossentropy": 2.847673773765564, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20647098124027252, |
| "loss/reg": 2.4896240234375, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.00571, |
| "grad_norm": 0.39025840163230896, |
| "grad_norm_var": 0.0022035635328787567, |
| "learning_rate": 5e-05, |
| "loss": 0.181, |
| "loss/crossentropy": 2.9154597520828247, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1810290329158306, |
| "loss/reg": 2.487513542175293, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.00572, |
| "grad_norm": 0.3611275851726532, |
| "grad_norm_var": 0.002232206094215346, |
| "learning_rate": 5e-05, |
| "loss": 0.1687, |
| "loss/crossentropy": 2.813008964061737, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16871189698576927, |
| "loss/reg": 2.4849367141723633, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.00573, |
| "grad_norm": 0.37163245677948, |
| "grad_norm_var": 0.002205551603401897, |
| "learning_rate": 5e-05, |
| "loss": 0.1736, |
| "loss/crossentropy": 2.829798102378845, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.173641849309206, |
| "loss/reg": 2.481811046600342, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.00574, |
| "grad_norm": 0.37662971019744873, |
| "grad_norm_var": 0.002204250880404842, |
| "learning_rate": 5e-05, |
| "loss": 0.1641, |
| "loss/crossentropy": 2.786403477191925, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16413037478923798, |
| "loss/reg": 2.479344606399536, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.00575, |
| "grad_norm": 0.4090428948402405, |
| "grad_norm_var": 0.002174681113915019, |
| "learning_rate": 5e-05, |
| "loss": 0.1684, |
| "loss/crossentropy": 2.685749888420105, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16840650886297226, |
| "loss/reg": 2.476419448852539, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.00576, |
| "grad_norm": 0.35688483715057373, |
| "grad_norm_var": 0.0021995018187083346, |
| "learning_rate": 5e-05, |
| "loss": 0.1611, |
| "loss/crossentropy": 2.809792697429657, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1611488163471222, |
| "loss/reg": 2.4737355709075928, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.00577, |
| "grad_norm": 0.38194504380226135, |
| "grad_norm_var": 0.002065385566742454, |
| "learning_rate": 5e-05, |
| "loss": 0.1615, |
| "loss/crossentropy": 2.850769340991974, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1615295149385929, |
| "loss/reg": 2.4713802337646484, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.00578, |
| "grad_norm": 0.3567502200603485, |
| "grad_norm_var": 0.001743510873329986, |
| "learning_rate": 5e-05, |
| "loss": 0.1689, |
| "loss/crossentropy": 2.7103776335716248, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16894375160336494, |
| "loss/reg": 2.4685709476470947, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.00579, |
| "grad_norm": 0.3396901786327362, |
| "grad_norm_var": 0.001824115359163836, |
| "learning_rate": 5e-05, |
| "loss": 0.166, |
| "loss/crossentropy": 2.7079854607582092, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1659584417939186, |
| "loss/reg": 2.465658664703369, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.0058, |
| "grad_norm": 0.358395516872406, |
| "grad_norm_var": 0.0018331543648902808, |
| "learning_rate": 5e-05, |
| "loss": 0.1813, |
| "loss/crossentropy": 2.8853692412376404, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18133477121591568, |
| "loss/reg": 2.4633235931396484, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.00581, |
| "grad_norm": 0.3434228301048279, |
| "grad_norm_var": 0.0017310432323107805, |
| "learning_rate": 5e-05, |
| "loss": 0.1739, |
| "loss/crossentropy": 2.666011691093445, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1739240102469921, |
| "loss/reg": 2.460447072982788, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.00582, |
| "grad_norm": 0.3482820689678192, |
| "grad_norm_var": 0.0014454775261250163, |
| "learning_rate": 5e-05, |
| "loss": 0.1846, |
| "loss/crossentropy": 2.6244596242904663, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1845875158905983, |
| "loss/reg": 2.457307815551758, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.00583, |
| "grad_norm": 0.36450985074043274, |
| "grad_norm_var": 0.0013555723186838029, |
| "learning_rate": 5e-05, |
| "loss": 0.1741, |
| "loss/crossentropy": 2.6909091472625732, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.174148079007864, |
| "loss/reg": 2.4545810222625732, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.00584, |
| "grad_norm": 0.34841907024383545, |
| "grad_norm_var": 0.0005772011049318792, |
| "learning_rate": 5e-05, |
| "loss": 0.1621, |
| "loss/crossentropy": 2.7992460131645203, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.162076648324728, |
| "loss/reg": 2.4516048431396484, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.00585, |
| "grad_norm": 0.36560627818107605, |
| "grad_norm_var": 0.0005501529366056079, |
| "learning_rate": 5e-05, |
| "loss": 0.1612, |
| "loss/crossentropy": 2.7556354999542236, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16119826585054398, |
| "loss/reg": 2.4482715129852295, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.00586, |
| "grad_norm": 0.37393423914909363, |
| "grad_norm_var": 0.00033166715444868193, |
| "learning_rate": 5e-05, |
| "loss": 0.1779, |
| "loss/crossentropy": 2.6222774982452393, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1779084950685501, |
| "loss/reg": 2.4452362060546875, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.00587, |
| "grad_norm": 0.3511587679386139, |
| "grad_norm_var": 0.0002976648126369145, |
| "learning_rate": 5e-05, |
| "loss": 0.1761, |
| "loss/crossentropy": 2.7342361211776733, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17614838480949402, |
| "loss/reg": 2.441678524017334, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.00588, |
| "grad_norm": 0.33847615122795105, |
| "grad_norm_var": 0.0003352805276915209, |
| "learning_rate": 5e-05, |
| "loss": 0.173, |
| "loss/crossentropy": 2.7935328483581543, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17295999452471733, |
| "loss/reg": 2.437959671020508, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.00589, |
| "grad_norm": 0.351034015417099, |
| "grad_norm_var": 0.00033410454836428903, |
| "learning_rate": 5e-05, |
| "loss": 0.1775, |
| "loss/crossentropy": 2.7590489387512207, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1775294505059719, |
| "loss/reg": 2.4346938133239746, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.0059, |
| "grad_norm": 0.37800535559654236, |
| "grad_norm_var": 0.0003372250971240794, |
| "learning_rate": 5e-05, |
| "loss": 0.1645, |
| "loss/crossentropy": 2.75826096534729, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16445999220013618, |
| "loss/reg": 2.4319543838500977, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.00591, |
| "grad_norm": 0.3323316276073456, |
| "grad_norm_var": 0.0002069473145354402, |
| "learning_rate": 5e-05, |
| "loss": 0.1644, |
| "loss/crossentropy": 2.963920295238495, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.164412472397089, |
| "loss/reg": 2.4295387268066406, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.00592, |
| "grad_norm": 0.8281128406524658, |
| "grad_norm_var": 0.014169124282143371, |
| "learning_rate": 5e-05, |
| "loss": 0.2256, |
| "loss/crossentropy": 2.9319988489151, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22560855001211166, |
| "loss/reg": 2.427125930786133, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.00593, |
| "grad_norm": 0.37988972663879395, |
| "grad_norm_var": 0.014170226758262046, |
| "learning_rate": 5e-05, |
| "loss": 0.1795, |
| "loss/crossentropy": 2.92197585105896, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1794501654803753, |
| "loss/reg": 2.4247610569000244, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.00594, |
| "grad_norm": 0.37449878454208374, |
| "grad_norm_var": 0.014123355612102569, |
| "learning_rate": 5e-05, |
| "loss": 0.1756, |
| "loss/crossentropy": 2.734030842781067, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17557094618678093, |
| "loss/reg": 2.4224398136138916, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.00595, |
| "grad_norm": 0.3890518248081207, |
| "grad_norm_var": 0.013970946553029018, |
| "learning_rate": 5e-05, |
| "loss": 0.1721, |
| "loss/crossentropy": 2.6910988688468933, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17206770926713943, |
| "loss/reg": 2.4206151962280273, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.00596, |
| "grad_norm": 0.45764538645744324, |
| "grad_norm_var": 0.014180672563351104, |
| "learning_rate": 5e-05, |
| "loss": 0.1886, |
| "loss/crossentropy": 2.706140458583832, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1886041909456253, |
| "loss/reg": 2.418341636657715, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.00597, |
| "grad_norm": 0.3294787108898163, |
| "grad_norm_var": 0.014289226884282809, |
| "learning_rate": 5e-05, |
| "loss": 0.1693, |
| "loss/crossentropy": 2.772903263568878, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16925981268286705, |
| "loss/reg": 2.415613889694214, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.00598, |
| "grad_norm": 0.3425086438655853, |
| "grad_norm_var": 0.014326812953815705, |
| "learning_rate": 5e-05, |
| "loss": 0.17, |
| "loss/crossentropy": 2.7024609446525574, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16995511576533318, |
| "loss/reg": 2.4122676849365234, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.00599, |
| "grad_norm": 0.37222734093666077, |
| "grad_norm_var": 0.014300147579082, |
| "learning_rate": 5e-05, |
| "loss": 0.1789, |
| "loss/crossentropy": 2.8698896765708923, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17886632308363914, |
| "loss/reg": 2.4099037647247314, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.006, |
| "grad_norm": 0.39135316014289856, |
| "grad_norm_var": 0.014151428197242365, |
| "learning_rate": 5e-05, |
| "loss": 0.1747, |
| "loss/crossentropy": 2.700629711151123, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17468373104929924, |
| "loss/reg": 2.40794038772583, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.00601, |
| "grad_norm": 0.3728218376636505, |
| "grad_norm_var": 0.014124279912823712, |
| "learning_rate": 5e-05, |
| "loss": 0.169, |
| "loss/crossentropy": 2.794102430343628, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16897983103990555, |
| "loss/reg": 2.405545711517334, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.00602, |
| "grad_norm": 0.37317147850990295, |
| "grad_norm_var": 0.014126729018321404, |
| "learning_rate": 5e-05, |
| "loss": 0.1747, |
| "loss/crossentropy": 2.6252577900886536, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17474820092320442, |
| "loss/reg": 2.402970790863037, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.00603, |
| "grad_norm": 0.35492607951164246, |
| "grad_norm_var": 0.01410428304541661, |
| "learning_rate": 5e-05, |
| "loss": 0.1809, |
| "loss/crossentropy": 2.6527358889579773, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18093448877334595, |
| "loss/reg": 2.4007880687713623, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.00604, |
| "grad_norm": 0.408010870218277, |
| "grad_norm_var": 0.013856041692491945, |
| "learning_rate": 5e-05, |
| "loss": 0.2018, |
| "loss/crossentropy": 2.874286651611328, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20177744701504707, |
| "loss/reg": 2.3988449573516846, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.00605, |
| "grad_norm": 0.3291812837123871, |
| "grad_norm_var": 0.014034946168994126, |
| "learning_rate": 5e-05, |
| "loss": 0.1614, |
| "loss/crossentropy": 2.7926167249679565, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16140995919704437, |
| "loss/reg": 2.3966500759124756, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.00606, |
| "grad_norm": 0.34659212827682495, |
| "grad_norm_var": 0.014192203001449558, |
| "learning_rate": 5e-05, |
| "loss": 0.1709, |
| "loss/crossentropy": 2.8195464611053467, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1709096021950245, |
| "loss/reg": 2.394033670425415, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.00607, |
| "grad_norm": 0.32253992557525635, |
| "grad_norm_var": 0.014285055545239086, |
| "learning_rate": 5e-05, |
| "loss": 0.1649, |
| "loss/crossentropy": 2.7236337065696716, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16493552178144455, |
| "loss/reg": 2.3918449878692627, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.00608, |
| "grad_norm": 0.350931316614151, |
| "grad_norm_var": 0.0011668026056699994, |
| "learning_rate": 5e-05, |
| "loss": 0.1718, |
| "loss/crossentropy": 2.800759196281433, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17175282910466194, |
| "loss/reg": 2.38920521736145, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.00609, |
| "grad_norm": 0.40333986282348633, |
| "grad_norm_var": 0.001237012928824995, |
| "learning_rate": 5e-05, |
| "loss": 0.2046, |
| "loss/crossentropy": 2.7574119567871094, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2046247273683548, |
| "loss/reg": 2.3865227699279785, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.0061, |
| "grad_norm": 0.3773089349269867, |
| "grad_norm_var": 0.0012392324335123346, |
| "learning_rate": 5e-05, |
| "loss": 0.1641, |
| "loss/crossentropy": 2.6313101649284363, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16410250216722488, |
| "loss/reg": 2.3836612701416016, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.00611, |
| "grad_norm": 0.438357949256897, |
| "grad_norm_var": 0.0015159779907225465, |
| "learning_rate": 5e-05, |
| "loss": 0.2098, |
| "loss/crossentropy": 2.780138611793518, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20979087427258492, |
| "loss/reg": 2.380405902862549, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.00612, |
| "grad_norm": 0.34121251106262207, |
| "grad_norm_var": 0.0010515226822608785, |
| "learning_rate": 5e-05, |
| "loss": 0.161, |
| "loss/crossentropy": 2.669090151786804, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1609921157360077, |
| "loss/reg": 2.3776426315307617, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.00613, |
| "grad_norm": 0.36169829964637756, |
| "grad_norm_var": 0.0009600577824135296, |
| "learning_rate": 5e-05, |
| "loss": 0.1734, |
| "loss/crossentropy": 2.7925440073013306, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17342102900147438, |
| "loss/reg": 2.3744184970855713, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.00614, |
| "grad_norm": 0.522160530090332, |
| "grad_norm_var": 0.002369345725690275, |
| "learning_rate": 5e-05, |
| "loss": 0.1663, |
| "loss/crossentropy": 2.698939800262451, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16627153754234314, |
| "loss/reg": 2.370917320251465, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.00615, |
| "grad_norm": 0.4562234580516815, |
| "grad_norm_var": 0.002733171284208069, |
| "learning_rate": 5e-05, |
| "loss": 0.1686, |
| "loss/crossentropy": 2.8971627950668335, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16856613755226135, |
| "loss/reg": 2.368067979812622, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.00616, |
| "grad_norm": 0.5767056345939636, |
| "grad_norm_var": 0.0050531115809510415, |
| "learning_rate": 5e-05, |
| "loss": 0.171, |
| "loss/crossentropy": 2.822002112865448, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17102698609232903, |
| "loss/reg": 2.3653640747070312, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.00617, |
| "grad_norm": 0.3703908324241638, |
| "grad_norm_var": 0.005060977204500819, |
| "learning_rate": 5e-05, |
| "loss": 0.1799, |
| "loss/crossentropy": 2.761395037174225, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17990661412477493, |
| "loss/reg": 2.362797975540161, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.00618, |
| "grad_norm": 0.44375622272491455, |
| "grad_norm_var": 0.005159430065949637, |
| "learning_rate": 5e-05, |
| "loss": 0.1662, |
| "loss/crossentropy": 2.8019450306892395, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1661831997334957, |
| "loss/reg": 2.3597350120544434, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.00619, |
| "grad_norm": 0.41226035356521606, |
| "grad_norm_var": 0.005018716701479123, |
| "learning_rate": 5e-05, |
| "loss": 0.1737, |
| "loss/crossentropy": 2.837542712688446, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1737065464258194, |
| "loss/reg": 2.356935977935791, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.0062, |
| "grad_norm": 0.36850520968437195, |
| "grad_norm_var": 0.005094037089036248, |
| "learning_rate": 5e-05, |
| "loss": 0.1691, |
| "loss/crossentropy": 2.872538685798645, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16909406706690788, |
| "loss/reg": 2.354841709136963, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.00621, |
| "grad_norm": 0.3547448217868805, |
| "grad_norm_var": 0.004888988248098869, |
| "learning_rate": 5e-05, |
| "loss": 0.1777, |
| "loss/crossentropy": 2.727312922477722, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17773358151316643, |
| "loss/reg": 2.3518083095550537, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.00622, |
| "grad_norm": 0.3340252637863159, |
| "grad_norm_var": 0.0049932414292845895, |
| "learning_rate": 5e-05, |
| "loss": 0.1673, |
| "loss/crossentropy": 2.7399535179138184, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16725115478038788, |
| "loss/reg": 2.349299907684326, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.00623, |
| "grad_norm": 0.328477680683136, |
| "grad_norm_var": 0.004932429457390519, |
| "learning_rate": 5e-05, |
| "loss": 0.1658, |
| "loss/crossentropy": 2.7973376512527466, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1658070906996727, |
| "loss/reg": 2.346407175064087, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.00624, |
| "grad_norm": 0.3988572061061859, |
| "grad_norm_var": 0.004746415643168555, |
| "learning_rate": 5e-05, |
| "loss": 0.1808, |
| "loss/crossentropy": 2.886197090148926, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18076446652412415, |
| "loss/reg": 2.343637228012085, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.00625, |
| "grad_norm": 0.3653312921524048, |
| "grad_norm_var": 0.0048476613679717525, |
| "learning_rate": 5e-05, |
| "loss": 0.1752, |
| "loss/crossentropy": 2.6095593571662903, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17522242665290833, |
| "loss/reg": 2.34155011177063, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.00626, |
| "grad_norm": 0.3519672751426697, |
| "grad_norm_var": 0.004975031863489395, |
| "learning_rate": 5e-05, |
| "loss": 0.1685, |
| "loss/crossentropy": 2.724495232105255, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16851425543427467, |
| "loss/reg": 2.339081048965454, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.00627, |
| "grad_norm": 0.3507337272167206, |
| "grad_norm_var": 0.005024779798457324, |
| "learning_rate": 5e-05, |
| "loss": 0.1661, |
| "loss/crossentropy": 2.775688886642456, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1661130003631115, |
| "loss/reg": 2.3362221717834473, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.00628, |
| "grad_norm": 0.35331088304519653, |
| "grad_norm_var": 0.004945443478871292, |
| "learning_rate": 5e-05, |
| "loss": 0.1803, |
| "loss/crossentropy": 2.6876689195632935, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18032584339380264, |
| "loss/reg": 2.3339033126831055, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.00629, |
| "grad_norm": 0.3569658696651459, |
| "grad_norm_var": 0.004969005818722216, |
| "learning_rate": 5e-05, |
| "loss": 0.1646, |
| "loss/crossentropy": 2.87895804643631, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16462786123156548, |
| "loss/reg": 2.3317511081695557, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.0063, |
| "grad_norm": 0.37102508544921875, |
| "grad_norm_var": 0.0038649155689368443, |
| "learning_rate": 5e-05, |
| "loss": 0.1807, |
| "loss/crossentropy": 2.8995742201805115, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1807471290230751, |
| "loss/reg": 2.3286077976226807, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.00631, |
| "grad_norm": 0.37091144919395447, |
| "grad_norm_var": 0.0035332975201383715, |
| "learning_rate": 5e-05, |
| "loss": 0.1679, |
| "loss/crossentropy": 2.755174398422241, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16785116121172905, |
| "loss/reg": 2.3263347148895264, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.00632, |
| "grad_norm": 0.3764369487762451, |
| "grad_norm_var": 0.000834165955391919, |
| "learning_rate": 5e-05, |
| "loss": 0.1597, |
| "loss/crossentropy": 2.7826634645462036, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1596829891204834, |
| "loss/reg": 2.3239433765411377, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.00633, |
| "grad_norm": 0.34151408076286316, |
| "grad_norm_var": 0.0008818179956038841, |
| "learning_rate": 5e-05, |
| "loss": 0.1628, |
| "loss/crossentropy": 2.805456221103668, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16277796775102615, |
| "loss/reg": 2.3215837478637695, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.00634, |
| "grad_norm": 0.7558053731918335, |
| "grad_norm_var": 0.010143553337954326, |
| "learning_rate": 5e-05, |
| "loss": 0.1858, |
| "loss/crossentropy": 2.7678999304771423, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18575545772910118, |
| "loss/reg": 2.3192989826202393, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.00635, |
| "grad_norm": 0.3809748589992523, |
| "grad_norm_var": 0.010099062255010161, |
| "learning_rate": 5e-05, |
| "loss": 0.1792, |
| "loss/crossentropy": 2.86500483751297, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1791505441069603, |
| "loss/reg": 2.317030906677246, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.00636, |
| "grad_norm": 0.40578746795654297, |
| "grad_norm_var": 0.010104068412990375, |
| "learning_rate": 5e-05, |
| "loss": 0.1882, |
| "loss/crossentropy": 2.8707818388938904, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.188164584338665, |
| "loss/reg": 2.3146989345550537, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.00637, |
| "grad_norm": 0.415227472782135, |
| "grad_norm_var": 0.010070131470069877, |
| "learning_rate": 5e-05, |
| "loss": 0.1748, |
| "loss/crossentropy": 2.831197440624237, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17476488277316093, |
| "loss/reg": 2.311936616897583, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.00638, |
| "grad_norm": 0.4119730293750763, |
| "grad_norm_var": 0.00985685373482662, |
| "learning_rate": 5e-05, |
| "loss": 0.1699, |
| "loss/crossentropy": 2.6559138894081116, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16989587992429733, |
| "loss/reg": 2.3090522289276123, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.00639, |
| "grad_norm": 0.3662709593772888, |
| "grad_norm_var": 0.009606093056996168, |
| "learning_rate": 5e-05, |
| "loss": 0.1775, |
| "loss/crossentropy": 2.767539858818054, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17754964902997017, |
| "loss/reg": 2.3056743144989014, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.0064, |
| "grad_norm": 0.38491374254226685, |
| "grad_norm_var": 0.009617242443139995, |
| "learning_rate": 5e-05, |
| "loss": 0.1827, |
| "loss/crossentropy": 2.6669586896896362, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18266603723168373, |
| "loss/reg": 2.303258180618286, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.00641, |
| "grad_norm": 0.4197373390197754, |
| "grad_norm_var": 0.009569272862985524, |
| "learning_rate": 5e-05, |
| "loss": 0.1778, |
| "loss/crossentropy": 2.7964502573013306, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17782465368509293, |
| "loss/reg": 2.300361156463623, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.00642, |
| "grad_norm": 0.4097757339477539, |
| "grad_norm_var": 0.00940137989136159, |
| "learning_rate": 5e-05, |
| "loss": 0.1856, |
| "loss/crossentropy": 2.735614001750946, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1855894774198532, |
| "loss/reg": 2.2972419261932373, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.00643, |
| "grad_norm": 0.35904356837272644, |
| "grad_norm_var": 0.00934616788177974, |
| "learning_rate": 5e-05, |
| "loss": 0.1833, |
| "loss/crossentropy": 2.774403393268585, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1832551322877407, |
| "loss/reg": 2.293954610824585, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.00644, |
| "grad_norm": 0.34157049655914307, |
| "grad_norm_var": 0.009435664127140328, |
| "learning_rate": 5e-05, |
| "loss": 0.1618, |
| "loss/crossentropy": 2.8616234064102173, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16181085631251335, |
| "loss/reg": 2.2906179428100586, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.00645, |
| "grad_norm": 0.4255986213684082, |
| "grad_norm_var": 0.009297406924193945, |
| "learning_rate": 5e-05, |
| "loss": 0.1853, |
| "loss/crossentropy": 2.654071033000946, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18530349805951118, |
| "loss/reg": 2.287349224090576, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.00646, |
| "grad_norm": 0.3393001854419708, |
| "grad_norm_var": 0.009518979339113423, |
| "learning_rate": 5e-05, |
| "loss": 0.1665, |
| "loss/crossentropy": 2.73319810628891, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16647282242774963, |
| "loss/reg": 2.2834725379943848, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.00647, |
| "grad_norm": 0.34969252347946167, |
| "grad_norm_var": 0.00964795505733251, |
| "learning_rate": 5e-05, |
| "loss": 0.1833, |
| "loss/crossentropy": 2.7993005514144897, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1833389550447464, |
| "loss/reg": 2.2799694538116455, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.00648, |
| "grad_norm": 0.35388484597206116, |
| "grad_norm_var": 0.009766310746661707, |
| "learning_rate": 5e-05, |
| "loss": 0.1749, |
| "loss/crossentropy": 2.766145169734955, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17488964274525642, |
| "loss/reg": 2.277585029602051, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.00649, |
| "grad_norm": 0.5462765097618103, |
| "grad_norm_var": 0.010685818975949597, |
| "learning_rate": 5e-05, |
| "loss": 0.1882, |
| "loss/crossentropy": 2.7475533485412598, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18824508786201477, |
| "loss/reg": 2.2750473022460938, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.0065, |
| "grad_norm": 0.3537692725658417, |
| "grad_norm_var": 0.002605622083243005, |
| "learning_rate": 5e-05, |
| "loss": 0.1761, |
| "loss/crossentropy": 2.756273865699768, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17612234875559807, |
| "loss/reg": 2.2722957134246826, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.00651, |
| "grad_norm": 0.3770252466201782, |
| "grad_norm_var": 0.0026121330513858157, |
| "learning_rate": 5e-05, |
| "loss": 0.1897, |
| "loss/crossentropy": 2.7889973521232605, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1897362545132637, |
| "loss/reg": 2.2701008319854736, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.00652, |
| "grad_norm": 0.4475138187408447, |
| "grad_norm_var": 0.0028018836674080227, |
| "learning_rate": 5e-05, |
| "loss": 0.1951, |
| "loss/crossentropy": 2.531024992465973, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1951226033270359, |
| "loss/reg": 2.267695665359497, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.00653, |
| "grad_norm": 0.3947466313838959, |
| "grad_norm_var": 0.002769718525090366, |
| "learning_rate": 5e-05, |
| "loss": 0.1958, |
| "loss/crossentropy": 2.886034905910492, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19575949385762215, |
| "loss/reg": 2.2648732662200928, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.00654, |
| "grad_norm": 0.3775857090950012, |
| "grad_norm_var": 0.0027546537142078996, |
| "learning_rate": 5e-05, |
| "loss": 0.1746, |
| "loss/crossentropy": 2.7190786600112915, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17461128905415535, |
| "loss/reg": 2.2620925903320312, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.00655, |
| "grad_norm": 0.34534481167793274, |
| "grad_norm_var": 0.002849399631435645, |
| "learning_rate": 5e-05, |
| "loss": 0.1792, |
| "loss/crossentropy": 2.8847506046295166, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1791832633316517, |
| "loss/reg": 2.2593743801116943, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.00656, |
| "grad_norm": 0.3607633411884308, |
| "grad_norm_var": 0.0028993682580486144, |
| "learning_rate": 5e-05, |
| "loss": 0.1792, |
| "loss/crossentropy": 2.815674066543579, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.179163109511137, |
| "loss/reg": 2.2569730281829834, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.00657, |
| "grad_norm": 0.38781270384788513, |
| "grad_norm_var": 0.002826278400635814, |
| "learning_rate": 5e-05, |
| "loss": 0.1618, |
| "loss/crossentropy": 2.6106160283088684, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1617795117199421, |
| "loss/reg": 2.254523277282715, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.00658, |
| "grad_norm": 0.40386784076690674, |
| "grad_norm_var": 0.0028094212847462165, |
| "learning_rate": 5e-05, |
| "loss": 0.1747, |
| "loss/crossentropy": 2.8471227884292603, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17474086582660675, |
| "loss/reg": 2.252164125442505, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.00659, |
| "grad_norm": 0.36319243907928467, |
| "grad_norm_var": 0.002796007207749466, |
| "learning_rate": 5e-05, |
| "loss": 0.163, |
| "loss/crossentropy": 2.7625906467437744, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16296877712011337, |
| "loss/reg": 2.249460458755493, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.0066, |
| "grad_norm": 0.3657222092151642, |
| "grad_norm_var": 0.00269101182172804, |
| "learning_rate": 5e-05, |
| "loss": 0.1777, |
| "loss/crossentropy": 2.781547486782074, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1777149885892868, |
| "loss/reg": 2.246467113494873, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.00661, |
| "grad_norm": 0.38363057374954224, |
| "grad_norm_var": 0.0025851401210759276, |
| "learning_rate": 5e-05, |
| "loss": 0.1844, |
| "loss/crossentropy": 2.82689893245697, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1844283789396286, |
| "loss/reg": 2.2436530590057373, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.00662, |
| "grad_norm": 0.4096749424934387, |
| "grad_norm_var": 0.0024716520181473594, |
| "learning_rate": 5e-05, |
| "loss": 0.1745, |
| "loss/crossentropy": 2.8063756823539734, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17451731115579605, |
| "loss/reg": 2.241178035736084, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.00663, |
| "grad_norm": 0.42931249737739563, |
| "grad_norm_var": 0.0024528927297352344, |
| "learning_rate": 5e-05, |
| "loss": 0.186, |
| "loss/crossentropy": 2.8724401593208313, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1859952136874199, |
| "loss/reg": 2.2383124828338623, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.00664, |
| "grad_norm": 0.3530314862728119, |
| "grad_norm_var": 0.0024574750299312478, |
| "learning_rate": 5e-05, |
| "loss": 0.1696, |
| "loss/crossentropy": 2.9292226433753967, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16955319419503212, |
| "loss/reg": 2.2356791496276855, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.00665, |
| "grad_norm": 0.4304611384868622, |
| "grad_norm_var": 0.0009397736187397402, |
| "learning_rate": 5e-05, |
| "loss": 0.1902, |
| "loss/crossentropy": 2.7114855647087097, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19023016840219498, |
| "loss/reg": 2.2330329418182373, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.00666, |
| "grad_norm": 0.32996541261672974, |
| "grad_norm_var": 0.0010789617804694747, |
| "learning_rate": 5e-05, |
| "loss": 0.158, |
| "loss/crossentropy": 2.8920618891716003, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1580132134258747, |
| "loss/reg": 2.2296440601348877, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.00667, |
| "grad_norm": 0.3874596953392029, |
| "grad_norm_var": 0.0010747020479673205, |
| "learning_rate": 5e-05, |
| "loss": 0.1819, |
| "loss/crossentropy": 2.7297377586364746, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18189727514982224, |
| "loss/reg": 2.226966619491577, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.00668, |
| "grad_norm": 0.36097773909568787, |
| "grad_norm_var": 0.000828712243429038, |
| "learning_rate": 5e-05, |
| "loss": 0.1726, |
| "loss/crossentropy": 2.6433697938919067, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17256683483719826, |
| "loss/reg": 2.2244138717651367, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.00669, |
| "grad_norm": 0.3509676158428192, |
| "grad_norm_var": 0.0008637156407869958, |
| "learning_rate": 5e-05, |
| "loss": 0.1723, |
| "loss/crossentropy": 2.8315157890319824, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17234884947538376, |
| "loss/reg": 2.220712661743164, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.0067, |
| "grad_norm": 0.3578469157218933, |
| "grad_norm_var": 0.0008878035089742793, |
| "learning_rate": 5e-05, |
| "loss": 0.1657, |
| "loss/crossentropy": 2.788190722465515, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16572094336152077, |
| "loss/reg": 2.217878818511963, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.00671, |
| "grad_norm": 0.4930081069469452, |
| "grad_norm_var": 0.0016420680378558003, |
| "learning_rate": 5e-05, |
| "loss": 0.1818, |
| "loss/crossentropy": 3.013857901096344, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18175816163420677, |
| "loss/reg": 2.2148284912109375, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.00672, |
| "grad_norm": 0.36925604939460754, |
| "grad_norm_var": 0.0016185866984450236, |
| "learning_rate": 5e-05, |
| "loss": 0.1642, |
| "loss/crossentropy": 2.8940696716308594, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1642276532948017, |
| "loss/reg": 2.2119295597076416, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.00673, |
| "grad_norm": 0.4327005445957184, |
| "grad_norm_var": 0.0017552981165500747, |
| "learning_rate": 5e-05, |
| "loss": 0.1742, |
| "loss/crossentropy": 2.87309467792511, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17423933744430542, |
| "loss/reg": 2.209021806716919, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.00674, |
| "grad_norm": 0.738524854183197, |
| "grad_norm_var": 0.009426579051544037, |
| "learning_rate": 5e-05, |
| "loss": 0.1868, |
| "loss/crossentropy": 2.8040258288383484, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1867678351700306, |
| "loss/reg": 2.2066619396209717, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.00675, |
| "grad_norm": 0.4364205002784729, |
| "grad_norm_var": 0.009307313279513674, |
| "learning_rate": 5e-05, |
| "loss": 0.1796, |
| "loss/crossentropy": 2.718536138534546, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17956989258527756, |
| "loss/reg": 2.203990936279297, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.00676, |
| "grad_norm": 0.41067376732826233, |
| "grad_norm_var": 0.009142390414932911, |
| "learning_rate": 5e-05, |
| "loss": 0.1642, |
| "loss/crossentropy": 2.72940456867218, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16421709582209587, |
| "loss/reg": 2.2013046741485596, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.00677, |
| "grad_norm": 0.4327182173728943, |
| "grad_norm_var": 0.009073804614162174, |
| "learning_rate": 5e-05, |
| "loss": 0.1905, |
| "loss/crossentropy": 2.7371246814727783, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19054419547319412, |
| "loss/reg": 2.1991653442382812, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.00678, |
| "grad_norm": 0.3779783844947815, |
| "grad_norm_var": 0.009181024716334075, |
| "learning_rate": 5e-05, |
| "loss": 0.1589, |
| "loss/crossentropy": 2.7500953674316406, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15891055390238762, |
| "loss/reg": 2.197261333465576, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.00679, |
| "grad_norm": 0.3585035502910614, |
| "grad_norm_var": 0.009389539404841711, |
| "learning_rate": 5e-05, |
| "loss": 0.1836, |
| "loss/crossentropy": 2.869826376438141, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18359991908073425, |
| "loss/reg": 2.195239782333374, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.0068, |
| "grad_norm": 0.3534944951534271, |
| "grad_norm_var": 0.009385802469305704, |
| "learning_rate": 5e-05, |
| "loss": 0.1689, |
| "loss/crossentropy": 2.8476794362068176, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1689487136900425, |
| "loss/reg": 2.1929402351379395, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.00681, |
| "grad_norm": 0.3718988001346588, |
| "grad_norm_var": 0.009470130435250168, |
| "learning_rate": 5e-05, |
| "loss": 0.1704, |
| "loss/crossentropy": 2.7930009365081787, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17038631066679955, |
| "loss/reg": 2.19075608253479, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.00682, |
| "grad_norm": 0.4854961037635803, |
| "grad_norm_var": 0.009319177707927173, |
| "learning_rate": 5e-05, |
| "loss": 0.1705, |
| "loss/crossentropy": 2.8028470277786255, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17054682224988937, |
| "loss/reg": 2.1886627674102783, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.00683, |
| "grad_norm": 0.3880312144756317, |
| "grad_norm_var": 0.00931672834921676, |
| "learning_rate": 5e-05, |
| "loss": 0.1766, |
| "loss/crossentropy": 2.963544547557831, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17662956938147545, |
| "loss/reg": 2.1860859394073486, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.00684, |
| "grad_norm": 0.3488878309726715, |
| "grad_norm_var": 0.009420855437860176, |
| "learning_rate": 5e-05, |
| "loss": 0.1667, |
| "loss/crossentropy": 2.959736704826355, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16670886427164078, |
| "loss/reg": 2.183668375015259, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.00685, |
| "grad_norm": 0.8154363632202148, |
| "grad_norm_var": 0.018681551405985854, |
| "learning_rate": 5e-05, |
| "loss": 0.2213, |
| "loss/crossentropy": 2.911233067512512, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2212524674832821, |
| "loss/reg": 2.1813154220581055, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.00686, |
| "grad_norm": 0.4155946969985962, |
| "grad_norm_var": 0.018194440840509217, |
| "learning_rate": 5e-05, |
| "loss": 0.1964, |
| "loss/crossentropy": 2.7186298966407776, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19640850275754929, |
| "loss/reg": 2.1795501708984375, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.00687, |
| "grad_norm": 0.38160914182662964, |
| "grad_norm_var": 0.01835781299917098, |
| "learning_rate": 5e-05, |
| "loss": 0.1708, |
| "loss/crossentropy": 2.7301290035247803, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17081937566399574, |
| "loss/reg": 2.177623748779297, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.00688, |
| "grad_norm": 0.41628003120422363, |
| "grad_norm_var": 0.01802219976069038, |
| "learning_rate": 5e-05, |
| "loss": 0.1771, |
| "loss/crossentropy": 2.7876546382904053, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17706667259335518, |
| "loss/reg": 2.175504446029663, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.00689, |
| "grad_norm": 0.4177417755126953, |
| "grad_norm_var": 0.018066232212721724, |
| "learning_rate": 5e-05, |
| "loss": 0.1786, |
| "loss/crossentropy": 2.763257145881653, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1785966381430626, |
| "loss/reg": 2.173213481903076, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.0069, |
| "grad_norm": 0.3603265583515167, |
| "grad_norm_var": 0.012296751904473697, |
| "learning_rate": 5e-05, |
| "loss": 0.1678, |
| "loss/crossentropy": 2.774847447872162, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16783085092902184, |
| "loss/reg": 2.1712427139282227, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.00691, |
| "grad_norm": 0.4307333827018738, |
| "grad_norm_var": 0.01228874334383105, |
| "learning_rate": 5e-05, |
| "loss": 0.2034, |
| "loss/crossentropy": 2.6488924622535706, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20339511707425117, |
| "loss/reg": 2.170015573501587, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.00692, |
| "grad_norm": 0.3678703010082245, |
| "grad_norm_var": 0.012472673417673882, |
| "learning_rate": 5e-05, |
| "loss": 0.1793, |
| "loss/crossentropy": 2.8285900950431824, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.179282795637846, |
| "loss/reg": 2.1680784225463867, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.00693, |
| "grad_norm": 0.3516632914543152, |
| "grad_norm_var": 0.012747599104723136, |
| "learning_rate": 5e-05, |
| "loss": 0.1638, |
| "loss/crossentropy": 2.72187340259552, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16377655416727066, |
| "loss/reg": 2.166708469390869, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.00694, |
| "grad_norm": 0.37773895263671875, |
| "grad_norm_var": 0.012748787659448176, |
| "learning_rate": 5e-05, |
| "loss": 0.2, |
| "loss/crossentropy": 2.5079989433288574, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19995050877332687, |
| "loss/reg": 2.1644845008850098, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.00695, |
| "grad_norm": 0.33557403087615967, |
| "grad_norm_var": 0.012954622340141124, |
| "learning_rate": 5e-05, |
| "loss": 0.173, |
| "loss/crossentropy": 2.733457326889038, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1730196811258793, |
| "loss/reg": 2.162649631500244, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.00696, |
| "grad_norm": 0.3414340615272522, |
| "grad_norm_var": 0.01306044443406886, |
| "learning_rate": 5e-05, |
| "loss": 0.1699, |
| "loss/crossentropy": 2.770694136619568, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1698729656636715, |
| "loss/reg": 2.1605873107910156, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.00697, |
| "grad_norm": 0.39742037653923035, |
| "grad_norm_var": 0.012961649579914787, |
| "learning_rate": 5e-05, |
| "loss": 0.1753, |
| "loss/crossentropy": 2.747798502445221, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17528066039085388, |
| "loss/reg": 2.158661127090454, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.00698, |
| "grad_norm": 0.4672209620475769, |
| "grad_norm_var": 0.012809503544980934, |
| "learning_rate": 5e-05, |
| "loss": 0.1961, |
| "loss/crossentropy": 2.764335811138153, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1961456499993801, |
| "loss/reg": 2.157139539718628, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.00699, |
| "grad_norm": 0.40900057554244995, |
| "grad_norm_var": 0.012766202979620484, |
| "learning_rate": 5e-05, |
| "loss": 0.1826, |
| "loss/crossentropy": 2.9526583552360535, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1826096773147583, |
| "loss/reg": 2.1556172370910645, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.007, |
| "grad_norm": 0.45763787627220154, |
| "grad_norm_var": 0.01255169197725956, |
| "learning_rate": 5e-05, |
| "loss": 0.1845, |
| "loss/crossentropy": 2.9059385657310486, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18454358726739883, |
| "loss/reg": 2.1542842388153076, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.00701, |
| "grad_norm": 0.568651020526886, |
| "grad_norm_var": 0.0033942912710514268, |
| "learning_rate": 5e-05, |
| "loss": 0.1801, |
| "loss/crossentropy": 2.771495759487152, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18007055297493935, |
| "loss/reg": 2.1522164344787598, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.00702, |
| "grad_norm": 0.3590672016143799, |
| "grad_norm_var": 0.00352192003862181, |
| "learning_rate": 5e-05, |
| "loss": 0.1651, |
| "loss/crossentropy": 2.750881016254425, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1650897115468979, |
| "loss/reg": 2.1509296894073486, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.00703, |
| "grad_norm": 0.36948493123054504, |
| "grad_norm_var": 0.0035648755964216056, |
| "learning_rate": 5e-05, |
| "loss": 0.1785, |
| "loss/crossentropy": 2.8027891516685486, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17848948016762733, |
| "loss/reg": 2.149231195449829, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.00704, |
| "grad_norm": 0.3613908588886261, |
| "grad_norm_var": 0.0036467673242235915, |
| "learning_rate": 5e-05, |
| "loss": 0.1682, |
| "loss/crossentropy": 2.763719141483307, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16823140904307365, |
| "loss/reg": 2.1478073596954346, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.00705, |
| "grad_norm": 0.38240060210227966, |
| "grad_norm_var": 0.003633263034560896, |
| "learning_rate": 5e-05, |
| "loss": 0.178, |
| "loss/crossentropy": 2.705716133117676, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1780022643506527, |
| "loss/reg": 2.1462342739105225, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.00706, |
| "grad_norm": 0.3587467074394226, |
| "grad_norm_var": 0.0036409547879681387, |
| "learning_rate": 5e-05, |
| "loss": 0.1658, |
| "loss/crossentropy": 2.7772558331489563, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16575098782777786, |
| "loss/reg": 2.144062042236328, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.00707, |
| "grad_norm": 0.36025822162628174, |
| "grad_norm_var": 0.0036250184261099458, |
| "learning_rate": 5e-05, |
| "loss": 0.1724, |
| "loss/crossentropy": 2.634014904499054, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17235567048192024, |
| "loss/reg": 2.1429154872894287, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.00708, |
| "grad_norm": 0.35575759410858154, |
| "grad_norm_var": 0.0036725083584184842, |
| "learning_rate": 5e-05, |
| "loss": 0.1794, |
| "loss/crossentropy": 2.6474004983901978, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17943605780601501, |
| "loss/reg": 2.1421751976013184, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.00709, |
| "grad_norm": 0.3865105211734772, |
| "grad_norm_var": 0.003566375202589933, |
| "learning_rate": 5e-05, |
| "loss": 0.1817, |
| "loss/crossentropy": 2.9082140922546387, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18172414600849152, |
| "loss/reg": 2.1403071880340576, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.0071, |
| "grad_norm": 0.368362694978714, |
| "grad_norm_var": 0.003590971719305887, |
| "learning_rate": 5e-05, |
| "loss": 0.1711, |
| "loss/crossentropy": 2.6766469478607178, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17112310975790024, |
| "loss/reg": 2.138167142868042, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.00711, |
| "grad_norm": 0.34797176718711853, |
| "grad_norm_var": 0.003506589552138199, |
| "learning_rate": 5e-05, |
| "loss": 0.163, |
| "loss/crossentropy": 2.8545928597450256, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16299721226096153, |
| "loss/reg": 2.1356112957000732, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.00712, |
| "grad_norm": 0.3511999547481537, |
| "grad_norm_var": 0.0034451354888741254, |
| "learning_rate": 5e-05, |
| "loss": 0.1706, |
| "loss/crossentropy": 2.7516467571258545, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1705768182873726, |
| "loss/reg": 2.13396954536438, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.00713, |
| "grad_norm": 0.4692562520503998, |
| "grad_norm_var": 0.0038021677070381584, |
| "learning_rate": 5e-05, |
| "loss": 0.1772, |
| "loss/crossentropy": 2.7622230648994446, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1771526113152504, |
| "loss/reg": 2.1316707134246826, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.00714, |
| "grad_norm": 0.3500974774360657, |
| "grad_norm_var": 0.003583350276630167, |
| "learning_rate": 5e-05, |
| "loss": 0.1651, |
| "loss/crossentropy": 2.7385149598121643, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16512250155210495, |
| "loss/reg": 2.1301488876342773, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.00715, |
| "grad_norm": 0.33279696106910706, |
| "grad_norm_var": 0.0037632620297312364, |
| "learning_rate": 5e-05, |
| "loss": 0.1639, |
| "loss/crossentropy": 2.6760587096214294, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16387901455163956, |
| "loss/reg": 2.128563165664673, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.00716, |
| "grad_norm": 0.36436334252357483, |
| "grad_norm_var": 0.003418879723208453, |
| "learning_rate": 5e-05, |
| "loss": 0.1675, |
| "loss/crossentropy": 2.7055559158325195, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16751762479543686, |
| "loss/reg": 2.1272294521331787, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.00717, |
| "grad_norm": 0.35308849811553955, |
| "grad_norm_var": 0.0009122804473129371, |
| "learning_rate": 5e-05, |
| "loss": 0.1685, |
| "loss/crossentropy": 2.827264368534088, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16845671087503433, |
| "loss/reg": 2.125559091567993, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.00718, |
| "grad_norm": 0.36609259247779846, |
| "grad_norm_var": 0.0009080073745675876, |
| "learning_rate": 5e-05, |
| "loss": 0.1878, |
| "loss/crossentropy": 2.7995529770851135, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18778567016124725, |
| "loss/reg": 2.12422513961792, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.00719, |
| "grad_norm": 0.3564467430114746, |
| "grad_norm_var": 0.0009149400496893722, |
| "learning_rate": 5e-05, |
| "loss": 0.1665, |
| "loss/crossentropy": 2.848701000213623, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16652807220816612, |
| "loss/reg": 2.1227705478668213, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.0072, |
| "grad_norm": 0.3523035943508148, |
| "grad_norm_var": 0.0009263477116920882, |
| "learning_rate": 5e-05, |
| "loss": 0.169, |
| "loss/crossentropy": 2.7714666724205017, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16896242648363113, |
| "loss/reg": 2.1214191913604736, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.00721, |
| "grad_norm": 0.39885270595550537, |
| "grad_norm_var": 0.0009792887842439849, |
| "learning_rate": 5e-05, |
| "loss": 0.1701, |
| "loss/crossentropy": 2.835131287574768, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1700747236609459, |
| "loss/reg": 2.1200404167175293, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.00722, |
| "grad_norm": 0.40293964743614197, |
| "grad_norm_var": 0.0010526817455953927, |
| "learning_rate": 5e-05, |
| "loss": 0.1819, |
| "loss/crossentropy": 2.744925618171692, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1818903423845768, |
| "loss/reg": 2.1181347370147705, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.00723, |
| "grad_norm": 0.5598530769348145, |
| "grad_norm_var": 0.0032894654306610577, |
| "learning_rate": 5e-05, |
| "loss": 0.1662, |
| "loss/crossentropy": 2.983691990375519, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16615596786141396, |
| "loss/reg": 2.1172993183135986, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.00724, |
| "grad_norm": 0.39669546484947205, |
| "grad_norm_var": 0.003249640426166478, |
| "learning_rate": 5e-05, |
| "loss": 0.1735, |
| "loss/crossentropy": 2.709549069404602, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17345992848277092, |
| "loss/reg": 2.1152801513671875, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.00725, |
| "grad_norm": 0.35726040601730347, |
| "grad_norm_var": 0.0032964500726321067, |
| "learning_rate": 5e-05, |
| "loss": 0.1624, |
| "loss/crossentropy": 2.7148231267929077, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16241873800754547, |
| "loss/reg": 2.1130924224853516, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.00726, |
| "grad_norm": 0.3927571177482605, |
| "grad_norm_var": 0.0032861190572127997, |
| "learning_rate": 5e-05, |
| "loss": 0.1795, |
| "loss/crossentropy": 2.7939482927322388, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1794990859925747, |
| "loss/reg": 2.111480712890625, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.00727, |
| "grad_norm": 0.3941044807434082, |
| "grad_norm_var": 0.0031944564404073005, |
| "learning_rate": 5e-05, |
| "loss": 0.1712, |
| "loss/crossentropy": 2.834249794483185, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1711888276040554, |
| "loss/reg": 2.109204053878784, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.00728, |
| "grad_norm": 0.4828793704509735, |
| "grad_norm_var": 0.0036429198556795937, |
| "learning_rate": 5e-05, |
| "loss": 0.2082, |
| "loss/crossentropy": 2.899094045162201, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20823358744382858, |
| "loss/reg": 2.1061551570892334, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.00729, |
| "grad_norm": 0.3574215769767761, |
| "grad_norm_var": 0.003326472236741973, |
| "learning_rate": 5e-05, |
| "loss": 0.1596, |
| "loss/crossentropy": 2.7636680603027344, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15961402654647827, |
| "loss/reg": 2.104001760482788, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.0073, |
| "grad_norm": 0.40163764357566833, |
| "grad_norm_var": 0.003227754706050412, |
| "learning_rate": 5e-05, |
| "loss": 0.1797, |
| "loss/crossentropy": 2.8588566184043884, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17974677309393883, |
| "loss/reg": 2.102442741394043, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.00731, |
| "grad_norm": 0.37189754843711853, |
| "grad_norm_var": 0.003015475193035148, |
| "learning_rate": 5e-05, |
| "loss": 0.1673, |
| "loss/crossentropy": 2.826458215713501, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16728204488754272, |
| "loss/reg": 2.1002049446105957, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.00732, |
| "grad_norm": 0.3587784171104431, |
| "grad_norm_var": 0.003039707591927121, |
| "learning_rate": 5e-05, |
| "loss": 0.1645, |
| "loss/crossentropy": 2.731923222541809, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16448039561510086, |
| "loss/reg": 2.098315954208374, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.00733, |
| "grad_norm": 0.37631648778915405, |
| "grad_norm_var": 0.002946915065401934, |
| "learning_rate": 5e-05, |
| "loss": 0.1714, |
| "loss/crossentropy": 2.880859136581421, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17137856781482697, |
| "loss/reg": 2.0962650775909424, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.00734, |
| "grad_norm": 0.3563605844974518, |
| "grad_norm_var": 0.002990850657754888, |
| "learning_rate": 5e-05, |
| "loss": 0.1628, |
| "loss/crossentropy": 2.6355279088020325, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16278789564967155, |
| "loss/reg": 2.094142436981201, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.00735, |
| "grad_norm": 0.37199047207832336, |
| "grad_norm_var": 0.0029265023383142925, |
| "learning_rate": 5e-05, |
| "loss": 0.1802, |
| "loss/crossentropy": 2.769617021083832, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18019907549023628, |
| "loss/reg": 2.0921125411987305, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.00736, |
| "grad_norm": 0.489103764295578, |
| "grad_norm_var": 0.0033036264225515164, |
| "learning_rate": 5e-05, |
| "loss": 0.1832, |
| "loss/crossentropy": 2.7491883039474487, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1831774264574051, |
| "loss/reg": 2.0904338359832764, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.00737, |
| "grad_norm": 0.5059826970100403, |
| "grad_norm_var": 0.003943075932518525, |
| "learning_rate": 5e-05, |
| "loss": 0.1817, |
| "loss/crossentropy": 2.8231146931648254, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18174266442656517, |
| "loss/reg": 2.0879714488983154, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.00738, |
| "grad_norm": 0.6662333011627197, |
| "grad_norm_var": 0.007992879009924207, |
| "learning_rate": 5e-05, |
| "loss": 0.1861, |
| "loss/crossentropy": 2.7952335476875305, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18612126260995865, |
| "loss/reg": 2.0855941772460938, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.00739, |
| "grad_norm": 0.43555790185928345, |
| "grad_norm_var": 0.006764259520141217, |
| "learning_rate": 5e-05, |
| "loss": 0.1823, |
| "loss/crossentropy": 2.7390406727790833, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1822943352162838, |
| "loss/reg": 2.083002805709839, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.0074, |
| "grad_norm": 0.36206063628196716, |
| "grad_norm_var": 0.0069454028516603905, |
| "learning_rate": 5e-05, |
| "loss": 0.1747, |
| "loss/crossentropy": 2.6245489716529846, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17474905773997307, |
| "loss/reg": 2.0807597637176514, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.00741, |
| "grad_norm": 0.4077146649360657, |
| "grad_norm_var": 0.006699115025232619, |
| "learning_rate": 5e-05, |
| "loss": 0.21, |
| "loss/crossentropy": 2.734727144241333, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20996900647878647, |
| "loss/reg": 2.0777878761291504, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.00742, |
| "grad_norm": 0.4748740792274475, |
| "grad_norm_var": 0.0068148961293998615, |
| "learning_rate": 5e-05, |
| "loss": 0.1926, |
| "loss/crossentropy": 2.757317006587982, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19259492680430412, |
| "loss/reg": 2.0748398303985596, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.00743, |
| "grad_norm": 0.3738694190979004, |
| "grad_norm_var": 0.006926021168675212, |
| "learning_rate": 5e-05, |
| "loss": 0.1671, |
| "loss/crossentropy": 2.7804144620895386, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1671152375638485, |
| "loss/reg": 2.072981119155884, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.00744, |
| "grad_norm": 0.4373812675476074, |
| "grad_norm_var": 0.006701504868688938, |
| "learning_rate": 5e-05, |
| "loss": 0.1836, |
| "loss/crossentropy": 2.8251866698265076, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.183602724224329, |
| "loss/reg": 2.069178581237793, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.00745, |
| "grad_norm": 0.41339626908302307, |
| "grad_norm_var": 0.006417608208757027, |
| "learning_rate": 5e-05, |
| "loss": 0.1665, |
| "loss/crossentropy": 2.7784698605537415, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16648468375205994, |
| "loss/reg": 2.066897392272949, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.00746, |
| "grad_norm": 0.36906108260154724, |
| "grad_norm_var": 0.0065862671267569286, |
| "learning_rate": 5e-05, |
| "loss": 0.1752, |
| "loss/crossentropy": 2.7134994864463806, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17522963881492615, |
| "loss/reg": 2.063711404800415, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.00747, |
| "grad_norm": 0.3699776232242584, |
| "grad_norm_var": 0.006599620482715507, |
| "learning_rate": 5e-05, |
| "loss": 0.1816, |
| "loss/crossentropy": 2.6448380947113037, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1815556287765503, |
| "loss/reg": 2.0618152618408203, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.00748, |
| "grad_norm": 0.35848432779312134, |
| "grad_norm_var": 0.006602145753337363, |
| "learning_rate": 5e-05, |
| "loss": 0.1759, |
| "loss/crossentropy": 2.577029287815094, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1758727729320526, |
| "loss/reg": 2.0592784881591797, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.00749, |
| "grad_norm": 0.40015411376953125, |
| "grad_norm_var": 0.006489211309593653, |
| "learning_rate": 5e-05, |
| "loss": 0.2007, |
| "loss/crossentropy": 2.7719894647598267, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20066174119710922, |
| "loss/reg": 2.056396484375, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.0075, |
| "grad_norm": 0.34235846996307373, |
| "grad_norm_var": 0.006628701391081989, |
| "learning_rate": 5e-05, |
| "loss": 0.1661, |
| "loss/crossentropy": 2.8526532649993896, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1660567931830883, |
| "loss/reg": 2.053225040435791, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.00751, |
| "grad_norm": 0.37578198313713074, |
| "grad_norm_var": 0.006603490490161393, |
| "learning_rate": 5e-05, |
| "loss": 0.192, |
| "loss/crossentropy": 2.803673267364502, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19199685007333755, |
| "loss/reg": 2.0500073432922363, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.00752, |
| "grad_norm": 0.3724234700202942, |
| "grad_norm_var": 0.006439587327084632, |
| "learning_rate": 5e-05, |
| "loss": 0.1611, |
| "loss/crossentropy": 2.8497246503829956, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16111965849995613, |
| "loss/reg": 2.0481040477752686, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.00753, |
| "grad_norm": 0.37283533811569214, |
| "grad_norm_var": 0.005960471364599432, |
| "learning_rate": 5e-05, |
| "loss": 0.1816, |
| "loss/crossentropy": 2.580562174320221, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18162427470088005, |
| "loss/reg": 2.045363187789917, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.00754, |
| "grad_norm": 0.42849722504615784, |
| "grad_norm_var": 0.0013156070888824681, |
| "learning_rate": 5e-05, |
| "loss": 0.1885, |
| "loss/crossentropy": 2.7384997606277466, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1885378062725067, |
| "loss/reg": 2.0420894622802734, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.00755, |
| "grad_norm": 0.3246319890022278, |
| "grad_norm_var": 0.0014611472372319412, |
| "learning_rate": 5e-05, |
| "loss": 0.152, |
| "loss/crossentropy": 2.827781558036804, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15203238278627396, |
| "loss/reg": 2.0399134159088135, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.00756, |
| "grad_norm": 0.3523566722869873, |
| "grad_norm_var": 0.0014986135555234647, |
| "learning_rate": 5e-05, |
| "loss": 0.1799, |
| "loss/crossentropy": 2.7049853801727295, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17991740256547928, |
| "loss/reg": 2.036095142364502, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.00757, |
| "grad_norm": 0.3352646827697754, |
| "grad_norm_var": 0.0016155829783374783, |
| "learning_rate": 5e-05, |
| "loss": 0.1612, |
| "loss/crossentropy": 2.715296685695648, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16122159361839294, |
| "loss/reg": 2.0335283279418945, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.00758, |
| "grad_norm": 0.36173179745674133, |
| "grad_norm_var": 0.001004548523534495, |
| "learning_rate": 5e-05, |
| "loss": 0.1768, |
| "loss/crossentropy": 2.8797001242637634, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17676853761076927, |
| "loss/reg": 2.0315983295440674, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.00759, |
| "grad_norm": 0.43379032611846924, |
| "grad_norm_var": 0.0012258123535982288, |
| "learning_rate": 5e-05, |
| "loss": 0.2008, |
| "loss/crossentropy": 2.7367305159568787, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20076703280210495, |
| "loss/reg": 2.028825521469116, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.0076, |
| "grad_norm": 0.7135851979255676, |
| "grad_norm_var": 0.008180404333396396, |
| "learning_rate": 5e-05, |
| "loss": 0.1988, |
| "loss/crossentropy": 2.7777557373046875, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19884883239865303, |
| "loss/reg": 2.0258262157440186, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.00761, |
| "grad_norm": 0.36141064763069153, |
| "grad_norm_var": 0.008223674749041798, |
| "learning_rate": 5e-05, |
| "loss": 0.1723, |
| "loss/crossentropy": 2.755949318408966, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17225057631731033, |
| "loss/reg": 2.022468090057373, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.00762, |
| "grad_norm": 0.47610145807266235, |
| "grad_norm_var": 0.008612084301677063, |
| "learning_rate": 5e-05, |
| "loss": 0.1786, |
| "loss/crossentropy": 2.701655924320221, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17863870784640312, |
| "loss/reg": 2.0194478034973145, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.00763, |
| "grad_norm": 0.35960420966148376, |
| "grad_norm_var": 0.0086585523494028, |
| "learning_rate": 5e-05, |
| "loss": 0.1719, |
| "loss/crossentropy": 2.6496411561965942, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17191722244024277, |
| "loss/reg": 2.0173516273498535, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.00764, |
| "grad_norm": 0.3759422302246094, |
| "grad_norm_var": 0.008585472348376118, |
| "learning_rate": 5e-05, |
| "loss": 0.1665, |
| "loss/crossentropy": 2.7261382937431335, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1664697714149952, |
| "loss/reg": 2.014254331588745, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.00765, |
| "grad_norm": 0.3791477680206299, |
| "grad_norm_var": 0.008610251361000461, |
| "learning_rate": 5e-05, |
| "loss": 0.1716, |
| "loss/crossentropy": 2.779210090637207, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1715676300227642, |
| "loss/reg": 2.0109994411468506, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.00766, |
| "grad_norm": 0.37698858976364136, |
| "grad_norm_var": 0.00842901980982322, |
| "learning_rate": 5e-05, |
| "loss": 0.178, |
| "loss/crossentropy": 2.5693264603614807, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17796850576996803, |
| "loss/reg": 2.007894277572632, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.00767, |
| "grad_norm": 0.324692040681839, |
| "grad_norm_var": 0.008757168987509056, |
| "learning_rate": 5e-05, |
| "loss": 0.163, |
| "loss/crossentropy": 2.790699005126953, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16297711431980133, |
| "loss/reg": 2.0054080486297607, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.00768, |
| "grad_norm": 0.37725430727005005, |
| "grad_norm_var": 0.008742918144709544, |
| "learning_rate": 5e-05, |
| "loss": 0.1701, |
| "loss/crossentropy": 2.90560781955719, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17007537558674812, |
| "loss/reg": 2.0026705265045166, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.00769, |
| "grad_norm": 0.3565872013568878, |
| "grad_norm_var": 0.008812017421293783, |
| "learning_rate": 5e-05, |
| "loss": 0.1748, |
| "loss/crossentropy": 2.8573551774024963, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1747995764017105, |
| "loss/reg": 1.9999767541885376, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.0077, |
| "grad_norm": 0.32768821716308594, |
| "grad_norm_var": 0.009011701837686615, |
| "learning_rate": 5e-05, |
| "loss": 0.1647, |
| "loss/crossentropy": 2.7850446105003357, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16465429961681366, |
| "loss/reg": 1.9974277019500732, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.00771, |
| "grad_norm": 0.34194430708885193, |
| "grad_norm_var": 0.008880009468442519, |
| "learning_rate": 5e-05, |
| "loss": 0.1681, |
| "loss/crossentropy": 2.89225697517395, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16806093603372574, |
| "loss/reg": 1.9941447973251343, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.00772, |
| "grad_norm": 0.36788639426231384, |
| "grad_norm_var": 0.008815313943155234, |
| "learning_rate": 5e-05, |
| "loss": 0.1824, |
| "loss/crossentropy": 2.8710330724716187, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1823921650648117, |
| "loss/reg": 1.991845965385437, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.00773, |
| "grad_norm": 0.33500465750694275, |
| "grad_norm_var": 0.008817280025891942, |
| "learning_rate": 5e-05, |
| "loss": 0.1669, |
| "loss/crossentropy": 2.7821491956710815, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16685106977820396, |
| "loss/reg": 1.9901355504989624, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.00774, |
| "grad_norm": 0.33815550804138184, |
| "grad_norm_var": 0.008946649562538052, |
| "learning_rate": 5e-05, |
| "loss": 0.162, |
| "loss/crossentropy": 2.7051143050193787, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16199326515197754, |
| "loss/reg": 1.9880555868148804, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.00775, |
| "grad_norm": 0.32524728775024414, |
| "grad_norm_var": 0.009054478596347363, |
| "learning_rate": 5e-05, |
| "loss": 0.169, |
| "loss/crossentropy": 2.72264701128006, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16902651265263557, |
| "loss/reg": 1.986093521118164, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.00776, |
| "grad_norm": 0.34697458148002625, |
| "grad_norm_var": 0.0013234442614042051, |
| "learning_rate": 5e-05, |
| "loss": 0.1691, |
| "loss/crossentropy": 2.780848979949951, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1690516211092472, |
| "loss/reg": 1.9844779968261719, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.00777, |
| "grad_norm": 0.33995282649993896, |
| "grad_norm_var": 0.0013500864177136548, |
| "learning_rate": 5e-05, |
| "loss": 0.1612, |
| "loss/crossentropy": 2.772739827632904, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16119593381881714, |
| "loss/reg": 1.9825077056884766, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.00778, |
| "grad_norm": 0.35139432549476624, |
| "grad_norm_var": 0.0003803343966673219, |
| "learning_rate": 5e-05, |
| "loss": 0.1668, |
| "loss/crossentropy": 2.7008825540542603, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16681700944900513, |
| "loss/reg": 1.9806305170059204, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.00779, |
| "grad_norm": 0.4588527977466583, |
| "grad_norm_var": 0.00110283708340256, |
| "learning_rate": 5e-05, |
| "loss": 0.1907, |
| "loss/crossentropy": 2.632855713367462, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19068260118365288, |
| "loss/reg": 1.9793086051940918, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.0078, |
| "grad_norm": 0.3829444646835327, |
| "grad_norm_var": 0.0011229031183707624, |
| "learning_rate": 5e-05, |
| "loss": 0.1875, |
| "loss/crossentropy": 2.9350045323371887, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18754199519753456, |
| "loss/reg": 1.9774302244186401, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.00781, |
| "grad_norm": 0.46253493428230286, |
| "grad_norm_var": 0.0017907320044085833, |
| "learning_rate": 5e-05, |
| "loss": 0.1957, |
| "loss/crossentropy": 2.7478776574134827, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19565920531749725, |
| "loss/reg": 1.9757329225540161, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.00782, |
| "grad_norm": 0.35229969024658203, |
| "grad_norm_var": 0.0017840355007145352, |
| "learning_rate": 5e-05, |
| "loss": 0.1611, |
| "loss/crossentropy": 2.805756628513336, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1610955037176609, |
| "loss/reg": 1.973933458328247, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.00783, |
| "grad_norm": 0.3324076533317566, |
| "grad_norm_var": 0.0017495419673394963, |
| "learning_rate": 5e-05, |
| "loss": 0.1706, |
| "loss/crossentropy": 2.8343148827552795, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1706329919397831, |
| "loss/reg": 1.9718844890594482, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.00784, |
| "grad_norm": 0.3563413619995117, |
| "grad_norm_var": 0.0017352353042652258, |
| "learning_rate": 5e-05, |
| "loss": 0.1768, |
| "loss/crossentropy": 2.825278103351593, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17681827396154404, |
| "loss/reg": 1.969612956047058, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.00785, |
| "grad_norm": 0.33560603857040405, |
| "grad_norm_var": 0.0017751309342711038, |
| "learning_rate": 5e-05, |
| "loss": 0.1558, |
| "loss/crossentropy": 2.8132280111312866, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1558120921254158, |
| "loss/reg": 1.9675753116607666, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.00786, |
| "grad_norm": 0.39733415842056274, |
| "grad_norm_var": 0.0017810049820061401, |
| "learning_rate": 5e-05, |
| "loss": 0.1849, |
| "loss/crossentropy": 2.8960456252098083, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18493180349469185, |
| "loss/reg": 1.965217113494873, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.00787, |
| "grad_norm": 0.561698317527771, |
| "grad_norm_var": 0.004151387117344507, |
| "learning_rate": 5e-05, |
| "loss": 0.1987, |
| "loss/crossentropy": 2.66669100522995, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19869648292660713, |
| "loss/reg": 1.9629205465316772, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.00788, |
| "grad_norm": 0.35911333560943604, |
| "grad_norm_var": 0.004167781816727311, |
| "learning_rate": 5e-05, |
| "loss": 0.1742, |
| "loss/crossentropy": 2.723667323589325, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17424843832850456, |
| "loss/reg": 1.9608547687530518, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.00789, |
| "grad_norm": 0.3422897160053253, |
| "grad_norm_var": 0.004130072564222831, |
| "learning_rate": 5e-05, |
| "loss": 0.1683, |
| "loss/crossentropy": 2.853653848171234, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16829833760857582, |
| "loss/reg": 1.959040880203247, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.0079, |
| "grad_norm": 0.373519629240036, |
| "grad_norm_var": 0.0040217911733014585, |
| "learning_rate": 5e-05, |
| "loss": 0.1697, |
| "loss/crossentropy": 2.7469093799591064, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1697460599243641, |
| "loss/reg": 1.9578640460968018, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.00791, |
| "grad_norm": 0.42586550116539, |
| "grad_norm_var": 0.003921241787547673, |
| "learning_rate": 5e-05, |
| "loss": 0.1896, |
| "loss/crossentropy": 2.9876235127449036, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18961479887366295, |
| "loss/reg": 1.9558684825897217, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.00792, |
| "grad_norm": 0.34371063113212585, |
| "grad_norm_var": 0.00393897634361432, |
| "learning_rate": 5e-05, |
| "loss": 0.1624, |
| "loss/crossentropy": 2.922863006591797, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16239817067980766, |
| "loss/reg": 1.9541829824447632, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.00793, |
| "grad_norm": 0.3611912727355957, |
| "grad_norm_var": 0.0038367960883469387, |
| "learning_rate": 5e-05, |
| "loss": 0.1767, |
| "loss/crossentropy": 2.751186192035675, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17671825364232063, |
| "loss/reg": 1.9524297714233398, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.00794, |
| "grad_norm": 0.3787733018398285, |
| "grad_norm_var": 0.0037525025406884736, |
| "learning_rate": 5e-05, |
| "loss": 0.1695, |
| "loss/crossentropy": 2.6539193391799927, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16952653229236603, |
| "loss/reg": 1.951439619064331, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.00795, |
| "grad_norm": 0.37621310353279114, |
| "grad_norm_var": 0.003409985625982037, |
| "learning_rate": 5e-05, |
| "loss": 0.1827, |
| "loss/crossentropy": 2.672878086566925, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1826501600444317, |
| "loss/reg": 1.9504698514938354, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.00796, |
| "grad_norm": 0.3580264747142792, |
| "grad_norm_var": 0.0034518512961513536, |
| "learning_rate": 5e-05, |
| "loss": 0.1741, |
| "loss/crossentropy": 2.8564891815185547, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17409207299351692, |
| "loss/reg": 1.9492477178573608, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.00797, |
| "grad_norm": 0.3552623689174652, |
| "grad_norm_var": 0.0030235748866805395, |
| "learning_rate": 5e-05, |
| "loss": 0.1679, |
| "loss/crossentropy": 2.9642611145973206, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16786304488778114, |
| "loss/reg": 1.9484763145446777, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.00798, |
| "grad_norm": 0.37029561400413513, |
| "grad_norm_var": 0.0029878997549970957, |
| "learning_rate": 5e-05, |
| "loss": 0.1837, |
| "loss/crossentropy": 2.7581509947776794, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18365685641765594, |
| "loss/reg": 1.946696400642395, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.00799, |
| "grad_norm": 0.37257152795791626, |
| "grad_norm_var": 0.00285137706672662, |
| "learning_rate": 5e-05, |
| "loss": 0.1768, |
| "loss/crossentropy": 2.766197443008423, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17676663026213646, |
| "loss/reg": 1.9451103210449219, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 0.3937225043773651, |
| "grad_norm_var": 0.0028245897421089812, |
| "learning_rate": 5e-05, |
| "loss": 0.1593, |
| "loss/crossentropy": 2.968823492527008, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.15929469466209412, |
| "loss/reg": 1.943403959274292, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.00801, |
| "grad_norm": 0.5229995846748352, |
| "grad_norm_var": 0.003870799660257873, |
| "learning_rate": 5e-05, |
| "loss": 0.1904, |
| "loss/crossentropy": 2.5733524560928345, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19040565192699432, |
| "loss/reg": 1.9423651695251465, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.00802, |
| "grad_norm": 0.4087795913219452, |
| "grad_norm_var": 0.003885163701405114, |
| "learning_rate": 5e-05, |
| "loss": 0.2011, |
| "loss/crossentropy": 2.643693685531616, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20110392943024635, |
| "loss/reg": 1.941137433052063, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.00803, |
| "grad_norm": 0.369555801153183, |
| "grad_norm_var": 0.0018963737991296507, |
| "learning_rate": 5e-05, |
| "loss": 0.1768, |
| "loss/crossentropy": 2.702915072441101, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17682579904794693, |
| "loss/reg": 1.9396127462387085, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.00804, |
| "grad_norm": 0.3822772204875946, |
| "grad_norm_var": 0.001859244513831604, |
| "learning_rate": 5e-05, |
| "loss": 0.1674, |
| "loss/crossentropy": 2.8051819801330566, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1674252152442932, |
| "loss/reg": 1.9382424354553223, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.00805, |
| "grad_norm": 0.42195388674736023, |
| "grad_norm_var": 0.0018187903132861672, |
| "learning_rate": 5e-05, |
| "loss": 0.1982, |
| "loss/crossentropy": 2.7962412238121033, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19819854572415352, |
| "loss/reg": 1.9367636442184448, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.00806, |
| "grad_norm": 0.39215224981307983, |
| "grad_norm_var": 0.001803471303692028, |
| "learning_rate": 5e-05, |
| "loss": 0.1862, |
| "loss/crossentropy": 2.838093400001526, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1862441450357437, |
| "loss/reg": 1.9356895685195923, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.00807, |
| "grad_norm": 0.36561474204063416, |
| "grad_norm_var": 0.0017388941933010808, |
| "learning_rate": 5e-05, |
| "loss": 0.1659, |
| "loss/crossentropy": 2.8486820459365845, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16586757823824883, |
| "loss/reg": 1.9336965084075928, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.00808, |
| "grad_norm": 0.3940856456756592, |
| "grad_norm_var": 0.0016146705961780842, |
| "learning_rate": 5e-05, |
| "loss": 0.1695, |
| "loss/crossentropy": 2.7590547800064087, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16953302919864655, |
| "loss/reg": 1.9318287372589111, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.00809, |
| "grad_norm": 0.37031009793281555, |
| "grad_norm_var": 0.0015860965038246484, |
| "learning_rate": 5e-05, |
| "loss": 0.1663, |
| "loss/crossentropy": 2.803991198539734, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16627426072955132, |
| "loss/reg": 1.9298937320709229, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.0081, |
| "grad_norm": 0.36467787623405457, |
| "grad_norm_var": 0.001618743456786816, |
| "learning_rate": 5e-05, |
| "loss": 0.1798, |
| "loss/crossentropy": 2.7522680163383484, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17983945459127426, |
| "loss/reg": 1.928220272064209, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.00811, |
| "grad_norm": 0.644191324710846, |
| "grad_norm_var": 0.005662418748027209, |
| "learning_rate": 5e-05, |
| "loss": 0.1823, |
| "loss/crossentropy": 2.9207261204719543, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18232716247439384, |
| "loss/reg": 1.9264674186706543, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.00812, |
| "grad_norm": 0.4135313034057617, |
| "grad_norm_var": 0.00550433789682554, |
| "learning_rate": 5e-05, |
| "loss": 0.1754, |
| "loss/crossentropy": 2.78128319978714, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17538663744926453, |
| "loss/reg": 1.9246680736541748, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.00813, |
| "grad_norm": 0.44594907760620117, |
| "grad_norm_var": 0.005370096537218135, |
| "learning_rate": 5e-05, |
| "loss": 0.1898, |
| "loss/crossentropy": 2.7069836258888245, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18982965499162674, |
| "loss/reg": 1.9229440689086914, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.00814, |
| "grad_norm": 0.41460415720939636, |
| "grad_norm_var": 0.005231401879877403, |
| "learning_rate": 5e-05, |
| "loss": 0.1965, |
| "loss/crossentropy": 2.658607244491577, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1964995227754116, |
| "loss/reg": 1.9211541414260864, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.00815, |
| "grad_norm": 0.40847840905189514, |
| "grad_norm_var": 0.0050977892227572616, |
| "learning_rate": 5e-05, |
| "loss": 0.1865, |
| "loss/crossentropy": 2.789508819580078, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18652214854955673, |
| "loss/reg": 1.91935396194458, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.00816, |
| "grad_norm": 0.39475539326667786, |
| "grad_norm_var": 0.005094298258556392, |
| "learning_rate": 5e-05, |
| "loss": 0.1876, |
| "loss/crossentropy": 2.771743655204773, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18758049979805946, |
| "loss/reg": 1.9176177978515625, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.00817, |
| "grad_norm": 0.3788897395133972, |
| "grad_norm_var": 0.004405869730473085, |
| "learning_rate": 5e-05, |
| "loss": 0.1765, |
| "loss/crossentropy": 2.807315766811371, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17645375058054924, |
| "loss/reg": 1.9157112836837769, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.00818, |
| "grad_norm": 0.3527612090110779, |
| "grad_norm_var": 0.004615691680188668, |
| "learning_rate": 5e-05, |
| "loss": 0.1644, |
| "loss/crossentropy": 2.8354954719543457, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16440149024128914, |
| "loss/reg": 1.914186716079712, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.00819, |
| "grad_norm": 0.45434367656707764, |
| "grad_norm_var": 0.004640431192599914, |
| "learning_rate": 5e-05, |
| "loss": 0.2004, |
| "loss/crossentropy": 2.7823927998542786, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20038331300020218, |
| "loss/reg": 1.9128350019454956, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.0082, |
| "grad_norm": 0.4440554082393646, |
| "grad_norm_var": 0.004630750512825665, |
| "learning_rate": 5e-05, |
| "loss": 0.198, |
| "loss/crossentropy": 2.7826399207115173, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19803617522120476, |
| "loss/reg": 1.9112329483032227, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.00821, |
| "grad_norm": 0.8357638120651245, |
| "grad_norm_var": 0.015646654980531947, |
| "learning_rate": 5e-05, |
| "loss": 0.2236, |
| "loss/crossentropy": 2.855618476867676, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22355607897043228, |
| "loss/reg": 1.9094618558883667, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.00822, |
| "grad_norm": 0.369484543800354, |
| "grad_norm_var": 0.01582983572015086, |
| "learning_rate": 5e-05, |
| "loss": 0.1688, |
| "loss/crossentropy": 2.7591440081596375, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16877064853906631, |
| "loss/reg": 1.9078381061553955, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.00823, |
| "grad_norm": 0.3682078421115875, |
| "grad_norm_var": 0.015804289096973827, |
| "learning_rate": 5e-05, |
| "loss": 0.1661, |
| "loss/crossentropy": 2.6714991331100464, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16609660163521767, |
| "loss/reg": 1.9066871404647827, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.00824, |
| "grad_norm": 0.4925972521305084, |
| "grad_norm_var": 0.015796176553567597, |
| "learning_rate": 5e-05, |
| "loss": 0.1983, |
| "loss/crossentropy": 2.690047025680542, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19832593947649002, |
| "loss/reg": 1.9051308631896973, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.00825, |
| "grad_norm": 0.4004671275615692, |
| "grad_norm_var": 0.01554450060197241, |
| "learning_rate": 5e-05, |
| "loss": 0.1867, |
| "loss/crossentropy": 3.0430662631988525, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18668686598539352, |
| "loss/reg": 1.9037506580352783, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.00826, |
| "grad_norm": 0.40644222497940063, |
| "grad_norm_var": 0.01518439463368978, |
| "learning_rate": 5e-05, |
| "loss": 0.1992, |
| "loss/crossentropy": 2.856709599494934, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19924000278115273, |
| "loss/reg": 1.9035279750823975, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.00827, |
| "grad_norm": 0.387662410736084, |
| "grad_norm_var": 0.012707668169475368, |
| "learning_rate": 5e-05, |
| "loss": 0.171, |
| "loss/crossentropy": 2.8205041885375977, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17101648449897766, |
| "loss/reg": 1.9019997119903564, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.00828, |
| "grad_norm": 0.4229760766029358, |
| "grad_norm_var": 0.012685578660426963, |
| "learning_rate": 5e-05, |
| "loss": 0.1705, |
| "loss/crossentropy": 2.8298428058624268, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1704978421330452, |
| "loss/reg": 1.9013198614120483, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.00829, |
| "grad_norm": 0.4192207455635071, |
| "grad_norm_var": 0.012695092968616347, |
| "learning_rate": 5e-05, |
| "loss": 0.1699, |
| "loss/crossentropy": 2.8572763800621033, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16986168175935745, |
| "loss/reg": 1.9007604122161865, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.0083, |
| "grad_norm": 0.3887600898742676, |
| "grad_norm_var": 0.012805118489640084, |
| "learning_rate": 5e-05, |
| "loss": 0.202, |
| "loss/crossentropy": 2.786255419254303, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20195355266332626, |
| "loss/reg": 1.9000667333602905, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.00831, |
| "grad_norm": 0.48604434728622437, |
| "grad_norm_var": 0.012929568590754843, |
| "learning_rate": 5e-05, |
| "loss": 0.1961, |
| "loss/crossentropy": 2.845152735710144, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19609695672988892, |
| "loss/reg": 1.899495005607605, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.00832, |
| "grad_norm": 0.38712021708488464, |
| "grad_norm_var": 0.012976881832390848, |
| "learning_rate": 5e-05, |
| "loss": 0.1889, |
| "loss/crossentropy": 2.7007412910461426, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1888689175248146, |
| "loss/reg": 1.8983198404312134, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.00833, |
| "grad_norm": 0.3749590814113617, |
| "grad_norm_var": 0.013008393945473115, |
| "learning_rate": 5e-05, |
| "loss": 0.1785, |
| "loss/crossentropy": 2.607687532901764, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1784559190273285, |
| "loss/reg": 1.897759199142456, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.00834, |
| "grad_norm": 0.35202544927597046, |
| "grad_norm_var": 0.013016684761580717, |
| "learning_rate": 5e-05, |
| "loss": 0.1707, |
| "loss/crossentropy": 2.7777926325798035, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17071311548352242, |
| "loss/reg": 1.8961632251739502, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.00835, |
| "grad_norm": 0.3441760540008545, |
| "grad_norm_var": 0.013518763280912912, |
| "learning_rate": 5e-05, |
| "loss": 0.1615, |
| "loss/crossentropy": 2.835566759109497, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1615000143647194, |
| "loss/reg": 1.8950566053390503, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.00836, |
| "grad_norm": 0.362005352973938, |
| "grad_norm_var": 0.013785734718565416, |
| "learning_rate": 5e-05, |
| "loss": 0.1631, |
| "loss/crossentropy": 2.818268299102783, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16310900822281837, |
| "loss/reg": 1.8935774564743042, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.00837, |
| "grad_norm": 0.3725143074989319, |
| "grad_norm_var": 0.0018186987426010584, |
| "learning_rate": 5e-05, |
| "loss": 0.1929, |
| "loss/crossentropy": 2.763745427131653, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.192863829433918, |
| "loss/reg": 1.8919721841812134, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.00838, |
| "grad_norm": 0.39604651927948, |
| "grad_norm_var": 0.0017691837659245461, |
| "learning_rate": 5e-05, |
| "loss": 0.1847, |
| "loss/crossentropy": 2.729005455970764, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18469301983714104, |
| "loss/reg": 1.8908017873764038, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.00839, |
| "grad_norm": 0.39175912737846375, |
| "grad_norm_var": 0.001711627371570279, |
| "learning_rate": 5e-05, |
| "loss": 0.1841, |
| "loss/crossentropy": 2.8563897609710693, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18407713249325752, |
| "loss/reg": 1.8892946243286133, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.0084, |
| "grad_norm": 0.3497207462787628, |
| "grad_norm_var": 0.0012053613127933737, |
| "learning_rate": 5e-05, |
| "loss": 0.1657, |
| "loss/crossentropy": 2.9635773301124573, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1656595915555954, |
| "loss/reg": 1.8886394500732422, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.00841, |
| "grad_norm": 0.36070436239242554, |
| "grad_norm_var": 0.0012493146014174172, |
| "learning_rate": 5e-05, |
| "loss": 0.1659, |
| "loss/crossentropy": 2.905772030353546, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16585366800427437, |
| "loss/reg": 1.8883072137832642, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.00842, |
| "grad_norm": 0.5194427371025085, |
| "grad_norm_var": 0.002330769361460483, |
| "learning_rate": 5e-05, |
| "loss": 0.1903, |
| "loss/crossentropy": 2.914414703845978, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1903173327445984, |
| "loss/reg": 1.8872804641723633, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.00843, |
| "grad_norm": 0.3658028841018677, |
| "grad_norm_var": 0.0023811347132161485, |
| "learning_rate": 5e-05, |
| "loss": 0.1722, |
| "loss/crossentropy": 2.8683270812034607, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17216329649090767, |
| "loss/reg": 1.8860183954238892, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.00844, |
| "grad_norm": 0.3355120122432709, |
| "grad_norm_var": 0.0025135271396979795, |
| "learning_rate": 5e-05, |
| "loss": 0.166, |
| "loss/crossentropy": 2.7672330141067505, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16596197709441185, |
| "loss/reg": 1.8848904371261597, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.00845, |
| "grad_norm": 0.45907968282699585, |
| "grad_norm_var": 0.002779472587279837, |
| "learning_rate": 5e-05, |
| "loss": 0.1881, |
| "loss/crossentropy": 2.79125440120697, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1881270818412304, |
| "loss/reg": 1.8831804990768433, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.00846, |
| "grad_norm": 0.3753393888473511, |
| "grad_norm_var": 0.0027935829770950843, |
| "learning_rate": 5e-05, |
| "loss": 0.1801, |
| "loss/crossentropy": 2.753562033176422, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18013736233115196, |
| "loss/reg": 1.8817567825317383, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.00847, |
| "grad_norm": 0.41996800899505615, |
| "grad_norm_var": 0.002216029114339835, |
| "learning_rate": 5e-05, |
| "loss": 0.1871, |
| "loss/crossentropy": 2.8630106449127197, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18711163103580475, |
| "loss/reg": 1.8805315494537354, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.00848, |
| "grad_norm": 0.40139615535736084, |
| "grad_norm_var": 0.0022320677834542836, |
| "learning_rate": 5e-05, |
| "loss": 0.182, |
| "loss/crossentropy": 2.8663435578346252, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18201814219355583, |
| "loss/reg": 1.8795160055160522, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.00849, |
| "grad_norm": 0.44251078367233276, |
| "grad_norm_var": 0.0024153193390402117, |
| "learning_rate": 5e-05, |
| "loss": 0.1721, |
| "loss/crossentropy": 2.8493316173553467, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1721041165292263, |
| "loss/reg": 1.8777539730072021, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.0085, |
| "grad_norm": 0.39363330602645874, |
| "grad_norm_var": 0.00231007314672006, |
| "learning_rate": 5e-05, |
| "loss": 0.1933, |
| "loss/crossentropy": 2.8225064277648926, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19327203929424286, |
| "loss/reg": 1.8762940168380737, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.00851, |
| "grad_norm": 0.3834942579269409, |
| "grad_norm_var": 0.0021502092497398652, |
| "learning_rate": 5e-05, |
| "loss": 0.1919, |
| "loss/crossentropy": 2.7274433970451355, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19194044917821884, |
| "loss/reg": 1.8752055168151855, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.00852, |
| "grad_norm": 0.36249086260795593, |
| "grad_norm_var": 0.0021480519578248518, |
| "learning_rate": 5e-05, |
| "loss": 0.1759, |
| "loss/crossentropy": 2.725217640399933, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17594841867685318, |
| "loss/reg": 1.8736830949783325, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.00853, |
| "grad_norm": 0.3869001269340515, |
| "grad_norm_var": 0.002116727725912885, |
| "learning_rate": 5e-05, |
| "loss": 0.1823, |
| "loss/crossentropy": 2.738001227378845, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1822870336472988, |
| "loss/reg": 1.8728076219558716, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.00854, |
| "grad_norm": 0.3708244860172272, |
| "grad_norm_var": 0.002157970353941248, |
| "learning_rate": 5e-05, |
| "loss": 0.1668, |
| "loss/crossentropy": 2.788993239402771, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16682880371809006, |
| "loss/reg": 1.8720353841781616, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.00855, |
| "grad_norm": 0.372335821390152, |
| "grad_norm_var": 0.002189712517136307, |
| "learning_rate": 5e-05, |
| "loss": 0.1718, |
| "loss/crossentropy": 2.813368082046509, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17184938862919807, |
| "loss/reg": 1.8711519241333008, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.00856, |
| "grad_norm": 0.35767483711242676, |
| "grad_norm_var": 0.0021470276840197164, |
| "learning_rate": 5e-05, |
| "loss": 0.175, |
| "loss/crossentropy": 2.779674708843231, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17495984584093094, |
| "loss/reg": 1.869391679763794, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.00857, |
| "grad_norm": 0.3517981767654419, |
| "grad_norm_var": 0.002191754274186038, |
| "learning_rate": 5e-05, |
| "loss": 0.1688, |
| "loss/crossentropy": 2.7776423692703247, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16876182705163956, |
| "loss/reg": 1.868484616279602, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.00858, |
| "grad_norm": 0.8127824664115906, |
| "grad_norm_var": 0.012490247842596114, |
| "learning_rate": 5e-05, |
| "loss": 0.2224, |
| "loss/crossentropy": 2.9876713156700134, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2223958522081375, |
| "loss/reg": 1.8681334257125854, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.00859, |
| "grad_norm": 0.4339921474456787, |
| "grad_norm_var": 0.012361098720851383, |
| "learning_rate": 5e-05, |
| "loss": 0.1752, |
| "loss/crossentropy": 2.8306267857551575, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17517483979463577, |
| "loss/reg": 1.8669747114181519, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.0086, |
| "grad_norm": 0.5807726383209229, |
| "grad_norm_var": 0.013480947234538975, |
| "learning_rate": 5e-05, |
| "loss": 0.1787, |
| "loss/crossentropy": 2.731403112411499, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17869474738836288, |
| "loss/reg": 1.865167498588562, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.00861, |
| "grad_norm": 0.377247154712677, |
| "grad_norm_var": 0.013599237642109623, |
| "learning_rate": 5e-05, |
| "loss": 0.1714, |
| "loss/crossentropy": 2.8269473910331726, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1713988333940506, |
| "loss/reg": 1.8640793561935425, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.00862, |
| "grad_norm": 0.37849879264831543, |
| "grad_norm_var": 0.013578332002263205, |
| "learning_rate": 5e-05, |
| "loss": 0.169, |
| "loss/crossentropy": 2.8433790802955627, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16895778477191925, |
| "loss/reg": 1.8629158735275269, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.00863, |
| "grad_norm": 0.4124751091003418, |
| "grad_norm_var": 0.013588511645486826, |
| "learning_rate": 5e-05, |
| "loss": 0.1803, |
| "loss/crossentropy": 2.7584823966026306, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18026690557599068, |
| "loss/reg": 1.8609492778778076, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.00864, |
| "grad_norm": 0.37336814403533936, |
| "grad_norm_var": 0.013730216300815038, |
| "learning_rate": 5e-05, |
| "loss": 0.1707, |
| "loss/crossentropy": 2.7589592933654785, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17070752009749413, |
| "loss/reg": 1.8597759008407593, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.00865, |
| "grad_norm": 0.8337989449501038, |
| "grad_norm_var": 0.02424293784323857, |
| "learning_rate": 5e-05, |
| "loss": 0.1781, |
| "loss/crossentropy": 2.8036633133888245, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17813356593251228, |
| "loss/reg": 1.8581993579864502, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.00866, |
| "grad_norm": 0.35601744055747986, |
| "grad_norm_var": 0.02460846166740538, |
| "learning_rate": 5e-05, |
| "loss": 0.1759, |
| "loss/crossentropy": 2.715932846069336, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17593519389629364, |
| "loss/reg": 1.8565593957901, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.00867, |
| "grad_norm": 0.425502747297287, |
| "grad_norm_var": 0.02436568774250706, |
| "learning_rate": 5e-05, |
| "loss": 0.1821, |
| "loss/crossentropy": 2.7275202870368958, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18210354447364807, |
| "loss/reg": 1.8554532527923584, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.00868, |
| "grad_norm": 0.3844553232192993, |
| "grad_norm_var": 0.024142035969486622, |
| "learning_rate": 5e-05, |
| "loss": 0.1796, |
| "loss/crossentropy": 2.8464134335517883, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1795903705060482, |
| "loss/reg": 1.855208396911621, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.00869, |
| "grad_norm": 0.35618311166763306, |
| "grad_norm_var": 0.02446160042257303, |
| "learning_rate": 5e-05, |
| "loss": 0.1758, |
| "loss/crossentropy": 2.87707781791687, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17583919316530228, |
| "loss/reg": 1.8549201488494873, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.0087, |
| "grad_norm": 0.41672077775001526, |
| "grad_norm_var": 0.024117258377413187, |
| "learning_rate": 5e-05, |
| "loss": 0.1759, |
| "loss/crossentropy": 2.71548855304718, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1758672632277012, |
| "loss/reg": 1.8541263341903687, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.00871, |
| "grad_norm": 0.3646162450313568, |
| "grad_norm_var": 0.024202440513241764, |
| "learning_rate": 5e-05, |
| "loss": 0.186, |
| "loss/crossentropy": 2.786548674106598, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18596061319112778, |
| "loss/reg": 1.8524823188781738, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.00872, |
| "grad_norm": 0.37939974665641785, |
| "grad_norm_var": 0.023961625350842352, |
| "learning_rate": 5e-05, |
| "loss": 0.1748, |
| "loss/crossentropy": 2.835165321826935, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17476912215352058, |
| "loss/reg": 1.8506335020065308, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.00873, |
| "grad_norm": 0.3887036442756653, |
| "grad_norm_var": 0.023551954015331347, |
| "learning_rate": 5e-05, |
| "loss": 0.1789, |
| "loss/crossentropy": 2.732525408267975, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17892278358340263, |
| "loss/reg": 1.849176287651062, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.00874, |
| "grad_norm": 0.387320876121521, |
| "grad_norm_var": 0.014549813961372993, |
| "learning_rate": 5e-05, |
| "loss": 0.1941, |
| "loss/crossentropy": 2.792769968509674, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19409611076116562, |
| "loss/reg": 1.848083734512329, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.00875, |
| "grad_norm": 0.3818178176879883, |
| "grad_norm_var": 0.014678730624869341, |
| "learning_rate": 5e-05, |
| "loss": 0.1865, |
| "loss/crossentropy": 2.762765884399414, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18646146729588509, |
| "loss/reg": 1.8465189933776855, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.00876, |
| "grad_norm": 0.36014363169670105, |
| "grad_norm_var": 0.013132955726787365, |
| "learning_rate": 5e-05, |
| "loss": 0.1847, |
| "loss/crossentropy": 2.8113619089126587, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1847200095653534, |
| "loss/reg": 1.8454153537750244, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.00877, |
| "grad_norm": 0.3916279077529907, |
| "grad_norm_var": 0.013081129963419124, |
| "learning_rate": 5e-05, |
| "loss": 0.181, |
| "loss/crossentropy": 2.767706513404846, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18097594752907753, |
| "loss/reg": 1.8445773124694824, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.00878, |
| "grad_norm": 0.38396528363227844, |
| "grad_norm_var": 0.013058641234249413, |
| "learning_rate": 5e-05, |
| "loss": 0.1799, |
| "loss/crossentropy": 2.750400483608246, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.179927259683609, |
| "loss/reg": 1.8441030979156494, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.00879, |
| "grad_norm": 0.3784838020801544, |
| "grad_norm_var": 0.013129867131250705, |
| "learning_rate": 5e-05, |
| "loss": 0.1783, |
| "loss/crossentropy": 2.6576608419418335, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1783200539648533, |
| "loss/reg": 1.842759370803833, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.0088, |
| "grad_norm": 0.3373940885066986, |
| "grad_norm_var": 0.013387093786405535, |
| "learning_rate": 5e-05, |
| "loss": 0.1665, |
| "loss/crossentropy": 2.7494055032730103, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16646360978484154, |
| "loss/reg": 1.8418775796890259, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.00881, |
| "grad_norm": 0.3743676543235779, |
| "grad_norm_var": 0.000488954453456858, |
| "learning_rate": 5e-05, |
| "loss": 0.1718, |
| "loss/crossentropy": 2.7373632192611694, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17177791520953178, |
| "loss/reg": 1.8405792713165283, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.00882, |
| "grad_norm": 0.36398422718048096, |
| "grad_norm_var": 0.0004683277690547882, |
| "learning_rate": 5e-05, |
| "loss": 0.1713, |
| "loss/crossentropy": 2.689941644668579, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1712586209177971, |
| "loss/reg": 1.8396137952804565, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.00883, |
| "grad_norm": 0.36932700872421265, |
| "grad_norm_var": 0.0003222525763987627, |
| "learning_rate": 5e-05, |
| "loss": 0.1738, |
| "loss/crossentropy": 3.0094715356826782, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17375321686267853, |
| "loss/reg": 1.8387012481689453, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.00884, |
| "grad_norm": 0.37647050619125366, |
| "grad_norm_var": 0.0003174026053568609, |
| "learning_rate": 5e-05, |
| "loss": 0.1734, |
| "loss/crossentropy": 2.675420820713043, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17337032034993172, |
| "loss/reg": 1.8372994661331177, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.00885, |
| "grad_norm": 0.3657122850418091, |
| "grad_norm_var": 0.0002983341146215642, |
| "learning_rate": 5e-05, |
| "loss": 0.175, |
| "loss/crossentropy": 2.785289704799652, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1750231385231018, |
| "loss/reg": 1.836666464805603, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.00886, |
| "grad_norm": 0.3565351963043213, |
| "grad_norm_var": 0.00019998832643003023, |
| "learning_rate": 5e-05, |
| "loss": 0.1611, |
| "loss/crossentropy": 2.67407763004303, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16111686453223228, |
| "loss/reg": 1.8361395597457886, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.00887, |
| "grad_norm": 0.38317063450813293, |
| "grad_norm_var": 0.00020202125672466782, |
| "learning_rate": 5e-05, |
| "loss": 0.1793, |
| "loss/crossentropy": 2.7103012204170227, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17928585410118103, |
| "loss/reg": 1.8344452381134033, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.00888, |
| "grad_norm": 0.39307963848114014, |
| "grad_norm_var": 0.00022420215532017082, |
| "learning_rate": 5e-05, |
| "loss": 0.1899, |
| "loss/crossentropy": 2.7159000635147095, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18990719318389893, |
| "loss/reg": 1.8328790664672852, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.00889, |
| "grad_norm": 0.35862287878990173, |
| "grad_norm_var": 0.00022381402201028245, |
| "learning_rate": 5e-05, |
| "loss": 0.1834, |
| "loss/crossentropy": 2.847196877002716, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18342823907732964, |
| "loss/reg": 1.8318103551864624, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.0089, |
| "grad_norm": 0.3539126515388489, |
| "grad_norm_var": 0.0002281156381275314, |
| "learning_rate": 5e-05, |
| "loss": 0.1777, |
| "loss/crossentropy": 2.853213608264923, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1777319796383381, |
| "loss/reg": 1.829829454421997, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.00891, |
| "grad_norm": 0.41561535000801086, |
| "grad_norm_var": 0.000350336348254295, |
| "learning_rate": 5e-05, |
| "loss": 0.1895, |
| "loss/crossentropy": 2.6837574243545532, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18954132869839668, |
| "loss/reg": 1.8281813859939575, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.00892, |
| "grad_norm": 0.3593007028102875, |
| "grad_norm_var": 0.00035178644306217054, |
| "learning_rate": 5e-05, |
| "loss": 0.1639, |
| "loss/crossentropy": 2.7398064136505127, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.163859985768795, |
| "loss/reg": 1.8272664546966553, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.00893, |
| "grad_norm": 0.3928586542606354, |
| "grad_norm_var": 0.00035500389449958367, |
| "learning_rate": 5e-05, |
| "loss": 0.187, |
| "loss/crossentropy": 2.7214816212654114, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18696707114577293, |
| "loss/reg": 1.8260765075683594, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.00894, |
| "grad_norm": 0.38060957193374634, |
| "grad_norm_var": 0.00035065611870696014, |
| "learning_rate": 5e-05, |
| "loss": 0.1816, |
| "loss/crossentropy": 2.8307188153266907, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18159236386418343, |
| "loss/reg": 1.8246755599975586, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.00895, |
| "grad_norm": 0.34957438707351685, |
| "grad_norm_var": 0.0003796919232549693, |
| "learning_rate": 5e-05, |
| "loss": 0.1801, |
| "loss/crossentropy": 2.7387137413024902, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18012140691280365, |
| "loss/reg": 1.8235573768615723, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.00896, |
| "grad_norm": 0.3666534721851349, |
| "grad_norm_var": 0.00030342620785123544, |
| "learning_rate": 5e-05, |
| "loss": 0.2036, |
| "loss/crossentropy": 2.758805215358734, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20357270538806915, |
| "loss/reg": 1.8219776153564453, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.00897, |
| "grad_norm": 0.5404136180877686, |
| "grad_norm_var": 0.0020682628614343557, |
| "learning_rate": 5e-05, |
| "loss": 0.1933, |
| "loss/crossentropy": 2.970340132713318, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19332898035645485, |
| "loss/reg": 1.8206608295440674, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.00898, |
| "grad_norm": 0.3982648551464081, |
| "grad_norm_var": 0.0020554109287465, |
| "learning_rate": 5e-05, |
| "loss": 0.1768, |
| "loss/crossentropy": 2.6868785619735718, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1768476814031601, |
| "loss/reg": 1.8192402124404907, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.00899, |
| "grad_norm": 0.38741791248321533, |
| "grad_norm_var": 0.002038042531465663, |
| "learning_rate": 5e-05, |
| "loss": 0.1808, |
| "loss/crossentropy": 2.816374719142914, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18077386170625687, |
| "loss/reg": 1.8179749250411987, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.009, |
| "grad_norm": 0.3847026526927948, |
| "grad_norm_var": 0.0020316665201686695, |
| "learning_rate": 5e-05, |
| "loss": 0.1982, |
| "loss/crossentropy": 2.8102923035621643, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19821672514081, |
| "loss/reg": 1.8163453340530396, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.00901, |
| "grad_norm": 0.3916863203048706, |
| "grad_norm_var": 0.0020013109603508254, |
| "learning_rate": 5e-05, |
| "loss": 0.1778, |
| "loss/crossentropy": 2.883694589138031, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17775952070951462, |
| "loss/reg": 1.8147519826889038, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.00902, |
| "grad_norm": 0.36217865347862244, |
| "grad_norm_var": 0.001979417665481912, |
| "learning_rate": 5e-05, |
| "loss": 0.1669, |
| "loss/crossentropy": 2.8100743293762207, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1668529324233532, |
| "loss/reg": 1.8133151531219482, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.00903, |
| "grad_norm": 0.42102572321891785, |
| "grad_norm_var": 0.00204143104015622, |
| "learning_rate": 5e-05, |
| "loss": 0.181, |
| "loss/crossentropy": 2.8006924986839294, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18102795258164406, |
| "loss/reg": 1.8111281394958496, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.00904, |
| "grad_norm": 0.388874351978302, |
| "grad_norm_var": 0.002041367346731493, |
| "learning_rate": 5e-05, |
| "loss": 0.1818, |
| "loss/crossentropy": 2.8004772067070007, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18177441507577896, |
| "loss/reg": 1.8097094297409058, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.00905, |
| "grad_norm": 0.3800460696220398, |
| "grad_norm_var": 0.0019783346485648203, |
| "learning_rate": 5e-05, |
| "loss": 0.176, |
| "loss/crossentropy": 2.693452537059784, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1760004386305809, |
| "loss/reg": 1.8086622953414917, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.00906, |
| "grad_norm": 0.32960033416748047, |
| "grad_norm_var": 0.0021389732007735863, |
| "learning_rate": 5e-05, |
| "loss": 0.1641, |
| "loss/crossentropy": 2.7406028509140015, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1641414910554886, |
| "loss/reg": 1.8070775270462036, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.00907, |
| "grad_norm": 0.41262900829315186, |
| "grad_norm_var": 0.002129550660359725, |
| "learning_rate": 5e-05, |
| "loss": 0.1639, |
| "loss/crossentropy": 2.815558433532715, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1639426201581955, |
| "loss/reg": 1.80557119846344, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.00908, |
| "grad_norm": 0.34168651700019836, |
| "grad_norm_var": 0.0022218976438497353, |
| "learning_rate": 5e-05, |
| "loss": 0.1689, |
| "loss/crossentropy": 2.7727773189544678, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16886158660054207, |
| "loss/reg": 1.8040578365325928, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.00909, |
| "grad_norm": 0.3481311500072479, |
| "grad_norm_var": 0.0023254939668475396, |
| "learning_rate": 5e-05, |
| "loss": 0.1723, |
| "loss/crossentropy": 2.7662405967712402, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17232706770300865, |
| "loss/reg": 1.802317500114441, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.0091, |
| "grad_norm": 0.34673434495925903, |
| "grad_norm_var": 0.0024236772610501315, |
| "learning_rate": 5e-05, |
| "loss": 0.1721, |
| "loss/crossentropy": 2.7547109723091125, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17211218550801277, |
| "loss/reg": 1.8002270460128784, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.00911, |
| "grad_norm": 0.358116090297699, |
| "grad_norm_var": 0.002388630196925971, |
| "learning_rate": 5e-05, |
| "loss": 0.1778, |
| "loss/crossentropy": 2.8174885511398315, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17777465283870697, |
| "loss/reg": 1.7986141443252563, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.00912, |
| "grad_norm": 0.37328216433525085, |
| "grad_norm_var": 0.0023752628687049343, |
| "learning_rate": 5e-05, |
| "loss": 0.1846, |
| "loss/crossentropy": 2.7423484921455383, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18462468683719635, |
| "loss/reg": 1.7969509363174438, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.00913, |
| "grad_norm": 0.4073435366153717, |
| "grad_norm_var": 0.000729848525378903, |
| "learning_rate": 5e-05, |
| "loss": 0.1656, |
| "loss/crossentropy": 2.6962223649024963, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16563431546092033, |
| "loss/reg": 1.7949342727661133, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.00914, |
| "grad_norm": 0.37720003724098206, |
| "grad_norm_var": 0.0006978068548327905, |
| "learning_rate": 5e-05, |
| "loss": 0.1853, |
| "loss/crossentropy": 2.710608184337616, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18529681861400604, |
| "loss/reg": 1.7920844554901123, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.00915, |
| "grad_norm": 0.36010050773620605, |
| "grad_norm_var": 0.0007016424011595597, |
| "learning_rate": 5e-05, |
| "loss": 0.1833, |
| "loss/crossentropy": 2.8862733840942383, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1833462007343769, |
| "loss/reg": 1.7895822525024414, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.00916, |
| "grad_norm": 0.35757148265838623, |
| "grad_norm_var": 0.0007087821480995478, |
| "learning_rate": 5e-05, |
| "loss": 0.1741, |
| "loss/crossentropy": 2.736583173274994, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1740923710167408, |
| "loss/reg": 1.7870407104492188, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.00917, |
| "grad_norm": 0.38085147738456726, |
| "grad_norm_var": 0.0006880592910958772, |
| "learning_rate": 5e-05, |
| "loss": 0.1758, |
| "loss/crossentropy": 2.722678780555725, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17577889189124107, |
| "loss/reg": 1.78484308719635, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.00918, |
| "grad_norm": 0.3618144989013672, |
| "grad_norm_var": 0.000688524329092799, |
| "learning_rate": 5e-05, |
| "loss": 0.1729, |
| "loss/crossentropy": 2.7107303738594055, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17288268730044365, |
| "loss/reg": 1.781936764717102, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.00919, |
| "grad_norm": 0.4562152326107025, |
| "grad_norm_var": 0.000997994245971834, |
| "learning_rate": 5e-05, |
| "loss": 0.209, |
| "loss/crossentropy": 2.6911511421203613, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20904593169689178, |
| "loss/reg": 1.7793291807174683, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.0092, |
| "grad_norm": 0.7892553806304932, |
| "grad_norm_var": 0.011823798595284762, |
| "learning_rate": 5e-05, |
| "loss": 0.2098, |
| "loss/crossentropy": 2.8024474382400513, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20984026044607162, |
| "loss/reg": 1.7767542600631714, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.00921, |
| "grad_norm": 0.4952187240123749, |
| "grad_norm_var": 0.012365066103201613, |
| "learning_rate": 5e-05, |
| "loss": 0.2215, |
| "loss/crossentropy": 2.836692988872528, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22146976366639137, |
| "loss/reg": 1.77475905418396, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.00922, |
| "grad_norm": 0.4500190317630768, |
| "grad_norm_var": 0.01204494814009713, |
| "learning_rate": 5e-05, |
| "loss": 0.1957, |
| "loss/crossentropy": 2.7740437984466553, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1957194283604622, |
| "loss/reg": 1.7725263833999634, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.00923, |
| "grad_norm": 0.4018624424934387, |
| "grad_norm_var": 0.012053458598524087, |
| "learning_rate": 5e-05, |
| "loss": 0.1985, |
| "loss/crossentropy": 2.783832609653473, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19849245250225067, |
| "loss/reg": 1.7706068754196167, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.00924, |
| "grad_norm": 0.4053579866886139, |
| "grad_norm_var": 0.01170279735803306, |
| "learning_rate": 5e-05, |
| "loss": 0.1847, |
| "loss/crossentropy": 2.8584959506988525, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1846720688045025, |
| "loss/reg": 1.7687995433807373, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.00925, |
| "grad_norm": 0.4355542063713074, |
| "grad_norm_var": 0.011379840002585932, |
| "learning_rate": 5e-05, |
| "loss": 0.1911, |
| "loss/crossentropy": 2.8072018027305603, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19105902686715126, |
| "loss/reg": 1.766356348991394, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.00926, |
| "grad_norm": 0.41985246539115906, |
| "grad_norm_var": 0.010977469936536095, |
| "learning_rate": 5e-05, |
| "loss": 0.1772, |
| "loss/crossentropy": 2.825522303581238, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17719140276312828, |
| "loss/reg": 1.764008641242981, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.00927, |
| "grad_norm": 0.4020366370677948, |
| "grad_norm_var": 0.010695516965112247, |
| "learning_rate": 5e-05, |
| "loss": 0.168, |
| "loss/crossentropy": 2.946666181087494, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16798604279756546, |
| "loss/reg": 1.7619134187698364, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.00928, |
| "grad_norm": 0.4333237111568451, |
| "grad_norm_var": 0.01047000612816995, |
| "learning_rate": 5e-05, |
| "loss": 0.1732, |
| "loss/crossentropy": 2.904057264328003, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1731964722275734, |
| "loss/reg": 1.7594822645187378, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.00929, |
| "grad_norm": 0.44914835691452026, |
| "grad_norm_var": 0.010434282299518182, |
| "learning_rate": 5e-05, |
| "loss": 0.1822, |
| "loss/crossentropy": 2.8881112933158875, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1821521669626236, |
| "loss/reg": 1.757930874824524, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.0093, |
| "grad_norm": 0.8063428401947021, |
| "grad_norm_var": 0.018582235883409348, |
| "learning_rate": 5e-05, |
| "loss": 0.2108, |
| "loss/crossentropy": 3.000428855419159, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21083774790167809, |
| "loss/reg": 1.756495714187622, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.00931, |
| "grad_norm": 0.37262633442878723, |
| "grad_norm_var": 0.018420551139004416, |
| "learning_rate": 5e-05, |
| "loss": 0.1727, |
| "loss/crossentropy": 2.899652659893036, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1727372407913208, |
| "loss/reg": 1.755653977394104, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.00932, |
| "grad_norm": 0.44574448466300964, |
| "grad_norm_var": 0.017660345874116586, |
| "learning_rate": 5e-05, |
| "loss": 0.1771, |
| "loss/crossentropy": 2.7930703163146973, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17705539613962173, |
| "loss/reg": 1.7541528940200806, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.00933, |
| "grad_norm": 0.4381559193134308, |
| "grad_norm_var": 0.017191491981390843, |
| "learning_rate": 5e-05, |
| "loss": 0.1928, |
| "loss/crossentropy": 3.009516716003418, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19276633486151695, |
| "loss/reg": 1.7523828744888306, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.00934, |
| "grad_norm": 0.37119948863983154, |
| "grad_norm_var": 0.01705829482260827, |
| "learning_rate": 5e-05, |
| "loss": 0.1784, |
| "loss/crossentropy": 2.7011741995811462, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17841476574540138, |
| "loss/reg": 1.750117540359497, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.00935, |
| "grad_norm": 0.38776126503944397, |
| "grad_norm_var": 0.017506596591750172, |
| "learning_rate": 5e-05, |
| "loss": 0.1765, |
| "loss/crossentropy": 2.755903959274292, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17649077624082565, |
| "loss/reg": 1.7485483884811401, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.00936, |
| "grad_norm": 0.3909890949726105, |
| "grad_norm_var": 0.010412048008983836, |
| "learning_rate": 5e-05, |
| "loss": 0.1718, |
| "loss/crossentropy": 2.7981409430503845, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1718210205435753, |
| "loss/reg": 1.7470717430114746, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.00937, |
| "grad_norm": 0.4047463536262512, |
| "grad_norm_var": 0.010306674977021387, |
| "learning_rate": 5e-05, |
| "loss": 0.1944, |
| "loss/crossentropy": 2.653374135494232, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1944383941590786, |
| "loss/reg": 1.7457630634307861, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.00938, |
| "grad_norm": 0.4641042947769165, |
| "grad_norm_var": 0.010340857957444612, |
| "learning_rate": 5e-05, |
| "loss": 0.1933, |
| "loss/crossentropy": 3.054188370704651, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19333792477846146, |
| "loss/reg": 1.7441859245300293, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.00939, |
| "grad_norm": 0.463642418384552, |
| "grad_norm_var": 0.01027101724812616, |
| "learning_rate": 5e-05, |
| "loss": 0.1906, |
| "loss/crossentropy": 2.7980846166610718, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19064636901021004, |
| "loss/reg": 1.7424683570861816, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.0094, |
| "grad_norm": 0.3763858675956726, |
| "grad_norm_var": 0.010469512228889604, |
| "learning_rate": 5e-05, |
| "loss": 0.1817, |
| "loss/crossentropy": 2.8554503321647644, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1817203275859356, |
| "loss/reg": 1.7405589818954468, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.00941, |
| "grad_norm": 0.41792160272598267, |
| "grad_norm_var": 0.010502572032980106, |
| "learning_rate": 5e-05, |
| "loss": 0.2081, |
| "loss/crossentropy": 2.7464479207992554, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20806986466050148, |
| "loss/reg": 1.7390387058258057, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.00942, |
| "grad_norm": 0.405609130859375, |
| "grad_norm_var": 0.010553986517832181, |
| "learning_rate": 5e-05, |
| "loss": 0.1923, |
| "loss/crossentropy": 2.9190812706947327, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1922554075717926, |
| "loss/reg": 1.737121343612671, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.00943, |
| "grad_norm": 0.5186859369277954, |
| "grad_norm_var": 0.010823950074372254, |
| "learning_rate": 5e-05, |
| "loss": 0.1892, |
| "loss/crossentropy": 2.846211016178131, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1892014741897583, |
| "loss/reg": 1.7357187271118164, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.00944, |
| "grad_norm": 0.3441806137561798, |
| "grad_norm_var": 0.011478989118617007, |
| "learning_rate": 5e-05, |
| "loss": 0.1701, |
| "loss/crossentropy": 2.8220054507255554, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1700589321553707, |
| "loss/reg": 1.733402967453003, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.00945, |
| "grad_norm": 0.37400367856025696, |
| "grad_norm_var": 0.011751047533430632, |
| "learning_rate": 5e-05, |
| "loss": 0.179, |
| "loss/crossentropy": 2.7692030668258667, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17900892347097397, |
| "loss/reg": 1.7316250801086426, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.00946, |
| "grad_norm": 0.4089336395263672, |
| "grad_norm_var": 0.0020184395330867097, |
| "learning_rate": 5e-05, |
| "loss": 0.1982, |
| "loss/crossentropy": 2.891884744167328, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19816706702113152, |
| "loss/reg": 1.7298880815505981, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.00947, |
| "grad_norm": 0.36752966046333313, |
| "grad_norm_var": 0.002046509202798789, |
| "learning_rate": 5e-05, |
| "loss": 0.1843, |
| "loss/crossentropy": 2.858055591583252, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18428384885191917, |
| "loss/reg": 1.7284300327301025, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.00948, |
| "grad_norm": 0.36644455790519714, |
| "grad_norm_var": 0.002074549092487384, |
| "learning_rate": 5e-05, |
| "loss": 0.1714, |
| "loss/crossentropy": 2.848255932331085, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17144014686346054, |
| "loss/reg": 1.7270002365112305, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.00949, |
| "grad_norm": 0.5910805463790894, |
| "grad_norm_var": 0.004186356490923812, |
| "learning_rate": 5e-05, |
| "loss": 0.23, |
| "loss/crossentropy": 2.7590108513832092, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22996008396148682, |
| "loss/reg": 1.7256983518600464, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.0095, |
| "grad_norm": 0.35803112387657166, |
| "grad_norm_var": 0.00427554901524122, |
| "learning_rate": 5e-05, |
| "loss": 0.1684, |
| "loss/crossentropy": 2.7760064005851746, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16840620338916779, |
| "loss/reg": 1.723679542541504, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.00951, |
| "grad_norm": 0.412681519985199, |
| "grad_norm_var": 0.004223846207916952, |
| "learning_rate": 5e-05, |
| "loss": 0.2018, |
| "loss/crossentropy": 2.9045740365982056, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20182525366544724, |
| "loss/reg": 1.72231125831604, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.00952, |
| "grad_norm": 0.4021626114845276, |
| "grad_norm_var": 0.004193552649382352, |
| "learning_rate": 5e-05, |
| "loss": 0.1848, |
| "loss/crossentropy": 2.6521793007850647, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1848319098353386, |
| "loss/reg": 1.720641851425171, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.00953, |
| "grad_norm": 0.3750251233577728, |
| "grad_norm_var": 0.00429834750938114, |
| "learning_rate": 5e-05, |
| "loss": 0.1791, |
| "loss/crossentropy": 2.7560397386550903, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17908834293484688, |
| "loss/reg": 1.7182477712631226, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.00954, |
| "grad_norm": 0.5893900990486145, |
| "grad_norm_var": 0.006092951728716223, |
| "learning_rate": 5e-05, |
| "loss": 0.2129, |
| "loss/crossentropy": 2.835801601409912, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.21293479949235916, |
| "loss/reg": 1.716722011566162, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.00955, |
| "grad_norm": 0.40877264738082886, |
| "grad_norm_var": 0.005985476808116985, |
| "learning_rate": 5e-05, |
| "loss": 0.1938, |
| "loss/crossentropy": 2.689119517803192, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19383220747113228, |
| "loss/reg": 1.714568853378296, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.00956, |
| "grad_norm": 0.38810843229293823, |
| "grad_norm_var": 0.005926205055061354, |
| "learning_rate": 5e-05, |
| "loss": 0.1705, |
| "loss/crossentropy": 2.948507070541382, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1705201156437397, |
| "loss/reg": 1.7119203805923462, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.00957, |
| "grad_norm": 0.4206679165363312, |
| "grad_norm_var": 0.005925719462670757, |
| "learning_rate": 5e-05, |
| "loss": 0.1807, |
| "loss/crossentropy": 2.78257417678833, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18074193224310875, |
| "loss/reg": 1.7095727920532227, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.00958, |
| "grad_norm": 0.3933105766773224, |
| "grad_norm_var": 0.005959929971731507, |
| "learning_rate": 5e-05, |
| "loss": 0.2045, |
| "loss/crossentropy": 2.7225964665412903, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2044883407652378, |
| "loss/reg": 1.707101583480835, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.00959, |
| "grad_norm": 0.3582659661769867, |
| "grad_norm_var": 0.005456189170996354, |
| "learning_rate": 5e-05, |
| "loss": 0.1603, |
| "loss/crossentropy": 2.7268422842025757, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1602596789598465, |
| "loss/reg": 1.7055177688598633, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.0096, |
| "grad_norm": 0.397594153881073, |
| "grad_norm_var": 0.0051663773874797295, |
| "learning_rate": 5e-05, |
| "loss": 0.1733, |
| "loss/crossentropy": 2.877332389354706, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1733493208885193, |
| "loss/reg": 1.7030569314956665, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.00961, |
| "grad_norm": 0.4981625974178314, |
| "grad_norm_var": 0.005480135764708397, |
| "learning_rate": 5e-05, |
| "loss": 0.1826, |
| "loss/crossentropy": 2.656112492084503, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18259770050644875, |
| "loss/reg": 1.7019816637039185, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.00962, |
| "grad_norm": 0.937751054763794, |
| "grad_norm_var": 0.022106629800203694, |
| "learning_rate": 5e-05, |
| "loss": 0.2083, |
| "loss/crossentropy": 2.907736301422119, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.20831404626369476, |
| "loss/reg": 1.700728416442871, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.00963, |
| "grad_norm": 0.3895174264907837, |
| "grad_norm_var": 0.021883161578962466, |
| "learning_rate": 5e-05, |
| "loss": 0.1803, |
| "loss/crossentropy": 2.845858633518219, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18031802773475647, |
| "loss/reg": 1.6990742683410645, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.00964, |
| "grad_norm": 0.34548690915107727, |
| "grad_norm_var": 0.02215928485241057, |
| "learning_rate": 5e-05, |
| "loss": 0.1645, |
| "loss/crossentropy": 2.8244311213493347, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1645219847559929, |
| "loss/reg": 1.697798252105713, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.00965, |
| "grad_norm": 0.36824318766593933, |
| "grad_norm_var": 0.021193656582446117, |
| "learning_rate": 5e-05, |
| "loss": 0.1717, |
| "loss/crossentropy": 2.884181797504425, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17166699841618538, |
| "loss/reg": 1.6957753896713257, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.00966, |
| "grad_norm": 0.37774839997291565, |
| "grad_norm_var": 0.021001939954339834, |
| "learning_rate": 5e-05, |
| "loss": 0.1824, |
| "loss/crossentropy": 3.025804340839386, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18241329863667488, |
| "loss/reg": 1.6944239139556885, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.00967, |
| "grad_norm": 0.36408743262290955, |
| "grad_norm_var": 0.02133579751543382, |
| "learning_rate": 5e-05, |
| "loss": 0.1816, |
| "loss/crossentropy": 2.6597015261650085, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1815933845937252, |
| "loss/reg": 1.692252516746521, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.00968, |
| "grad_norm": 0.34311729669570923, |
| "grad_norm_var": 0.02183892884845392, |
| "learning_rate": 5e-05, |
| "loss": 0.1662, |
| "loss/crossentropy": 2.845684826374054, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1661607250571251, |
| "loss/reg": 1.6907639503479004, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.00969, |
| "grad_norm": 0.38303908705711365, |
| "grad_norm_var": 0.021779175231247044, |
| "learning_rate": 5e-05, |
| "loss": 0.1693, |
| "loss/crossentropy": 2.558404505252838, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16925161331892014, |
| "loss/reg": 1.6893569231033325, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.0097, |
| "grad_norm": 0.3850949704647064, |
| "grad_norm_var": 0.02018777587365078, |
| "learning_rate": 5e-05, |
| "loss": 0.1685, |
| "loss/crossentropy": 2.890751600265503, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1684512346982956, |
| "loss/reg": 1.688266634941101, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.00971, |
| "grad_norm": 0.4068422317504883, |
| "grad_norm_var": 0.020191525445637973, |
| "learning_rate": 5e-05, |
| "loss": 0.1857, |
| "loss/crossentropy": 2.707846701145172, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18569114059209824, |
| "loss/reg": 1.6867531538009644, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.00972, |
| "grad_norm": 0.3924512565135956, |
| "grad_norm_var": 0.020172897207266394, |
| "learning_rate": 5e-05, |
| "loss": 0.1829, |
| "loss/crossentropy": 2.848098576068878, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1828712299466133, |
| "loss/reg": 1.6852294206619263, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.00973, |
| "grad_norm": 0.3714575469493866, |
| "grad_norm_var": 0.020336838096992275, |
| "learning_rate": 5e-05, |
| "loss": 0.1773, |
| "loss/crossentropy": 2.8703532814979553, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17731666564941406, |
| "loss/reg": 1.6838881969451904, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.00974, |
| "grad_norm": 0.35195186734199524, |
| "grad_norm_var": 0.020588227081264298, |
| "learning_rate": 5e-05, |
| "loss": 0.1862, |
| "loss/crossentropy": 2.863659620285034, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18621815741062164, |
| "loss/reg": 1.6821165084838867, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.00975, |
| "grad_norm": 0.441755086183548, |
| "grad_norm_var": 0.02037088575085001, |
| "learning_rate": 5e-05, |
| "loss": 0.1933, |
| "loss/crossentropy": 2.810901939868927, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19334488362073898, |
| "loss/reg": 1.6802574396133423, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.00976, |
| "grad_norm": 0.40233367681503296, |
| "grad_norm_var": 0.02035677589008348, |
| "learning_rate": 5e-05, |
| "loss": 0.1983, |
| "loss/crossentropy": 2.681654691696167, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19833911955356598, |
| "loss/reg": 1.6781818866729736, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.00977, |
| "grad_norm": 0.6531580686569214, |
| "grad_norm_var": 0.023423138566671976, |
| "learning_rate": 5e-05, |
| "loss": 0.2212, |
| "loss/crossentropy": 2.82851505279541, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2212192267179489, |
| "loss/reg": 1.6763266324996948, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.00978, |
| "grad_norm": 0.3646674156188965, |
| "grad_norm_var": 0.005314392422501734, |
| "learning_rate": 5e-05, |
| "loss": 0.1715, |
| "loss/crossentropy": 2.7788134813308716, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17147252708673477, |
| "loss/reg": 1.674770712852478, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.00979, |
| "grad_norm": 0.40374529361724854, |
| "grad_norm_var": 0.005314159555871933, |
| "learning_rate": 5e-05, |
| "loss": 0.2092, |
| "loss/crossentropy": 2.7746172547340393, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.2092289738357067, |
| "loss/reg": 1.6723932027816772, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.0098, |
| "grad_norm": 0.3737621009349823, |
| "grad_norm_var": 0.005169172562247167, |
| "learning_rate": 5e-05, |
| "loss": 0.1847, |
| "loss/crossentropy": 2.871635138988495, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18473126366734505, |
| "loss/reg": 1.6707124710083008, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.00981, |
| "grad_norm": 0.37383797764778137, |
| "grad_norm_var": 0.005148210609649088, |
| "learning_rate": 5e-05, |
| "loss": 0.1733, |
| "loss/crossentropy": 2.760922133922577, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1733292043209076, |
| "loss/reg": 1.6693061590194702, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.00982, |
| "grad_norm": 0.38922467827796936, |
| "grad_norm_var": 0.00512344066750369, |
| "learning_rate": 5e-05, |
| "loss": 0.164, |
| "loss/crossentropy": 2.8191832304000854, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16395087912678719, |
| "loss/reg": 1.6671026945114136, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.00983, |
| "grad_norm": 0.40670332312583923, |
| "grad_norm_var": 0.0050327015332547465, |
| "learning_rate": 5e-05, |
| "loss": 0.1892, |
| "loss/crossentropy": 2.847275197505951, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18915896490216255, |
| "loss/reg": 1.6655491590499878, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.00984, |
| "grad_norm": 0.3739645183086395, |
| "grad_norm_var": 0.004847126969690196, |
| "learning_rate": 5e-05, |
| "loss": 0.194, |
| "loss/crossentropy": 2.817361056804657, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1940479725599289, |
| "loss/reg": 1.664434552192688, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.00985, |
| "grad_norm": 0.36827903985977173, |
| "grad_norm_var": 0.00490322302848593, |
| "learning_rate": 5e-05, |
| "loss": 0.1793, |
| "loss/crossentropy": 2.9278652667999268, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17926159501075745, |
| "loss/reg": 1.6631444692611694, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.00986, |
| "grad_norm": 0.36838048696517944, |
| "grad_norm_var": 0.0049621510753778035, |
| "learning_rate": 5e-05, |
| "loss": 0.182, |
| "loss/crossentropy": 2.625900149345398, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1820085123181343, |
| "loss/reg": 1.6615486145019531, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.00987, |
| "grad_norm": 0.406107097864151, |
| "grad_norm_var": 0.00496177464005331, |
| "learning_rate": 5e-05, |
| "loss": 0.165, |
| "loss/crossentropy": 2.6364856362342834, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16496483236551285, |
| "loss/reg": 1.6604608297348022, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.00988, |
| "grad_norm": 0.3886563777923584, |
| "grad_norm_var": 0.004967815483619401, |
| "learning_rate": 5e-05, |
| "loss": 0.1803, |
| "loss/crossentropy": 2.796413004398346, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1803182028234005, |
| "loss/reg": 1.6593427658081055, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.00989, |
| "grad_norm": 0.35161423683166504, |
| "grad_norm_var": 0.005074223354079936, |
| "learning_rate": 5e-05, |
| "loss": 0.1644, |
| "loss/crossentropy": 2.8155667185783386, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.16441339999437332, |
| "loss/reg": 1.6583104133605957, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.0099, |
| "grad_norm": 0.39407941699028015, |
| "grad_norm_var": 0.0049088886087087355, |
| "learning_rate": 5e-05, |
| "loss": 0.1928, |
| "loss/crossentropy": 2.8455575704574585, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19284628704190254, |
| "loss/reg": 1.658367395401001, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.00991, |
| "grad_norm": 0.3695094883441925, |
| "grad_norm_var": 0.004869171230278472, |
| "learning_rate": 5e-05, |
| "loss": 0.1814, |
| "loss/crossentropy": 2.80289888381958, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18142832443118095, |
| "loss/reg": 1.6575336456298828, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.00992, |
| "grad_norm": 0.3505973815917969, |
| "grad_norm_var": 0.005015199761620141, |
| "learning_rate": 5e-05, |
| "loss": 0.1769, |
| "loss/crossentropy": 2.731611430644989, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17688723653554916, |
| "loss/reg": 1.6567095518112183, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.00993, |
| "grad_norm": 0.38008397817611694, |
| "grad_norm_var": 0.0003133497280889556, |
| "learning_rate": 5e-05, |
| "loss": 0.1836, |
| "loss/crossentropy": 2.7869952917099, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.18361864984035492, |
| "loss/reg": 1.6552574634552002, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.00994, |
| "grad_norm": 0.38647469878196716, |
| "grad_norm_var": 0.00030154116815576856, |
| "learning_rate": 5e-05, |
| "loss": 0.1794, |
| "loss/crossentropy": 2.744201898574829, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17939525097608566, |
| "loss/reg": 1.6545374393463135, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.00995, |
| "grad_norm": 0.3995093107223511, |
| "grad_norm_var": 0.0002894285610608412, |
| "learning_rate": 5e-05, |
| "loss": 0.1916, |
| "loss/crossentropy": 2.694726526737213, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1916041001677513, |
| "loss/reg": 1.6533586978912354, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.00996, |
| "grad_norm": 0.34584900736808777, |
| "grad_norm_var": 0.0003615231269390488, |
| "learning_rate": 5e-05, |
| "loss": 0.1739, |
| "loss/crossentropy": 2.8016315698623657, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.1739257462322712, |
| "loss/reg": 1.651719570159912, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.00997, |
| "grad_norm": 0.3925288915634155, |
| "grad_norm_var": 0.0003722265532580001, |
| "learning_rate": 5e-05, |
| "loss": 0.177, |
| "loss/crossentropy": 2.897447168827057, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.17699377238750458, |
| "loss/reg": 1.6504392623901367, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.00998, |
| "grad_norm": 0.39880403876304626, |
| "grad_norm_var": 0.0003904176090236522, |
| "learning_rate": 5e-05, |
| "loss": 0.1919, |
| "loss/crossentropy": 2.8275578022003174, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19189641624689102, |
| "loss/reg": 1.6491367816925049, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.00999, |
| "grad_norm": 0.4310808479785919, |
| "grad_norm_var": 0.0005141220319849537, |
| "learning_rate": 5e-05, |
| "loss": 0.2298, |
| "loss/crossentropy": 2.7063609957695007, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.22976921498775482, |
| "loss/reg": 1.6471576690673828, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.3714313507080078, |
| "grad_norm_var": 0.0005171003041950173, |
| "learning_rate": 5e-05, |
| "loss": 0.1901, |
| "loss/crossentropy": 2.7168938517570496, |
| "loss/hidden": 0.0, |
| "loss/logits": 0.19012651592493057, |
| "loss/reg": 1.6457953453063965, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 100000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": true, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.15246892515328e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|