| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.025, |
| "eval_steps": 1000, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 2.5e-05, |
| "grad_norm": 11.25, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1607.1185, |
| "loss/crossentropy": 0.4998045265674591, |
| "loss/hidden": 0.2041015625, |
| "loss/logits": 0.007406285032629967, |
| "loss/reg": 1606.4072265625, |
| "loss/twn": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 5e-05, |
| "grad_norm": 25.625, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1604.4435, |
| "loss/crossentropy": 1.6524670124053955, |
| "loss/hidden": 0.1357421875, |
| "loss/logits": 0.006067799869924784, |
| "loss/reg": 1602.649169921875, |
| "loss/twn": 0.0, |
| "step": 2 |
| }, |
| { |
| "epoch": 7.5e-05, |
| "grad_norm": 9.9375, |
| "learning_rate": 3e-06, |
| "loss": 1547.7074, |
| "loss/crossentropy": 1.9341739416122437, |
| "loss/hidden": 0.15234375, |
| "loss/logits": 0.014470485970377922, |
| "loss/reg": 1545.6063232421875, |
| "loss/twn": 0.0, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0001, |
| "grad_norm": 14.375, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1500.7928, |
| "loss/crossentropy": 2.407871723175049, |
| "loss/hidden": 0.1875, |
| "loss/logits": 0.01105956919491291, |
| "loss/reg": 1498.1864013671875, |
| "loss/twn": 0.0, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.000125, |
| "grad_norm": 9.875, |
| "learning_rate": 5e-06, |
| "loss": 1421.8827, |
| "loss/crossentropy": 1.7022260427474976, |
| "loss/hidden": 0.10546875, |
| "loss/logits": 0.00693091843277216, |
| "loss/reg": 1420.0679931640625, |
| "loss/twn": 0.0, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.00015, |
| "grad_norm": 470.0, |
| "learning_rate": 6e-06, |
| "loss": 1315.2301, |
| "loss/crossentropy": 1.3705801963806152, |
| "loss/hidden": 0.181640625, |
| "loss/logits": 0.002700040116906166, |
| "loss/reg": 1313.6751708984375, |
| "loss/twn": 0.0, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.000175, |
| "grad_norm": 12.4375, |
| "learning_rate": 7.000000000000001e-06, |
| "loss": 1187.7322, |
| "loss/crossentropy": 1.8566981554031372, |
| "loss/hidden": 0.083984375, |
| "loss/logits": 0.004405488260090351, |
| "loss/reg": 1185.787109375, |
| "loss/twn": 0.0, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0002, |
| "grad_norm": 20.75, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 1041.5831, |
| "loss/crossentropy": 2.76304030418396, |
| "loss/hidden": 0.14453125, |
| "loss/logits": 0.009587295353412628, |
| "loss/reg": 1038.6658935546875, |
| "loss/twn": 0.0, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.000225, |
| "grad_norm": 280.0, |
| "learning_rate": 9e-06, |
| "loss": 889.5813, |
| "loss/crossentropy": 2.0730843544006348, |
| "loss/hidden": 0.1767578125, |
| "loss/logits": 0.009802292101085186, |
| "loss/reg": 887.3216552734375, |
| "loss/twn": 0.0, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.00025, |
| "grad_norm": 13.8125, |
| "learning_rate": 1e-05, |
| "loss": 739.1588, |
| "loss/crossentropy": 2.678976535797119, |
| "loss/hidden": 0.1357421875, |
| "loss/logits": 0.01222484465688467, |
| "loss/reg": 736.3318481445312, |
| "loss/twn": 0.0, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.000275, |
| "grad_norm": 19.5, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 602.8989, |
| "loss/crossentropy": 1.9476336240768433, |
| "loss/hidden": 0.1865234375, |
| "loss/logits": 0.007580972742289305, |
| "loss/reg": 600.7571411132812, |
| "loss/twn": 0.0, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0003, |
| "grad_norm": 9.875, |
| "learning_rate": 1.2e-05, |
| "loss": 486.0685, |
| "loss/crossentropy": 2.1175615787506104, |
| "loss/hidden": 0.1767578125, |
| "loss/logits": 0.008497287519276142, |
| "loss/reg": 483.76568603515625, |
| "loss/twn": 0.0, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.000325, |
| "grad_norm": 14.0625, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 389.9533, |
| "loss/crossentropy": 1.2835053205490112, |
| "loss/hidden": 0.10498046875, |
| "loss/logits": 0.008934162557125092, |
| "loss/reg": 388.5558776855469, |
| "loss/twn": 0.0, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.00035, |
| "grad_norm": 19.5, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 313.737, |
| "loss/crossentropy": 1.8903541564941406, |
| "loss/hidden": 0.1103515625, |
| "loss/logits": 0.008576408959925175, |
| "loss/reg": 311.7276916503906, |
| "loss/twn": 0.0, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.000375, |
| "grad_norm": 32.25, |
| "learning_rate": 1.5e-05, |
| "loss": 253.3787, |
| "loss/crossentropy": 1.3272770643234253, |
| "loss/hidden": 0.2275390625, |
| "loss/logits": 0.009392762556672096, |
| "loss/reg": 251.8144989013672, |
| "loss/twn": 0.0, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0004, |
| "grad_norm": 10.6875, |
| "grad_norm_var": 16279.171077473959, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 207.3934, |
| "loss/crossentropy": 1.8237905502319336, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.0111556276679039, |
| "loss/reg": 205.45985412597656, |
| "loss/twn": 0.0, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.000425, |
| "grad_norm": 115.5, |
| "grad_norm_var": 16268.691780598958, |
| "learning_rate": 1.7000000000000003e-05, |
| "loss": 171.5853, |
| "loss/crossentropy": 1.6593583822250366, |
| "loss/hidden": 0.140625, |
| "loss/logits": 0.004231919534504414, |
| "loss/reg": 169.7810516357422, |
| "loss/twn": 0.0, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.00045, |
| "grad_norm": 16.375, |
| "grad_norm_var": 16325.545556640625, |
| "learning_rate": 1.8e-05, |
| "loss": 143.6776, |
| "loss/crossentropy": 0.922300398349762, |
| "loss/hidden": 0.22265625, |
| "loss/logits": 0.012654997408390045, |
| "loss/reg": 142.52001953125, |
| "loss/twn": 0.0, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.000475, |
| "grad_norm": 228.0, |
| "grad_norm_var": 17643.971875, |
| "learning_rate": 1.9e-05, |
| "loss": 123.6267, |
| "loss/crossentropy": 1.7576591968536377, |
| "loss/hidden": 0.12255859375, |
| "loss/logits": 0.0077894763089716434, |
| "loss/reg": 121.73872375488281, |
| "loss/twn": 0.0, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0005, |
| "grad_norm": 15.3125, |
| "grad_norm_var": 17635.768994140624, |
| "learning_rate": 2e-05, |
| "loss": 107.5298, |
| "loss/crossentropy": 1.702596664428711, |
| "loss/hidden": 0.193359375, |
| "loss/logits": 0.012834219262003899, |
| "loss/reg": 105.62105560302734, |
| "loss/twn": 0.0, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.000525, |
| "grad_norm": 21.125, |
| "grad_norm_var": 17537.747509765624, |
| "learning_rate": 2.1e-05, |
| "loss": 96.0316, |
| "loss/crossentropy": 2.7474312782287598, |
| "loss/hidden": 0.16796875, |
| "loss/logits": 0.018948907032608986, |
| "loss/reg": 93.09722900390625, |
| "loss/twn": 0.0, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.00055, |
| "grad_norm": 12.8125, |
| "grad_norm_var": 6900.875520833333, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 85.865, |
| "loss/crossentropy": 2.7010557651519775, |
| "loss/hidden": 0.1005859375, |
| "loss/logits": 0.004374333191663027, |
| "loss/reg": 83.05902862548828, |
| "loss/twn": 0.0, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.000575, |
| "grad_norm": 13.875, |
| "grad_norm_var": 6893.302067057291, |
| "learning_rate": 2.3000000000000003e-05, |
| "loss": 78.0325, |
| "loss/crossentropy": 2.801321029663086, |
| "loss/hidden": 0.146484375, |
| "loss/logits": 0.019743533805012703, |
| "loss/reg": 75.06490325927734, |
| "loss/twn": 0.0, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0006, |
| "grad_norm": 11.625, |
| "grad_norm_var": 6937.396728515625, |
| "learning_rate": 2.4e-05, |
| "loss": 70.1903, |
| "loss/crossentropy": 1.5617326498031616, |
| "loss/hidden": 0.134765625, |
| "loss/logits": 0.006116375792771578, |
| "loss/reg": 68.48768615722656, |
| "loss/twn": 0.0, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.000625, |
| "grad_norm": 32.0, |
| "grad_norm_var": 3246.975895182292, |
| "learning_rate": 2.5e-05, |
| "loss": 64.4648, |
| "loss/crossentropy": 1.4777029752731323, |
| "loss/hidden": 0.158203125, |
| "loss/logits": 0.007067938335239887, |
| "loss/reg": 62.821861267089844, |
| "loss/twn": 0.0, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.00065, |
| "grad_norm": 11.3125, |
| "grad_norm_var": 3254.977197265625, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 60.7985, |
| "loss/crossentropy": 2.695087432861328, |
| "loss/hidden": 0.154296875, |
| "loss/logits": 0.009972814470529556, |
| "loss/reg": 57.93910217285156, |
| "loss/twn": 0.0, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.000675, |
| "grad_norm": 9.9375, |
| "grad_norm_var": 3282.35234375, |
| "learning_rate": 2.7000000000000002e-05, |
| "loss": 54.9976, |
| "loss/crossentropy": 1.1553270816802979, |
| "loss/hidden": 0.13671875, |
| "loss/logits": 0.008951587602496147, |
| "loss/reg": 53.69655990600586, |
| "loss/twn": 0.0, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0007, |
| "grad_norm": 20.0, |
| "grad_norm_var": 3253.6384765625, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 54.2425, |
| "loss/crossentropy": 4.120519638061523, |
| "loss/hidden": 0.1171875, |
| "loss/logits": 0.012582110241055489, |
| "loss/reg": 49.99223709106445, |
| "loss/twn": 0.0, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.000725, |
| "grad_norm": 12.3125, |
| "grad_norm_var": 3259.070768229167, |
| "learning_rate": 2.9e-05, |
| "loss": 49.3116, |
| "loss/crossentropy": 2.52665638923645, |
| "loss/hidden": 0.11474609375, |
| "loss/logits": 0.009403377771377563, |
| "loss/reg": 46.660804748535156, |
| "loss/twn": 0.0, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.00075, |
| "grad_norm": 10.375, |
| "grad_norm_var": 3284.8536458333333, |
| "learning_rate": 3e-05, |
| "loss": 44.966, |
| "loss/crossentropy": 1.1925100088119507, |
| "loss/hidden": 0.08837890625, |
| "loss/logits": 0.002639985177665949, |
| "loss/reg": 43.682464599609375, |
| "loss/twn": 0.0, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.000775, |
| "grad_norm": 67.5, |
| "grad_norm_var": 3345.6231770833333, |
| "learning_rate": 3.1e-05, |
| "loss": 42.8695, |
| "loss/crossentropy": 1.6953678131103516, |
| "loss/hidden": 0.2060546875, |
| "loss/logits": 0.011261125095188618, |
| "loss/reg": 40.95684814453125, |
| "loss/twn": 0.0, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.0008, |
| "grad_norm": 22.5, |
| "grad_norm_var": 3311.253108723958, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 41.8117, |
| "loss/crossentropy": 3.090846538543701, |
| "loss/hidden": 0.2451171875, |
| "loss/logits": 0.015867076814174652, |
| "loss/reg": 38.459877014160156, |
| "loss/twn": 0.0, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.000825, |
| "grad_norm": 14.8125, |
| "grad_norm_var": 2914.9796223958333, |
| "learning_rate": 3.3e-05, |
| "loss": 38.0881, |
| "loss/crossentropy": 1.7205989360809326, |
| "loss/hidden": 0.1650390625, |
| "loss/logits": 0.013207211159169674, |
| "loss/reg": 36.18929672241211, |
| "loss/twn": 0.0, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.00085, |
| "grad_norm": 17.75, |
| "grad_norm_var": 2912.14296875, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 35.2028, |
| "loss/crossentropy": 0.8660376667976379, |
| "loss/hidden": 0.1865234375, |
| "loss/logits": 0.012940528802573681, |
| "loss/reg": 34.13732147216797, |
| "loss/twn": 0.0, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.000875, |
| "grad_norm": 12.5, |
| "grad_norm_var": 199.53671875, |
| "learning_rate": 3.5e-05, |
| "loss": 34.9671, |
| "loss/crossentropy": 2.679326057434082, |
| "loss/hidden": 0.044921875, |
| "loss/logits": 0.005477376747876406, |
| "loss/reg": 32.237335205078125, |
| "loss/twn": 0.0, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0009, |
| "grad_norm": 13.1875, |
| "grad_norm_var": 200.8947265625, |
| "learning_rate": 3.6e-05, |
| "loss": 32.3725, |
| "loss/crossentropy": 1.6193571090698242, |
| "loss/hidden": 0.2314453125, |
| "loss/logits": 0.016166094690561295, |
| "loss/reg": 30.505502700805664, |
| "loss/twn": 0.0, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.000925, |
| "grad_norm": 13.5625, |
| "grad_norm_var": 202.30284830729167, |
| "learning_rate": 3.7e-05, |
| "loss": 31.5358, |
| "loss/crossentropy": 2.4860172271728516, |
| "loss/hidden": 0.1572265625, |
| "loss/logits": 0.010934200137853622, |
| "loss/reg": 28.881595611572266, |
| "loss/twn": 0.0, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.00095, |
| "grad_norm": 41.5, |
| "grad_norm_var": 231.96901041666666, |
| "learning_rate": 3.8e-05, |
| "loss": 30.499, |
| "loss/crossentropy": 2.840606689453125, |
| "loss/hidden": 0.2138671875, |
| "loss/logits": 0.017023704946041107, |
| "loss/reg": 27.427488327026367, |
| "loss/twn": 0.0, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.000975, |
| "grad_norm": 39.75, |
| "grad_norm_var": 251.65826822916668, |
| "learning_rate": 3.9000000000000006e-05, |
| "loss": 27.702, |
| "loss/crossentropy": 1.4538397789001465, |
| "loss/hidden": 0.19921875, |
| "loss/logits": 0.01595349609851837, |
| "loss/reg": 26.032983779907227, |
| "loss/twn": 0.0, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.001, |
| "grad_norm": 9.5, |
| "grad_norm_var": 254.85572916666666, |
| "learning_rate": 4e-05, |
| "loss": 27.0967, |
| "loss/crossentropy": 2.216383695602417, |
| "loss/hidden": 0.10595703125, |
| "loss/logits": 0.006870034150779247, |
| "loss/reg": 24.76752471923828, |
| "loss/twn": 0.0, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.001025, |
| "grad_norm": 11.625, |
| "grad_norm_var": 253.04108072916668, |
| "learning_rate": 4.1e-05, |
| "loss": 25.7473, |
| "loss/crossentropy": 2.1163833141326904, |
| "loss/hidden": 0.06689453125, |
| "loss/logits": 0.004073521587997675, |
| "loss/reg": 23.559967041015625, |
| "loss/twn": 0.0, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.00105, |
| "grad_norm": 14.0, |
| "grad_norm_var": 250.197509765625, |
| "learning_rate": 4.2e-05, |
| "loss": 23.4299, |
| "loss/crossentropy": 0.7737110257148743, |
| "loss/hidden": 0.1748046875, |
| "loss/logits": 0.009298819117248058, |
| "loss/reg": 22.47205352783203, |
| "loss/twn": 0.0, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.001075, |
| "grad_norm": 12.75, |
| "grad_norm_var": 246.6650390625, |
| "learning_rate": 4.3e-05, |
| "loss": 23.3947, |
| "loss/crossentropy": 1.8225781917572021, |
| "loss/hidden": 0.1123046875, |
| "loss/logits": 0.006940089166164398, |
| "loss/reg": 21.452856063842773, |
| "loss/twn": 0.0, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.0011, |
| "grad_norm": 82.0, |
| "grad_norm_var": 479.8754557291667, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 23.171, |
| "loss/crossentropy": 2.5216610431671143, |
| "loss/hidden": 0.1396484375, |
| "loss/logits": 0.01111547276377678, |
| "loss/reg": 20.498552322387695, |
| "loss/twn": 0.0, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.001125, |
| "grad_norm": 10.75, |
| "grad_norm_var": 482.614306640625, |
| "learning_rate": 4.5e-05, |
| "loss": 22.58, |
| "loss/crossentropy": 2.7781012058258057, |
| "loss/hidden": 0.158203125, |
| "loss/logits": 0.008185407146811485, |
| "loss/reg": 19.63548469543457, |
| "loss/twn": 0.0, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.00115, |
| "grad_norm": 27.125, |
| "grad_norm_var": 468.31573893229165, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 21.6325, |
| "loss/crossentropy": 2.6625006198883057, |
| "loss/hidden": 0.1494140625, |
| "loss/logits": 0.009840598329901695, |
| "loss/reg": 18.810749053955078, |
| "loss/twn": 0.0, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.001175, |
| "grad_norm": 20.5, |
| "grad_norm_var": 344.27980143229166, |
| "learning_rate": 4.7e-05, |
| "loss": 20.5522, |
| "loss/crossentropy": 2.3305137157440186, |
| "loss/hidden": 0.1484375, |
| "loss/logits": 0.016644544899463654, |
| "loss/reg": 18.056581497192383, |
| "loss/twn": 0.0, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0012, |
| "grad_norm": 11.25, |
| "grad_norm_var": 352.54737955729166, |
| "learning_rate": 4.8e-05, |
| "loss": 18.7184, |
| "loss/crossentropy": 1.2184098958969116, |
| "loss/hidden": 0.1455078125, |
| "loss/logits": 0.0094651710242033, |
| "loss/reg": 17.344999313354492, |
| "loss/twn": 0.0, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.001225, |
| "grad_norm": 9.5, |
| "grad_norm_var": 359.42734375, |
| "learning_rate": 4.9e-05, |
| "loss": 19.2773, |
| "loss/crossentropy": 2.487840414047241, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.004987399093806744, |
| "loss/reg": 16.685823440551758, |
| "loss/twn": 0.0, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.00125, |
| "grad_norm": 15.1875, |
| "grad_norm_var": 361.1883951822917, |
| "learning_rate": 5e-05, |
| "loss": 18.9783, |
| "loss/crossentropy": 2.735170602798462, |
| "loss/hidden": 0.166015625, |
| "loss/logits": 0.011278904974460602, |
| "loss/reg": 16.065805435180664, |
| "loss/twn": 0.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.001275, |
| "grad_norm": 15.75, |
| "grad_norm_var": 357.929931640625, |
| "learning_rate": 5.1000000000000006e-05, |
| "loss": 17.1984, |
| "loss/crossentropy": 1.4663747549057007, |
| "loss/hidden": 0.2353515625, |
| "loss/logits": 0.003095359541475773, |
| "loss/reg": 15.49356746673584, |
| "loss/twn": 0.0, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.0013, |
| "grad_norm": 9.9375, |
| "grad_norm_var": 362.29881184895834, |
| "learning_rate": 5.2000000000000004e-05, |
| "loss": 16.953, |
| "loss/crossentropy": 1.918389916419983, |
| "loss/hidden": 0.0791015625, |
| "loss/logits": 0.0031922967173159122, |
| "loss/reg": 14.95230484008789, |
| "loss/twn": 0.0, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.001325, |
| "grad_norm": 10.6875, |
| "grad_norm_var": 365.8745930989583, |
| "learning_rate": 5.300000000000001e-05, |
| "loss": 16.8909, |
| "loss/crossentropy": 2.3705666065216064, |
| "loss/hidden": 0.07275390625, |
| "loss/logits": 0.0030757079366594553, |
| "loss/reg": 14.444525718688965, |
| "loss/twn": 0.0, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.00135, |
| "grad_norm": 16.75, |
| "grad_norm_var": 337.7085774739583, |
| "learning_rate": 5.4000000000000005e-05, |
| "loss": 16.1047, |
| "loss/crossentropy": 1.9884377717971802, |
| "loss/hidden": 0.12890625, |
| "loss/logits": 0.010179271921515465, |
| "loss/reg": 13.977179527282715, |
| "loss/twn": 0.0, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.001375, |
| "grad_norm": 13.375, |
| "grad_norm_var": 311.08631184895836, |
| "learning_rate": 5.500000000000001e-05, |
| "loss": 16.2292, |
| "loss/crossentropy": 2.642868995666504, |
| "loss/hidden": 0.04736328125, |
| "loss/logits": 0.004405863583087921, |
| "loss/reg": 13.5346097946167, |
| "loss/twn": 0.0, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0014, |
| "grad_norm": 20.875, |
| "grad_norm_var": 306.026806640625, |
| "learning_rate": 5.6000000000000006e-05, |
| "loss": 14.8051, |
| "loss/crossentropy": 1.465383529663086, |
| "loss/hidden": 0.2080078125, |
| "loss/logits": 0.00864885188639164, |
| "loss/reg": 13.123102188110352, |
| "loss/twn": 0.0, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.001425, |
| "grad_norm": 15.875, |
| "grad_norm_var": 303.045166015625, |
| "learning_rate": 5.6999999999999996e-05, |
| "loss": 14.4812, |
| "loss/crossentropy": 1.640454649925232, |
| "loss/hidden": 0.09765625, |
| "loss/logits": 0.007794347126036882, |
| "loss/reg": 12.735280990600586, |
| "loss/twn": 0.0, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.00145, |
| "grad_norm": 16.5, |
| "grad_norm_var": 301.720947265625, |
| "learning_rate": 5.8e-05, |
| "loss": 14.5378, |
| "loss/crossentropy": 2.0751516819000244, |
| "loss/hidden": 0.07861328125, |
| "loss/logits": 0.006516133435070515, |
| "loss/reg": 12.377544403076172, |
| "loss/twn": 0.0, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.001475, |
| "grad_norm": 20.875, |
| "grad_norm_var": 298.750244140625, |
| "learning_rate": 5.9e-05, |
| "loss": 14.1279, |
| "loss/crossentropy": 1.9119625091552734, |
| "loss/hidden": 0.1552734375, |
| "loss/logits": 0.021668870002031326, |
| "loss/reg": 12.038968086242676, |
| "loss/twn": 0.0, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0015, |
| "grad_norm": 13.5625, |
| "grad_norm_var": 23.984375, |
| "learning_rate": 6e-05, |
| "loss": 12.6629, |
| "loss/crossentropy": 0.7274801731109619, |
| "loss/hidden": 0.203125, |
| "loss/logits": 0.009123459458351135, |
| "loss/reg": 11.72317123413086, |
| "loss/twn": 0.0, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.001525, |
| "grad_norm": 11.875, |
| "grad_norm_var": 23.3462890625, |
| "learning_rate": 6.1e-05, |
| "loss": 14.3076, |
| "loss/crossentropy": 2.738680601119995, |
| "loss/hidden": 0.1396484375, |
| "loss/logits": 0.010733511298894882, |
| "loss/reg": 11.418492317199707, |
| "loss/twn": 0.0, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.00155, |
| "grad_norm": 41.25, |
| "grad_norm_var": 57.518489583333334, |
| "learning_rate": 6.2e-05, |
| "loss": 13.614, |
| "loss/crossentropy": 2.1940059661865234, |
| "loss/hidden": 0.26171875, |
| "loss/logits": 0.016079768538475037, |
| "loss/reg": 11.142221450805664, |
| "loss/twn": 0.0, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.001575, |
| "grad_norm": 16.25, |
| "grad_norm_var": 56.371875, |
| "learning_rate": 6.3e-05, |
| "loss": 12.6424, |
| "loss/crossentropy": 1.5363647937774658, |
| "loss/hidden": 0.220703125, |
| "loss/logits": 0.009181533940136433, |
| "loss/reg": 10.876102447509766, |
| "loss/twn": 0.0, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.0016, |
| "grad_norm": 8.25, |
| "grad_norm_var": 58.921875, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 12.6573, |
| "loss/crossentropy": 1.9360976219177246, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.00900467112660408, |
| "loss/reg": 10.625749588012695, |
| "loss/twn": 0.0, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.001625, |
| "grad_norm": 25.0, |
| "grad_norm_var": 60.43958333333333, |
| "learning_rate": 6.500000000000001e-05, |
| "loss": 12.8598, |
| "loss/crossentropy": 2.2861106395721436, |
| "loss/hidden": 0.169921875, |
| "loss/logits": 0.008098036982119083, |
| "loss/reg": 10.395671844482422, |
| "loss/twn": 0.0, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.00165, |
| "grad_norm": 10.5, |
| "grad_norm_var": 62.94568684895833, |
| "learning_rate": 6.6e-05, |
| "loss": 10.8882, |
| "loss/crossentropy": 0.5159875154495239, |
| "loss/hidden": 0.1884765625, |
| "loss/logits": 0.007731384597718716, |
| "loss/reg": 10.176012992858887, |
| "loss/twn": 0.0, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.001675, |
| "grad_norm": 15.4375, |
| "grad_norm_var": 62.99166666666667, |
| "learning_rate": 6.7e-05, |
| "loss": 11.7439, |
| "loss/crossentropy": 1.6010075807571411, |
| "loss/hidden": 0.16796875, |
| "loss/logits": 0.0077722882851958275, |
| "loss/reg": 9.96713924407959, |
| "loss/twn": 0.0, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.0017, |
| "grad_norm": 8.125, |
| "grad_norm_var": 64.82823893229167, |
| "learning_rate": 6.800000000000001e-05, |
| "loss": 11.7136, |
| "loss/crossentropy": 1.8642301559448242, |
| "loss/hidden": 0.07177734375, |
| "loss/logits": 0.003159617306664586, |
| "loss/reg": 9.774468421936035, |
| "loss/twn": 0.0, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.001725, |
| "grad_norm": 12.75, |
| "grad_norm_var": 63.475260416666664, |
| "learning_rate": 6.9e-05, |
| "loss": 11.2146, |
| "loss/crossentropy": 1.5259939432144165, |
| "loss/hidden": 0.0927734375, |
| "loss/logits": 0.0040178182534873486, |
| "loss/reg": 9.591811180114746, |
| "loss/twn": 0.0, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.00175, |
| "grad_norm": 13.25, |
| "grad_norm_var": 64.21901041666666, |
| "learning_rate": 7e-05, |
| "loss": 10.007, |
| "loss/crossentropy": 0.4211646616458893, |
| "loss/hidden": 0.1630859375, |
| "loss/logits": 0.006910983473062515, |
| "loss/reg": 9.415842056274414, |
| "loss/twn": 0.0, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.001775, |
| "grad_norm": 15.1875, |
| "grad_norm_var": 63.672900390625, |
| "learning_rate": 7.1e-05, |
| "loss": 11.3191, |
| "loss/crossentropy": 1.91142737865448, |
| "loss/hidden": 0.1416015625, |
| "loss/logits": 0.012339383363723755, |
| "loss/reg": 9.253693580627441, |
| "loss/twn": 0.0, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.0018, |
| "grad_norm": 25.125, |
| "grad_norm_var": 67.225634765625, |
| "learning_rate": 7.2e-05, |
| "loss": 9.6954, |
| "loss/crossentropy": 0.3831652104854584, |
| "loss/hidden": 0.2060546875, |
| "loss/logits": 0.007283635437488556, |
| "loss/reg": 9.098925590515137, |
| "loss/twn": 0.0, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.001825, |
| "grad_norm": 12.375, |
| "grad_norm_var": 68.45245768229167, |
| "learning_rate": 7.3e-05, |
| "loss": 11.8289, |
| "loss/crossentropy": 2.7114861011505127, |
| "loss/hidden": 0.1572265625, |
| "loss/logits": 0.00806540995836258, |
| "loss/reg": 8.952132225036621, |
| "loss/twn": 0.0, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.00185, |
| "grad_norm": 18.0, |
| "grad_norm_var": 68.56417643229166, |
| "learning_rate": 7.4e-05, |
| "loss": 9.9016, |
| "loss/crossentropy": 0.9610092043876648, |
| "loss/hidden": 0.12255859375, |
| "loss/logits": 0.005808320362120867, |
| "loss/reg": 8.812213897705078, |
| "loss/twn": 0.0, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.001875, |
| "grad_norm": 11.75, |
| "grad_norm_var": 68.73527018229167, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 11.33, |
| "loss/crossentropy": 2.4811081886291504, |
| "loss/hidden": 0.154296875, |
| "loss/logits": 0.012214528396725655, |
| "loss/reg": 8.682340621948242, |
| "loss/twn": 0.0, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.0019, |
| "grad_norm": 16.75, |
| "grad_norm_var": 68.26295572916666, |
| "learning_rate": 7.6e-05, |
| "loss": 10.7107, |
| "loss/crossentropy": 1.9060146808624268, |
| "loss/hidden": 0.2294921875, |
| "loss/logits": 0.016750231385231018, |
| "loss/reg": 8.558440208435059, |
| "loss/twn": 0.0, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.001925, |
| "grad_norm": 11.5, |
| "grad_norm_var": 68.49635416666666, |
| "learning_rate": 7.7e-05, |
| "loss": 10.9686, |
| "loss/crossentropy": 2.370375394821167, |
| "loss/hidden": 0.1435546875, |
| "loss/logits": 0.014391288161277771, |
| "loss/reg": 8.440238952636719, |
| "loss/twn": 0.0, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.00195, |
| "grad_norm": 30.0, |
| "grad_norm_var": 39.04713541666667, |
| "learning_rate": 7.800000000000001e-05, |
| "loss": 11.235, |
| "loss/crossentropy": 2.7426469326019287, |
| "loss/hidden": 0.1552734375, |
| "loss/logits": 0.012605298310518265, |
| "loss/reg": 8.32447624206543, |
| "loss/twn": 0.0, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.001975, |
| "grad_norm": 12.8125, |
| "grad_norm_var": 39.50636393229167, |
| "learning_rate": 7.900000000000001e-05, |
| "loss": 10.4604, |
| "loss/crossentropy": 2.1269540786743164, |
| "loss/hidden": 0.10986328125, |
| "loss/logits": 0.0066815330646932125, |
| "loss/reg": 8.216917991638184, |
| "loss/twn": 0.0, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.002, |
| "grad_norm": 13.625, |
| "grad_norm_var": 36.169384765625, |
| "learning_rate": 8e-05, |
| "loss": 10.9456, |
| "loss/crossentropy": 2.6664817333221436, |
| "loss/hidden": 0.15234375, |
| "loss/logits": 0.011289350688457489, |
| "loss/reg": 8.115513801574707, |
| "loss/twn": 0.0, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.002025, |
| "grad_norm": 32.25, |
| "grad_norm_var": 48.38487955729167, |
| "learning_rate": 8.1e-05, |
| "loss": 10.8641, |
| "loss/crossentropy": 2.6699304580688477, |
| "loss/hidden": 0.1640625, |
| "loss/logits": 0.010942000895738602, |
| "loss/reg": 8.019161224365234, |
| "loss/twn": 0.0, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.00205, |
| "grad_norm": 21.875, |
| "grad_norm_var": 47.804280598958336, |
| "learning_rate": 8.2e-05, |
| "loss": 10.4686, |
| "loss/crossentropy": 2.2571003437042236, |
| "loss/hidden": 0.267578125, |
| "loss/logits": 0.021330825984477997, |
| "loss/reg": 7.922557353973389, |
| "loss/twn": 0.0, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.002075, |
| "grad_norm": 11.1875, |
| "grad_norm_var": 49.776546223958334, |
| "learning_rate": 8.3e-05, |
| "loss": 10.8712, |
| "loss/crossentropy": 2.8793890476226807, |
| "loss/hidden": 0.1435546875, |
| "loss/logits": 0.0122376699000597, |
| "loss/reg": 7.835977077484131, |
| "loss/twn": 0.0, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.0021, |
| "grad_norm": 12.125, |
| "grad_norm_var": 46.224462890625, |
| "learning_rate": 8.4e-05, |
| "loss": 10.6407, |
| "loss/crossentropy": 2.8739991188049316, |
| "loss/hidden": 0.0140380859375, |
| "loss/logits": 0.0031395466066896915, |
| "loss/reg": 7.749497413635254, |
| "loss/twn": 0.0, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.002125, |
| "grad_norm": 224.0, |
| "grad_norm_var": 2718.206884765625, |
| "learning_rate": 8.5e-05, |
| "loss": 9.9904, |
| "loss/crossentropy": 2.1338188648223877, |
| "loss/hidden": 0.1748046875, |
| "loss/logits": 0.013754406943917274, |
| "loss/reg": 7.668061256408691, |
| "loss/twn": 0.0, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.00215, |
| "grad_norm": 9.0625, |
| "grad_norm_var": 2728.7181640625, |
| "learning_rate": 8.6e-05, |
| "loss": 10.4151, |
| "loss/crossentropy": 2.6691489219665527, |
| "loss/hidden": 0.1455078125, |
| "loss/logits": 0.011645539663732052, |
| "loss/reg": 7.588749885559082, |
| "loss/twn": 0.0, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.002175, |
| "grad_norm": 23.375, |
| "grad_norm_var": 2716.899593098958, |
| "learning_rate": 8.7e-05, |
| "loss": 9.2735, |
| "loss/crossentropy": 1.5923405885696411, |
| "loss/hidden": 0.1591796875, |
| "loss/logits": 0.006691344082355499, |
| "loss/reg": 7.515244007110596, |
| "loss/twn": 0.0, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.0022, |
| "grad_norm": 11.3125, |
| "grad_norm_var": 2738.4708333333333, |
| "learning_rate": 8.800000000000001e-05, |
| "loss": 10.0973, |
| "loss/crossentropy": 2.563422679901123, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.006290389224886894, |
| "loss/reg": 7.441190242767334, |
| "loss/twn": 0.0, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.002225, |
| "grad_norm": 16.75, |
| "grad_norm_var": 2729.6775390625, |
| "learning_rate": 8.900000000000001e-05, |
| "loss": 10.1917, |
| "loss/crossentropy": 2.605319023132324, |
| "loss/hidden": 0.1845703125, |
| "loss/logits": 0.029636088758707047, |
| "loss/reg": 7.372167587280273, |
| "loss/twn": 0.0, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.00225, |
| "grad_norm": 17.0, |
| "grad_norm_var": 2731.3098307291666, |
| "learning_rate": 9e-05, |
| "loss": 9.2722, |
| "loss/crossentropy": 1.7703652381896973, |
| "loss/hidden": 0.1845703125, |
| "loss/logits": 0.011869278736412525, |
| "loss/reg": 7.30535364151001, |
| "loss/twn": 0.0, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.002275, |
| "grad_norm": 26.625, |
| "grad_norm_var": 2709.51640625, |
| "learning_rate": 9.1e-05, |
| "loss": 10.0987, |
| "loss/crossentropy": 2.770080327987671, |
| "loss/hidden": 0.08154296875, |
| "loss/logits": 0.006538551300764084, |
| "loss/reg": 7.240530967712402, |
| "loss/twn": 0.0, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.0023, |
| "grad_norm": 12.3125, |
| "grad_norm_var": 2718.9657389322915, |
| "learning_rate": 9.200000000000001e-05, |
| "loss": 9.1897, |
| "loss/crossentropy": 1.7330008745193481, |
| "loss/hidden": 0.265625, |
| "loss/logits": 0.012156343087553978, |
| "loss/reg": 7.1789398193359375, |
| "loss/twn": 0.0, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.002325, |
| "grad_norm": 113.5, |
| "grad_norm_var": 3112.675113932292, |
| "learning_rate": 9.300000000000001e-05, |
| "loss": 8.4886, |
| "loss/crossentropy": 1.163967490196228, |
| "loss/hidden": 0.197265625, |
| "loss/logits": 0.009277150966227055, |
| "loss/reg": 7.11806058883667, |
| "loss/twn": 0.0, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.00235, |
| "grad_norm": 37.25, |
| "grad_norm_var": 3109.446598307292, |
| "learning_rate": 9.4e-05, |
| "loss": 9.3185, |
| "loss/crossentropy": 2.135645627975464, |
| "loss/hidden": 0.11767578125, |
| "loss/logits": 0.0029190080240368843, |
| "loss/reg": 7.06224250793457, |
| "loss/twn": 0.0, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.002375, |
| "grad_norm": 8.3125, |
| "grad_norm_var": 3125.339567057292, |
| "learning_rate": 9.5e-05, |
| "loss": 9.3855, |
| "loss/crossentropy": 2.309610605239868, |
| "loss/hidden": 0.06689453125, |
| "loss/logits": 0.0035296978894621134, |
| "loss/reg": 7.005456447601318, |
| "loss/twn": 0.0, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.0024, |
| "grad_norm": 91.5, |
| "grad_norm_var": 3262.5942545572916, |
| "learning_rate": 9.6e-05, |
| "loss": 9.9531, |
| "loss/crossentropy": 2.7651376724243164, |
| "loss/hidden": 0.2197265625, |
| "loss/logits": 0.01677127555012703, |
| "loss/reg": 6.951422214508057, |
| "loss/twn": 0.0, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.002425, |
| "grad_norm": 35.25, |
| "grad_norm_var": 3259.3458170572917, |
| "learning_rate": 9.7e-05, |
| "loss": 8.7807, |
| "loss/crossentropy": 1.7207653522491455, |
| "loss/hidden": 0.1494140625, |
| "loss/logits": 0.011507261544466019, |
| "loss/reg": 6.8990349769592285, |
| "loss/twn": 0.0, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.00245, |
| "grad_norm": 13.75, |
| "grad_norm_var": 3285.235791015625, |
| "learning_rate": 9.8e-05, |
| "loss": 8.475, |
| "loss/crossentropy": 1.481154441833496, |
| "loss/hidden": 0.140625, |
| "loss/logits": 0.005128794349730015, |
| "loss/reg": 6.848050594329834, |
| "loss/twn": 0.0, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.002475, |
| "grad_norm": 10.1875, |
| "grad_norm_var": 3289.334228515625, |
| "learning_rate": 9.900000000000001e-05, |
| "loss": 9.021, |
| "loss/crossentropy": 2.196463108062744, |
| "loss/hidden": 0.0234375, |
| "loss/logits": 0.0013116542249917984, |
| "loss/reg": 6.79979133605957, |
| "loss/twn": 0.0, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.0025, |
| "grad_norm": 13.6875, |
| "grad_norm_var": 3283.3889973958335, |
| "learning_rate": 0.0001, |
| "loss": 9.1231, |
| "loss/crossentropy": 2.0860254764556885, |
| "loss/hidden": 0.265625, |
| "loss/logits": 0.0192781500518322, |
| "loss/reg": 6.75217342376709, |
| "loss/twn": 0.0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.002525, |
| "grad_norm": 65.5, |
| "grad_norm_var": 996.5311848958333, |
| "learning_rate": 0.0001, |
| "loss": 8.6978, |
| "loss/crossentropy": 1.8436778783798218, |
| "loss/hidden": 0.140625, |
| "loss/logits": 0.006662796251475811, |
| "loss/reg": 6.706822395324707, |
| "loss/twn": 0.0, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.00255, |
| "grad_norm": 9.0625, |
| "grad_norm_var": 996.5311848958333, |
| "learning_rate": 0.0001, |
| "loss": 9.2667, |
| "loss/crossentropy": 2.4968836307525635, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.007922045886516571, |
| "loss/reg": 6.663230895996094, |
| "loss/twn": 0.0, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.002575, |
| "grad_norm": 11.1875, |
| "grad_norm_var": 1019.1574055989583, |
| "learning_rate": 0.0001, |
| "loss": 8.2507, |
| "loss/crossentropy": 1.475099802017212, |
| "loss/hidden": 0.1484375, |
| "loss/logits": 0.007549532223492861, |
| "loss/reg": 6.619617938995361, |
| "loss/twn": 0.0, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.0026, |
| "grad_norm": 13.5625, |
| "grad_norm_var": 1013.6202962239583, |
| "learning_rate": 0.0001, |
| "loss": 9.2719, |
| "loss/crossentropy": 2.5519533157348633, |
| "loss/hidden": 0.1328125, |
| "loss/logits": 0.00820184126496315, |
| "loss/reg": 6.578925132751465, |
| "loss/twn": 0.0, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.002625, |
| "grad_norm": 288.0, |
| "grad_norm_var": 5098.051936848959, |
| "learning_rate": 0.0001, |
| "loss": 7.9889, |
| "loss/crossentropy": 1.3079354763031006, |
| "loss/hidden": 0.1328125, |
| "loss/logits": 0.01036953553557396, |
| "loss/reg": 6.537764072418213, |
| "loss/twn": 0.0, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.00265, |
| "grad_norm": 17.5, |
| "grad_norm_var": 5096.006363932292, |
| "learning_rate": 0.0001, |
| "loss": 9.3801, |
| "loss/crossentropy": 2.7196500301361084, |
| "loss/hidden": 0.1474609375, |
| "loss/logits": 0.013073693960905075, |
| "loss/reg": 6.49993371963501, |
| "loss/twn": 0.0, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.002675, |
| "grad_norm": 772.0, |
| "grad_norm_var": 37700.72758789062, |
| "learning_rate": 0.0001, |
| "loss": 7.3614, |
| "loss/crossentropy": 0.6930418014526367, |
| "loss/hidden": 0.1982421875, |
| "loss/logits": 0.0074032871052622795, |
| "loss/reg": 6.462671279907227, |
| "loss/twn": 0.0, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.0027, |
| "grad_norm": 10.125, |
| "grad_norm_var": 37725.00826822917, |
| "learning_rate": 0.0001, |
| "loss": 8.4394, |
| "loss/crossentropy": 1.9201096296310425, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.006301195826381445, |
| "loss/reg": 6.426520347595215, |
| "loss/twn": 0.0, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.002725, |
| "grad_norm": 11.1875, |
| "grad_norm_var": 38118.67159830729, |
| "learning_rate": 0.0001, |
| "loss": 9.0847, |
| "loss/crossentropy": 2.634326219558716, |
| "loss/hidden": 0.056640625, |
| "loss/logits": 0.003579255659133196, |
| "loss/reg": 6.3901753425598145, |
| "loss/twn": 0.0, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.00275, |
| "grad_norm": 13.0, |
| "grad_norm_var": 38319.52980143229, |
| "learning_rate": 0.0001, |
| "loss": 9.0789, |
| "loss/crossentropy": 2.5669283866882324, |
| "loss/hidden": 0.1455078125, |
| "loss/logits": 0.010270677506923676, |
| "loss/reg": 6.356166839599609, |
| "loss/twn": 0.0, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.002775, |
| "grad_norm": 14.4375, |
| "grad_norm_var": 38258.03097330729, |
| "learning_rate": 0.0001, |
| "loss": 8.9621, |
| "loss/crossentropy": 2.5042357444763184, |
| "loss/hidden": 0.125, |
| "loss/logits": 0.00943165272474289, |
| "loss/reg": 6.323448657989502, |
| "loss/twn": 0.0, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.0028, |
| "grad_norm": 10.75, |
| "grad_norm_var": 38615.72823893229, |
| "learning_rate": 0.0001, |
| "loss": 8.0629, |
| "loss/crossentropy": 1.681036353111267, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.003971286583691835, |
| "loss/reg": 6.291506767272949, |
| "loss/twn": 0.0, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.002825, |
| "grad_norm": 23.875, |
| "grad_norm_var": 38694.45271809896, |
| "learning_rate": 0.0001, |
| "loss": 7.101, |
| "loss/crossentropy": 0.6117576956748962, |
| "loss/hidden": 0.2158203125, |
| "loss/logits": 0.012755107134580612, |
| "loss/reg": 6.260617256164551, |
| "loss/twn": 0.0, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.00285, |
| "grad_norm": 12.1875, |
| "grad_norm_var": 38708.639322916664, |
| "learning_rate": 0.0001, |
| "loss": 8.0025, |
| "loss/crossentropy": 1.5227508544921875, |
| "loss/hidden": 0.234375, |
| "loss/logits": 0.015468025580048561, |
| "loss/reg": 6.229867935180664, |
| "loss/twn": 0.0, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.002875, |
| "grad_norm": 16.375, |
| "grad_norm_var": 38652.598942057295, |
| "learning_rate": 0.0001, |
| "loss": 9.088, |
| "loss/crossentropy": 2.7616689205169678, |
| "loss/hidden": 0.115234375, |
| "loss/logits": 0.009811250492930412, |
| "loss/reg": 6.201269626617432, |
| "loss/twn": 0.0, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.0029, |
| "grad_norm": 12.375, |
| "grad_norm_var": 38664.55670572917, |
| "learning_rate": 0.0001, |
| "loss": 8.9623, |
| "loss/crossentropy": 2.647496461868286, |
| "loss/hidden": 0.1328125, |
| "loss/logits": 0.009309421293437481, |
| "loss/reg": 6.172722339630127, |
| "loss/twn": 0.0, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.002925, |
| "grad_norm": 20.5, |
| "grad_norm_var": 38886.04108072917, |
| "learning_rate": 0.0001, |
| "loss": 9.1565, |
| "loss/crossentropy": 2.8847148418426514, |
| "loss/hidden": 0.11767578125, |
| "loss/logits": 0.008627700619399548, |
| "loss/reg": 6.145481586456299, |
| "loss/twn": 0.0, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.00295, |
| "grad_norm": 16.375, |
| "grad_norm_var": 38821.67394205729, |
| "learning_rate": 0.0001, |
| "loss": 9.0896, |
| "loss/crossentropy": 2.7421655654907227, |
| "loss/hidden": 0.212890625, |
| "loss/logits": 0.016587935388088226, |
| "loss/reg": 6.117995262145996, |
| "loss/twn": 0.0, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.002975, |
| "grad_norm": 8.5625, |
| "grad_norm_var": 38845.82667643229, |
| "learning_rate": 0.0001, |
| "loss": 8.091, |
| "loss/crossentropy": 1.8508156538009644, |
| "loss/hidden": 0.138671875, |
| "loss/logits": 0.008302265778183937, |
| "loss/reg": 6.093196868896484, |
| "loss/twn": 0.0, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.003, |
| "grad_norm": 11.625, |
| "grad_norm_var": 38862.91451822917, |
| "learning_rate": 0.0001, |
| "loss": 8.6832, |
| "loss/crossentropy": 2.444472312927246, |
| "loss/hidden": 0.1552734375, |
| "loss/logits": 0.016056066378951073, |
| "loss/reg": 6.067349433898926, |
| "loss/twn": 0.0, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.003025, |
| "grad_norm": 15.0625, |
| "grad_norm_var": 35901.329410807295, |
| "learning_rate": 0.0001, |
| "loss": 7.6991, |
| "loss/crossentropy": 1.443003535270691, |
| "loss/hidden": 0.203125, |
| "loss/logits": 0.009365499019622803, |
| "loss/reg": 6.043575763702393, |
| "loss/twn": 0.0, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.00305, |
| "grad_norm": 10.125, |
| "grad_norm_var": 35948.114567057295, |
| "learning_rate": 0.0001, |
| "loss": 8.3654, |
| "loss/crossentropy": 2.175076961517334, |
| "loss/hidden": 0.1572265625, |
| "loss/logits": 0.012922637164592743, |
| "loss/reg": 6.0201544761657715, |
| "loss/twn": 0.0, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.003075, |
| "grad_norm": 12.9375, |
| "grad_norm_var": 16.191145833333334, |
| "learning_rate": 0.0001, |
| "loss": 8.7509, |
| "loss/crossentropy": 2.659536123275757, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.007753277197480202, |
| "loss/reg": 5.997157096862793, |
| "loss/twn": 0.0, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.0031, |
| "grad_norm": 11.9375, |
| "grad_norm_var": 15.527978515625, |
| "learning_rate": 0.0001, |
| "loss": 8.4483, |
| "loss/crossentropy": 2.3908164501190186, |
| "loss/hidden": 0.07666015625, |
| "loss/logits": 0.005580560304224491, |
| "loss/reg": 5.975290298461914, |
| "loss/twn": 0.0, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.003125, |
| "grad_norm": 12.8125, |
| "grad_norm_var": 15.120035807291666, |
| "learning_rate": 0.0001, |
| "loss": 8.1928, |
| "loss/crossentropy": 2.0353291034698486, |
| "loss/hidden": 0.1923828125, |
| "loss/logits": 0.011610760353505611, |
| "loss/reg": 5.953509330749512, |
| "loss/twn": 0.0, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.00315, |
| "grad_norm": 11.4375, |
| "grad_norm_var": 15.467122395833334, |
| "learning_rate": 0.0001, |
| "loss": 6.9926, |
| "loss/crossentropy": 0.923692524433136, |
| "loss/hidden": 0.12890625, |
| "loss/logits": 0.006808650679886341, |
| "loss/reg": 5.933147430419922, |
| "loss/twn": 0.0, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.003175, |
| "grad_norm": 18.875, |
| "grad_norm_var": 17.053759765625, |
| "learning_rate": 0.0001, |
| "loss": 8.6838, |
| "loss/crossentropy": 2.7514772415161133, |
| "loss/hidden": 0.016357421875, |
| "loss/logits": 0.00333950063213706, |
| "loss/reg": 5.91263484954834, |
| "loss/twn": 0.0, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.0032, |
| "grad_norm": 13.375, |
| "grad_norm_var": 16.307275390625, |
| "learning_rate": 0.0001, |
| "loss": 7.5016, |
| "loss/crossentropy": 1.4413155317306519, |
| "loss/hidden": 0.1552734375, |
| "loss/logits": 0.011735007166862488, |
| "loss/reg": 5.893232822418213, |
| "loss/twn": 0.0, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.003225, |
| "grad_norm": 52.25, |
| "grad_norm_var": 102.939697265625, |
| "learning_rate": 0.0001, |
| "loss": 7.4808, |
| "loss/crossentropy": 1.5077205896377563, |
| "loss/hidden": 0.0947265625, |
| "loss/logits": 0.004158593248575926, |
| "loss/reg": 5.874199867248535, |
| "loss/twn": 0.0, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.00325, |
| "grad_norm": 12.75, |
| "grad_norm_var": 102.6697265625, |
| "learning_rate": 0.0001, |
| "loss": 8.7541, |
| "loss/crossentropy": 2.7712345123291016, |
| "loss/hidden": 0.1201171875, |
| "loss/logits": 0.006270177662372589, |
| "loss/reg": 5.856495380401611, |
| "loss/twn": 0.0, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.003275, |
| "grad_norm": 9.5625, |
| "grad_norm_var": 105.30779622395833, |
| "learning_rate": 0.0001, |
| "loss": 7.2955, |
| "loss/crossentropy": 1.3631829023361206, |
| "loss/hidden": 0.08837890625, |
| "loss/logits": 0.005783870816230774, |
| "loss/reg": 5.838170528411865, |
| "loss/twn": 0.0, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.0033, |
| "grad_norm": 22.625, |
| "grad_norm_var": 107.38448893229166, |
| "learning_rate": 0.0001, |
| "loss": 8.8285, |
| "loss/crossentropy": 2.8225176334381104, |
| "loss/hidden": 0.1767578125, |
| "loss/logits": 0.007785791996866465, |
| "loss/reg": 5.82139778137207, |
| "loss/twn": 0.0, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.003325, |
| "grad_norm": 23.5, |
| "grad_norm_var": 109.62667643229166, |
| "learning_rate": 0.0001, |
| "loss": 7.4036, |
| "loss/crossentropy": 1.5806615352630615, |
| "loss/hidden": 0.016357421875, |
| "loss/logits": 0.001918629975989461, |
| "loss/reg": 5.804649829864502, |
| "loss/twn": 0.0, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.00335, |
| "grad_norm": 11.1875, |
| "grad_norm_var": 111.3869140625, |
| "learning_rate": 0.0001, |
| "loss": 8.0831, |
| "loss/crossentropy": 2.268815755844116, |
| "loss/hidden": 0.0233154296875, |
| "loss/logits": 0.0031364229507744312, |
| "loss/reg": 5.787786483764648, |
| "loss/twn": 0.0, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.003375, |
| "grad_norm": 17.875, |
| "grad_norm_var": 107.36847330729167, |
| "learning_rate": 0.0001, |
| "loss": 8.4399, |
| "loss/crossentropy": 2.5208940505981445, |
| "loss/hidden": 0.1376953125, |
| "loss/logits": 0.009613238275051117, |
| "loss/reg": 5.771730422973633, |
| "loss/twn": 0.0, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.0034, |
| "grad_norm": 12.1875, |
| "grad_norm_var": 107.00416666666666, |
| "learning_rate": 0.0001, |
| "loss": 7.3628, |
| "loss/crossentropy": 1.5146337747573853, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.004632354713976383, |
| "loss/reg": 5.757077693939209, |
| "loss/twn": 0.0, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.003425, |
| "grad_norm": 74.5, |
| "grad_norm_var": 314.18409830729166, |
| "learning_rate": 0.0001, |
| "loss": 8.7141, |
| "loss/crossentropy": 2.663015127182007, |
| "loss/hidden": 0.298828125, |
| "loss/logits": 0.010531080886721611, |
| "loss/reg": 5.741701126098633, |
| "loss/twn": 0.0, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.00345, |
| "grad_norm": 11.5625, |
| "grad_norm_var": 312.32545572916666, |
| "learning_rate": 0.0001, |
| "loss": 8.2802, |
| "loss/crossentropy": 2.3824350833892822, |
| "loss/hidden": 0.1591796875, |
| "loss/logits": 0.011414668522775173, |
| "loss/reg": 5.727158546447754, |
| "loss/twn": 0.0, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.003475, |
| "grad_norm": 11.1875, |
| "grad_norm_var": 314.30149739583334, |
| "learning_rate": 0.0001, |
| "loss": 8.1258, |
| "loss/crossentropy": 2.285022497177124, |
| "loss/hidden": 0.11962890625, |
| "loss/logits": 0.008713757619261742, |
| "loss/reg": 5.712470054626465, |
| "loss/twn": 0.0, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.0035, |
| "grad_norm": 16.375, |
| "grad_norm_var": 310.479931640625, |
| "learning_rate": 0.0001, |
| "loss": 8.1639, |
| "loss/crossentropy": 2.350821018218994, |
| "loss/hidden": 0.10107421875, |
| "loss/logits": 0.012461278587579727, |
| "loss/reg": 5.699510097503662, |
| "loss/twn": 0.0, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.003525, |
| "grad_norm": 9.4375, |
| "grad_norm_var": 314.765478515625, |
| "learning_rate": 0.0001, |
| "loss": 7.8463, |
| "loss/crossentropy": 2.103158473968506, |
| "loss/hidden": 0.05224609375, |
| "loss/logits": 0.005224157590419054, |
| "loss/reg": 5.685665130615234, |
| "loss/twn": 0.0, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.00355, |
| "grad_norm": 9.0625, |
| "grad_norm_var": 318.001416015625, |
| "learning_rate": 0.0001, |
| "loss": 8.1747, |
| "loss/crossentropy": 2.418196678161621, |
| "loss/hidden": 0.07666015625, |
| "loss/logits": 0.006400687620043755, |
| "loss/reg": 5.6734795570373535, |
| "loss/twn": 0.0, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.003575, |
| "grad_norm": 15.4375, |
| "grad_norm_var": 319.43639322916664, |
| "learning_rate": 0.0001, |
| "loss": 6.5554, |
| "loss/crossentropy": 0.6986656785011292, |
| "loss/hidden": 0.1845703125, |
| "loss/logits": 0.012149279937148094, |
| "loss/reg": 5.660000324249268, |
| "loss/twn": 0.0, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.0036, |
| "grad_norm": 15.875, |
| "grad_norm_var": 317.5587890625, |
| "learning_rate": 0.0001, |
| "loss": 8.5371, |
| "loss/crossentropy": 2.7418227195739746, |
| "loss/hidden": 0.13671875, |
| "loss/logits": 0.0111403688788414, |
| "loss/reg": 5.647412300109863, |
| "loss/twn": 0.0, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.003625, |
| "grad_norm": 13.875, |
| "grad_norm_var": 246.30520833333333, |
| "learning_rate": 0.0001, |
| "loss": 6.2652, |
| "loss/crossentropy": 0.4583094120025635, |
| "loss/hidden": 0.158203125, |
| "loss/logits": 0.012293124571442604, |
| "loss/reg": 5.6363725662231445, |
| "loss/twn": 0.0, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.00365, |
| "grad_norm": 22.75, |
| "grad_norm_var": 245.63854166666667, |
| "learning_rate": 0.0001, |
| "loss": 7.3134, |
| "loss/crossentropy": 1.468201994895935, |
| "loss/hidden": 0.2109375, |
| "loss/logits": 0.009673453867435455, |
| "loss/reg": 5.624554634094238, |
| "loss/twn": 0.0, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.003675, |
| "grad_norm": 310.0, |
| "grad_norm_var": 5526.531754557292, |
| "learning_rate": 0.0001, |
| "loss": 7.0649, |
| "loss/crossentropy": 1.2726686000823975, |
| "loss/hidden": 0.1728515625, |
| "loss/logits": 0.005334332585334778, |
| "loss/reg": 5.614006042480469, |
| "loss/twn": 0.0, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.0037, |
| "grad_norm": 9.25, |
| "grad_norm_var": 5563.953889973958, |
| "learning_rate": 0.0001, |
| "loss": 6.774, |
| "loss/crossentropy": 1.0251015424728394, |
| "loss/hidden": 0.138671875, |
| "loss/logits": 0.007210130337625742, |
| "loss/reg": 5.603022575378418, |
| "loss/twn": 0.0, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.003725, |
| "grad_norm": 17.875, |
| "grad_norm_var": 5575.684358723958, |
| "learning_rate": 0.0001, |
| "loss": 8.7246, |
| "loss/crossentropy": 2.911123275756836, |
| "loss/hidden": 0.19921875, |
| "loss/logits": 0.02136034518480301, |
| "loss/reg": 5.592944145202637, |
| "loss/twn": 0.0, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.00375, |
| "grad_norm": 9.875, |
| "grad_norm_var": 5580.160872395833, |
| "learning_rate": 0.0001, |
| "loss": 8.39, |
| "loss/crossentropy": 2.711456537246704, |
| "loss/hidden": 0.09033203125, |
| "loss/logits": 0.005852097645401955, |
| "loss/reg": 5.582311153411865, |
| "loss/twn": 0.0, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.003775, |
| "grad_norm": 14.625, |
| "grad_norm_var": 5588.7056640625, |
| "learning_rate": 0.0001, |
| "loss": 7.2155, |
| "loss/crossentropy": 1.4148988723754883, |
| "loss/hidden": 0.2158203125, |
| "loss/logits": 0.012599754147231579, |
| "loss/reg": 5.5721516609191895, |
| "loss/twn": 0.0, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.0038, |
| "grad_norm": 12.375, |
| "grad_norm_var": 5588.115869140625, |
| "learning_rate": 0.0001, |
| "loss": 8.013, |
| "loss/crossentropy": 2.3517696857452393, |
| "loss/hidden": 0.09130859375, |
| "loss/logits": 0.007323693484067917, |
| "loss/reg": 5.562623023986816, |
| "loss/twn": 0.0, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.003825, |
| "grad_norm": 30.5, |
| "grad_norm_var": 5482.538785807292, |
| "learning_rate": 0.0001, |
| "loss": 7.8607, |
| "loss/crossentropy": 2.2278008460998535, |
| "loss/hidden": 0.07421875, |
| "loss/logits": 0.0050660185515880585, |
| "loss/reg": 5.553621292114258, |
| "loss/twn": 0.0, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.00385, |
| "grad_norm": 13.0625, |
| "grad_norm_var": 5478.366129557292, |
| "learning_rate": 0.0001, |
| "loss": 8.277, |
| "loss/crossentropy": 2.206120252609253, |
| "loss/hidden": 0.5078125, |
| "loss/logits": 0.018887437880039215, |
| "loss/reg": 5.544199466705322, |
| "loss/twn": 0.0, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.003875, |
| "grad_norm": 93.0, |
| "grad_norm_var": 5656.329622395833, |
| "learning_rate": 0.0001, |
| "loss": 8.3416, |
| "loss/crossentropy": 2.643498420715332, |
| "loss/hidden": 0.1474609375, |
| "loss/logits": 0.015275152400135994, |
| "loss/reg": 5.535386562347412, |
| "loss/twn": 0.0, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.0039, |
| "grad_norm": 15.8125, |
| "grad_norm_var": 5657.996468098959, |
| "learning_rate": 0.0001, |
| "loss": 8.66, |
| "loss/crossentropy": 3.025573492050171, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.008579680696129799, |
| "loss/reg": 5.52721643447876, |
| "loss/twn": 0.0, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.003925, |
| "grad_norm": 15.3125, |
| "grad_norm_var": 5637.544124348959, |
| "learning_rate": 0.0001, |
| "loss": 8.3922, |
| "loss/crossentropy": 2.7117786407470703, |
| "loss/hidden": 0.15234375, |
| "loss/logits": 0.008373694494366646, |
| "loss/reg": 5.519668102264404, |
| "loss/twn": 0.0, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.00395, |
| "grad_norm": 24.75, |
| "grad_norm_var": 5591.000455729167, |
| "learning_rate": 0.0001, |
| "loss": 8.4122, |
| "loss/crossentropy": 2.7280266284942627, |
| "loss/hidden": 0.1611328125, |
| "loss/logits": 0.01191728375852108, |
| "loss/reg": 5.511092662811279, |
| "loss/twn": 0.0, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.003975, |
| "grad_norm": 12.9375, |
| "grad_norm_var": 5599.461393229167, |
| "learning_rate": 0.0001, |
| "loss": 8.2413, |
| "loss/crossentropy": 2.5042476654052734, |
| "loss/hidden": 0.21484375, |
| "loss/logits": 0.018616054207086563, |
| "loss/reg": 5.503547668457031, |
| "loss/twn": 0.0, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.004, |
| "grad_norm": 12.875, |
| "grad_norm_var": 5609.470768229166, |
| "learning_rate": 0.0001, |
| "loss": 8.4032, |
| "loss/crossentropy": 2.762385606765747, |
| "loss/hidden": 0.1337890625, |
| "loss/logits": 0.010888181626796722, |
| "loss/reg": 5.496166706085205, |
| "loss/twn": 0.0, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.004025, |
| "grad_norm": 10.8125, |
| "grad_norm_var": 5620.440738932291, |
| "learning_rate": 0.0001, |
| "loss": 8.0804, |
| "loss/crossentropy": 2.5779385566711426, |
| "loss/hidden": 0.0093994140625, |
| "loss/logits": 0.004039571154862642, |
| "loss/reg": 5.489066123962402, |
| "loss/twn": 0.0, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.00405, |
| "grad_norm": 334.0, |
| "grad_norm_var": 10996.149723307291, |
| "learning_rate": 0.0001, |
| "loss": 6.2302, |
| "loss/crossentropy": 0.5805911421775818, |
| "loss/hidden": 0.158203125, |
| "loss/logits": 0.008694609627127647, |
| "loss/reg": 5.482710361480713, |
| "loss/twn": 0.0, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.004075, |
| "grad_norm": 131.0, |
| "grad_norm_var": 6997.830452473959, |
| "learning_rate": 0.0001, |
| "loss": 7.0482, |
| "loss/crossentropy": 1.3563833236694336, |
| "loss/hidden": 0.2060546875, |
| "loss/logits": 0.010020879097282887, |
| "loss/reg": 5.475753307342529, |
| "loss/twn": 0.0, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.0041, |
| "grad_norm": 13.125, |
| "grad_norm_var": 6979.068994140625, |
| "learning_rate": 0.0001, |
| "loss": 8.1687, |
| "loss/crossentropy": 2.569106101989746, |
| "loss/hidden": 0.1171875, |
| "loss/logits": 0.012607071548700333, |
| "loss/reg": 5.469805717468262, |
| "loss/twn": 0.0, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.004125, |
| "grad_norm": 14.1875, |
| "grad_norm_var": 6994.544010416666, |
| "learning_rate": 0.0001, |
| "loss": 7.2464, |
| "loss/crossentropy": 1.6330703496932983, |
| "loss/hidden": 0.142578125, |
| "loss/logits": 0.007347936742007732, |
| "loss/reg": 5.463380336761475, |
| "loss/twn": 0.0, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.00415, |
| "grad_norm": 11.125, |
| "grad_norm_var": 6988.3890625, |
| "learning_rate": 0.0001, |
| "loss": 8.1928, |
| "loss/crossentropy": 2.5154733657836914, |
| "loss/hidden": 0.20703125, |
| "loss/logits": 0.013312840834259987, |
| "loss/reg": 5.45693826675415, |
| "loss/twn": 0.0, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.004175, |
| "grad_norm": 13.875, |
| "grad_norm_var": 6991.70859375, |
| "learning_rate": 0.0001, |
| "loss": 7.1058, |
| "loss/crossentropy": 1.5941680669784546, |
| "loss/hidden": 0.05712890625, |
| "loss/logits": 0.003117609303444624, |
| "loss/reg": 5.451424598693848, |
| "loss/twn": 0.0, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.0042, |
| "grad_norm": 25.5, |
| "grad_norm_var": 6941.1431640625, |
| "learning_rate": 0.0001, |
| "loss": 5.8236, |
| "loss/crossentropy": 0.22541135549545288, |
| "loss/hidden": 0.1494140625, |
| "loss/logits": 0.002781955059617758, |
| "loss/reg": 5.445990562438965, |
| "loss/twn": 0.0, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.004225, |
| "grad_norm": 9.9375, |
| "grad_norm_var": 7016.212353515625, |
| "learning_rate": 0.0001, |
| "loss": 8.0762, |
| "loss/crossentropy": 2.55377197265625, |
| "loss/hidden": 0.07666015625, |
| "loss/logits": 0.005485657136887312, |
| "loss/reg": 5.4402995109558105, |
| "loss/twn": 0.0, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.00425, |
| "grad_norm": 16.0, |
| "grad_norm_var": 7003.476302083333, |
| "learning_rate": 0.0001, |
| "loss": 8.043, |
| "loss/crossentropy": 2.316843032836914, |
| "loss/hidden": 0.26953125, |
| "loss/logits": 0.021316442638635635, |
| "loss/reg": 5.435269832611084, |
| "loss/twn": 0.0, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.004275, |
| "grad_norm": 11.125, |
| "grad_norm_var": 6921.814518229166, |
| "learning_rate": 0.0001, |
| "loss": 8.12, |
| "loss/crossentropy": 2.539064645767212, |
| "loss/hidden": 0.134765625, |
| "loss/logits": 0.01590101048350334, |
| "loss/reg": 5.430272579193115, |
| "loss/twn": 0.0, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.0043, |
| "grad_norm": 12.5625, |
| "grad_norm_var": 6933.832747395833, |
| "learning_rate": 0.0001, |
| "loss": 7.4549, |
| "loss/crossentropy": 1.8348197937011719, |
| "loss/hidden": 0.1826171875, |
| "loss/logits": 0.012229220010340214, |
| "loss/reg": 5.425241470336914, |
| "loss/twn": 0.0, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.004325, |
| "grad_norm": 14.1875, |
| "grad_norm_var": 6937.888020833333, |
| "learning_rate": 0.0001, |
| "loss": 6.9049, |
| "loss/crossentropy": 1.270473599433899, |
| "loss/hidden": 0.205078125, |
| "loss/logits": 0.009345902130007744, |
| "loss/reg": 5.4199981689453125, |
| "loss/twn": 0.0, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.00435, |
| "grad_norm": 10.5625, |
| "grad_norm_var": 6982.626676432292, |
| "learning_rate": 0.0001, |
| "loss": 7.0624, |
| "loss/crossentropy": 1.425657033920288, |
| "loss/hidden": 0.2099609375, |
| "loss/logits": 0.011879321187734604, |
| "loss/reg": 5.4148969650268555, |
| "loss/twn": 0.0, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.004375, |
| "grad_norm": 8.875, |
| "grad_norm_var": 6998.784635416667, |
| "learning_rate": 0.0001, |
| "loss": 7.1708, |
| "loss/crossentropy": 1.4886845350265503, |
| "loss/hidden": 0.265625, |
| "loss/logits": 0.005756002385169268, |
| "loss/reg": 5.410771369934082, |
| "loss/twn": 0.0, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.0044, |
| "grad_norm": 28.625, |
| "grad_norm_var": 6956.046354166667, |
| "learning_rate": 0.0001, |
| "loss": 6.1956, |
| "loss/crossentropy": 0.5712894201278687, |
| "loss/hidden": 0.2119140625, |
| "loss/logits": 0.006170031148940325, |
| "loss/reg": 5.406195163726807, |
| "loss/twn": 0.0, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.004425, |
| "grad_norm": 8.75, |
| "grad_norm_var": 6964.777067057292, |
| "learning_rate": 0.0001, |
| "loss": 6.737, |
| "loss/crossentropy": 1.1914383172988892, |
| "loss/hidden": 0.13671875, |
| "loss/logits": 0.006925875786691904, |
| "loss/reg": 5.401881694793701, |
| "loss/twn": 0.0, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.00445, |
| "grad_norm": 44.5, |
| "grad_norm_var": 911.0606608072917, |
| "learning_rate": 0.0001, |
| "loss": 8.1517, |
| "loss/crossentropy": 2.5778274536132812, |
| "loss/hidden": 0.1669921875, |
| "loss/logits": 0.009458218701183796, |
| "loss/reg": 5.397425174713135, |
| "loss/twn": 0.0, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.004475, |
| "grad_norm": 16.625, |
| "grad_norm_var": 87.32237955729167, |
| "learning_rate": 0.0001, |
| "loss": 7.1888, |
| "loss/crossentropy": 1.5994395017623901, |
| "loss/hidden": 0.185546875, |
| "loss/logits": 0.01000029407441616, |
| "loss/reg": 5.393801212310791, |
| "loss/twn": 0.0, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.0045, |
| "grad_norm": 12.1875, |
| "grad_norm_var": 87.76451822916667, |
| "learning_rate": 0.0001, |
| "loss": 8.0046, |
| "loss/crossentropy": 2.455324172973633, |
| "loss/hidden": 0.1474609375, |
| "loss/logits": 0.012162324041128159, |
| "loss/reg": 5.389675617218018, |
| "loss/twn": 0.0, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.004525, |
| "grad_norm": 124.0, |
| "grad_norm_var": 812.4984212239583, |
| "learning_rate": 0.0001, |
| "loss": 6.3516, |
| "loss/crossentropy": 0.8374608755111694, |
| "loss/hidden": 0.125, |
| "loss/logits": 0.003441192675381899, |
| "loss/reg": 5.3856940269470215, |
| "loss/twn": 0.0, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.00455, |
| "grad_norm": 10.9375, |
| "grad_norm_var": 812.7981770833334, |
| "learning_rate": 0.0001, |
| "loss": 8.1559, |
| "loss/crossentropy": 2.670118570327759, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.005220318678766489, |
| "loss/reg": 5.381902694702148, |
| "loss/twn": 0.0, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.004575, |
| "grad_norm": 10.0, |
| "grad_norm_var": 818.4593098958334, |
| "learning_rate": 0.0001, |
| "loss": 7.6118, |
| "loss/crossentropy": 2.0260519981384277, |
| "loss/hidden": 0.1982421875, |
| "loss/logits": 0.009290603920817375, |
| "loss/reg": 5.378239631652832, |
| "loss/twn": 0.0, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.0046, |
| "grad_norm": 11.3125, |
| "grad_norm_var": 825.881884765625, |
| "learning_rate": 0.0001, |
| "loss": 8.0393, |
| "loss/crossentropy": 2.524176597595215, |
| "loss/hidden": 0.130859375, |
| "loss/logits": 0.009289154782891273, |
| "loss/reg": 5.3750152587890625, |
| "loss/twn": 0.0, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.004625, |
| "grad_norm": 17.375, |
| "grad_norm_var": 817.4895182291667, |
| "learning_rate": 0.0001, |
| "loss": 8.2323, |
| "loss/crossentropy": 2.7599668502807617, |
| "loss/hidden": 0.09130859375, |
| "loss/logits": 0.009666713885962963, |
| "loss/reg": 5.371392250061035, |
| "loss/twn": 0.0, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.00465, |
| "grad_norm": 20.625, |
| "grad_norm_var": 814.9096354166667, |
| "learning_rate": 0.0001, |
| "loss": 7.9849, |
| "loss/crossentropy": 2.4787378311157227, |
| "loss/hidden": 0.126953125, |
| "loss/logits": 0.011332664638757706, |
| "loss/reg": 5.367901802062988, |
| "loss/twn": 0.0, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.004675, |
| "grad_norm": 20.625, |
| "grad_norm_var": 805.9638020833333, |
| "learning_rate": 0.0001, |
| "loss": 8.1101, |
| "loss/crossentropy": 2.6036434173583984, |
| "loss/hidden": 0.1318359375, |
| "loss/logits": 0.009613338857889175, |
| "loss/reg": 5.364970684051514, |
| "loss/twn": 0.0, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.0047, |
| "grad_norm": 14.5, |
| "grad_norm_var": 803.4415201822917, |
| "learning_rate": 0.0001, |
| "loss": 8.2869, |
| "loss/crossentropy": 2.7700679302215576, |
| "loss/hidden": 0.1435546875, |
| "loss/logits": 0.011366615071892738, |
| "loss/reg": 5.361906051635742, |
| "loss/twn": 0.0, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.004725, |
| "grad_norm": 30.75, |
| "grad_norm_var": 800.3403645833333, |
| "learning_rate": 0.0001, |
| "loss": 8.1568, |
| "loss/crossentropy": 2.633868455886841, |
| "loss/hidden": 0.158203125, |
| "loss/logits": 0.006533905863761902, |
| "loss/reg": 5.358221054077148, |
| "loss/twn": 0.0, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.00475, |
| "grad_norm": 33.0, |
| "grad_norm_var": 790.4363118489583, |
| "learning_rate": 0.0001, |
| "loss": 7.5575, |
| "loss/crossentropy": 1.9494565725326538, |
| "loss/hidden": 0.2412109375, |
| "loss/logits": 0.011269403621554375, |
| "loss/reg": 5.3555755615234375, |
| "loss/twn": 0.0, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.004775, |
| "grad_norm": 9.625, |
| "grad_norm_var": 788.7796712239583, |
| "learning_rate": 0.0001, |
| "loss": 7.0502, |
| "loss/crossentropy": 1.607956051826477, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.0032915128394961357, |
| "loss/reg": 5.35251522064209, |
| "loss/twn": 0.0, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.0048, |
| "grad_norm": 100.5, |
| "grad_norm_var": 1138.346728515625, |
| "learning_rate": 0.0001, |
| "loss": 8.4206, |
| "loss/crossentropy": 2.9291961193084717, |
| "loss/hidden": 0.1337890625, |
| "loss/logits": 0.007461494766175747, |
| "loss/reg": 5.3501105308532715, |
| "loss/twn": 0.0, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.004825, |
| "grad_norm": 14.875, |
| "grad_norm_var": 1123.0661295572916, |
| "learning_rate": 0.0001, |
| "loss": 7.4399, |
| "loss/crossentropy": 1.9533724784851074, |
| "loss/hidden": 0.134765625, |
| "loss/logits": 0.004339105449616909, |
| "loss/reg": 5.3473944664001465, |
| "loss/twn": 0.0, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.00485, |
| "grad_norm": 64.5, |
| "grad_norm_var": 1184.8265462239583, |
| "learning_rate": 0.0001, |
| "loss": 7.542, |
| "loss/crossentropy": 2.0671801567077637, |
| "loss/hidden": 0.1171875, |
| "loss/logits": 0.012455419637262821, |
| "loss/reg": 5.345158100128174, |
| "loss/twn": 0.0, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.004875, |
| "grad_norm": 16.125, |
| "grad_norm_var": 1185.8648274739583, |
| "learning_rate": 0.0001, |
| "loss": 7.938, |
| "loss/crossentropy": 2.456360101699829, |
| "loss/hidden": 0.130859375, |
| "loss/logits": 0.008208954706788063, |
| "loss/reg": 5.342526435852051, |
| "loss/twn": 0.0, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.0049, |
| "grad_norm": 80.0, |
| "grad_norm_var": 1294.7356770833333, |
| "learning_rate": 0.0001, |
| "loss": 7.8581, |
| "loss/crossentropy": 2.487790107727051, |
| "loss/hidden": 0.025634765625, |
| "loss/logits": 0.0046631209552288055, |
| "loss/reg": 5.340009689331055, |
| "loss/twn": 0.0, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.004925, |
| "grad_norm": 14.75, |
| "grad_norm_var": 761.3453125, |
| "learning_rate": 0.0001, |
| "loss": 6.8181, |
| "loss/crossentropy": 1.304626703262329, |
| "loss/hidden": 0.171875, |
| "loss/logits": 0.004350706003606319, |
| "loss/reg": 5.337262153625488, |
| "loss/twn": 0.0, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.00495, |
| "grad_norm": 11.125, |
| "grad_norm_var": 760.887353515625, |
| "learning_rate": 0.0001, |
| "loss": 7.9797, |
| "loss/crossentropy": 2.4714841842651367, |
| "loss/hidden": 0.1611328125, |
| "loss/logits": 0.011602582409977913, |
| "loss/reg": 5.335472106933594, |
| "loss/twn": 0.0, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.004975, |
| "grad_norm": 95.0, |
| "grad_norm_var": 993.0878743489583, |
| "learning_rate": 0.0001, |
| "loss": 7.04, |
| "loss/crossentropy": 1.4688469171524048, |
| "loss/hidden": 0.2275390625, |
| "loss/logits": 0.0106576569378376, |
| "loss/reg": 5.332970142364502, |
| "loss/twn": 0.0, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.005, |
| "grad_norm": 9.9375, |
| "grad_norm_var": 997.4878743489584, |
| "learning_rate": 0.0001, |
| "loss": 6.7972, |
| "loss/crossentropy": 1.3856897354125977, |
| "loss/hidden": 0.080078125, |
| "loss/logits": 0.001242777332663536, |
| "loss/reg": 5.330203056335449, |
| "loss/twn": 0.0, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.005025, |
| "grad_norm": 11.875, |
| "grad_norm_var": 1011.9969889322916, |
| "learning_rate": 0.0001, |
| "loss": 7.5639, |
| "loss/crossentropy": 2.073434591293335, |
| "loss/hidden": 0.1484375, |
| "loss/logits": 0.013964459300041199, |
| "loss/reg": 5.328036308288574, |
| "loss/twn": 0.0, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.00505, |
| "grad_norm": 18.25, |
| "grad_norm_var": 1016.660400390625, |
| "learning_rate": 0.0001, |
| "loss": 8.68, |
| "loss/crossentropy": 3.2589170932769775, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.008732986636459827, |
| "loss/reg": 5.325946807861328, |
| "loss/twn": 0.0, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.005075, |
| "grad_norm": 16.125, |
| "grad_norm_var": 1026.004931640625, |
| "learning_rate": 0.0001, |
| "loss": 8.6078, |
| "loss/crossentropy": 3.1099424362182617, |
| "loss/hidden": 0.1552734375, |
| "loss/logits": 0.018260516226291656, |
| "loss/reg": 5.324294090270996, |
| "loss/twn": 0.0, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.0051, |
| "grad_norm": 46.75, |
| "grad_norm_var": 1007.981884765625, |
| "learning_rate": 0.0001, |
| "loss": 7.5539, |
| "loss/crossentropy": 2.091862678527832, |
| "loss/hidden": 0.130859375, |
| "loss/logits": 0.009298876859247684, |
| "loss/reg": 5.321921348571777, |
| "loss/twn": 0.0, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.005125, |
| "grad_norm": 10.3125, |
| "grad_norm_var": 1047.91484375, |
| "learning_rate": 0.0001, |
| "loss": 8.124, |
| "loss/crossentropy": 2.732879400253296, |
| "loss/hidden": 0.064453125, |
| "loss/logits": 0.0066910069435834885, |
| "loss/reg": 5.320003986358643, |
| "loss/twn": 0.0, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.00515, |
| "grad_norm": 11.0625, |
| "grad_norm_var": 1082.517822265625, |
| "learning_rate": 0.0001, |
| "loss": 6.9944, |
| "loss/crossentropy": 1.5260496139526367, |
| "loss/hidden": 0.142578125, |
| "loss/logits": 0.007849331945180893, |
| "loss/reg": 5.317881107330322, |
| "loss/twn": 0.0, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.005175, |
| "grad_norm": 8.6875, |
| "grad_norm_var": 1085.5166015625, |
| "learning_rate": 0.0001, |
| "loss": 6.7833, |
| "loss/crossentropy": 1.3956589698791504, |
| "loss/hidden": 0.06689453125, |
| "loss/logits": 0.004923268221318722, |
| "loss/reg": 5.315812587738037, |
| "loss/twn": 0.0, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.0052, |
| "grad_norm": 10.0625, |
| "grad_norm_var": 784.176025390625, |
| "learning_rate": 0.0001, |
| "loss": 6.6116, |
| "loss/crossentropy": 1.040010690689087, |
| "loss/hidden": 0.2451171875, |
| "loss/logits": 0.011602293699979782, |
| "loss/reg": 5.3148298263549805, |
| "loss/twn": 0.0, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.005225, |
| "grad_norm": 21.875, |
| "grad_norm_var": 775.4880045572917, |
| "learning_rate": 0.0001, |
| "loss": 8.5451, |
| "loss/crossentropy": 3.072871685028076, |
| "loss/hidden": 0.1474609375, |
| "loss/logits": 0.011389853432774544, |
| "loss/reg": 5.313349723815918, |
| "loss/twn": 0.0, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.00525, |
| "grad_norm": 16.25, |
| "grad_norm_var": 685.5469889322917, |
| "learning_rate": 0.0001, |
| "loss": 6.872, |
| "loss/crossentropy": 1.4240162372589111, |
| "loss/hidden": 0.130859375, |
| "loss/logits": 0.0054016802459955215, |
| "loss/reg": 5.311694145202637, |
| "loss/twn": 0.0, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.005275, |
| "grad_norm": 10.8125, |
| "grad_norm_var": 693.5171223958333, |
| "learning_rate": 0.0001, |
| "loss": 6.665, |
| "loss/crossentropy": 1.2265129089355469, |
| "loss/hidden": 0.12451171875, |
| "loss/logits": 0.0037709574680775404, |
| "loss/reg": 5.31024169921875, |
| "loss/twn": 0.0, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.0053, |
| "grad_norm": 11.4375, |
| "grad_norm_var": 480.45558268229166, |
| "learning_rate": 0.0001, |
| "loss": 8.1829, |
| "loss/crossentropy": 2.7488791942596436, |
| "loss/hidden": 0.11767578125, |
| "loss/logits": 0.007810299750417471, |
| "loss/reg": 5.3085198402404785, |
| "loss/twn": 0.0, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.005325, |
| "grad_norm": 13.5, |
| "grad_norm_var": 481.47316080729166, |
| "learning_rate": 0.0001, |
| "loss": 8.1725, |
| "loss/crossentropy": 2.769019603729248, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.010608029551804066, |
| "loss/reg": 5.306417942047119, |
| "loss/twn": 0.0, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.00535, |
| "grad_norm": 10.875, |
| "grad_norm_var": 481.77928059895834, |
| "learning_rate": 0.0001, |
| "loss": 7.1392, |
| "loss/crossentropy": 1.7968316078186035, |
| "loss/hidden": 0.03271484375, |
| "loss/logits": 0.004501561634242535, |
| "loss/reg": 5.305141925811768, |
| "loss/twn": 0.0, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.005375, |
| "grad_norm": 11.1875, |
| "grad_norm_var": 84.65208333333334, |
| "learning_rate": 0.0001, |
| "loss": 7.577, |
| "loss/crossentropy": 2.1686487197875977, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.006175590679049492, |
| "loss/reg": 5.303523540496826, |
| "loss/twn": 0.0, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.0054, |
| "grad_norm": 12.0625, |
| "grad_norm_var": 83.51764322916667, |
| "learning_rate": 0.0001, |
| "loss": 8.042, |
| "loss/crossentropy": 2.737717628479004, |
| "loss/hidden": 6.16908073425293e-06, |
| "loss/logits": 0.0018352700863033533, |
| "loss/reg": 5.302443027496338, |
| "loss/twn": 0.0, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.005425, |
| "grad_norm": 12.8125, |
| "grad_norm_var": 83.17316080729167, |
| "learning_rate": 0.0001, |
| "loss": 8.3658, |
| "loss/crossentropy": 2.9845688343048096, |
| "loss/hidden": 0.07421875, |
| "loss/logits": 0.005686669610440731, |
| "loss/reg": 5.301285743713379, |
| "loss/twn": 0.0, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.00545, |
| "grad_norm": 13.25, |
| "grad_norm_var": 82.654931640625, |
| "learning_rate": 0.0001, |
| "loss": 8.1609, |
| "loss/crossentropy": 2.7524123191833496, |
| "loss/hidden": 0.10107421875, |
| "loss/logits": 0.007715051528066397, |
| "loss/reg": 5.299709320068359, |
| "loss/twn": 0.0, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.005475, |
| "grad_norm": 23.375, |
| "grad_norm_var": 87.20506184895834, |
| "learning_rate": 0.0001, |
| "loss": 7.156, |
| "loss/crossentropy": 1.624443531036377, |
| "loss/hidden": 0.2216796875, |
| "loss/logits": 0.011536870151758194, |
| "loss/reg": 5.298386573791504, |
| "loss/twn": 0.0, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.0055, |
| "grad_norm": 18.625, |
| "grad_norm_var": 18.591780598958334, |
| "learning_rate": 0.0001, |
| "loss": 8.0654, |
| "loss/crossentropy": 2.5879249572753906, |
| "loss/hidden": 0.1630859375, |
| "loss/logits": 0.017003701999783516, |
| "loss/reg": 5.297426223754883, |
| "loss/twn": 0.0, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.005525, |
| "grad_norm": 43.25, |
| "grad_norm_var": 72.34680989583333, |
| "learning_rate": 0.0001, |
| "loss": 7.8801, |
| "loss/crossentropy": 2.4491031169891357, |
| "loss/hidden": 0.125, |
| "loss/logits": 0.010065239854156971, |
| "loss/reg": 5.295965194702148, |
| "loss/twn": 0.0, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.00555, |
| "grad_norm": 9.5, |
| "grad_norm_var": 73.438525390625, |
| "learning_rate": 0.0001, |
| "loss": 7.2681, |
| "loss/crossentropy": 1.8589441776275635, |
| "loss/hidden": 0.10595703125, |
| "loss/logits": 0.0087303277105093, |
| "loss/reg": 5.2944464683532715, |
| "loss/twn": 0.0, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.005575, |
| "grad_norm": 10.3125, |
| "grad_norm_var": 72.133447265625, |
| "learning_rate": 0.0001, |
| "loss": 6.9474, |
| "loss/crossentropy": 1.452248215675354, |
| "loss/hidden": 0.1962890625, |
| "loss/logits": 0.005369896534830332, |
| "loss/reg": 5.293449401855469, |
| "loss/twn": 0.0, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.0056, |
| "grad_norm": 15.25, |
| "grad_norm_var": 70.00305989583333, |
| "learning_rate": 0.0001, |
| "loss": 8.1935, |
| "loss/crossentropy": 2.740863800048828, |
| "loss/hidden": 0.14453125, |
| "loss/logits": 0.01583397574722767, |
| "loss/reg": 5.292267799377441, |
| "loss/twn": 0.0, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.005625, |
| "grad_norm": 1056.0, |
| "grad_norm_var": 67732.47864583334, |
| "learning_rate": 0.0001, |
| "loss": 7.8539, |
| "loss/crossentropy": 2.403062105178833, |
| "loss/hidden": 0.1474609375, |
| "loss/logits": 0.012353872880339622, |
| "loss/reg": 5.29097318649292, |
| "loss/twn": 0.0, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.00565, |
| "grad_norm": 10.3125, |
| "grad_norm_var": 67785.57133789062, |
| "learning_rate": 0.0001, |
| "loss": 7.194, |
| "loss/crossentropy": 1.7008212804794312, |
| "loss/hidden": 0.189453125, |
| "loss/logits": 0.013665840029716492, |
| "loss/reg": 5.290075778961182, |
| "loss/twn": 0.0, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.005675, |
| "grad_norm": 12.5, |
| "grad_norm_var": 67770.14609375, |
| "learning_rate": 0.0001, |
| "loss": 6.5587, |
| "loss/crossentropy": 1.045196771621704, |
| "loss/hidden": 0.2080078125, |
| "loss/logits": 0.016789620742201805, |
| "loss/reg": 5.288687705993652, |
| "loss/twn": 0.0, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.0057, |
| "grad_norm": 27.625, |
| "grad_norm_var": 67637.96925455729, |
| "learning_rate": 0.0001, |
| "loss": 7.1449, |
| "loss/crossentropy": 1.6837458610534668, |
| "loss/hidden": 0.158203125, |
| "loss/logits": 0.015014993026852608, |
| "loss/reg": 5.2879252433776855, |
| "loss/twn": 0.0, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.005725, |
| "grad_norm": 10.6875, |
| "grad_norm_var": 67663.88014322917, |
| "learning_rate": 0.0001, |
| "loss": 7.0729, |
| "loss/crossentropy": 1.7119382619857788, |
| "loss/hidden": 0.06689453125, |
| "loss/logits": 0.007287868298590183, |
| "loss/reg": 5.286799907684326, |
| "loss/twn": 0.0, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.00575, |
| "grad_norm": 53.0, |
| "grad_norm_var": 67380.34817708333, |
| "learning_rate": 0.0001, |
| "loss": 7.5607, |
| "loss/crossentropy": 2.0949935913085938, |
| "loss/hidden": 0.169921875, |
| "loss/logits": 0.009483925998210907, |
| "loss/reg": 5.286267280578613, |
| "loss/twn": 0.0, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.005775, |
| "grad_norm": 19.375, |
| "grad_norm_var": 67305.34086914062, |
| "learning_rate": 0.0001, |
| "loss": 7.9328, |
| "loss/crossentropy": 2.538356304168701, |
| "loss/hidden": 0.10498046875, |
| "loss/logits": 0.005003707949072123, |
| "loss/reg": 5.284492492675781, |
| "loss/twn": 0.0, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.0058, |
| "grad_norm": 24.5, |
| "grad_norm_var": 67195.30462239584, |
| "learning_rate": 0.0001, |
| "loss": 8.2162, |
| "loss/crossentropy": 2.749314308166504, |
| "loss/hidden": 0.169921875, |
| "loss/logits": 0.012603437528014183, |
| "loss/reg": 5.2843194007873535, |
| "loss/twn": 0.0, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.005825, |
| "grad_norm": 14.6875, |
| "grad_norm_var": 67177.47161458334, |
| "learning_rate": 0.0001, |
| "loss": 7.1017, |
| "loss/crossentropy": 1.6476311683654785, |
| "loss/hidden": 0.1572265625, |
| "loss/logits": 0.01403855625540018, |
| "loss/reg": 5.282772064208984, |
| "loss/twn": 0.0, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.00585, |
| "grad_norm": 10.875, |
| "grad_norm_var": 67200.58951822917, |
| "learning_rate": 0.0001, |
| "loss": 7.2783, |
| "loss/crossentropy": 1.8889567852020264, |
| "loss/hidden": 0.09814453125, |
| "loss/logits": 0.009974194690585136, |
| "loss/reg": 5.281259059906006, |
| "loss/twn": 0.0, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.005875, |
| "grad_norm": 8.4375, |
| "grad_norm_var": 67337.25597330728, |
| "learning_rate": 0.0001, |
| "loss": 7.2088, |
| "loss/crossentropy": 1.7615716457366943, |
| "loss/hidden": 0.16015625, |
| "loss/logits": 0.0061751967296004295, |
| "loss/reg": 5.280921459197998, |
| "loss/twn": 0.0, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.0059, |
| "grad_norm": 9.4375, |
| "grad_norm_var": 67422.68776041667, |
| "learning_rate": 0.0001, |
| "loss": 7.3424, |
| "loss/crossentropy": 1.9424008131027222, |
| "loss/hidden": 0.115234375, |
| "loss/logits": 0.004247123841196299, |
| "loss/reg": 5.280468940734863, |
| "loss/twn": 0.0, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.005925, |
| "grad_norm": 10.3125, |
| "grad_norm_var": 67667.18865559896, |
| "learning_rate": 0.0001, |
| "loss": 8.127, |
| "loss/crossentropy": 2.7163503170013428, |
| "loss/hidden": 0.12255859375, |
| "loss/logits": 0.008166075684130192, |
| "loss/reg": 5.279946804046631, |
| "loss/twn": 0.0, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.00595, |
| "grad_norm": 12.5, |
| "grad_norm_var": 67638.98084309897, |
| "learning_rate": 0.0001, |
| "loss": 7.0844, |
| "loss/crossentropy": 1.5295031070709229, |
| "loss/hidden": 0.26953125, |
| "loss/logits": 0.006746275350451469, |
| "loss/reg": 5.278590679168701, |
| "loss/twn": 0.0, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.005975, |
| "grad_norm": 92.5, |
| "grad_norm_var": 67279.8171875, |
| "learning_rate": 0.0001, |
| "loss": 5.8036, |
| "loss/crossentropy": 0.4050528109073639, |
| "loss/hidden": 0.1142578125, |
| "loss/logits": 0.005355454981327057, |
| "loss/reg": 5.278897762298584, |
| "loss/twn": 0.0, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.006, |
| "grad_norm": 28.875, |
| "grad_norm_var": 67161.52805989583, |
| "learning_rate": 0.0001, |
| "loss": 8.1647, |
| "loss/crossentropy": 2.643662929534912, |
| "loss/hidden": 0.2294921875, |
| "loss/logits": 0.013766671530902386, |
| "loss/reg": 5.277756214141846, |
| "loss/twn": 0.0, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.006025, |
| "grad_norm": 13.1875, |
| "grad_norm_var": 479.914697265625, |
| "learning_rate": 0.0001, |
| "loss": 6.9306, |
| "loss/crossentropy": 1.5002285242080688, |
| "loss/hidden": 0.1435546875, |
| "loss/logits": 0.009923199191689491, |
| "loss/reg": 5.276930809020996, |
| "loss/twn": 0.0, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.00605, |
| "grad_norm": 11.9375, |
| "grad_norm_var": 477.45519205729164, |
| "learning_rate": 0.0001, |
| "loss": 8.1267, |
| "loss/crossentropy": 2.7130773067474365, |
| "loss/hidden": 0.12890625, |
| "loss/logits": 0.00791969709098339, |
| "loss/reg": 5.276750087738037, |
| "loss/twn": 0.0, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.006075, |
| "grad_norm": 10.75, |
| "grad_norm_var": 479.98631184895834, |
| "learning_rate": 0.0001, |
| "loss": 7.1488, |
| "loss/crossentropy": 1.727049469947815, |
| "loss/hidden": 0.1376953125, |
| "loss/logits": 0.00825846754014492, |
| "loss/reg": 5.2758002281188965, |
| "loss/twn": 0.0, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.0061, |
| "grad_norm": 15.4375, |
| "grad_norm_var": 480.80833333333334, |
| "learning_rate": 0.0001, |
| "loss": 7.921, |
| "loss/crossentropy": 2.5109403133392334, |
| "loss/hidden": 0.12255859375, |
| "loss/logits": 0.012591829523444176, |
| "loss/reg": 5.274876594543457, |
| "loss/twn": 0.0, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.006125, |
| "grad_norm": 14.6875, |
| "grad_norm_var": 475.9583333333333, |
| "learning_rate": 0.0001, |
| "loss": 7.9285, |
| "loss/crossentropy": 2.4956674575805664, |
| "loss/hidden": 0.146484375, |
| "loss/logits": 0.012429025955498219, |
| "loss/reg": 5.2739410400390625, |
| "loss/twn": 0.0, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.00615, |
| "grad_norm": 11.5625, |
| "grad_norm_var": 411.4820149739583, |
| "learning_rate": 0.0001, |
| "loss": 7.9078, |
| "loss/crossentropy": 2.5263020992279053, |
| "loss/hidden": 0.10107421875, |
| "loss/logits": 0.007129446603357792, |
| "loss/reg": 5.273260116577148, |
| "loss/twn": 0.0, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.006175, |
| "grad_norm": 17.5, |
| "grad_norm_var": 411.6870930989583, |
| "learning_rate": 0.0001, |
| "loss": 7.0028, |
| "loss/crossentropy": 1.6373541355133057, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.0059446613304317, |
| "loss/reg": 5.273036479949951, |
| "loss/twn": 0.0, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.0062, |
| "grad_norm": 11.375, |
| "grad_norm_var": 413.1773274739583, |
| "learning_rate": 0.0001, |
| "loss": 7.5298, |
| "loss/crossentropy": 2.1802141666412354, |
| "loss/hidden": 0.07568359375, |
| "loss/logits": 0.0017865689005702734, |
| "loss/reg": 5.272162914276123, |
| "loss/twn": 0.0, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.006225, |
| "grad_norm": 8.625, |
| "grad_norm_var": 418.4583333333333, |
| "learning_rate": 0.0001, |
| "loss": 6.7983, |
| "loss/crossentropy": 1.4629161357879639, |
| "loss/hidden": 0.06201171875, |
| "loss/logits": 0.0020404397509992123, |
| "loss/reg": 5.271305084228516, |
| "loss/twn": 0.0, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.00625, |
| "grad_norm": 10.625, |
| "grad_norm_var": 418.6997395833333, |
| "learning_rate": 0.0001, |
| "loss": 6.9642, |
| "loss/crossentropy": 1.5569666624069214, |
| "loss/hidden": 0.12451171875, |
| "loss/logits": 0.011692131869494915, |
| "loss/reg": 5.27101469039917, |
| "loss/twn": 0.0, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.006275, |
| "grad_norm": 174.0, |
| "grad_norm_var": 1921.1363118489583, |
| "learning_rate": 0.0001, |
| "loss": 6.8711, |
| "loss/crossentropy": 1.442520260810852, |
| "loss/hidden": 0.1484375, |
| "loss/logits": 0.009676285088062286, |
| "loss/reg": 5.270504951477051, |
| "loss/twn": 0.0, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.0063, |
| "grad_norm": 7.75, |
| "grad_norm_var": 1925.5655598958333, |
| "learning_rate": 0.0001, |
| "loss": 6.6066, |
| "loss/crossentropy": 1.229660153388977, |
| "loss/hidden": 0.10107421875, |
| "loss/logits": 0.006052733864635229, |
| "loss/reg": 5.269782543182373, |
| "loss/twn": 0.0, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.006325, |
| "grad_norm": 10.75, |
| "grad_norm_var": 1924.5325358072917, |
| "learning_rate": 0.0001, |
| "loss": 7.977, |
| "loss/crossentropy": 2.645150899887085, |
| "loss/hidden": 0.05712890625, |
| "loss/logits": 0.005253541748970747, |
| "loss/reg": 5.269515514373779, |
| "loss/twn": 0.0, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.00635, |
| "grad_norm": 15.3125, |
| "grad_norm_var": 1919.1192057291667, |
| "learning_rate": 0.0001, |
| "loss": 8.4118, |
| "loss/crossentropy": 2.997927188873291, |
| "loss/hidden": 0.1337890625, |
| "loss/logits": 0.010878477245569229, |
| "loss/reg": 5.269217014312744, |
| "loss/twn": 0.0, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.006375, |
| "grad_norm": 10.5625, |
| "grad_norm_var": 1638.7606608072917, |
| "learning_rate": 0.0001, |
| "loss": 7.96, |
| "loss/crossentropy": 2.5343592166900635, |
| "loss/hidden": 0.1494140625, |
| "loss/logits": 0.00772454310208559, |
| "loss/reg": 5.268545627593994, |
| "loss/twn": 0.0, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.0064, |
| "grad_norm": 11.125, |
| "grad_norm_var": 1645.2782389322917, |
| "learning_rate": 0.0001, |
| "loss": 8.081, |
| "loss/crossentropy": 2.7416863441467285, |
| "loss/hidden": 0.06689453125, |
| "loss/logits": 0.004296740982681513, |
| "loss/reg": 5.268085479736328, |
| "loss/twn": 0.0, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.006425, |
| "grad_norm": 83.5, |
| "grad_norm_var": 1869.7838541666667, |
| "learning_rate": 0.0001, |
| "loss": 7.679, |
| "loss/crossentropy": 2.278621196746826, |
| "loss/hidden": 0.12255859375, |
| "loss/logits": 0.01035086065530777, |
| "loss/reg": 5.267502307891846, |
| "loss/twn": 0.0, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.00645, |
| "grad_norm": 11.3125, |
| "grad_norm_var": 1871.0296223958333, |
| "learning_rate": 0.0001, |
| "loss": 7.0164, |
| "loss/crossentropy": 1.6138280630111694, |
| "loss/hidden": 0.125, |
| "loss/logits": 0.01041114330291748, |
| "loss/reg": 5.267125606536865, |
| "loss/twn": 0.0, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.006475, |
| "grad_norm": 9.375, |
| "grad_norm_var": 1874.0453125, |
| "learning_rate": 0.0001, |
| "loss": 8.1242, |
| "loss/crossentropy": 2.724980354309082, |
| "loss/hidden": 0.1220703125, |
| "loss/logits": 0.010582932271063328, |
| "loss/reg": 5.266530990600586, |
| "loss/twn": 0.0, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.0065, |
| "grad_norm": 7.09375, |
| "grad_norm_var": 1890.6687133789062, |
| "learning_rate": 0.0001, |
| "loss": 7.1719, |
| "loss/crossentropy": 1.8638581037521362, |
| "loss/hidden": 0.0400390625, |
| "loss/logits": 0.002111276611685753, |
| "loss/reg": 5.265857696533203, |
| "loss/twn": 0.0, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.006525, |
| "grad_norm": 12.9375, |
| "grad_norm_var": 1893.4873982747397, |
| "learning_rate": 0.0001, |
| "loss": 7.2962, |
| "loss/crossentropy": 1.825720191001892, |
| "loss/hidden": 0.189453125, |
| "loss/logits": 0.015128025785088539, |
| "loss/reg": 5.26585054397583, |
| "loss/twn": 0.0, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.00655, |
| "grad_norm": 116.5, |
| "grad_norm_var": 2381.9933227539063, |
| "learning_rate": 0.0001, |
| "loss": 5.8142, |
| "loss/crossentropy": 0.3675730228424072, |
| "loss/hidden": 0.173828125, |
| "loss/logits": 0.0077675022184848785, |
| "loss/reg": 5.265065670013428, |
| "loss/twn": 0.0, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.006575, |
| "grad_norm": 10.875, |
| "grad_norm_var": 2397.895048014323, |
| "learning_rate": 0.0001, |
| "loss": 7.9558, |
| "loss/crossentropy": 2.6135733127593994, |
| "loss/hidden": 0.07177734375, |
| "loss/logits": 0.005567469634115696, |
| "loss/reg": 5.264838695526123, |
| "loss/twn": 0.0, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.0066, |
| "grad_norm": 26.75, |
| "grad_norm_var": 2370.424247233073, |
| "learning_rate": 0.0001, |
| "loss": 7.1463, |
| "loss/crossentropy": 1.744168996810913, |
| "loss/hidden": 0.130859375, |
| "loss/logits": 0.006982623599469662, |
| "loss/reg": 5.264274597167969, |
| "loss/twn": 0.0, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.006625, |
| "grad_norm": 12.9375, |
| "grad_norm_var": 2357.603544108073, |
| "learning_rate": 0.0001, |
| "loss": 6.835, |
| "loss/crossentropy": 1.3916579484939575, |
| "loss/hidden": 0.16796875, |
| "loss/logits": 0.011533312499523163, |
| "loss/reg": 5.263826847076416, |
| "loss/twn": 0.0, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.00665, |
| "grad_norm": 86.0, |
| "grad_norm_var": 2485.682157389323, |
| "learning_rate": 0.0001, |
| "loss": 8.1388, |
| "loss/crossentropy": 2.743070363998413, |
| "loss/hidden": 0.11962890625, |
| "loss/logits": 0.012774601578712463, |
| "loss/reg": 5.263358116149902, |
| "loss/twn": 0.0, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.006675, |
| "grad_norm": 20.125, |
| "grad_norm_var": 1173.6974243164063, |
| "learning_rate": 0.0001, |
| "loss": 7.547, |
| "loss/crossentropy": 2.15224289894104, |
| "loss/hidden": 0.126953125, |
| "loss/logits": 0.004906866233795881, |
| "loss/reg": 5.262901782989502, |
| "loss/twn": 0.0, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.0067, |
| "grad_norm": 8.25, |
| "grad_norm_var": 1172.3426066080729, |
| "learning_rate": 0.0001, |
| "loss": 8.0178, |
| "loss/crossentropy": 2.7544662952423096, |
| "loss/hidden": 2.9206275939941406e-06, |
| "loss/logits": 0.0010420402977615595, |
| "loss/reg": 5.262295246124268, |
| "loss/twn": 0.0, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.006725, |
| "grad_norm": 10.5625, |
| "grad_norm_var": 1172.7845011393229, |
| "learning_rate": 0.0001, |
| "loss": 7.2573, |
| "loss/crossentropy": 1.708762288093567, |
| "loss/hidden": 0.27734375, |
| "loss/logits": 0.008786465972661972, |
| "loss/reg": 5.262362480163574, |
| "loss/twn": 0.0, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.00675, |
| "grad_norm": 8.6875, |
| "grad_norm_var": 1187.023075358073, |
| "learning_rate": 0.0001, |
| "loss": 7.3198, |
| "loss/crossentropy": 1.9545139074325562, |
| "loss/hidden": 0.09619140625, |
| "loss/logits": 0.0075501929968595505, |
| "loss/reg": 5.261580467224121, |
| "loss/twn": 0.0, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.006775, |
| "grad_norm": 27.75, |
| "grad_norm_var": 1165.726688639323, |
| "learning_rate": 0.0001, |
| "loss": 7.9432, |
| "loss/crossentropy": 2.5177128314971924, |
| "loss/hidden": 0.1552734375, |
| "loss/logits": 0.008861662819981575, |
| "loss/reg": 5.261343955993652, |
| "loss/twn": 0.0, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.0068, |
| "grad_norm": 12.75, |
| "grad_norm_var": 1162.0217732747396, |
| "learning_rate": 0.0001, |
| "loss": 7.2029, |
| "loss/crossentropy": 1.8247922658920288, |
| "loss/hidden": 0.1103515625, |
| "loss/logits": 0.00664330180734396, |
| "loss/reg": 5.261136054992676, |
| "loss/twn": 0.0, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.006825, |
| "grad_norm": 24.625, |
| "grad_norm_var": 951.5283162434896, |
| "learning_rate": 0.0001, |
| "loss": 6.3479, |
| "loss/crossentropy": 0.8956549167633057, |
| "loss/hidden": 0.1845703125, |
| "loss/logits": 0.00696325721219182, |
| "loss/reg": 5.2607598304748535, |
| "loss/twn": 0.0, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.00685, |
| "grad_norm": 15.3125, |
| "grad_norm_var": 945.0106079101563, |
| "learning_rate": 0.0001, |
| "loss": 7.1854, |
| "loss/crossentropy": 1.796543002128601, |
| "loss/hidden": 0.12255859375, |
| "loss/logits": 0.005927722901105881, |
| "loss/reg": 5.26037073135376, |
| "loss/twn": 0.0, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.006875, |
| "grad_norm": 18.625, |
| "grad_norm_var": 930.2756469726562, |
| "learning_rate": 0.0001, |
| "loss": 8.1342, |
| "loss/crossentropy": 2.680659770965576, |
| "loss/hidden": 0.181640625, |
| "loss/logits": 0.011690370738506317, |
| "loss/reg": 5.260243892669678, |
| "loss/twn": 0.0, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.0069, |
| "grad_norm": 14.4375, |
| "grad_norm_var": 914.9025390625, |
| "learning_rate": 0.0001, |
| "loss": 8.5123, |
| "loss/crossentropy": 3.0964365005493164, |
| "loss/hidden": 0.1455078125, |
| "loss/logits": 0.010575573891401291, |
| "loss/reg": 5.259780406951904, |
| "loss/twn": 0.0, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.006925, |
| "grad_norm": 12.6875, |
| "grad_norm_var": 915.3650390625, |
| "learning_rate": 0.0001, |
| "loss": 8.2582, |
| "loss/crossentropy": 2.825162172317505, |
| "loss/hidden": 0.15625, |
| "loss/logits": 0.01738206297159195, |
| "loss/reg": 5.259433746337891, |
| "loss/twn": 0.0, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.00695, |
| "grad_norm": 21.625, |
| "grad_norm_var": 341.7171875, |
| "learning_rate": 0.0001, |
| "loss": 7.1032, |
| "loss/crossentropy": 1.7048217058181763, |
| "loss/hidden": 0.1279296875, |
| "loss/logits": 0.011121492832899094, |
| "loss/reg": 5.259332656860352, |
| "loss/twn": 0.0, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.006975, |
| "grad_norm": 9.3125, |
| "grad_norm_var": 343.92706705729165, |
| "learning_rate": 0.0001, |
| "loss": 7.4175, |
| "loss/crossentropy": 1.9668402671813965, |
| "loss/hidden": 0.1826171875, |
| "loss/logits": 0.009085997007787228, |
| "loss/reg": 5.258953094482422, |
| "loss/twn": 0.0, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.007, |
| "grad_norm": 14.3125, |
| "grad_norm_var": 343.48333333333335, |
| "learning_rate": 0.0001, |
| "loss": 7.5905, |
| "loss/crossentropy": 2.2082672119140625, |
| "loss/hidden": 0.1181640625, |
| "loss/logits": 0.005382226780056953, |
| "loss/reg": 5.258672714233398, |
| "loss/twn": 0.0, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.007025, |
| "grad_norm": 17.75, |
| "grad_norm_var": 340.4792805989583, |
| "learning_rate": 0.0001, |
| "loss": 6.2281, |
| "loss/crossentropy": 0.711824893951416, |
| "loss/hidden": 0.251953125, |
| "loss/logits": 0.005929501727223396, |
| "loss/reg": 5.258391380310059, |
| "loss/twn": 0.0, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.00705, |
| "grad_norm": 48.5, |
| "grad_norm_var": 99.24881184895834, |
| "learning_rate": 0.0001, |
| "loss": 6.6114, |
| "loss/crossentropy": 1.0958250761032104, |
| "loss/hidden": 0.25, |
| "loss/logits": 0.007801922038197517, |
| "loss/reg": 5.257816314697266, |
| "loss/twn": 0.0, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.007075, |
| "grad_norm": 16.0, |
| "grad_norm_var": 99.05115559895833, |
| "learning_rate": 0.0001, |
| "loss": 6.7629, |
| "loss/crossentropy": 1.2992652654647827, |
| "loss/hidden": 0.1953125, |
| "loss/logits": 0.01093169767409563, |
| "loss/reg": 5.257413864135742, |
| "loss/twn": 0.0, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.0071, |
| "grad_norm": 9.5, |
| "grad_norm_var": 97.594775390625, |
| "learning_rate": 0.0001, |
| "loss": 7.7528, |
| "loss/crossentropy": 2.3623929023742676, |
| "loss/hidden": 0.1279296875, |
| "loss/logits": 0.00520264683291316, |
| "loss/reg": 5.257322788238525, |
| "loss/twn": 0.0, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.007125, |
| "grad_norm": 16.75, |
| "grad_norm_var": 94.1384765625, |
| "learning_rate": 0.0001, |
| "loss": 7.282, |
| "loss/crossentropy": 1.8802975416183472, |
| "loss/hidden": 0.1357421875, |
| "loss/logits": 0.008890845812857151, |
| "loss/reg": 5.2571001052856445, |
| "loss/twn": 0.0, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.00715, |
| "grad_norm": 9.875, |
| "grad_norm_var": 92.745947265625, |
| "learning_rate": 0.0001, |
| "loss": 7.0654, |
| "loss/crossentropy": 1.632087230682373, |
| "loss/hidden": 0.1669921875, |
| "loss/logits": 0.009606104344129562, |
| "loss/reg": 5.256716728210449, |
| "loss/twn": 0.0, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.007175, |
| "grad_norm": 29.25, |
| "grad_norm_var": 94.813916015625, |
| "learning_rate": 0.0001, |
| "loss": 7.8745, |
| "loss/crossentropy": 2.4687438011169434, |
| "loss/hidden": 0.140625, |
| "loss/logits": 0.00846975389868021, |
| "loss/reg": 5.256651878356934, |
| "loss/twn": 0.0, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.0072, |
| "grad_norm": 28.25, |
| "grad_norm_var": 98.55167643229167, |
| "learning_rate": 0.0001, |
| "loss": 8.246, |
| "loss/crossentropy": 2.8182952404022217, |
| "loss/hidden": 0.158203125, |
| "loss/logits": 0.013284040614962578, |
| "loss/reg": 5.256263256072998, |
| "loss/twn": 0.0, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.007225, |
| "grad_norm": 10.625, |
| "grad_norm_var": 100.62980143229167, |
| "learning_rate": 0.0001, |
| "loss": 8.0743, |
| "loss/crossentropy": 2.664623737335205, |
| "loss/hidden": 0.1435546875, |
| "loss/logits": 0.009994969703257084, |
| "loss/reg": 5.256109237670898, |
| "loss/twn": 0.0, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.00725, |
| "grad_norm": 134.0, |
| "grad_norm_var": 933.7604166666666, |
| "learning_rate": 0.0001, |
| "loss": 8.0955, |
| "loss/crossentropy": 2.6748669147491455, |
| "loss/hidden": 0.158203125, |
| "loss/logits": 0.006934846751391888, |
| "loss/reg": 5.25545597076416, |
| "loss/twn": 0.0, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.007275, |
| "grad_norm": 11.0625, |
| "grad_norm_var": 944.487744140625, |
| "learning_rate": 0.0001, |
| "loss": 7.7484, |
| "loss/crossentropy": 2.490112781524658, |
| "loss/hidden": 9.655952453613281e-06, |
| "loss/logits": 0.0029325929936021566, |
| "loss/reg": 5.255389213562012, |
| "loss/twn": 0.0, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.0073, |
| "grad_norm": 9.375, |
| "grad_norm_var": 953.3853515625, |
| "learning_rate": 0.0001, |
| "loss": 7.3465, |
| "loss/crossentropy": 1.976689338684082, |
| "loss/hidden": 0.10791015625, |
| "loss/logits": 0.006814016494899988, |
| "loss/reg": 5.255037307739258, |
| "loss/twn": 0.0, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.007325, |
| "grad_norm": 20.125, |
| "grad_norm_var": 944.7024576822917, |
| "learning_rate": 0.0001, |
| "loss": 7.0865, |
| "loss/crossentropy": 1.7022920846939087, |
| "loss/hidden": 0.12451171875, |
| "loss/logits": 0.004657519515603781, |
| "loss/reg": 5.25502347946167, |
| "loss/twn": 0.0, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.00735, |
| "grad_norm": 10.8125, |
| "grad_norm_var": 957.44375, |
| "learning_rate": 0.0001, |
| "loss": 5.8776, |
| "loss/crossentropy": 0.5808318853378296, |
| "loss/hidden": 0.0400390625, |
| "loss/logits": 0.001980610191822052, |
| "loss/reg": 5.254761219024658, |
| "loss/twn": 0.0, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.007375, |
| "grad_norm": 10.0, |
| "grad_norm_var": 956.0610514322917, |
| "learning_rate": 0.0001, |
| "loss": 7.66, |
| "loss/crossentropy": 2.271899700164795, |
| "loss/hidden": 0.125, |
| "loss/logits": 0.008504629135131836, |
| "loss/reg": 5.254581451416016, |
| "loss/twn": 0.0, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.0074, |
| "grad_norm": 8.6875, |
| "grad_norm_var": 965.8755045572917, |
| "learning_rate": 0.0001, |
| "loss": 6.0481, |
| "loss/crossentropy": 0.639009952545166, |
| "loss/hidden": 0.1533203125, |
| "loss/logits": 0.0019615632481873035, |
| "loss/reg": 5.253849983215332, |
| "loss/twn": 0.0, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.007425, |
| "grad_norm": 11.1875, |
| "grad_norm_var": 974.3947916666667, |
| "learning_rate": 0.0001, |
| "loss": 7.7422, |
| "loss/crossentropy": 2.3546876907348633, |
| "loss/hidden": 0.12451171875, |
| "loss/logits": 0.00921722687780857, |
| "loss/reg": 5.253818035125732, |
| "loss/twn": 0.0, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.00745, |
| "grad_norm": 17.625, |
| "grad_norm_var": 933.1155598958334, |
| "learning_rate": 0.0001, |
| "loss": 8.3911, |
| "loss/crossentropy": 3.0292787551879883, |
| "loss/hidden": 0.10107421875, |
| "loss/logits": 0.00718055572360754, |
| "loss/reg": 5.253612518310547, |
| "loss/twn": 0.0, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.007475, |
| "grad_norm": 440.0, |
| "grad_norm_var": 11825.940559895833, |
| "learning_rate": 0.0001, |
| "loss": 7.0072, |
| "loss/crossentropy": 1.5630390644073486, |
| "loss/hidden": 0.18359375, |
| "loss/logits": 0.007107208017259836, |
| "loss/reg": 5.253422737121582, |
| "loss/twn": 0.0, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.0075, |
| "grad_norm": 10.875, |
| "grad_norm_var": 11818.895833333334, |
| "learning_rate": 0.0001, |
| "loss": 6.9788, |
| "loss/crossentropy": 1.483949065208435, |
| "loss/hidden": 0.2314453125, |
| "loss/logits": 0.010475615039467812, |
| "loss/reg": 5.25289249420166, |
| "loss/twn": 0.0, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.007525, |
| "grad_norm": 9.8125, |
| "grad_norm_var": 11851.417171223959, |
| "learning_rate": 0.0001, |
| "loss": 7.5546, |
| "loss/crossentropy": 2.208834648132324, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.006687905173748732, |
| "loss/reg": 5.2526960372924805, |
| "loss/twn": 0.0, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.00755, |
| "grad_norm": 11.25, |
| "grad_norm_var": 11844.504931640626, |
| "learning_rate": 0.0001, |
| "loss": 7.1337, |
| "loss/crossentropy": 1.752866506576538, |
| "loss/hidden": 0.1201171875, |
| "loss/logits": 0.008473502472043037, |
| "loss/reg": 5.2522807121276855, |
| "loss/twn": 0.0, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.007575, |
| "grad_norm": 20.5, |
| "grad_norm_var": 11871.525113932292, |
| "learning_rate": 0.0001, |
| "loss": 5.9332, |
| "loss/crossentropy": 0.6589277386665344, |
| "loss/hidden": 0.02099609375, |
| "loss/logits": 0.0013422563206404448, |
| "loss/reg": 5.251956462860107, |
| "loss/twn": 0.0, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.0076, |
| "grad_norm": 11.5625, |
| "grad_norm_var": 11932.343229166667, |
| "learning_rate": 0.0001, |
| "loss": 7.0088, |
| "loss/crossentropy": 1.5739790201187134, |
| "loss/hidden": 0.1748046875, |
| "loss/logits": 0.007969235070049763, |
| "loss/reg": 5.2520952224731445, |
| "loss/twn": 0.0, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.007625, |
| "grad_norm": 8.9375, |
| "grad_norm_var": 11940.642301432292, |
| "learning_rate": 0.0001, |
| "loss": 8.1038, |
| "loss/crossentropy": 2.782174587249756, |
| "loss/hidden": 0.064453125, |
| "loss/logits": 0.005289027933031321, |
| "loss/reg": 5.251874923706055, |
| "loss/twn": 0.0, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.00765, |
| "grad_norm": 19.75, |
| "grad_norm_var": 11425.267692057292, |
| "learning_rate": 0.0001, |
| "loss": 8.1897, |
| "loss/crossentropy": 2.776104688644409, |
| "loss/hidden": 0.150390625, |
| "loss/logits": 0.011526349931955338, |
| "loss/reg": 5.251704692840576, |
| "loss/twn": 0.0, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.007675, |
| "grad_norm": 7.9375, |
| "grad_norm_var": 11437.715608723958, |
| "learning_rate": 0.0001, |
| "loss": 8.9456, |
| "loss/crossentropy": 3.692781686782837, |
| "loss/hidden": 5.245208740234375e-06, |
| "loss/logits": 0.0012750843307003379, |
| "loss/reg": 5.251509189605713, |
| "loss/twn": 0.0, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.0077, |
| "grad_norm": 69.0, |
| "grad_norm_var": 11422.188264973958, |
| "learning_rate": 0.0001, |
| "loss": 8.1468, |
| "loss/crossentropy": 2.5686264038085938, |
| "loss/hidden": 0.30078125, |
| "loss/logits": 0.025831755250692368, |
| "loss/reg": 5.251588821411133, |
| "loss/twn": 0.0, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.007725, |
| "grad_norm": 13.375, |
| "grad_norm_var": 11445.626936848957, |
| "learning_rate": 0.0001, |
| "loss": 7.1097, |
| "loss/crossentropy": 1.660717248916626, |
| "loss/hidden": 0.185546875, |
| "loss/logits": 0.01224461942911148, |
| "loss/reg": 5.25119161605835, |
| "loss/twn": 0.0, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.00775, |
| "grad_norm": 17.875, |
| "grad_norm_var": 11418.828059895834, |
| "learning_rate": 0.0001, |
| "loss": 8.2796, |
| "loss/crossentropy": 2.8778281211853027, |
| "loss/hidden": 0.1376953125, |
| "loss/logits": 0.013323968276381493, |
| "loss/reg": 5.250760555267334, |
| "loss/twn": 0.0, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.007775, |
| "grad_norm": 10.25, |
| "grad_norm_var": 11417.731184895832, |
| "learning_rate": 0.0001, |
| "loss": 7.1638, |
| "loss/crossentropy": 1.7495626211166382, |
| "loss/hidden": 0.1533203125, |
| "loss/logits": 0.010259518399834633, |
| "loss/reg": 5.250608921051025, |
| "loss/twn": 0.0, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.0078, |
| "grad_norm": 21.0, |
| "grad_norm_var": 11370.812223307292, |
| "learning_rate": 0.0001, |
| "loss": 7.2684, |
| "loss/crossentropy": 1.8736885786056519, |
| "loss/hidden": 0.1357421875, |
| "loss/logits": 0.008427501656115055, |
| "loss/reg": 5.250565528869629, |
| "loss/twn": 0.0, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.007825, |
| "grad_norm": 171.0, |
| "grad_norm_var": 12271.96328125, |
| "learning_rate": 0.0001, |
| "loss": 6.804, |
| "loss/crossentropy": 1.4117506742477417, |
| "loss/hidden": 0.13671875, |
| "loss/logits": 0.004921610467135906, |
| "loss/reg": 5.250616550445557, |
| "loss/twn": 0.0, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.00785, |
| "grad_norm": 21.75, |
| "grad_norm_var": 12253.1322265625, |
| "learning_rate": 0.0001, |
| "loss": 7.013, |
| "loss/crossentropy": 1.5148544311523438, |
| "loss/hidden": 0.23828125, |
| "loss/logits": 0.009858010336756706, |
| "loss/reg": 5.24995756149292, |
| "loss/twn": 0.0, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.007875, |
| "grad_norm": 127.0, |
| "grad_norm_var": 2269.4103515625, |
| "learning_rate": 0.0001, |
| "loss": 8.0458, |
| "loss/crossentropy": 2.6658105850219727, |
| "loss/hidden": 0.12255859375, |
| "loss/logits": 0.007101866416633129, |
| "loss/reg": 5.250338077545166, |
| "loss/twn": 0.0, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.0079, |
| "grad_norm": 49.25, |
| "grad_norm_var": 2240.6091145833334, |
| "learning_rate": 0.0001, |
| "loss": 6.9261, |
| "loss/crossentropy": 1.5474319458007812, |
| "loss/hidden": 0.12451171875, |
| "loss/logits": 0.004424188286066055, |
| "loss/reg": 5.249726295471191, |
| "loss/twn": 0.0, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.007925, |
| "grad_norm": 11.0625, |
| "grad_norm_var": 2236.19375, |
| "learning_rate": 0.0001, |
| "loss": 7.9458, |
| "loss/crossentropy": 2.6948788166046143, |
| "loss/hidden": 3.7550926208496094e-06, |
| "loss/logits": 0.0011138684349134564, |
| "loss/reg": 5.249834060668945, |
| "loss/twn": 0.0, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.00795, |
| "grad_norm": 14.625, |
| "grad_norm_var": 2225.3322265625, |
| "learning_rate": 0.0001, |
| "loss": 7.097, |
| "loss/crossentropy": 1.674680233001709, |
| "loss/hidden": 0.1533203125, |
| "loss/logits": 0.018878858536481857, |
| "loss/reg": 5.250132083892822, |
| "loss/twn": 0.0, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.007975, |
| "grad_norm": 280.0, |
| "grad_norm_var": 5856.9806640625, |
| "learning_rate": 0.0001, |
| "loss": 6.8544, |
| "loss/crossentropy": 1.4390206336975098, |
| "loss/hidden": 0.158203125, |
| "loss/logits": 0.007244464010000229, |
| "loss/reg": 5.249932765960693, |
| "loss/twn": 0.0, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 17.75, |
| "grad_norm_var": 5824.858837890625, |
| "learning_rate": 0.0001, |
| "loss": 8.0011, |
| "loss/crossentropy": 2.620617389678955, |
| "loss/hidden": 0.12255859375, |
| "loss/logits": 0.008633976802229881, |
| "loss/reg": 5.249265193939209, |
| "loss/twn": 0.0, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.008025, |
| "grad_norm": 18.625, |
| "grad_norm_var": 5772.79609375, |
| "learning_rate": 0.0001, |
| "loss": 6.3629, |
| "loss/crossentropy": 0.973727822303772, |
| "loss/hidden": 0.1337890625, |
| "loss/logits": 0.005988460034132004, |
| "loss/reg": 5.249377250671387, |
| "loss/twn": 0.0, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.00805, |
| "grad_norm": 7.71875, |
| "grad_norm_var": 5837.412365722656, |
| "learning_rate": 0.0001, |
| "loss": 6.802, |
| "loss/crossentropy": 1.4281165599822998, |
| "loss/hidden": 0.1171875, |
| "loss/logits": 0.007195750251412392, |
| "loss/reg": 5.249452114105225, |
| "loss/twn": 0.0, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.008075, |
| "grad_norm": 10.375, |
| "grad_norm_var": 5822.930822753906, |
| "learning_rate": 0.0001, |
| "loss": 8.0202, |
| "loss/crossentropy": 2.667816162109375, |
| "loss/hidden": 0.09619140625, |
| "loss/logits": 0.00719710998237133, |
| "loss/reg": 5.249003887176514, |
| "loss/twn": 0.0, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.0081, |
| "grad_norm": 30.5, |
| "grad_norm_var": 5837.498661295573, |
| "learning_rate": 0.0001, |
| "loss": 8.5258, |
| "loss/crossentropy": 3.144439697265625, |
| "loss/hidden": 0.1171875, |
| "loss/logits": 0.01504556369036436, |
| "loss/reg": 5.249162197113037, |
| "loss/twn": 0.0, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.008125, |
| "grad_norm": 10.5625, |
| "grad_norm_var": 5852.246708170573, |
| "learning_rate": 0.0001, |
| "loss": 7.8167, |
| "loss/crossentropy": 2.3962008953094482, |
| "loss/hidden": 0.1650390625, |
| "loss/logits": 0.006836063228547573, |
| "loss/reg": 5.248575210571289, |
| "loss/twn": 0.0, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.00815, |
| "grad_norm": 486.0, |
| "grad_norm_var": 17467.963993326823, |
| "learning_rate": 0.0001, |
| "loss": 5.9967, |
| "loss/crossentropy": 0.583743691444397, |
| "loss/hidden": 0.1572265625, |
| "loss/logits": 0.007230043411254883, |
| "loss/reg": 5.248484134674072, |
| "loss/twn": 0.0, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.008175, |
| "grad_norm": 10.8125, |
| "grad_norm_var": 17462.717508951824, |
| "learning_rate": 0.0001, |
| "loss": 7.1253, |
| "loss/crossentropy": 1.6490944623947144, |
| "loss/hidden": 0.2138671875, |
| "loss/logits": 0.01414478849619627, |
| "loss/reg": 5.248198509216309, |
| "loss/twn": 0.0, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.0082, |
| "grad_norm": 10.125, |
| "grad_norm_var": 17556.386942545574, |
| "learning_rate": 0.0001, |
| "loss": 7.8925, |
| "loss/crossentropy": 2.5412166118621826, |
| "loss/hidden": 0.09619140625, |
| "loss/logits": 0.006635190453380346, |
| "loss/reg": 5.248410701751709, |
| "loss/twn": 0.0, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.008225, |
| "grad_norm": 12.6875, |
| "grad_norm_var": 17198.204911295572, |
| "learning_rate": 0.0001, |
| "loss": 6.9945, |
| "loss/crossentropy": 1.603257656097412, |
| "loss/hidden": 0.1328125, |
| "loss/logits": 0.010111295618116856, |
| "loss/reg": 5.248310565948486, |
| "loss/twn": 0.0, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.00825, |
| "grad_norm": 12.5, |
| "grad_norm_var": 17262.97177327474, |
| "learning_rate": 0.0001, |
| "loss": 5.7919, |
| "loss/crossentropy": 0.3630053400993347, |
| "loss/hidden": 0.177734375, |
| "loss/logits": 0.002837226027622819, |
| "loss/reg": 5.248295783996582, |
| "loss/twn": 0.0, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.008275, |
| "grad_norm": 22.0, |
| "grad_norm_var": 17144.92880452474, |
| "learning_rate": 0.0001, |
| "loss": 8.1295, |
| "loss/crossentropy": 2.8160948753356934, |
| "loss/hidden": 0.0595703125, |
| "loss/logits": 0.005728469230234623, |
| "loss/reg": 5.24811315536499, |
| "loss/twn": 0.0, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.0083, |
| "grad_norm": 17.125, |
| "grad_norm_var": 17267.413732910158, |
| "learning_rate": 0.0001, |
| "loss": 6.9959, |
| "loss/crossentropy": 1.5546507835388184, |
| "loss/hidden": 0.18359375, |
| "loss/logits": 0.010080805979669094, |
| "loss/reg": 5.247556209564209, |
| "loss/twn": 0.0, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.008325, |
| "grad_norm": 11.5625, |
| "grad_norm_var": 17264.114904785158, |
| "learning_rate": 0.0001, |
| "loss": 7.8893, |
| "loss/crossentropy": 2.5168728828430176, |
| "loss/hidden": 0.115234375, |
| "loss/logits": 0.009305896237492561, |
| "loss/reg": 5.247858047485352, |
| "loss/twn": 0.0, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.00835, |
| "grad_norm": 13.4375, |
| "grad_norm_var": 17271.515751139323, |
| "learning_rate": 0.0001, |
| "loss": 8.1715, |
| "loss/crossentropy": 2.723308563232422, |
| "loss/hidden": 0.1884765625, |
| "loss/logits": 0.01238995511084795, |
| "loss/reg": 5.247326374053955, |
| "loss/twn": 0.0, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.008375, |
| "grad_norm": 13.0, |
| "grad_norm_var": 13921.291532389323, |
| "learning_rate": 0.0001, |
| "loss": 8.3945, |
| "loss/crossentropy": 3.096419334411621, |
| "loss/hidden": 0.04736328125, |
| "loss/logits": 0.0033957725390791893, |
| "loss/reg": 5.247368812561035, |
| "loss/twn": 0.0, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.0084, |
| "grad_norm": 17.75, |
| "grad_norm_var": 13921.291532389323, |
| "learning_rate": 0.0001, |
| "loss": 7.3812, |
| "loss/crossentropy": 1.8540621995925903, |
| "loss/hidden": 0.2734375, |
| "loss/logits": 0.006610853597521782, |
| "loss/reg": 5.247133731842041, |
| "loss/twn": 0.0, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.008425, |
| "grad_norm": 13.3125, |
| "grad_norm_var": 13941.063993326823, |
| "learning_rate": 0.0001, |
| "loss": 8.1344, |
| "loss/crossentropy": 2.7475340366363525, |
| "loss/hidden": 0.1328125, |
| "loss/logits": 0.007097205147147179, |
| "loss/reg": 5.246928691864014, |
| "loss/twn": 0.0, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.00845, |
| "grad_norm": 68.5, |
| "grad_norm_var": 13880.22734375, |
| "learning_rate": 0.0001, |
| "loss": 7.8648, |
| "loss/crossentropy": 2.4544427394866943, |
| "loss/hidden": 0.15234375, |
| "loss/logits": 0.010997762903571129, |
| "loss/reg": 5.246999263763428, |
| "loss/twn": 0.0, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.008475, |
| "grad_norm": 18.75, |
| "grad_norm_var": 13843.137434895832, |
| "learning_rate": 0.0001, |
| "loss": 7.9608, |
| "loss/crossentropy": 2.603945255279541, |
| "loss/hidden": 0.099609375, |
| "loss/logits": 0.010800717398524284, |
| "loss/reg": 5.246415615081787, |
| "loss/twn": 0.0, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.0085, |
| "grad_norm": 29.5, |
| "grad_norm_var": 13845.5384765625, |
| "learning_rate": 0.0001, |
| "loss": 6.9841, |
| "loss/crossentropy": 1.614331603050232, |
| "loss/hidden": 0.1171875, |
| "loss/logits": 0.005967713892459869, |
| "loss/reg": 5.246609687805176, |
| "loss/twn": 0.0, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.008525, |
| "grad_norm": 11.0625, |
| "grad_norm_var": 13843.059830729168, |
| "learning_rate": 0.0001, |
| "loss": 7.04, |
| "loss/crossentropy": 1.6529104709625244, |
| "loss/hidden": 0.130859375, |
| "loss/logits": 0.009600062854588032, |
| "loss/reg": 5.246581554412842, |
| "loss/twn": 0.0, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.00855, |
| "grad_norm": 13.5, |
| "grad_norm_var": 203.06764322916666, |
| "learning_rate": 0.0001, |
| "loss": 7.3224, |
| "loss/crossentropy": 1.903311848640442, |
| "loss/hidden": 0.162109375, |
| "loss/logits": 0.010554994456470013, |
| "loss/reg": 5.246466636657715, |
| "loss/twn": 0.0, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.008575, |
| "grad_norm": 21.125, |
| "grad_norm_var": 199.17628580729166, |
| "learning_rate": 0.0001, |
| "loss": 7.0821, |
| "loss/crossentropy": 1.5746668577194214, |
| "loss/hidden": 0.24609375, |
| "loss/logits": 0.014962641522288322, |
| "loss/reg": 5.246390342712402, |
| "loss/twn": 0.0, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.0086, |
| "grad_norm": 14.4375, |
| "grad_norm_var": 195.16588541666667, |
| "learning_rate": 0.0001, |
| "loss": 7.2347, |
| "loss/crossentropy": 1.8215773105621338, |
| "loss/hidden": 0.1552734375, |
| "loss/logits": 0.01197369396686554, |
| "loss/reg": 5.24590539932251, |
| "loss/twn": 0.0, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.008625, |
| "grad_norm": 109.0, |
| "grad_norm_var": 688.8426920572916, |
| "learning_rate": 0.0001, |
| "loss": 7.5554, |
| "loss/crossentropy": 2.1289989948272705, |
| "loss/hidden": 0.1630859375, |
| "loss/logits": 0.017385877668857574, |
| "loss/reg": 5.245935440063477, |
| "loss/twn": 0.0, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.00865, |
| "grad_norm": 238.0, |
| "grad_norm_var": 3478.8179524739585, |
| "learning_rate": 0.0001, |
| "loss": 7.9943, |
| "loss/crossentropy": 2.593379020690918, |
| "loss/hidden": 0.142578125, |
| "loss/logits": 0.012457353994250298, |
| "loss/reg": 5.245845317840576, |
| "loss/twn": 0.0, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.008675, |
| "grad_norm": 43.5, |
| "grad_norm_var": 3457.530712890625, |
| "learning_rate": 0.0001, |
| "loss": 7.1267, |
| "loss/crossentropy": 1.6978092193603516, |
| "loss/hidden": 0.1728515625, |
| "loss/logits": 0.010361634194850922, |
| "loss/reg": 5.245694637298584, |
| "loss/twn": 0.0, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.0087, |
| "grad_norm": 24.875, |
| "grad_norm_var": 3436.771207682292, |
| "learning_rate": 0.0001, |
| "loss": 6.7434, |
| "loss/crossentropy": 1.4021539688110352, |
| "loss/hidden": 0.08837890625, |
| "loss/logits": 0.007280138321220875, |
| "loss/reg": 5.245609283447266, |
| "loss/twn": 0.0, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.008725, |
| "grad_norm": 26.25, |
| "grad_norm_var": 3391.9552083333333, |
| "learning_rate": 0.0001, |
| "loss": 8.1297, |
| "loss/crossentropy": 2.7253024578094482, |
| "loss/hidden": 0.146484375, |
| "loss/logits": 0.012663663364946842, |
| "loss/reg": 5.245262145996094, |
| "loss/twn": 0.0, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.00875, |
| "grad_norm": 10.875, |
| "grad_norm_var": 3402.2098795572915, |
| "learning_rate": 0.0001, |
| "loss": 7.2976, |
| "loss/crossentropy": 1.8926178216934204, |
| "loss/hidden": 0.146484375, |
| "loss/logits": 0.013426396995782852, |
| "loss/reg": 5.245053291320801, |
| "loss/twn": 0.0, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.008775, |
| "grad_norm": 13.0625, |
| "grad_norm_var": 3401.967708333333, |
| "learning_rate": 0.0001, |
| "loss": 7.3178, |
| "loss/crossentropy": 1.8622666597366333, |
| "loss/hidden": 0.1953125, |
| "loss/logits": 0.015104337595403194, |
| "loss/reg": 5.245081424713135, |
| "loss/twn": 0.0, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.0088, |
| "grad_norm": 29.875, |
| "grad_norm_var": 3371.8004557291665, |
| "learning_rate": 0.0001, |
| "loss": 7.9433, |
| "loss/crossentropy": 2.5869476795196533, |
| "loss/hidden": 0.10546875, |
| "loss/logits": 0.0058593666180968285, |
| "loss/reg": 5.2449822425842285, |
| "loss/twn": 0.0, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.008825, |
| "grad_norm": 28.5, |
| "grad_norm_var": 3326.400113932292, |
| "learning_rate": 0.0001, |
| "loss": 7.0321, |
| "loss/crossentropy": 1.740645408630371, |
| "loss/hidden": 0.04248046875, |
| "loss/logits": 0.004086637869477272, |
| "loss/reg": 5.244920253753662, |
| "loss/twn": 0.0, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.00885, |
| "grad_norm": 11.75, |
| "grad_norm_var": 3340.7945149739585, |
| "learning_rate": 0.0001, |
| "loss": 7.3477, |
| "loss/crossentropy": 1.9114320278167725, |
| "loss/hidden": 0.1787109375, |
| "loss/logits": 0.012824185192584991, |
| "loss/reg": 5.244693279266357, |
| "loss/twn": 0.0, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.008875, |
| "grad_norm": 9.6875, |
| "grad_norm_var": 3371.9114583333335, |
| "learning_rate": 0.0001, |
| "loss": 6.4325, |
| "loss/crossentropy": 1.1136820316314697, |
| "loss/hidden": 0.0732421875, |
| "loss/logits": 0.001058733556419611, |
| "loss/reg": 5.244504451751709, |
| "loss/twn": 0.0, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.0089, |
| "grad_norm": 9.625, |
| "grad_norm_var": 3423.5968098958333, |
| "learning_rate": 0.0001, |
| "loss": 7.5243, |
| "loss/crossentropy": 2.1711511611938477, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.009911064058542252, |
| "loss/reg": 5.244630813598633, |
| "loss/twn": 0.0, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.008925, |
| "grad_norm": 13.25, |
| "grad_norm_var": 3415.909228515625, |
| "learning_rate": 0.0001, |
| "loss": 7.3907, |
| "loss/crossentropy": 1.9722574949264526, |
| "loss/hidden": 0.1611328125, |
| "loss/logits": 0.012831033207476139, |
| "loss/reg": 5.244527816772461, |
| "loss/twn": 0.0, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.00895, |
| "grad_norm": 258.0, |
| "grad_norm_var": 6334.500634765625, |
| "learning_rate": 0.0001, |
| "loss": 6.6838, |
| "loss/crossentropy": 1.2551920413970947, |
| "loss/hidden": 0.1796875, |
| "loss/logits": 0.004792730323970318, |
| "loss/reg": 5.244173526763916, |
| "loss/twn": 0.0, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.008975, |
| "grad_norm": 9.375, |
| "grad_norm_var": 6394.419514973958, |
| "learning_rate": 0.0001, |
| "loss": 7.731, |
| "loss/crossentropy": 2.3687314987182617, |
| "loss/hidden": 0.10791015625, |
| "loss/logits": 0.010197984986007214, |
| "loss/reg": 5.244191646575928, |
| "loss/twn": 0.0, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.009, |
| "grad_norm": 10.75, |
| "grad_norm_var": 6414.292643229167, |
| "learning_rate": 0.0001, |
| "loss": 6.3021, |
| "loss/crossentropy": 0.8574244379997253, |
| "loss/hidden": 0.19140625, |
| "loss/logits": 0.009014951065182686, |
| "loss/reg": 5.244270324707031, |
| "loss/twn": 0.0, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.009025, |
| "grad_norm": 12.0, |
| "grad_norm_var": 6276.774934895833, |
| "learning_rate": 0.0001, |
| "loss": 7.8089, |
| "loss/crossentropy": 2.456528902053833, |
| "loss/hidden": 0.09619140625, |
| "loss/logits": 0.012111629359424114, |
| "loss/reg": 5.244028091430664, |
| "loss/twn": 0.0, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.00905, |
| "grad_norm": 12.6875, |
| "grad_norm_var": 3706.745556640625, |
| "learning_rate": 0.0001, |
| "loss": 6.1318, |
| "loss/crossentropy": 0.654383659362793, |
| "loss/hidden": 0.2255859375, |
| "loss/logits": 0.00800924189388752, |
| "loss/reg": 5.243789196014404, |
| "loss/twn": 0.0, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.009075, |
| "grad_norm": 60.0, |
| "grad_norm_var": 3747.402587890625, |
| "learning_rate": 0.0001, |
| "loss": 5.8096, |
| "loss/crossentropy": 0.38534435629844666, |
| "loss/hidden": 0.17578125, |
| "loss/logits": 0.004911348223686218, |
| "loss/reg": 5.243527412414551, |
| "loss/twn": 0.0, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.0091, |
| "grad_norm": 8.625, |
| "grad_norm_var": 3783.2118326822915, |
| "learning_rate": 0.0001, |
| "loss": 7.0141, |
| "loss/crossentropy": 1.7209011316299438, |
| "loss/hidden": 0.04736328125, |
| "loss/logits": 0.002276923507452011, |
| "loss/reg": 5.243542671203613, |
| "loss/twn": 0.0, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.009125, |
| "grad_norm": 700.0, |
| "grad_norm_var": 31568.732014973957, |
| "learning_rate": 0.0001, |
| "loss": 6.5463, |
| "loss/crossentropy": 1.1213988065719604, |
| "loss/hidden": 0.171875, |
| "loss/logits": 0.009275542572140694, |
| "loss/reg": 5.243773937225342, |
| "loss/twn": 0.0, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.00915, |
| "grad_norm": 9.5, |
| "grad_norm_var": 31580.584228515625, |
| "learning_rate": 0.0001, |
| "loss": 7.2758, |
| "loss/crossentropy": 1.888658046722412, |
| "loss/hidden": 0.1328125, |
| "loss/logits": 0.011050861328840256, |
| "loss/reg": 5.243287086486816, |
| "loss/twn": 0.0, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.009175, |
| "grad_norm": 7.6875, |
| "grad_norm_var": 31626.63006184896, |
| "learning_rate": 0.0001, |
| "loss": 6.2879, |
| "loss/crossentropy": 0.9124002456665039, |
| "loss/hidden": 0.1220703125, |
| "loss/logits": 0.00983446091413498, |
| "loss/reg": 5.243640899658203, |
| "loss/twn": 0.0, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.0092, |
| "grad_norm": 17.75, |
| "grad_norm_var": 31707.892822265625, |
| "learning_rate": 0.0001, |
| "loss": 8.2629, |
| "loss/crossentropy": 2.844151735305786, |
| "loss/hidden": 0.16015625, |
| "loss/logits": 0.015287065878510475, |
| "loss/reg": 5.243282794952393, |
| "loss/twn": 0.0, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.009225, |
| "grad_norm": 10.8125, |
| "grad_norm_var": 31834.040625, |
| "learning_rate": 0.0001, |
| "loss": 7.021, |
| "loss/crossentropy": 1.6278934478759766, |
| "loss/hidden": 0.142578125, |
| "loss/logits": 0.007217081263661385, |
| "loss/reg": 5.243272304534912, |
| "loss/twn": 0.0, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.00925, |
| "grad_norm": 8.875, |
| "grad_norm_var": 31857.8806640625, |
| "learning_rate": 0.0001, |
| "loss": 7.2069, |
| "loss/crossentropy": 1.8656002283096313, |
| "loss/hidden": 0.09375, |
| "loss/logits": 0.004388316534459591, |
| "loss/reg": 5.243135452270508, |
| "loss/twn": 0.0, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.009275, |
| "grad_norm": 14.0625, |
| "grad_norm_var": 31822.486458333333, |
| "learning_rate": 0.0001, |
| "loss": 6.0393, |
| "loss/crossentropy": 0.5794708132743835, |
| "loss/hidden": 0.2080078125, |
| "loss/logits": 0.008730066008865833, |
| "loss/reg": 5.243083953857422, |
| "loss/twn": 0.0, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.0093, |
| "grad_norm": 10.625, |
| "grad_norm_var": 31814.140625, |
| "learning_rate": 0.0001, |
| "loss": 7.7146, |
| "loss/crossentropy": 2.3672714233398438, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.005637788213789463, |
| "loss/reg": 5.243073463439941, |
| "loss/twn": 0.0, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.009325, |
| "grad_norm": 11.125, |
| "grad_norm_var": 31831.281184895834, |
| "learning_rate": 0.0001, |
| "loss": 7.1862, |
| "loss/crossentropy": 1.8489041328430176, |
| "loss/hidden": 0.08837890625, |
| "loss/logits": 0.00582461804151535, |
| "loss/reg": 5.243066787719727, |
| "loss/twn": 0.0, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.00935, |
| "grad_norm": 10.3125, |
| "grad_norm_var": 29543.332405598958, |
| "learning_rate": 0.0001, |
| "loss": 8.1468, |
| "loss/crossentropy": 2.779754877090454, |
| "loss/hidden": 0.11279296875, |
| "loss/logits": 0.011424477212131023, |
| "loss/reg": 5.242800235748291, |
| "loss/twn": 0.0, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.009375, |
| "grad_norm": 13.5625, |
| "grad_norm_var": 29517.761393229168, |
| "learning_rate": 0.0001, |
| "loss": 5.9118, |
| "loss/crossentropy": 0.5431471467018127, |
| "loss/hidden": 0.1171875, |
| "loss/logits": 0.008679039776325226, |
| "loss/reg": 5.242814064025879, |
| "loss/twn": 0.0, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.0094, |
| "grad_norm": 13.3125, |
| "grad_norm_var": 29502.23357747396, |
| "learning_rate": 0.0001, |
| "loss": 7.1375, |
| "loss/crossentropy": 1.6137068271636963, |
| "loss/hidden": 0.267578125, |
| "loss/logits": 0.013521241024136543, |
| "loss/reg": 5.2426886558532715, |
| "loss/twn": 0.0, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.009425, |
| "grad_norm": 20.25, |
| "grad_norm_var": 29456.37303059896, |
| "learning_rate": 0.0001, |
| "loss": 8.1117, |
| "loss/crossentropy": 2.5268547534942627, |
| "loss/hidden": 0.328125, |
| "loss/logits": 0.014234257861971855, |
| "loss/reg": 5.242476463317871, |
| "loss/twn": 0.0, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.00945, |
| "grad_norm": 12.625, |
| "grad_norm_var": 29456.751497395835, |
| "learning_rate": 0.0001, |
| "loss": 7.0252, |
| "loss/crossentropy": 1.6614928245544434, |
| "loss/hidden": 0.1123046875, |
| "loss/logits": 0.009039688855409622, |
| "loss/reg": 5.242366313934326, |
| "loss/twn": 0.0, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.009475, |
| "grad_norm": 12.375, |
| "grad_norm_var": 29586.256770833334, |
| "learning_rate": 0.0001, |
| "loss": 7.3434, |
| "loss/crossentropy": 1.9416403770446777, |
| "loss/hidden": 0.1513671875, |
| "loss/logits": 0.008125634863972664, |
| "loss/reg": 5.242313385009766, |
| "loss/twn": 0.0, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.0095, |
| "grad_norm": 9.3125, |
| "grad_norm_var": 29582.02667643229, |
| "learning_rate": 0.0001, |
| "loss": 7.2748, |
| "loss/crossentropy": 1.8965728282928467, |
| "loss/hidden": 0.12890625, |
| "loss/logits": 0.0069448379799723625, |
| "loss/reg": 5.242362976074219, |
| "loss/twn": 0.0, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.009525, |
| "grad_norm": 10.75, |
| "grad_norm_var": 10.688785807291667, |
| "learning_rate": 0.0001, |
| "loss": 7.591, |
| "loss/crossentropy": 2.19319224357605, |
| "loss/hidden": 0.146484375, |
| "loss/logits": 0.009041574783623219, |
| "loss/reg": 5.242304801940918, |
| "loss/twn": 0.0, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.00955, |
| "grad_norm": 17.875, |
| "grad_norm_var": 12.215478515625, |
| "learning_rate": 0.0001, |
| "loss": 8.1746, |
| "loss/crossentropy": 2.799837827682495, |
| "loss/hidden": 0.1220703125, |
| "loss/logits": 0.010410355404019356, |
| "loss/reg": 5.242269992828369, |
| "loss/twn": 0.0, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.009575, |
| "grad_norm": 142.0, |
| "grad_norm_var": 1052.0530598958333, |
| "learning_rate": 0.0001, |
| "loss": 5.5973, |
| "loss/crossentropy": 0.23081077635288239, |
| "loss/hidden": 0.119140625, |
| "loss/logits": 0.00518256239593029, |
| "loss/reg": 5.242154121398926, |
| "loss/twn": 0.0, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.0096, |
| "grad_norm": 178.0, |
| "grad_norm_var": 2588.1160807291667, |
| "learning_rate": 0.0001, |
| "loss": 7.9228, |
| "loss/crossentropy": 2.5279970169067383, |
| "loss/hidden": 0.146484375, |
| "loss/logits": 0.006114904303103685, |
| "loss/reg": 5.242175102233887, |
| "loss/twn": 0.0, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.009625, |
| "grad_norm": 9.6875, |
| "grad_norm_var": 2591.222135416667, |
| "learning_rate": 0.0001, |
| "loss": 8.0683, |
| "loss/crossentropy": 2.7547900676727295, |
| "loss/hidden": 0.064453125, |
| "loss/logits": 0.007344301789999008, |
| "loss/reg": 5.241701602935791, |
| "loss/twn": 0.0, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.00965, |
| "grad_norm": 15.4375, |
| "grad_norm_var": 2574.622770182292, |
| "learning_rate": 0.0001, |
| "loss": 8.1417, |
| "loss/crossentropy": 2.6952314376831055, |
| "loss/hidden": 0.185546875, |
| "loss/logits": 0.019006717950105667, |
| "loss/reg": 5.241945743560791, |
| "loss/twn": 0.0, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.009675, |
| "grad_norm": 10.5625, |
| "grad_norm_var": 2583.447509765625, |
| "learning_rate": 0.0001, |
| "loss": 7.02, |
| "loss/crossentropy": 1.5960569381713867, |
| "loss/hidden": 0.16796875, |
| "loss/logits": 0.014128390699625015, |
| "loss/reg": 5.241806507110596, |
| "loss/twn": 0.0, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.0097, |
| "grad_norm": 9.875, |
| "grad_norm_var": 2585.531494140625, |
| "learning_rate": 0.0001, |
| "loss": 7.6158, |
| "loss/crossentropy": 2.310436487197876, |
| "loss/hidden": 0.05712890625, |
| "loss/logits": 0.006508246064186096, |
| "loss/reg": 5.241683006286621, |
| "loss/twn": 0.0, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.009725, |
| "grad_norm": 13.625, |
| "grad_norm_var": 2579.2749837239585, |
| "learning_rate": 0.0001, |
| "loss": 8.1291, |
| "loss/crossentropy": 2.7542238235473633, |
| "loss/hidden": 0.12255859375, |
| "loss/logits": 0.010532179847359657, |
| "loss/reg": 5.241789817810059, |
| "loss/twn": 0.0, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.00975, |
| "grad_norm": 8.0625, |
| "grad_norm_var": 2585.864436848958, |
| "learning_rate": 0.0001, |
| "loss": 7.6721, |
| "loss/crossentropy": 2.319371223449707, |
| "loss/hidden": 0.107421875, |
| "loss/logits": 0.003740239655598998, |
| "loss/reg": 5.241525650024414, |
| "loss/twn": 0.0, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.009775, |
| "grad_norm": 10.4375, |
| "grad_norm_var": 2593.774593098958, |
| "learning_rate": 0.0001, |
| "loss": 8.1857, |
| "loss/crossentropy": 2.8464860916137695, |
| "loss/hidden": 0.09130859375, |
| "loss/logits": 0.006419371347874403, |
| "loss/reg": 5.241455078125, |
| "loss/twn": 0.0, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.0098, |
| "grad_norm": 17.125, |
| "grad_norm_var": 2585.749479166667, |
| "learning_rate": 0.0001, |
| "loss": 8.0076, |
| "loss/crossentropy": 2.555150032043457, |
| "loss/hidden": 0.2001953125, |
| "loss/logits": 0.01086280308663845, |
| "loss/reg": 5.241420745849609, |
| "loss/twn": 0.0, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.009825, |
| "grad_norm": 9.0, |
| "grad_norm_var": 2609.972135416667, |
| "learning_rate": 0.0001, |
| "loss": 6.3665, |
| "loss/crossentropy": 1.016921877861023, |
| "loss/hidden": 0.10205078125, |
| "loss/logits": 0.005952711217105389, |
| "loss/reg": 5.241562843322754, |
| "loss/twn": 0.0, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.00985, |
| "grad_norm": 10.875, |
| "grad_norm_var": 2614.3161458333334, |
| "learning_rate": 0.0001, |
| "loss": 7.8429, |
| "loss/crossentropy": 2.5393762588500977, |
| "loss/hidden": 0.05712890625, |
| "loss/logits": 0.005034131929278374, |
| "loss/reg": 5.241337299346924, |
| "loss/twn": 0.0, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.009875, |
| "grad_norm": 80.0, |
| "grad_norm_var": 2738.4009765625, |
| "learning_rate": 0.0001, |
| "loss": 8.0734, |
| "loss/crossentropy": 2.591184377670288, |
| "loss/hidden": 0.2294921875, |
| "loss/logits": 0.01196893397718668, |
| "loss/reg": 5.24077033996582, |
| "loss/twn": 0.0, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.0099, |
| "grad_norm": 20.375, |
| "grad_norm_var": 2708.840478515625, |
| "learning_rate": 0.0001, |
| "loss": 8.3222, |
| "loss/crossentropy": 2.96229887008667, |
| "loss/hidden": 0.10791015625, |
| "loss/logits": 0.010710952803492546, |
| "loss/reg": 5.2412495613098145, |
| "loss/twn": 0.0, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.009925, |
| "grad_norm": 14.0, |
| "grad_norm_var": 2698.892431640625, |
| "learning_rate": 0.0001, |
| "loss": 7.2185, |
| "loss/crossentropy": 1.843474268913269, |
| "loss/hidden": 0.125, |
| "loss/logits": 0.009277286008000374, |
| "loss/reg": 5.240739345550537, |
| "loss/twn": 0.0, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.00995, |
| "grad_norm": 11.0, |
| "grad_norm_var": 2717.9419108072916, |
| "learning_rate": 0.0001, |
| "loss": 7.7714, |
| "loss/crossentropy": 2.3742737770080566, |
| "loss/hidden": 0.146484375, |
| "loss/logits": 0.009680403396487236, |
| "loss/reg": 5.240973472595215, |
| "loss/twn": 0.0, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.009975, |
| "grad_norm": 12.0, |
| "grad_norm_var": 1919.5929524739583, |
| "learning_rate": 0.0001, |
| "loss": 8.0069, |
| "loss/crossentropy": 2.649110794067383, |
| "loss/hidden": 0.10791015625, |
| "loss/logits": 0.00901185255497694, |
| "loss/reg": 5.24085807800293, |
| "loss/twn": 0.0, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 9.5, |
| "grad_norm_var": 298.921337890625, |
| "learning_rate": 0.0001, |
| "loss": 7.3041, |
| "loss/crossentropy": 1.9669857025146484, |
| "loss/hidden": 0.0908203125, |
| "loss/logits": 0.005569307133555412, |
| "loss/reg": 5.240681171417236, |
| "loss/twn": 0.0, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.010025, |
| "grad_norm": 169.0, |
| "grad_norm_var": 1743.7280598958334, |
| "learning_rate": 0.0001, |
| "loss": 5.7116, |
| "loss/crossentropy": 0.3063473105430603, |
| "loss/hidden": 0.1611328125, |
| "loss/logits": 0.003124656155705452, |
| "loss/reg": 5.2410149574279785, |
| "loss/twn": 0.0, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.01005, |
| "grad_norm": 61.75, |
| "grad_norm_var": 1810.6761555989583, |
| "learning_rate": 0.0001, |
| "loss": 7.4546, |
| "loss/crossentropy": 2.0817315578460693, |
| "loss/hidden": 0.1279296875, |
| "loss/logits": 0.004020760301500559, |
| "loss/reg": 5.240888595581055, |
| "loss/twn": 0.0, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.010075, |
| "grad_norm": 19.25, |
| "grad_norm_var": 1793.8056640625, |
| "learning_rate": 0.0001, |
| "loss": 6.645, |
| "loss/crossentropy": 1.261731743812561, |
| "loss/hidden": 0.13671875, |
| "loss/logits": 0.006218242458999157, |
| "loss/reg": 5.240310192108154, |
| "loss/twn": 0.0, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.0101, |
| "grad_norm": 8.875, |
| "grad_norm_var": 1796.5171223958334, |
| "learning_rate": 0.0001, |
| "loss": 6.3703, |
| "loss/crossentropy": 0.9765498638153076, |
| "loss/hidden": 0.1455078125, |
| "loss/logits": 0.007485189475119114, |
| "loss/reg": 5.240753650665283, |
| "loss/twn": 0.0, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.010125, |
| "grad_norm": 10.8125, |
| "grad_norm_var": 1803.0320149739584, |
| "learning_rate": 0.0001, |
| "loss": 7.0126, |
| "loss/crossentropy": 1.6325502395629883, |
| "loss/hidden": 0.1318359375, |
| "loss/logits": 0.007993818260729313, |
| "loss/reg": 5.240240097045898, |
| "loss/twn": 0.0, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.01015, |
| "grad_norm": 41.25, |
| "grad_norm_var": 1776.9919270833334, |
| "learning_rate": 0.0001, |
| "loss": 6.1547, |
| "loss/crossentropy": 0.801928699016571, |
| "loss/hidden": 0.10986328125, |
| "loss/logits": 0.002597600221633911, |
| "loss/reg": 5.240261554718018, |
| "loss/twn": 0.0, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.010175, |
| "grad_norm": 15.4375, |
| "grad_norm_var": 1764.4606770833334, |
| "learning_rate": 0.0001, |
| "loss": 7.3067, |
| "loss/crossentropy": 1.9361686706542969, |
| "loss/hidden": 0.11962890625, |
| "loss/logits": 0.01030397042632103, |
| "loss/reg": 5.240562438964844, |
| "loss/twn": 0.0, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.0102, |
| "grad_norm": 9.375, |
| "grad_norm_var": 1783.4723958333334, |
| "learning_rate": 0.0001, |
| "loss": 8.0074, |
| "loss/crossentropy": 2.654067039489746, |
| "loss/hidden": 0.10546875, |
| "loss/logits": 0.0074330884963274, |
| "loss/reg": 5.240452766418457, |
| "loss/twn": 0.0, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.010225, |
| "grad_norm": 8.0625, |
| "grad_norm_var": 1786.3281087239584, |
| "learning_rate": 0.0001, |
| "loss": 7.3042, |
| "loss/crossentropy": 1.928905963897705, |
| "loss/hidden": 0.125, |
| "loss/logits": 0.009886398911476135, |
| "loss/reg": 5.240396499633789, |
| "loss/twn": 0.0, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.01025, |
| "grad_norm": 11.8125, |
| "grad_norm_var": 1783.8239583333334, |
| "learning_rate": 0.0001, |
| "loss": 7.1916, |
| "loss/crossentropy": 1.7467641830444336, |
| "loss/hidden": 0.1943359375, |
| "loss/logits": 0.010304899886250496, |
| "loss/reg": 5.2401838302612305, |
| "loss/twn": 0.0, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.010275, |
| "grad_norm": 9.6875, |
| "grad_norm_var": 1637.2480305989584, |
| "learning_rate": 0.0001, |
| "loss": 7.8017, |
| "loss/crossentropy": 2.478278636932373, |
| "loss/hidden": 0.07666015625, |
| "loss/logits": 0.0064398422837257385, |
| "loss/reg": 5.2403669357299805, |
| "loss/twn": 0.0, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.0103, |
| "grad_norm": 13.75, |
| "grad_norm_var": 1645.8536295572917, |
| "learning_rate": 0.0001, |
| "loss": 5.9725, |
| "loss/crossentropy": 0.5244819521903992, |
| "loss/hidden": 0.1982421875, |
| "loss/logits": 0.009226880967617035, |
| "loss/reg": 5.24050760269165, |
| "loss/twn": 0.0, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.010325, |
| "grad_norm": 12.75, |
| "grad_norm_var": 1648.0508951822917, |
| "learning_rate": 0.0001, |
| "loss": 6.1233, |
| "loss/crossentropy": 0.6444658041000366, |
| "loss/hidden": 0.228515625, |
| "loss/logits": 0.009683252312242985, |
| "loss/reg": 5.240647792816162, |
| "loss/twn": 0.0, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.01035, |
| "grad_norm": 30.375, |
| "grad_norm_var": 1631.4206868489584, |
| "learning_rate": 0.0001, |
| "loss": 7.3483, |
| "loss/crossentropy": 1.942959189414978, |
| "loss/hidden": 0.150390625, |
| "loss/logits": 0.014488045126199722, |
| "loss/reg": 5.2405009269714355, |
| "loss/twn": 0.0, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.010375, |
| "grad_norm": 148.0, |
| "grad_norm_var": 2502.174853515625, |
| "learning_rate": 0.0001, |
| "loss": 5.7032, |
| "loss/crossentropy": 0.21244874596595764, |
| "loss/hidden": 0.2451171875, |
| "loss/logits": 0.005563709884881973, |
| "loss/reg": 5.240046501159668, |
| "loss/twn": 0.0, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.0104, |
| "grad_norm": 11.3125, |
| "grad_norm_var": 2495.9203125, |
| "learning_rate": 0.0001, |
| "loss": 8.1335, |
| "loss/crossentropy": 2.8577675819396973, |
| "loss/hidden": 0.0302734375, |
| "loss/logits": 0.005365458317101002, |
| "loss/reg": 5.240131378173828, |
| "loss/twn": 0.0, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.010425, |
| "grad_norm": 10.8125, |
| "grad_norm_var": 1261.934228515625, |
| "learning_rate": 0.0001, |
| "loss": 6.9774, |
| "loss/crossentropy": 1.5951570272445679, |
| "loss/hidden": 0.134765625, |
| "loss/logits": 0.007291505113244057, |
| "loss/reg": 5.240211486816406, |
| "loss/twn": 0.0, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.01045, |
| "grad_norm": 9.875, |
| "grad_norm_var": 1186.0130045572917, |
| "learning_rate": 0.0001, |
| "loss": 7.6738, |
| "loss/crossentropy": 2.354665756225586, |
| "loss/hidden": 0.07421875, |
| "loss/logits": 0.004681308753788471, |
| "loss/reg": 5.240237236022949, |
| "loss/twn": 0.0, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.010475, |
| "grad_norm": 18.0, |
| "grad_norm_var": 1186.7714680989584, |
| "learning_rate": 0.0001, |
| "loss": 7.9042, |
| "loss/crossentropy": 2.479489326477051, |
| "loss/hidden": 0.1748046875, |
| "loss/logits": 0.009896760806441307, |
| "loss/reg": 5.239961624145508, |
| "loss/twn": 0.0, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.0105, |
| "grad_norm": 78.0, |
| "grad_norm_var": 1353.9675618489584, |
| "learning_rate": 0.0001, |
| "loss": 6.7, |
| "loss/crossentropy": 1.1504077911376953, |
| "loss/hidden": 0.306640625, |
| "loss/logits": 0.0026911741588264704, |
| "loss/reg": 5.240240097045898, |
| "loss/twn": 0.0, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.010525, |
| "grad_norm": 14.5625, |
| "grad_norm_var": 1346.5242024739584, |
| "learning_rate": 0.0001, |
| "loss": 8.0741, |
| "loss/crossentropy": 2.7434136867523193, |
| "loss/hidden": 0.083984375, |
| "loss/logits": 0.006712112110108137, |
| "loss/reg": 5.2399516105651855, |
| "loss/twn": 0.0, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.01055, |
| "grad_norm": 14.375, |
| "grad_norm_var": 1343.0808430989584, |
| "learning_rate": 0.0001, |
| "loss": 6.6885, |
| "loss/crossentropy": 1.297805666923523, |
| "loss/hidden": 0.1455078125, |
| "loss/logits": 0.005279569886624813, |
| "loss/reg": 5.2398576736450195, |
| "loss/twn": 0.0, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.010575, |
| "grad_norm": 10.125, |
| "grad_norm_var": 1352.3348307291667, |
| "learning_rate": 0.0001, |
| "loss": 5.9427, |
| "loss/crossentropy": 0.5282614231109619, |
| "loss/hidden": 0.1669921875, |
| "loss/logits": 0.007687091361731291, |
| "loss/reg": 5.239724159240723, |
| "loss/twn": 0.0, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.0106, |
| "grad_norm": 13.6875, |
| "grad_norm_var": 1344.1219889322917, |
| "learning_rate": 0.0001, |
| "loss": 7.3072, |
| "loss/crossentropy": 1.7837430238723755, |
| "loss/hidden": 0.271484375, |
| "loss/logits": 0.012293729931116104, |
| "loss/reg": 5.2396626472473145, |
| "loss/twn": 0.0, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.010625, |
| "grad_norm": 53.75, |
| "grad_norm_var": 1365.6212890625, |
| "learning_rate": 0.0001, |
| "loss": 8.513, |
| "loss/crossentropy": 3.109501838684082, |
| "loss/hidden": 0.1572265625, |
| "loss/logits": 0.006876545026898384, |
| "loss/reg": 5.239365100860596, |
| "loss/twn": 0.0, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.01065, |
| "grad_norm": 8.5625, |
| "grad_norm_var": 1373.6447265625, |
| "learning_rate": 0.0001, |
| "loss": 7.5404, |
| "loss/crossentropy": 2.174077272415161, |
| "loss/hidden": 0.115234375, |
| "loss/logits": 0.011358590796589851, |
| "loss/reg": 5.239738941192627, |
| "loss/twn": 0.0, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.010675, |
| "grad_norm": 12.1875, |
| "grad_norm_var": 1367.7306640625, |
| "learning_rate": 0.0001, |
| "loss": 8.1571, |
| "loss/crossentropy": 2.827575922012329, |
| "loss/hidden": 0.083984375, |
| "loss/logits": 0.005951396189630032, |
| "loss/reg": 5.239595890045166, |
| "loss/twn": 0.0, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.0107, |
| "grad_norm": 14.875, |
| "grad_norm_var": 1365.55859375, |
| "learning_rate": 0.0001, |
| "loss": 8.0294, |
| "loss/crossentropy": 2.697416305541992, |
| "loss/hidden": 0.087890625, |
| "loss/logits": 0.004499722272157669, |
| "loss/reg": 5.239617824554443, |
| "loss/twn": 0.0, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.010725, |
| "grad_norm": 11.0, |
| "grad_norm_var": 1369.5015625, |
| "learning_rate": 0.0001, |
| "loss": 6.9497, |
| "loss/crossentropy": 1.5794799327850342, |
| "loss/hidden": 0.12255859375, |
| "loss/logits": 0.008041350170969963, |
| "loss/reg": 5.239623069763184, |
| "loss/twn": 0.0, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.01075, |
| "grad_norm": 12.0, |
| "grad_norm_var": 1386.5462890625, |
| "learning_rate": 0.0001, |
| "loss": 6.8916, |
| "loss/crossentropy": 1.5268070697784424, |
| "loss/hidden": 0.11962890625, |
| "loss/logits": 0.0057592848315835, |
| "loss/reg": 5.23940372467041, |
| "loss/twn": 0.0, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.010775, |
| "grad_norm": 11.5, |
| "grad_norm_var": 359.2416015625, |
| "learning_rate": 0.0001, |
| "loss": 8.0251, |
| "loss/crossentropy": 2.647442579269409, |
| "loss/hidden": 0.12890625, |
| "loss/logits": 0.009594411589205265, |
| "loss/reg": 5.239197254180908, |
| "loss/twn": 0.0, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.0108, |
| "grad_norm": 18.75, |
| "grad_norm_var": 355.0367024739583, |
| "learning_rate": 0.0001, |
| "loss": 8.1387, |
| "loss/crossentropy": 2.7440555095672607, |
| "loss/hidden": 0.1376953125, |
| "loss/logits": 0.01775319315493107, |
| "loss/reg": 5.23914909362793, |
| "loss/twn": 0.0, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.010825, |
| "grad_norm": 9.0, |
| "grad_norm_var": 357.3424479166667, |
| "learning_rate": 0.0001, |
| "loss": 7.2622, |
| "loss/crossentropy": 1.8811193704605103, |
| "loss/hidden": 0.1337890625, |
| "loss/logits": 0.007980940863490105, |
| "loss/reg": 5.239285469055176, |
| "loss/twn": 0.0, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.01085, |
| "grad_norm": 10.625, |
| "grad_norm_var": 356.42604166666666, |
| "learning_rate": 0.0001, |
| "loss": 8.2447, |
| "loss/crossentropy": 2.8804891109466553, |
| "loss/hidden": 0.1123046875, |
| "loss/logits": 0.01278759352862835, |
| "loss/reg": 5.239134311676025, |
| "loss/twn": 0.0, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.010875, |
| "grad_norm": 102.0, |
| "grad_norm_var": 781.3260416666667, |
| "learning_rate": 0.0001, |
| "loss": 6.8421, |
| "loss/crossentropy": 1.4239375591278076, |
| "loss/hidden": 0.1669921875, |
| "loss/logits": 0.011750075966119766, |
| "loss/reg": 5.239468574523926, |
| "loss/twn": 0.0, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.0109, |
| "grad_norm": 14.25, |
| "grad_norm_var": 582.1736979166667, |
| "learning_rate": 0.0001, |
| "loss": 7.9746, |
| "loss/crossentropy": 2.5394973754882812, |
| "loss/hidden": 0.1875, |
| "loss/logits": 0.008459478616714478, |
| "loss/reg": 5.239116191864014, |
| "loss/twn": 0.0, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.010925, |
| "grad_norm": 7.8125, |
| "grad_norm_var": 590.5479166666667, |
| "learning_rate": 0.0001, |
| "loss": 7.3172, |
| "loss/crossentropy": 1.975609540939331, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.0038147151935845613, |
| "loss/reg": 5.239123344421387, |
| "loss/twn": 0.0, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.01095, |
| "grad_norm": 10.8125, |
| "grad_norm_var": 594.1465983072917, |
| "learning_rate": 0.0001, |
| "loss": 7.1303, |
| "loss/crossentropy": 1.7625492811203003, |
| "loss/hidden": 0.12255859375, |
| "loss/logits": 0.005926240235567093, |
| "loss/reg": 5.239302635192871, |
| "loss/twn": 0.0, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.010975, |
| "grad_norm": 11.75, |
| "grad_norm_var": 592.1593587239583, |
| "learning_rate": 0.0001, |
| "loss": 8.0788, |
| "loss/crossentropy": 2.697333812713623, |
| "loss/hidden": 0.12890625, |
| "loss/logits": 0.013756821863353252, |
| "loss/reg": 5.2388529777526855, |
| "loss/twn": 0.0, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.011, |
| "grad_norm": 57.0, |
| "grad_norm_var": 672.0280598958333, |
| "learning_rate": 0.0001, |
| "loss": 7.046, |
| "loss/crossentropy": 1.7012284994125366, |
| "loss/hidden": 0.10009765625, |
| "loss/logits": 0.005743634421378374, |
| "loss/reg": 5.238898754119873, |
| "loss/twn": 0.0, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.011025, |
| "grad_norm": 16.375, |
| "grad_norm_var": 605.434375, |
| "learning_rate": 0.0001, |
| "loss": 7.7288, |
| "loss/crossentropy": 2.397491216659546, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.005718431435525417, |
| "loss/reg": 5.239189624786377, |
| "loss/twn": 0.0, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.01105, |
| "grad_norm": 30.75, |
| "grad_norm_var": 600.7946451822917, |
| "learning_rate": 0.0001, |
| "loss": 6.7732, |
| "loss/crossentropy": 1.3775757551193237, |
| "loss/hidden": 0.150390625, |
| "loss/logits": 0.006438620388507843, |
| "loss/reg": 5.23883056640625, |
| "loss/twn": 0.0, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.011075, |
| "grad_norm": 326.0, |
| "grad_norm_var": 6348.5484375, |
| "learning_rate": 0.0001, |
| "loss": 6.8298, |
| "loss/crossentropy": 1.457594394683838, |
| "loss/hidden": 0.12890625, |
| "loss/logits": 0.00396731635555625, |
| "loss/reg": 5.239315509796143, |
| "loss/twn": 0.0, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.0111, |
| "grad_norm": 12.9375, |
| "grad_norm_var": 6355.669254557291, |
| "learning_rate": 0.0001, |
| "loss": 8.2584, |
| "loss/crossentropy": 2.878317356109619, |
| "loss/hidden": 0.130859375, |
| "loss/logits": 0.010759024880826473, |
| "loss/reg": 5.238509178161621, |
| "loss/twn": 0.0, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.011125, |
| "grad_norm": 8.4375, |
| "grad_norm_var": 6366.469791666666, |
| "learning_rate": 0.0001, |
| "loss": 7.6916, |
| "loss/crossentropy": 2.346791982650757, |
| "loss/hidden": 0.10107421875, |
| "loss/logits": 0.004902126267552376, |
| "loss/reg": 5.2388715744018555, |
| "loss/twn": 0.0, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.01115, |
| "grad_norm": 35.75, |
| "grad_norm_var": 6309.098697916666, |
| "learning_rate": 0.0001, |
| "loss": 6.5195, |
| "loss/crossentropy": 1.047242283821106, |
| "loss/hidden": 0.2265625, |
| "loss/logits": 0.006808393634855747, |
| "loss/reg": 5.2388739585876465, |
| "loss/twn": 0.0, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.011175, |
| "grad_norm": 51.75, |
| "grad_norm_var": 6242.728125, |
| "learning_rate": 0.0001, |
| "loss": 8.1962, |
| "loss/crossentropy": 2.7516071796417236, |
| "loss/hidden": 0.1865234375, |
| "loss/logits": 0.0192459337413311, |
| "loss/reg": 5.238797187805176, |
| "loss/twn": 0.0, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.0112, |
| "grad_norm": 12.6875, |
| "grad_norm_var": 6266.446077473958, |
| "learning_rate": 0.0001, |
| "loss": 8.1265, |
| "loss/crossentropy": 2.730348587036133, |
| "loss/hidden": 0.142578125, |
| "loss/logits": 0.015144633129239082, |
| "loss/reg": 5.238423824310303, |
| "loss/twn": 0.0, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.011225, |
| "grad_norm": 48.25, |
| "grad_norm_var": 6175.005843098958, |
| "learning_rate": 0.0001, |
| "loss": 6.9994, |
| "loss/crossentropy": 1.4689970016479492, |
| "loss/hidden": 0.283203125, |
| "loss/logits": 0.008740945719182491, |
| "loss/reg": 5.238440036773682, |
| "loss/twn": 0.0, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.01125, |
| "grad_norm": 7.8125, |
| "grad_norm_var": 6189.262434895833, |
| "learning_rate": 0.0001, |
| "loss": 7.8836, |
| "loss/crossentropy": 2.586432695388794, |
| "loss/hidden": 0.0546875, |
| "loss/logits": 0.004194296896457672, |
| "loss/reg": 5.238241672515869, |
| "loss/twn": 0.0, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.011275, |
| "grad_norm": 12.5625, |
| "grad_norm_var": 6035.099202473958, |
| "learning_rate": 0.0001, |
| "loss": 7.5775, |
| "loss/crossentropy": 2.230092763900757, |
| "loss/hidden": 0.10107421875, |
| "loss/logits": 0.007766470313072205, |
| "loss/reg": 5.238610744476318, |
| "loss/twn": 0.0, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.0113, |
| "grad_norm": 10.9375, |
| "grad_norm_var": 6047.8462890625, |
| "learning_rate": 0.0001, |
| "loss": 7.9835, |
| "loss/crossentropy": 2.615774154663086, |
| "loss/hidden": 0.1201171875, |
| "loss/logits": 0.009664995595812798, |
| "loss/reg": 5.237979412078857, |
| "loss/twn": 0.0, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.011325, |
| "grad_norm": 76.0, |
| "grad_norm_var": 6033.516259765625, |
| "learning_rate": 0.0001, |
| "loss": 6.81, |
| "loss/crossentropy": 1.415939450263977, |
| "loss/hidden": 0.1435546875, |
| "loss/logits": 0.01219608448445797, |
| "loss/reg": 5.238288402557373, |
| "loss/twn": 0.0, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.01135, |
| "grad_norm": 9.125, |
| "grad_norm_var": 6041.5244140625, |
| "learning_rate": 0.0001, |
| "loss": 7.0682, |
| "loss/crossentropy": 1.6937216520309448, |
| "loss/hidden": 0.1298828125, |
| "loss/logits": 0.006700664758682251, |
| "loss/reg": 5.237900733947754, |
| "loss/twn": 0.0, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.011375, |
| "grad_norm": 19.125, |
| "grad_norm_var": 6011.728645833334, |
| "learning_rate": 0.0001, |
| "loss": 8.4153, |
| "loss/crossentropy": 2.93511700630188, |
| "loss/hidden": 0.2197265625, |
| "loss/logits": 0.022560518234968185, |
| "loss/reg": 5.237886905670166, |
| "loss/twn": 0.0, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.0114, |
| "grad_norm": 15.3125, |
| "grad_norm_var": 6059.028759765625, |
| "learning_rate": 0.0001, |
| "loss": 7.0063, |
| "loss/crossentropy": 1.5995585918426514, |
| "loss/hidden": 0.1572265625, |
| "loss/logits": 0.011760546825826168, |
| "loss/reg": 5.237764835357666, |
| "loss/twn": 0.0, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.011425, |
| "grad_norm": 10.875, |
| "grad_norm_var": 6080.710791015625, |
| "learning_rate": 0.0001, |
| "loss": 7.0912, |
| "loss/crossentropy": 1.6729381084442139, |
| "loss/hidden": 0.1669921875, |
| "loss/logits": 0.013158103451132774, |
| "loss/reg": 5.238087177276611, |
| "loss/twn": 0.0, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.01145, |
| "grad_norm": 11.3125, |
| "grad_norm_var": 6136.1228515625, |
| "learning_rate": 0.0001, |
| "loss": 7.8938, |
| "loss/crossentropy": 2.558936834335327, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.010592980310320854, |
| "loss/reg": 5.23784065246582, |
| "loss/twn": 0.0, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.011475, |
| "grad_norm": 14.3125, |
| "grad_norm_var": 397.25792643229164, |
| "learning_rate": 0.0001, |
| "loss": 8.1499, |
| "loss/crossentropy": 2.677492141723633, |
| "loss/hidden": 0.21875, |
| "loss/logits": 0.015965130180120468, |
| "loss/reg": 5.237676620483398, |
| "loss/twn": 0.0, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.0115, |
| "grad_norm": 15.4375, |
| "grad_norm_var": 394.51964518229164, |
| "learning_rate": 0.0001, |
| "loss": 8.1308, |
| "loss/crossentropy": 2.706998348236084, |
| "loss/hidden": 0.1708984375, |
| "loss/logits": 0.015249890275299549, |
| "loss/reg": 5.237621784210205, |
| "loss/twn": 0.0, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.011525, |
| "grad_norm": 10.6875, |
| "grad_norm_var": 390.62316080729164, |
| "learning_rate": 0.0001, |
| "loss": 7.1208, |
| "loss/crossentropy": 1.7324503660202026, |
| "loss/hidden": 0.1416015625, |
| "loss/logits": 0.008826036937534809, |
| "loss/reg": 5.237947940826416, |
| "loss/twn": 0.0, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.01155, |
| "grad_norm": 88.0, |
| "grad_norm_var": 652.7167805989583, |
| "learning_rate": 0.0001, |
| "loss": 7.8363, |
| "loss/crossentropy": 2.4369447231292725, |
| "loss/hidden": 0.15234375, |
| "loss/logits": 0.00924272183328867, |
| "loss/reg": 5.237813949584961, |
| "loss/twn": 0.0, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.011575, |
| "grad_norm": 8.875, |
| "grad_norm_var": 619.7566243489583, |
| "learning_rate": 0.0001, |
| "loss": 7.7379, |
| "loss/crossentropy": 2.4647915363311768, |
| "loss/hidden": 0.0302734375, |
| "loss/logits": 0.005017576273530722, |
| "loss/reg": 5.237803936004639, |
| "loss/twn": 0.0, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.0116, |
| "grad_norm": 8.3125, |
| "grad_norm_var": 627.089306640625, |
| "learning_rate": 0.0001, |
| "loss": 7.7172, |
| "loss/crossentropy": 2.3781650066375732, |
| "loss/hidden": 0.09619140625, |
| "loss/logits": 0.0055891769006848335, |
| "loss/reg": 5.237229824066162, |
| "loss/twn": 0.0, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.011625, |
| "grad_norm": 65.5, |
| "grad_norm_var": 703.914697265625, |
| "learning_rate": 0.0001, |
| "loss": 8.1072, |
| "loss/crossentropy": 2.73203182220459, |
| "loss/hidden": 0.126953125, |
| "loss/logits": 0.010718154720962048, |
| "loss/reg": 5.237488746643066, |
| "loss/twn": 0.0, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.01165, |
| "grad_norm": 21.875, |
| "grad_norm_var": 685.90078125, |
| "learning_rate": 0.0001, |
| "loss": 8.3027, |
| "loss/crossentropy": 2.8547723293304443, |
| "loss/hidden": 0.1865234375, |
| "loss/logits": 0.023786598816514015, |
| "loss/reg": 5.2376484870910645, |
| "loss/twn": 0.0, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.011675, |
| "grad_norm": 101.5, |
| "grad_norm_var": 1034.077197265625, |
| "learning_rate": 0.0001, |
| "loss": 7.6863, |
| "loss/crossentropy": 2.4159936904907227, |
| "loss/hidden": 0.0302734375, |
| "loss/logits": 0.0024244533851742744, |
| "loss/reg": 5.23759126663208, |
| "loss/twn": 0.0, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.0117, |
| "grad_norm": 13.9375, |
| "grad_norm_var": 1026.835009765625, |
| "learning_rate": 0.0001, |
| "loss": 8.0093, |
| "loss/crossentropy": 2.580734968185425, |
| "loss/hidden": 0.17578125, |
| "loss/logits": 0.015159064903855324, |
| "loss/reg": 5.2376275062561035, |
| "loss/twn": 0.0, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.011725, |
| "grad_norm": 9.8125, |
| "grad_norm_var": 900.303125, |
| "learning_rate": 0.0001, |
| "loss": 7.7953, |
| "loss/crossentropy": 2.4660680294036865, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.005589427426457405, |
| "loss/reg": 5.237224578857422, |
| "loss/twn": 0.0, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.01175, |
| "grad_norm": 384.0, |
| "grad_norm_var": 8815.046809895834, |
| "learning_rate": 0.0001, |
| "loss": 6.1676, |
| "loss/crossentropy": 0.7747684121131897, |
| "loss/hidden": 0.1494140625, |
| "loss/logits": 0.005672769621014595, |
| "loss/reg": 5.23769998550415, |
| "loss/twn": 0.0, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.011775, |
| "grad_norm": 14.75, |
| "grad_norm_var": 8834.2125, |
| "learning_rate": 0.0001, |
| "loss": 6.978, |
| "loss/crossentropy": 1.5184272527694702, |
| "loss/hidden": 0.2109375, |
| "loss/logits": 0.011126836761832237, |
| "loss/reg": 5.2375288009643555, |
| "loss/twn": 0.0, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.0118, |
| "grad_norm": 10.4375, |
| "grad_norm_var": 8858.0212890625, |
| "learning_rate": 0.0001, |
| "loss": 7.7419, |
| "loss/crossentropy": 2.359570264816284, |
| "loss/hidden": 0.1318359375, |
| "loss/logits": 0.013412706553936005, |
| "loss/reg": 5.2371039390563965, |
| "loss/twn": 0.0, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.011825, |
| "grad_norm": 13.9375, |
| "grad_norm_var": 8842.896207682292, |
| "learning_rate": 0.0001, |
| "loss": 8.1685, |
| "loss/crossentropy": 2.766843557357788, |
| "loss/hidden": 0.150390625, |
| "loss/logits": 0.01371270976960659, |
| "loss/reg": 5.237538814544678, |
| "loss/twn": 0.0, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.01185, |
| "grad_norm": 15.3125, |
| "grad_norm_var": 8823.506624348958, |
| "learning_rate": 0.0001, |
| "loss": 7.5292, |
| "loss/crossentropy": 2.151641607284546, |
| "loss/hidden": 0.1318359375, |
| "loss/logits": 0.008503757417201996, |
| "loss/reg": 5.237189769744873, |
| "loss/twn": 0.0, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.011875, |
| "grad_norm": 11.25, |
| "grad_norm_var": 8838.5806640625, |
| "learning_rate": 0.0001, |
| "loss": 8.0395, |
| "loss/crossentropy": 2.686025619506836, |
| "loss/hidden": 0.1103515625, |
| "loss/logits": 0.005928123835474253, |
| "loss/reg": 5.237187385559082, |
| "loss/twn": 0.0, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.0119, |
| "grad_norm": 10.1875, |
| "grad_norm_var": 8864.2181640625, |
| "learning_rate": 0.0001, |
| "loss": 6.7249, |
| "loss/crossentropy": 1.3968653678894043, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.004261254798620939, |
| "loss/reg": 5.23736047744751, |
| "loss/twn": 0.0, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.011925, |
| "grad_norm": 17.375, |
| "grad_norm_var": 8832.607535807292, |
| "learning_rate": 0.0001, |
| "loss": 7.1631, |
| "loss/crossentropy": 1.7119083404541016, |
| "loss/hidden": 0.203125, |
| "loss/logits": 0.010743262246251106, |
| "loss/reg": 5.237338542938232, |
| "loss/twn": 0.0, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.01195, |
| "grad_norm": 584.0, |
| "grad_norm_var": 26742.08253580729, |
| "learning_rate": 0.0001, |
| "loss": 6.4806, |
| "loss/crossentropy": 1.1264278888702393, |
| "loss/hidden": 0.11376953125, |
| "loss/logits": 0.003235449083149433, |
| "loss/reg": 5.237181663513184, |
| "loss/twn": 0.0, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.011975, |
| "grad_norm": 11.375, |
| "grad_norm_var": 26718.53435872396, |
| "learning_rate": 0.0001, |
| "loss": 5.9934, |
| "loss/crossentropy": 0.5191141963005066, |
| "loss/hidden": 0.2275390625, |
| "loss/logits": 0.009647047147154808, |
| "loss/reg": 5.237125396728516, |
| "loss/twn": 0.0, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.012, |
| "grad_norm": 10.375, |
| "grad_norm_var": 26698.853059895835, |
| "learning_rate": 0.0001, |
| "loss": 8.1124, |
| "loss/crossentropy": 2.780978202819824, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.007745138369500637, |
| "loss/reg": 5.23725700378418, |
| "loss/twn": 0.0, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.012025, |
| "grad_norm": 34.5, |
| "grad_norm_var": 26822.8853515625, |
| "learning_rate": 0.0001, |
| "loss": 7.868, |
| "loss/crossentropy": 2.5087831020355225, |
| "loss/hidden": 0.11474609375, |
| "loss/logits": 0.007656463421881199, |
| "loss/reg": 5.236792087554932, |
| "loss/twn": 0.0, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.01205, |
| "grad_norm": 8.6875, |
| "grad_norm_var": 26934.268212890624, |
| "learning_rate": 0.0001, |
| "loss": 7.2881, |
| "loss/crossentropy": 1.942946434020996, |
| "loss/hidden": 0.10107421875, |
| "loss/logits": 0.007265533320605755, |
| "loss/reg": 5.236773490905762, |
| "loss/twn": 0.0, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.012075, |
| "grad_norm": 10.375, |
| "grad_norm_var": 27170.338916015626, |
| "learning_rate": 0.0001, |
| "loss": 7.3296, |
| "loss/crossentropy": 1.9258122444152832, |
| "loss/hidden": 0.1552734375, |
| "loss/logits": 0.011823762208223343, |
| "loss/reg": 5.2366719245910645, |
| "loss/twn": 0.0, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.0121, |
| "grad_norm": 9.4375, |
| "grad_norm_var": 27206.753759765626, |
| "learning_rate": 0.0001, |
| "loss": 7.756, |
| "loss/crossentropy": 2.4538094997406006, |
| "loss/hidden": 0.0595703125, |
| "loss/logits": 0.005918778479099274, |
| "loss/reg": 5.2366943359375, |
| "loss/twn": 0.0, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.012125, |
| "grad_norm": 13.0625, |
| "grad_norm_var": 27180.362744140624, |
| "learning_rate": 0.0001, |
| "loss": 8.1167, |
| "loss/crossentropy": 2.796402931213379, |
| "loss/hidden": 0.0791015625, |
| "loss/logits": 0.004426885861903429, |
| "loss/reg": 5.236767768859863, |
| "loss/twn": 0.0, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.01215, |
| "grad_norm": 10.0625, |
| "grad_norm_var": 20385.898893229165, |
| "learning_rate": 0.0001, |
| "loss": 7.9673, |
| "loss/crossentropy": 2.5698435306549072, |
| "loss/hidden": 0.1494140625, |
| "loss/logits": 0.011329087428748608, |
| "loss/reg": 5.236757278442383, |
| "loss/twn": 0.0, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.012175, |
| "grad_norm": 10.625, |
| "grad_norm_var": 20405.838541666668, |
| "learning_rate": 0.0001, |
| "loss": 7.2025, |
| "loss/crossentropy": 1.8346238136291504, |
| "loss/hidden": 0.126953125, |
| "loss/logits": 0.004264689050614834, |
| "loss/reg": 5.236656188964844, |
| "loss/twn": 0.0, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.0122, |
| "grad_norm": 14.9375, |
| "grad_norm_var": 20384.079166666666, |
| "learning_rate": 0.0001, |
| "loss": 7.7223, |
| "loss/crossentropy": 2.3055596351623535, |
| "loss/hidden": 0.1650390625, |
| "loss/logits": 0.015009969472885132, |
| "loss/reg": 5.236688137054443, |
| "loss/twn": 0.0, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.012225, |
| "grad_norm": 16.25, |
| "grad_norm_var": 20373.57355143229, |
| "learning_rate": 0.0001, |
| "loss": 7.9159, |
| "loss/crossentropy": 2.573246955871582, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.007458841428160667, |
| "loss/reg": 5.236563682556152, |
| "loss/twn": 0.0, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.01225, |
| "grad_norm": 12.3125, |
| "grad_norm_var": 20387.70636393229, |
| "learning_rate": 0.0001, |
| "loss": 7.9834, |
| "loss/crossentropy": 2.5690972805023193, |
| "loss/hidden": 0.1650390625, |
| "loss/logits": 0.012899991124868393, |
| "loss/reg": 5.236404895782471, |
| "loss/twn": 0.0, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.012275, |
| "grad_norm": 10.0625, |
| "grad_norm_var": 20393.779622395832, |
| "learning_rate": 0.0001, |
| "loss": 8.1415, |
| "loss/crossentropy": 2.8210272789001465, |
| "loss/hidden": 0.07666015625, |
| "loss/logits": 0.006865846458822489, |
| "loss/reg": 5.23691463470459, |
| "loss/twn": 0.0, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.0123, |
| "grad_norm": 9.8125, |
| "grad_norm_var": 20395.727864583332, |
| "learning_rate": 0.0001, |
| "loss": 7.8399, |
| "loss/crossentropy": 2.5191946029663086, |
| "loss/hidden": 0.0791015625, |
| "loss/logits": 0.00524523202329874, |
| "loss/reg": 5.2364020347595215, |
| "loss/twn": 0.0, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.012325, |
| "grad_norm": 19.625, |
| "grad_norm_var": 20386.570833333335, |
| "learning_rate": 0.0001, |
| "loss": 8.46, |
| "loss/crossentropy": 3.0929410457611084, |
| "loss/hidden": 0.12060546875, |
| "loss/logits": 0.010085565969347954, |
| "loss/reg": 5.236414909362793, |
| "loss/twn": 0.0, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.01235, |
| "grad_norm": 13.375, |
| "grad_norm_var": 39.9259765625, |
| "learning_rate": 0.0001, |
| "loss": 8.1608, |
| "loss/crossentropy": 2.7697112560272217, |
| "loss/hidden": 0.1396484375, |
| "loss/logits": 0.015202455222606659, |
| "loss/reg": 5.236268043518066, |
| "loss/twn": 0.0, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.012375, |
| "grad_norm": 24.125, |
| "grad_norm_var": 46.5931640625, |
| "learning_rate": 0.0001, |
| "loss": 7.9654, |
| "loss/crossentropy": 2.5814311504364014, |
| "loss/hidden": 0.140625, |
| "loss/logits": 0.006853965111076832, |
| "loss/reg": 5.236512184143066, |
| "loss/twn": 0.0, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.0124, |
| "grad_norm": 10.3125, |
| "grad_norm_var": 46.62550455729167, |
| "learning_rate": 0.0001, |
| "loss": 6.2441, |
| "loss/crossentropy": 0.8653862476348877, |
| "loss/hidden": 0.13671875, |
| "loss/logits": 0.005771493539214134, |
| "loss/reg": 5.236272811889648, |
| "loss/twn": 0.0, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.012425, |
| "grad_norm": 19.0, |
| "grad_norm_var": 19.734619140625, |
| "learning_rate": 0.0001, |
| "loss": 7.8094, |
| "loss/crossentropy": 2.4122979640960693, |
| "loss/hidden": 0.1533203125, |
| "loss/logits": 0.007559158839285374, |
| "loss/reg": 5.236222743988037, |
| "loss/twn": 0.0, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.01245, |
| "grad_norm": 23.375, |
| "grad_norm_var": 24.274739583333332, |
| "learning_rate": 0.0001, |
| "loss": 7.2016, |
| "loss/crossentropy": 1.773500680923462, |
| "loss/hidden": 0.177734375, |
| "loss/logits": 0.01412028819322586, |
| "loss/reg": 5.236289024353027, |
| "loss/twn": 0.0, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.012475, |
| "grad_norm": 12.25, |
| "grad_norm_var": 23.545247395833332, |
| "learning_rate": 0.0001, |
| "loss": 7.306, |
| "loss/crossentropy": 1.899260401725769, |
| "loss/hidden": 0.16015625, |
| "loss/logits": 0.010113149881362915, |
| "loss/reg": 5.236475467681885, |
| "loss/twn": 0.0, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.0125, |
| "grad_norm": 10.0625, |
| "grad_norm_var": 23.165364583333332, |
| "learning_rate": 0.0001, |
| "loss": 7.6044, |
| "loss/crossentropy": 2.234079360961914, |
| "loss/hidden": 0.126953125, |
| "loss/logits": 0.0073781562969088554, |
| "loss/reg": 5.235958576202393, |
| "loss/twn": 0.0, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.012525, |
| "grad_norm": 11.8125, |
| "grad_norm_var": 23.473958333333332, |
| "learning_rate": 0.0001, |
| "loss": 7.5774, |
| "loss/crossentropy": 2.250281572341919, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.004540358670055866, |
| "loss/reg": 5.236131191253662, |
| "loss/twn": 0.0, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.01255, |
| "grad_norm": 10.1875, |
| "grad_norm_var": 23.405143229166665, |
| "learning_rate": 0.0001, |
| "loss": 6.5639, |
| "loss/crossentropy": 1.1574146747589111, |
| "loss/hidden": 0.1591796875, |
| "loss/logits": 0.011335412040352821, |
| "loss/reg": 5.236012935638428, |
| "loss/twn": 0.0, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.012575, |
| "grad_norm": 12.0, |
| "grad_norm_var": 22.857291666666665, |
| "learning_rate": 0.0001, |
| "loss": 7.8073, |
| "loss/crossentropy": 2.4466099739074707, |
| "loss/hidden": 0.115234375, |
| "loss/logits": 0.009260098449885845, |
| "loss/reg": 5.236217021942139, |
| "loss/twn": 0.0, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.0126, |
| "grad_norm": 15.0, |
| "grad_norm_var": 22.862483723958334, |
| "learning_rate": 0.0001, |
| "loss": 6.7732, |
| "loss/crossentropy": 1.3012182712554932, |
| "loss/hidden": 0.2265625, |
| "loss/logits": 0.00941612757742405, |
| "loss/reg": 5.235978126525879, |
| "loss/twn": 0.0, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.012625, |
| "grad_norm": 141.0, |
| "grad_norm_var": 1027.1649576822917, |
| "learning_rate": 0.0001, |
| "loss": 7.359, |
| "loss/crossentropy": 1.941611886024475, |
| "loss/hidden": 0.173828125, |
| "loss/logits": 0.007238644640892744, |
| "loss/reg": 5.236276626586914, |
| "loss/twn": 0.0, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.01265, |
| "grad_norm": 11.875, |
| "grad_norm_var": 1027.7504557291666, |
| "learning_rate": 0.0001, |
| "loss": 8.2093, |
| "loss/crossentropy": 2.877250909805298, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.009630267508327961, |
| "loss/reg": 5.235978603363037, |
| "loss/twn": 0.0, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.012675, |
| "grad_norm": 136.0, |
| "grad_norm_var": 1816.5980305989583, |
| "learning_rate": 0.0001, |
| "loss": 8.2749, |
| "loss/crossentropy": 2.9017584323883057, |
| "loss/hidden": 0.1279296875, |
| "loss/logits": 0.00875360518693924, |
| "loss/reg": 5.236504077911377, |
| "loss/twn": 0.0, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.0127, |
| "grad_norm": 124.5, |
| "grad_norm_var": 2330.153125, |
| "learning_rate": 0.0001, |
| "loss": 5.9319, |
| "loss/crossentropy": 0.569814920425415, |
| "loss/hidden": 0.11962890625, |
| "loss/logits": 0.006820861250162125, |
| "loss/reg": 5.235653400421143, |
| "loss/twn": 0.0, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.012725, |
| "grad_norm": 12.6875, |
| "grad_norm_var": 2349.377587890625, |
| "learning_rate": 0.0001, |
| "loss": 7.4016, |
| "loss/crossentropy": 2.056201934814453, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.010679306462407112, |
| "loss/reg": 5.236123085021973, |
| "loss/twn": 0.0, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.01275, |
| "grad_norm": 10.9375, |
| "grad_norm_var": 2357.3369140625, |
| "learning_rate": 0.0001, |
| "loss": 7.799, |
| "loss/crossentropy": 2.4681053161621094, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.008713052608072758, |
| "loss/reg": 5.235713481903076, |
| "loss/twn": 0.0, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.012775, |
| "grad_norm": 11.8125, |
| "grad_norm_var": 2387.242822265625, |
| "learning_rate": 0.0001, |
| "loss": 7.0821, |
| "loss/crossentropy": 1.63010573387146, |
| "loss/hidden": 0.2041015625, |
| "loss/logits": 0.011883174069225788, |
| "loss/reg": 5.235997200012207, |
| "loss/twn": 0.0, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.0128, |
| "grad_norm": 9.3125, |
| "grad_norm_var": 2390.703759765625, |
| "learning_rate": 0.0001, |
| "loss": 7.1311, |
| "loss/crossentropy": 1.7556850910186768, |
| "loss/hidden": 0.1328125, |
| "loss/logits": 0.0065896175801754, |
| "loss/reg": 5.236062049865723, |
| "loss/twn": 0.0, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.012825, |
| "grad_norm": 7.15625, |
| "grad_norm_var": 2425.903446451823, |
| "learning_rate": 0.0001, |
| "loss": 6.1515, |
| "loss/crossentropy": 0.7963519096374512, |
| "loss/hidden": 0.11279296875, |
| "loss/logits": 0.006251027341932058, |
| "loss/reg": 5.236119270324707, |
| "loss/twn": 0.0, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.01285, |
| "grad_norm": 14.3125, |
| "grad_norm_var": 2445.081018066406, |
| "learning_rate": 0.0001, |
| "loss": 8.0629, |
| "loss/crossentropy": 2.6161036491394043, |
| "loss/hidden": 0.197265625, |
| "loss/logits": 0.013607255183160305, |
| "loss/reg": 5.235938549041748, |
| "loss/twn": 0.0, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.012875, |
| "grad_norm": 9.25, |
| "grad_norm_var": 2454.5161743164062, |
| "learning_rate": 0.0001, |
| "loss": 8.0433, |
| "loss/crossentropy": 2.7305965423583984, |
| "loss/hidden": 0.0693359375, |
| "loss/logits": 0.007636295165866613, |
| "loss/reg": 5.235754489898682, |
| "loss/twn": 0.0, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.0129, |
| "grad_norm": 17.125, |
| "grad_norm_var": 2434.8625610351564, |
| "learning_rate": 0.0001, |
| "loss": 7.871, |
| "loss/crossentropy": 2.3984246253967285, |
| "loss/hidden": 0.220703125, |
| "loss/logits": 0.01586098223924637, |
| "loss/reg": 5.2359771728515625, |
| "loss/twn": 0.0, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.012925, |
| "grad_norm": 16.75, |
| "grad_norm_var": 2421.328153483073, |
| "learning_rate": 0.0001, |
| "loss": 8.2615, |
| "loss/crossentropy": 2.9614031314849854, |
| "loss/hidden": 0.0595703125, |
| "loss/logits": 0.0044908830896019936, |
| "loss/reg": 5.23606014251709, |
| "loss/twn": 0.0, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.01295, |
| "grad_norm": 13.375, |
| "grad_norm_var": 2411.420340983073, |
| "learning_rate": 0.0001, |
| "loss": 7.3563, |
| "loss/crossentropy": 2.021721363067627, |
| "loss/hidden": 0.09375, |
| "loss/logits": 0.004891795106232166, |
| "loss/reg": 5.235958099365234, |
| "loss/twn": 0.0, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.012975, |
| "grad_norm": 14.6875, |
| "grad_norm_var": 2403.5608032226564, |
| "learning_rate": 0.0001, |
| "loss": 7.2795, |
| "loss/crossentropy": 1.8727322816848755, |
| "loss/hidden": 0.16015625, |
| "loss/logits": 0.010636523365974426, |
| "loss/reg": 5.235999584197998, |
| "loss/twn": 0.0, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.013, |
| "grad_norm": 10.1875, |
| "grad_norm_var": 2418.0734985351564, |
| "learning_rate": 0.0001, |
| "loss": 7.2173, |
| "loss/crossentropy": 1.8416680097579956, |
| "loss/hidden": 0.1318359375, |
| "loss/logits": 0.00781365018337965, |
| "loss/reg": 5.235951900482178, |
| "loss/twn": 0.0, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.013025, |
| "grad_norm": 9.8125, |
| "grad_norm_var": 1640.6509073893228, |
| "learning_rate": 0.0001, |
| "loss": 7.9581, |
| "loss/crossentropy": 2.697462797164917, |
| "loss/hidden": 0.02099609375, |
| "loss/logits": 0.004056986421346664, |
| "loss/reg": 5.235566139221191, |
| "loss/twn": 0.0, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.01305, |
| "grad_norm": 444.0, |
| "grad_norm_var": 12447.939611816406, |
| "learning_rate": 0.0001, |
| "loss": 6.4374, |
| "loss/crossentropy": 1.0375036001205444, |
| "loss/hidden": 0.1611328125, |
| "loss/logits": 0.003013317007571459, |
| "loss/reg": 5.235776901245117, |
| "loss/twn": 0.0, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.013075, |
| "grad_norm": 14.5625, |
| "grad_norm_var": 12039.795764160157, |
| "learning_rate": 0.0001, |
| "loss": 7.6035, |
| "loss/crossentropy": 2.2387633323669434, |
| "loss/hidden": 0.11767578125, |
| "loss/logits": 0.011812473647296429, |
| "loss/reg": 5.235291957855225, |
| "loss/twn": 0.0, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.0131, |
| "grad_norm": 11.75, |
| "grad_norm_var": 11658.413016764323, |
| "learning_rate": 0.0001, |
| "loss": 8.2169, |
| "loss/crossentropy": 2.89178729057312, |
| "loss/hidden": 0.083984375, |
| "loss/logits": 0.005315279122442007, |
| "loss/reg": 5.235769748687744, |
| "loss/twn": 0.0, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.013125, |
| "grad_norm": 25.0, |
| "grad_norm_var": 11624.30995686849, |
| "learning_rate": 0.0001, |
| "loss": 6.9365, |
| "loss/crossentropy": 1.5732824802398682, |
| "loss/hidden": 0.1201171875, |
| "loss/logits": 0.007601576391607523, |
| "loss/reg": 5.235477924346924, |
| "loss/twn": 0.0, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.01315, |
| "grad_norm": 10.875, |
| "grad_norm_var": 11624.552404785156, |
| "learning_rate": 0.0001, |
| "loss": 8.1942, |
| "loss/crossentropy": 2.9256229400634766, |
| "loss/hidden": 0.0302734375, |
| "loss/logits": 0.002373297931626439, |
| "loss/reg": 5.235915184020996, |
| "loss/twn": 0.0, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.013175, |
| "grad_norm": 8.1875, |
| "grad_norm_var": 11638.996708170573, |
| "learning_rate": 0.0001, |
| "loss": 6.944, |
| "loss/crossentropy": 1.6115573644638062, |
| "loss/hidden": 0.09375, |
| "loss/logits": 0.002878053579479456, |
| "loss/reg": 5.235781669616699, |
| "loss/twn": 0.0, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.0132, |
| "grad_norm": 33.75, |
| "grad_norm_var": 11577.075646972657, |
| "learning_rate": 0.0001, |
| "loss": 5.7046, |
| "loss/crossentropy": 0.35535818338394165, |
| "loss/hidden": 0.11328125, |
| "loss/logits": 0.0003594207810238004, |
| "loss/reg": 5.235568046569824, |
| "loss/twn": 0.0, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.013225, |
| "grad_norm": 17.0, |
| "grad_norm_var": 11538.3197265625, |
| "learning_rate": 0.0001, |
| "loss": 7.8718, |
| "loss/crossentropy": 2.430708646774292, |
| "loss/hidden": 0.197265625, |
| "loss/logits": 0.008184842765331268, |
| "loss/reg": 5.235634803771973, |
| "loss/twn": 0.0, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.01325, |
| "grad_norm": 12.6875, |
| "grad_norm_var": 11544.465104166668, |
| "learning_rate": 0.0001, |
| "loss": 7.1415, |
| "loss/crossentropy": 1.7498486042022705, |
| "loss/hidden": 0.146484375, |
| "loss/logits": 0.009613174945116043, |
| "loss/reg": 5.235511302947998, |
| "loss/twn": 0.0, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.013275, |
| "grad_norm": 10.5, |
| "grad_norm_var": 11539.135677083334, |
| "learning_rate": 0.0001, |
| "loss": 7.9653, |
| "loss/crossentropy": 2.616609573364258, |
| "loss/hidden": 0.10107421875, |
| "loss/logits": 0.011807188391685486, |
| "loss/reg": 5.235820770263672, |
| "loss/twn": 0.0, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.0133, |
| "grad_norm": 15.5625, |
| "grad_norm_var": 11544.447770182293, |
| "learning_rate": 0.0001, |
| "loss": 7.821, |
| "loss/crossentropy": 2.5074052810668945, |
| "loss/hidden": 0.07177734375, |
| "loss/logits": 0.006313705816864967, |
| "loss/reg": 5.235459804534912, |
| "loss/twn": 0.0, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.013325, |
| "grad_norm": 55.0, |
| "grad_norm_var": 11508.170035807292, |
| "learning_rate": 0.0001, |
| "loss": 6.9534, |
| "loss/crossentropy": 1.6006724834442139, |
| "loss/hidden": 0.10986328125, |
| "loss/logits": 0.007289988920092583, |
| "loss/reg": 5.235603332519531, |
| "loss/twn": 0.0, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.01335, |
| "grad_norm": 12.625, |
| "grad_norm_var": 11511.286051432291, |
| "learning_rate": 0.0001, |
| "loss": 8.027, |
| "loss/crossentropy": 2.671614646911621, |
| "loss/hidden": 0.11279296875, |
| "loss/logits": 0.0071898894384503365, |
| "loss/reg": 5.235414028167725, |
| "loss/twn": 0.0, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.013375, |
| "grad_norm": 372.0, |
| "grad_norm_var": 18087.790104166666, |
| "learning_rate": 0.0001, |
| "loss": 8.0188, |
| "loss/crossentropy": 2.639939308166504, |
| "loss/hidden": 0.134765625, |
| "loss/logits": 0.008616717532277107, |
| "loss/reg": 5.235448360443115, |
| "loss/twn": 0.0, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.0134, |
| "grad_norm": 9.0625, |
| "grad_norm_var": 18096.311393229167, |
| "learning_rate": 0.0001, |
| "loss": 7.087, |
| "loss/crossentropy": 1.6764798164367676, |
| "loss/hidden": 0.1630859375, |
| "loss/logits": 0.012176426127552986, |
| "loss/reg": 5.235233783721924, |
| "loss/twn": 0.0, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.013425, |
| "grad_norm": 9.3125, |
| "grad_norm_var": 18100.0994140625, |
| "learning_rate": 0.0001, |
| "loss": 6.986, |
| "loss/crossentropy": 1.620530605316162, |
| "loss/hidden": 0.12255859375, |
| "loss/logits": 0.007392015308141708, |
| "loss/reg": 5.235503673553467, |
| "loss/twn": 0.0, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.01345, |
| "grad_norm": 16.125, |
| "grad_norm_var": 7998.4609375, |
| "learning_rate": 0.0001, |
| "loss": 8.4295, |
| "loss/crossentropy": 3.019792079925537, |
| "loss/hidden": 0.1591796875, |
| "loss/logits": 0.015434058383107185, |
| "loss/reg": 5.235138893127441, |
| "loss/twn": 0.0, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.013475, |
| "grad_norm": 18.5, |
| "grad_norm_var": 7986.272119140625, |
| "learning_rate": 0.0001, |
| "loss": 6.5863, |
| "loss/crossentropy": 0.9472768902778625, |
| "loss/hidden": 0.3984375, |
| "loss/logits": 0.005067166872322559, |
| "loss/reg": 5.235495090484619, |
| "loss/twn": 0.0, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.0135, |
| "grad_norm": 68.0, |
| "grad_norm_var": 7973.117822265625, |
| "learning_rate": 0.0001, |
| "loss": 7.0399, |
| "loss/crossentropy": 1.5947017669677734, |
| "loss/hidden": 0.201171875, |
| "loss/logits": 0.008832491934299469, |
| "loss/reg": 5.235156059265137, |
| "loss/twn": 0.0, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.013525, |
| "grad_norm": 696.0, |
| "grad_norm_var": 34468.181884765625, |
| "learning_rate": 0.0001, |
| "loss": 8.1707, |
| "loss/crossentropy": 2.8003129959106445, |
| "loss/hidden": 0.123046875, |
| "loss/logits": 0.012298551388084888, |
| "loss/reg": 5.235071659088135, |
| "loss/twn": 0.0, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.01355, |
| "grad_norm": 11.5, |
| "grad_norm_var": 34462.002197265625, |
| "learning_rate": 0.0001, |
| "loss": 7.5471, |
| "loss/crossentropy": 2.217853546142578, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.007477154955267906, |
| "loss/reg": 5.235381126403809, |
| "loss/twn": 0.0, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.013575, |
| "grad_norm": 12.6875, |
| "grad_norm_var": 34416.96235351563, |
| "learning_rate": 0.0001, |
| "loss": 8.3214, |
| "loss/crossentropy": 2.926238775253296, |
| "loss/hidden": 0.14453125, |
| "loss/logits": 0.015387848019599915, |
| "loss/reg": 5.235249042510986, |
| "loss/twn": 0.0, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.0136, |
| "grad_norm": 78.0, |
| "grad_norm_var": 34233.16352539063, |
| "learning_rate": 0.0001, |
| "loss": 8.227, |
| "loss/crossentropy": 2.8347182273864746, |
| "loss/hidden": 0.1416015625, |
| "loss/logits": 0.015542502515017986, |
| "loss/reg": 5.235121250152588, |
| "loss/twn": 0.0, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.013625, |
| "grad_norm": 9.625, |
| "grad_norm_var": 34306.78292643229, |
| "learning_rate": 0.0001, |
| "loss": 7.7463, |
| "loss/crossentropy": 2.451880693435669, |
| "loss/hidden": 0.0546875, |
| "loss/logits": 0.004437028430402279, |
| "loss/reg": 5.2353057861328125, |
| "loss/twn": 0.0, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.01365, |
| "grad_norm": 11.3125, |
| "grad_norm_var": 34320.69907226563, |
| "learning_rate": 0.0001, |
| "loss": 6.514, |
| "loss/crossentropy": 1.0950896739959717, |
| "loss/hidden": 0.1787109375, |
| "loss/logits": 0.004933930933475494, |
| "loss/reg": 5.235309600830078, |
| "loss/twn": 0.0, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.013675, |
| "grad_norm": 19.375, |
| "grad_norm_var": 34234.075374348955, |
| "learning_rate": 0.0001, |
| "loss": 7.917, |
| "loss/crossentropy": 2.5059616565704346, |
| "loss/hidden": 0.1650390625, |
| "loss/logits": 0.010931363329291344, |
| "loss/reg": 5.23504114151001, |
| "loss/twn": 0.0, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.0137, |
| "grad_norm": 13.0625, |
| "grad_norm_var": 34258.751155598955, |
| "learning_rate": 0.0001, |
| "loss": 8.0647, |
| "loss/crossentropy": 2.657578706741333, |
| "loss/hidden": 0.1640625, |
| "loss/logits": 0.007689584046602249, |
| "loss/reg": 5.235403060913086, |
| "loss/twn": 0.0, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.013725, |
| "grad_norm": 8.6875, |
| "grad_norm_var": 34598.19524739583, |
| "learning_rate": 0.0001, |
| "loss": 7.7748, |
| "loss/crossentropy": 2.4471874237060547, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.006248220801353455, |
| "loss/reg": 5.234949588775635, |
| "loss/twn": 0.0, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.01375, |
| "grad_norm": 20.125, |
| "grad_norm_var": 34528.96868489583, |
| "learning_rate": 0.0001, |
| "loss": 8.0594, |
| "loss/crossentropy": 2.625808000564575, |
| "loss/hidden": 0.1826171875, |
| "loss/logits": 0.015631355345249176, |
| "loss/reg": 5.235373020172119, |
| "loss/twn": 0.0, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.013775, |
| "grad_norm": 13.875, |
| "grad_norm_var": 28880.479427083334, |
| "learning_rate": 0.0001, |
| "loss": 5.896, |
| "loss/crossentropy": 0.3797203600406647, |
| "loss/hidden": 0.27734375, |
| "loss/logits": 0.003970766440033913, |
| "loss/reg": 5.234944820404053, |
| "loss/twn": 0.0, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.0138, |
| "grad_norm": 37.0, |
| "grad_norm_var": 28726.655843098957, |
| "learning_rate": 0.0001, |
| "loss": 8.1785, |
| "loss/crossentropy": 2.7710931301116943, |
| "loss/hidden": 0.1650390625, |
| "loss/logits": 0.007251654751598835, |
| "loss/reg": 5.235079765319824, |
| "loss/twn": 0.0, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.013825, |
| "grad_norm": 10.4375, |
| "grad_norm_var": 28718.351936848958, |
| "learning_rate": 0.0001, |
| "loss": 7.4216, |
| "loss/crossentropy": 2.1164746284484863, |
| "loss/hidden": 0.06689453125, |
| "loss/logits": 0.003332372521981597, |
| "loss/reg": 5.234862327575684, |
| "loss/twn": 0.0, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.01385, |
| "grad_norm": 8.625, |
| "grad_norm_var": 28771.012093098958, |
| "learning_rate": 0.0001, |
| "loss": 6.9919, |
| "loss/crossentropy": 1.6055660247802734, |
| "loss/hidden": 0.140625, |
| "loss/logits": 0.010463319718837738, |
| "loss/reg": 5.235208034515381, |
| "loss/twn": 0.0, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.013875, |
| "grad_norm": 19.375, |
| "grad_norm_var": 28765.65818684896, |
| "learning_rate": 0.0001, |
| "loss": 7.6993, |
| "loss/crossentropy": 2.3250389099121094, |
| "loss/hidden": 0.1328125, |
| "loss/logits": 0.006799938622862101, |
| "loss/reg": 5.234645843505859, |
| "loss/twn": 0.0, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.0139, |
| "grad_norm": 28.0, |
| "grad_norm_var": 28848.887353515624, |
| "learning_rate": 0.0001, |
| "loss": 6.6932, |
| "loss/crossentropy": 1.2838351726531982, |
| "loss/hidden": 0.16796875, |
| "loss/logits": 0.00634372141212225, |
| "loss/reg": 5.235021591186523, |
| "loss/twn": 0.0, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.013925, |
| "grad_norm": 209.0, |
| "grad_norm_var": 2527.298291015625, |
| "learning_rate": 0.0001, |
| "loss": 8.2303, |
| "loss/crossentropy": 2.8924214839935303, |
| "loss/hidden": 0.095703125, |
| "loss/logits": 0.007457260973751545, |
| "loss/reg": 5.234717845916748, |
| "loss/twn": 0.0, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.01395, |
| "grad_norm": 14.25, |
| "grad_norm_var": 2520.284358723958, |
| "learning_rate": 0.0001, |
| "loss": 7.1526, |
| "loss/crossentropy": 1.7737421989440918, |
| "loss/hidden": 0.134765625, |
| "loss/logits": 0.009325024671852589, |
| "loss/reg": 5.2347235679626465, |
| "loss/twn": 0.0, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.013975, |
| "grad_norm": 31.75, |
| "grad_norm_var": 2493.68125, |
| "learning_rate": 0.0001, |
| "loss": 7.8427, |
| "loss/crossentropy": 2.5061957836151123, |
| "loss/hidden": 0.09326171875, |
| "loss/logits": 0.008374359458684921, |
| "loss/reg": 5.234871864318848, |
| "loss/twn": 0.0, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.014, |
| "grad_norm": 9.5625, |
| "grad_norm_var": 2378.353369140625, |
| "learning_rate": 0.0001, |
| "loss": 8.2168, |
| "loss/crossentropy": 2.899165391921997, |
| "loss/hidden": 0.0791015625, |
| "loss/logits": 0.0035689827054739, |
| "loss/reg": 5.234927654266357, |
| "loss/twn": 0.0, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.014025, |
| "grad_norm": 53.0, |
| "grad_norm_var": 2383.8656087239583, |
| "learning_rate": 0.0001, |
| "loss": 5.8024, |
| "loss/crossentropy": 0.35774412751197815, |
| "loss/hidden": 0.2021484375, |
| "loss/logits": 0.00793472956866026, |
| "loss/reg": 5.234549045562744, |
| "loss/twn": 0.0, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.01405, |
| "grad_norm": 64.5, |
| "grad_norm_var": 2415.985872395833, |
| "learning_rate": 0.0001, |
| "loss": 7.1944, |
| "loss/crossentropy": 1.7934857606887817, |
| "loss/hidden": 0.15625, |
| "loss/logits": 0.01002482883632183, |
| "loss/reg": 5.234671115875244, |
| "loss/twn": 0.0, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.014075, |
| "grad_norm": 7.78125, |
| "grad_norm_var": 2448.6008422851564, |
| "learning_rate": 0.0001, |
| "loss": 7.6461, |
| "loss/crossentropy": 2.308584451675415, |
| "loss/hidden": 0.09619140625, |
| "loss/logits": 0.006569989956915379, |
| "loss/reg": 5.23472261428833, |
| "loss/twn": 0.0, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.0141, |
| "grad_norm": 55.25, |
| "grad_norm_var": 2440.2951782226564, |
| "learning_rate": 0.0001, |
| "loss": 7.8559, |
| "loss/crossentropy": 2.439448118209839, |
| "loss/hidden": 0.16796875, |
| "loss/logits": 0.01364111714065075, |
| "loss/reg": 5.234842300415039, |
| "loss/twn": 0.0, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.014125, |
| "grad_norm": 16.25, |
| "grad_norm_var": 2415.370438639323, |
| "learning_rate": 0.0001, |
| "loss": 8.3004, |
| "loss/crossentropy": 2.8731632232666016, |
| "loss/hidden": 0.1787109375, |
| "loss/logits": 0.014065857976675034, |
| "loss/reg": 5.23447847366333, |
| "loss/twn": 0.0, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.01415, |
| "grad_norm": 11.8125, |
| "grad_norm_var": 2438.8619099934895, |
| "learning_rate": 0.0001, |
| "loss": 7.1178, |
| "loss/crossentropy": 1.780933141708374, |
| "loss/hidden": 0.09375, |
| "loss/logits": 0.008497532457113266, |
| "loss/reg": 5.234607696533203, |
| "loss/twn": 0.0, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.014175, |
| "grad_norm": 14.375, |
| "grad_norm_var": 2437.342248535156, |
| "learning_rate": 0.0001, |
| "loss": 8.2505, |
| "loss/crossentropy": 2.875947952270508, |
| "loss/hidden": 0.125, |
| "loss/logits": 0.0144406259059906, |
| "loss/reg": 5.235077857971191, |
| "loss/twn": 0.0, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.0142, |
| "grad_norm": 22.75, |
| "grad_norm_var": 2449.9111938476562, |
| "learning_rate": 0.0001, |
| "loss": 7.0607, |
| "loss/crossentropy": 1.6746110916137695, |
| "loss/hidden": 0.146484375, |
| "loss/logits": 0.0051438165828585625, |
| "loss/reg": 5.234502792358398, |
| "loss/twn": 0.0, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.014225, |
| "grad_norm": 19.625, |
| "grad_norm_var": 2423.817736816406, |
| "learning_rate": 0.0001, |
| "loss": 7.989, |
| "loss/crossentropy": 2.5603058338165283, |
| "loss/hidden": 0.1826171875, |
| "loss/logits": 0.011639740318059921, |
| "loss/reg": 5.234450340270996, |
| "loss/twn": 0.0, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.01425, |
| "grad_norm": 12.5625, |
| "grad_norm_var": 2410.089807128906, |
| "learning_rate": 0.0001, |
| "loss": 6.1451, |
| "loss/crossentropy": 0.8017870187759399, |
| "loss/hidden": 0.10498046875, |
| "loss/logits": 0.004113970324397087, |
| "loss/reg": 5.2342610359191895, |
| "loss/twn": 0.0, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.014275, |
| "grad_norm": 7.96875, |
| "grad_norm_var": 2444.820947265625, |
| "learning_rate": 0.0001, |
| "loss": 6.3145, |
| "loss/crossentropy": 0.9622921943664551, |
| "loss/hidden": 0.10791015625, |
| "loss/logits": 0.00957135483622551, |
| "loss/reg": 5.234708786010742, |
| "loss/twn": 0.0, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.0143, |
| "grad_norm": 9.5, |
| "grad_norm_var": 2486.3206868489583, |
| "learning_rate": 0.0001, |
| "loss": 7.6128, |
| "loss/crossentropy": 2.2709686756134033, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.008640400134027004, |
| "loss/reg": 5.2345733642578125, |
| "loss/twn": 0.0, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.014325, |
| "grad_norm": 7.6875, |
| "grad_norm_var": 348.6860026041667, |
| "learning_rate": 0.0001, |
| "loss": 7.5522, |
| "loss/crossentropy": 2.1828360557556152, |
| "loss/hidden": 0.126953125, |
| "loss/logits": 0.008249370381236076, |
| "loss/reg": 5.234208583831787, |
| "loss/twn": 0.0, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.01435, |
| "grad_norm": 12.4375, |
| "grad_norm_var": 350.864306640625, |
| "learning_rate": 0.0001, |
| "loss": 8.2355, |
| "loss/crossentropy": 2.8409531116485596, |
| "loss/hidden": 0.1494140625, |
| "loss/logits": 0.01071943435817957, |
| "loss/reg": 5.234445095062256, |
| "loss/twn": 0.0, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.014375, |
| "grad_norm": 9.8125, |
| "grad_norm_var": 353.3037109375, |
| "learning_rate": 0.0001, |
| "loss": 6.8632, |
| "loss/crossentropy": 1.4696354866027832, |
| "loss/hidden": 0.1513671875, |
| "loss/logits": 0.007870590314269066, |
| "loss/reg": 5.234313011169434, |
| "loss/twn": 0.0, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.0144, |
| "grad_norm": 54.5, |
| "grad_norm_var": 411.406494140625, |
| "learning_rate": 0.0001, |
| "loss": 7.1332, |
| "loss/crossentropy": 1.8072994947433472, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.004960792139172554, |
| "loss/reg": 5.234533309936523, |
| "loss/twn": 0.0, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.014425, |
| "grad_norm": 14.9375, |
| "grad_norm_var": 353.450390625, |
| "learning_rate": 0.0001, |
| "loss": 8.0425, |
| "loss/crossentropy": 2.6438798904418945, |
| "loss/hidden": 0.1494140625, |
| "loss/logits": 0.014983810484409332, |
| "loss/reg": 5.234223365783691, |
| "loss/twn": 0.0, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.01445, |
| "grad_norm": 8.6875, |
| "grad_norm_var": 227.10193684895833, |
| "learning_rate": 0.0001, |
| "loss": 6.7681, |
| "loss/crossentropy": 1.3993828296661377, |
| "loss/hidden": 0.12890625, |
| "loss/logits": 0.005370709113776684, |
| "loss/reg": 5.234410285949707, |
| "loss/twn": 0.0, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.014475, |
| "grad_norm": 30.875, |
| "grad_norm_var": 229.36620686848957, |
| "learning_rate": 0.0001, |
| "loss": 8.3016, |
| "loss/crossentropy": 2.88641095161438, |
| "loss/hidden": 0.166015625, |
| "loss/logits": 0.014844672754406929, |
| "loss/reg": 5.234356880187988, |
| "loss/twn": 0.0, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.0145, |
| "grad_norm": 10.625, |
| "grad_norm_var": 140.0116170247396, |
| "learning_rate": 0.0001, |
| "loss": 6.7513, |
| "loss/crossentropy": 1.4178798198699951, |
| "loss/hidden": 0.09375, |
| "loss/logits": 0.005413350649178028, |
| "loss/reg": 5.234261989593506, |
| "loss/twn": 0.0, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.014525, |
| "grad_norm": 17.875, |
| "grad_norm_var": 140.1169881184896, |
| "learning_rate": 0.0001, |
| "loss": 7.069, |
| "loss/crossentropy": 1.6242541074752808, |
| "loss/hidden": 0.19921875, |
| "loss/logits": 0.011282745748758316, |
| "loss/reg": 5.234253883361816, |
| "loss/twn": 0.0, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.01455, |
| "grad_norm": 13.5, |
| "grad_norm_var": 139.2117146809896, |
| "learning_rate": 0.0001, |
| "loss": 6.743, |
| "loss/crossentropy": 1.388974666595459, |
| "loss/hidden": 0.1103515625, |
| "loss/logits": 0.009235072880983353, |
| "loss/reg": 5.234424591064453, |
| "loss/twn": 0.0, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.014575, |
| "grad_norm": 13.125, |
| "grad_norm_var": 139.70227457682293, |
| "learning_rate": 0.0001, |
| "loss": 7.0183, |
| "loss/crossentropy": 1.626574993133545, |
| "loss/hidden": 0.15234375, |
| "loss/logits": 0.005102044437080622, |
| "loss/reg": 5.234281539916992, |
| "loss/twn": 0.0, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.0146, |
| "grad_norm": 11.4375, |
| "grad_norm_var": 138.50621337890624, |
| "learning_rate": 0.0001, |
| "loss": 8.1816, |
| "loss/crossentropy": 2.712043285369873, |
| "loss/hidden": 0.21484375, |
| "loss/logits": 0.020563386380672455, |
| "loss/reg": 5.234140396118164, |
| "loss/twn": 0.0, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.014625, |
| "grad_norm": 135.0, |
| "grad_norm_var": 1027.0439412434896, |
| "learning_rate": 0.0001, |
| "loss": 7.6221, |
| "loss/crossentropy": 2.309222936630249, |
| "loss/hidden": 0.07177734375, |
| "loss/logits": 0.00689616659656167, |
| "loss/reg": 5.234216213226318, |
| "loss/twn": 0.0, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.01465, |
| "grad_norm": 12.3125, |
| "grad_norm_var": 1027.4010375976563, |
| "learning_rate": 0.0001, |
| "loss": 8.0345, |
| "loss/crossentropy": 2.648197650909424, |
| "loss/hidden": 0.14453125, |
| "loss/logits": 0.0074767498299479485, |
| "loss/reg": 5.2342705726623535, |
| "loss/twn": 0.0, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.014675, |
| "grad_norm": 11.3125, |
| "grad_norm_var": 1021.3348307291667, |
| "learning_rate": 0.0001, |
| "loss": 6.9087, |
| "loss/crossentropy": 1.557716965675354, |
| "loss/hidden": 0.1103515625, |
| "loss/logits": 0.0062755015678703785, |
| "loss/reg": 5.2343974113464355, |
| "loss/twn": 0.0, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.0147, |
| "grad_norm": 14.0, |
| "grad_norm_var": 1014.2895182291667, |
| "learning_rate": 0.0001, |
| "loss": 7.9829, |
| "loss/crossentropy": 2.599196195602417, |
| "loss/hidden": 0.1416015625, |
| "loss/logits": 0.008030779659748077, |
| "loss/reg": 5.23403787612915, |
| "loss/twn": 0.0, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.014725, |
| "grad_norm": 10.75, |
| "grad_norm_var": 1008.364697265625, |
| "learning_rate": 0.0001, |
| "loss": 6.2994, |
| "loss/crossentropy": 0.9181722402572632, |
| "loss/hidden": 0.1455078125, |
| "loss/logits": 0.0018882363801822066, |
| "loss/reg": 5.2338151931762695, |
| "loss/twn": 0.0, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.01475, |
| "grad_norm": 10.75, |
| "grad_norm_var": 1011.1046875, |
| "learning_rate": 0.0001, |
| "loss": 7.8917, |
| "loss/crossentropy": 2.6068289279937744, |
| "loss/hidden": 0.04736328125, |
| "loss/logits": 0.003562201978638768, |
| "loss/reg": 5.233980178833008, |
| "loss/twn": 0.0, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.014775, |
| "grad_norm": 10.4375, |
| "grad_norm_var": 1009.9702473958333, |
| "learning_rate": 0.0001, |
| "loss": 6.9904, |
| "loss/crossentropy": 1.5700491666793823, |
| "loss/hidden": 0.1787109375, |
| "loss/logits": 0.0076020704582333565, |
| "loss/reg": 5.234048366546631, |
| "loss/twn": 0.0, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.0148, |
| "grad_norm": 13.75, |
| "grad_norm_var": 946.7228515625, |
| "learning_rate": 0.0001, |
| "loss": 8.3853, |
| "loss/crossentropy": 2.9748241901397705, |
| "loss/hidden": 0.162109375, |
| "loss/logits": 0.014261037111282349, |
| "loss/reg": 5.234062194824219, |
| "loss/twn": 0.0, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.014825, |
| "grad_norm": 20.625, |
| "grad_norm_var": 943.9872233072916, |
| "learning_rate": 0.0001, |
| "loss": 7.1549, |
| "loss/crossentropy": 1.6911969184875488, |
| "loss/hidden": 0.2158203125, |
| "loss/logits": 0.013989459723234177, |
| "loss/reg": 5.233931541442871, |
| "loss/twn": 0.0, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.01485, |
| "grad_norm": 12.1875, |
| "grad_norm_var": 938.7426920572917, |
| "learning_rate": 0.0001, |
| "loss": 6.7309, |
| "loss/crossentropy": 1.2908926010131836, |
| "loss/hidden": 0.1943359375, |
| "loss/logits": 0.011645066551864147, |
| "loss/reg": 5.234016418457031, |
| "loss/twn": 0.0, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.014875, |
| "grad_norm": 11.1875, |
| "grad_norm_var": 939.1067057291667, |
| "learning_rate": 0.0001, |
| "loss": 8.2017, |
| "loss/crossentropy": 2.890516757965088, |
| "loss/hidden": 0.07177734375, |
| "loss/logits": 0.005793450400233269, |
| "loss/reg": 5.233628273010254, |
| "loss/twn": 0.0, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.0149, |
| "grad_norm": 9.75, |
| "grad_norm_var": 940.3130208333333, |
| "learning_rate": 0.0001, |
| "loss": 6.5188, |
| "loss/crossentropy": 1.189386010169983, |
| "loss/hidden": 0.09130859375, |
| "loss/logits": 0.004200034309178591, |
| "loss/reg": 5.233954906463623, |
| "loss/twn": 0.0, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.014925, |
| "grad_norm": 7.09375, |
| "grad_norm_var": 951.3511678059896, |
| "learning_rate": 0.0001, |
| "loss": 6.7014, |
| "loss/crossentropy": 1.3759723901748657, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.0052226390689611435, |
| "loss/reg": 5.233729362487793, |
| "loss/twn": 0.0, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.01495, |
| "grad_norm": 8.875, |
| "grad_norm_var": 956.5892211914063, |
| "learning_rate": 0.0001, |
| "loss": 7.4921, |
| "loss/crossentropy": 2.140756368637085, |
| "loss/hidden": 0.10986328125, |
| "loss/logits": 0.007792431861162186, |
| "loss/reg": 5.233695983886719, |
| "loss/twn": 0.0, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.014975, |
| "grad_norm": 12.5, |
| "grad_norm_var": 957.1479777018229, |
| "learning_rate": 0.0001, |
| "loss": 8.0241, |
| "loss/crossentropy": 2.6345574855804443, |
| "loss/hidden": 0.1435546875, |
| "loss/logits": 0.012438047677278519, |
| "loss/reg": 5.233548164367676, |
| "loss/twn": 0.0, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.015, |
| "grad_norm": 10.0, |
| "grad_norm_var": 958.8220662434895, |
| "learning_rate": 0.0001, |
| "loss": 6.329, |
| "loss/crossentropy": 0.9624335765838623, |
| "loss/hidden": 0.125, |
| "loss/logits": 0.007683398202061653, |
| "loss/reg": 5.233921051025391, |
| "loss/twn": 0.0, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.015025, |
| "grad_norm": 78.5, |
| "grad_norm_var": 287.54615478515626, |
| "learning_rate": 0.0001, |
| "loss": 8.289, |
| "loss/crossentropy": 2.931908369064331, |
| "loss/hidden": 0.11279296875, |
| "loss/logits": 0.010676998645067215, |
| "loss/reg": 5.233586311340332, |
| "loss/twn": 0.0, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.01505, |
| "grad_norm": 11.125, |
| "grad_norm_var": 288.1986612955729, |
| "learning_rate": 0.0001, |
| "loss": 7.1402, |
| "loss/crossentropy": 1.7836761474609375, |
| "loss/hidden": 0.115234375, |
| "loss/logits": 0.007275612559169531, |
| "loss/reg": 5.234038352966309, |
| "loss/twn": 0.0, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.015075, |
| "grad_norm": 214.0, |
| "grad_norm_var": 2734.4889933268228, |
| "learning_rate": 0.0001, |
| "loss": 8.0975, |
| "loss/crossentropy": 2.6364102363586426, |
| "loss/hidden": 0.220703125, |
| "loss/logits": 0.006685142405331135, |
| "loss/reg": 5.233693599700928, |
| "loss/twn": 0.0, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.0151, |
| "grad_norm": 14.125, |
| "grad_norm_var": 2734.2487915039064, |
| "learning_rate": 0.0001, |
| "loss": 7.7466, |
| "loss/crossentropy": 2.511186122894287, |
| "loss/hidden": 6.556510925292969e-06, |
| "loss/logits": 0.0016271582571789622, |
| "loss/reg": 5.233736515045166, |
| "loss/twn": 0.0, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.015125, |
| "grad_norm": 11.3125, |
| "grad_norm_var": 2732.938928222656, |
| "learning_rate": 0.0001, |
| "loss": 7.2908, |
| "loss/crossentropy": 1.9334180355072021, |
| "loss/hidden": 0.11279296875, |
| "loss/logits": 0.011062689125537872, |
| "loss/reg": 5.233515739440918, |
| "loss/twn": 0.0, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.01515, |
| "grad_norm": 38.25, |
| "grad_norm_var": 2715.0710896809896, |
| "learning_rate": 0.0001, |
| "loss": 7.1654, |
| "loss/crossentropy": 1.7573686838150024, |
| "loss/hidden": 0.1630859375, |
| "loss/logits": 0.01111831609159708, |
| "loss/reg": 5.233856201171875, |
| "loss/twn": 0.0, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.015175, |
| "grad_norm": 10.9375, |
| "grad_norm_var": 2713.767053222656, |
| "learning_rate": 0.0001, |
| "loss": 8.0656, |
| "loss/crossentropy": 2.725510835647583, |
| "loss/hidden": 0.09619140625, |
| "loss/logits": 0.010500291362404823, |
| "loss/reg": 5.233447074890137, |
| "loss/twn": 0.0, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.0152, |
| "grad_norm": 15.4375, |
| "grad_norm_var": 2710.229455566406, |
| "learning_rate": 0.0001, |
| "loss": 8.219, |
| "loss/crossentropy": 2.8715081214904785, |
| "loss/hidden": 0.103515625, |
| "loss/logits": 0.010522611439228058, |
| "loss/reg": 5.233500003814697, |
| "loss/twn": 0.0, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.015225, |
| "grad_norm": 12.75, |
| "grad_norm_var": 2724.336779785156, |
| "learning_rate": 0.0001, |
| "loss": 7.4159, |
| "loss/crossentropy": 2.050481081008911, |
| "loss/hidden": 0.1201171875, |
| "loss/logits": 0.012026194483041763, |
| "loss/reg": 5.233264446258545, |
| "loss/twn": 0.0, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.01525, |
| "grad_norm": 102.5, |
| "grad_norm_var": 3021.098010253906, |
| "learning_rate": 0.0001, |
| "loss": 6.1157, |
| "loss/crossentropy": 0.7093434929847717, |
| "loss/hidden": 0.162109375, |
| "loss/logits": 0.010594572871923447, |
| "loss/reg": 5.233696460723877, |
| "loss/twn": 0.0, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.015275, |
| "grad_norm": 11.0, |
| "grad_norm_var": 3021.7085571289062, |
| "learning_rate": 0.0001, |
| "loss": 7.9153, |
| "loss/crossentropy": 2.5407986640930176, |
| "loss/hidden": 0.130859375, |
| "loss/logits": 0.010352734476327896, |
| "loss/reg": 5.233257293701172, |
| "loss/twn": 0.0, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.0153, |
| "grad_norm": 9.3125, |
| "grad_norm_var": 3023.223173014323, |
| "learning_rate": 0.0001, |
| "loss": 7.4287, |
| "loss/crossentropy": 2.056551694869995, |
| "loss/hidden": 0.1298828125, |
| "loss/logits": 0.008765427395701408, |
| "loss/reg": 5.233468055725098, |
| "loss/twn": 0.0, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.015325, |
| "grad_norm": 39.25, |
| "grad_norm_var": 2966.133268229167, |
| "learning_rate": 0.0001, |
| "loss": 7.0537, |
| "loss/crossentropy": 1.7130509614944458, |
| "loss/hidden": 0.10205078125, |
| "loss/logits": 0.005178738851100206, |
| "loss/reg": 5.233448028564453, |
| "loss/twn": 0.0, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.01535, |
| "grad_norm": 15.6875, |
| "grad_norm_var": 2943.0399576822915, |
| "learning_rate": 0.0001, |
| "loss": 7.4671, |
| "loss/crossentropy": 2.072944402694702, |
| "loss/hidden": 0.138671875, |
| "loss/logits": 0.021782729774713516, |
| "loss/reg": 5.233670234680176, |
| "loss/twn": 0.0, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.015375, |
| "grad_norm": 40.5, |
| "grad_norm_var": 2897.146207682292, |
| "learning_rate": 0.0001, |
| "loss": 7.2931, |
| "loss/crossentropy": 1.9488314390182495, |
| "loss/hidden": 0.10693359375, |
| "loss/logits": 0.004016375169157982, |
| "loss/reg": 5.233325481414795, |
| "loss/twn": 0.0, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.0154, |
| "grad_norm": 17.875, |
| "grad_norm_var": 2869.870817057292, |
| "learning_rate": 0.0001, |
| "loss": 7.5828, |
| "loss/crossentropy": 2.250316858291626, |
| "loss/hidden": 0.09375, |
| "loss/logits": 0.005294739734381437, |
| "loss/reg": 5.233473300933838, |
| "loss/twn": 0.0, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.015425, |
| "grad_norm": 13.6875, |
| "grad_norm_var": 2801.0919270833333, |
| "learning_rate": 0.0001, |
| "loss": 7.946, |
| "loss/crossentropy": 2.605081796646118, |
| "loss/hidden": 0.09619140625, |
| "loss/logits": 0.01103608775883913, |
| "loss/reg": 5.233642578125, |
| "loss/twn": 0.0, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.01545, |
| "grad_norm": 10.8125, |
| "grad_norm_var": 2802.1390462239583, |
| "learning_rate": 0.0001, |
| "loss": 7.9096, |
| "loss/crossentropy": 2.6560287475585938, |
| "loss/hidden": 0.016357421875, |
| "loss/logits": 0.0036827209405601025, |
| "loss/reg": 5.233491897583008, |
| "loss/twn": 0.0, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.015475, |
| "grad_norm": 10.5625, |
| "grad_norm_var": 563.0020833333333, |
| "learning_rate": 0.0001, |
| "loss": 7.9054, |
| "loss/crossentropy": 2.542119026184082, |
| "loss/hidden": 0.12255859375, |
| "loss/logits": 0.007251254748553038, |
| "loss/reg": 5.2334303855896, |
| "loss/twn": 0.0, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.0155, |
| "grad_norm": 17.875, |
| "grad_norm_var": 559.2559895833333, |
| "learning_rate": 0.0001, |
| "loss": 8.014, |
| "loss/crossentropy": 2.6021788120269775, |
| "loss/hidden": 0.1708984375, |
| "loss/logits": 0.007565245497971773, |
| "loss/reg": 5.23338508605957, |
| "loss/twn": 0.0, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.015525, |
| "grad_norm": 21.625, |
| "grad_norm_var": 548.9945149739583, |
| "learning_rate": 0.0001, |
| "loss": 8.1369, |
| "loss/crossentropy": 2.7238121032714844, |
| "loss/hidden": 0.1650390625, |
| "loss/logits": 0.0146188298240304, |
| "loss/reg": 5.233391284942627, |
| "loss/twn": 0.0, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.01555, |
| "grad_norm": 26.875, |
| "grad_norm_var": 535.8540201822917, |
| "learning_rate": 0.0001, |
| "loss": 6.8563, |
| "loss/crossentropy": 1.4800411462783813, |
| "loss/hidden": 0.134765625, |
| "loss/logits": 0.008334355428814888, |
| "loss/reg": 5.233196258544922, |
| "loss/twn": 0.0, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.015575, |
| "grad_norm": 53.0, |
| "grad_norm_var": 575.73671875, |
| "learning_rate": 0.0001, |
| "loss": 8.042, |
| "loss/crossentropy": 2.7313811779022217, |
| "loss/hidden": 0.07275390625, |
| "loss/logits": 0.004363874904811382, |
| "loss/reg": 5.23349666595459, |
| "loss/twn": 0.0, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.0156, |
| "grad_norm": 11.9375, |
| "grad_norm_var": 581.51171875, |
| "learning_rate": 0.0001, |
| "loss": 7.9383, |
| "loss/crossentropy": 2.561565637588501, |
| "loss/hidden": 0.12890625, |
| "loss/logits": 0.014568326994776726, |
| "loss/reg": 5.233224868774414, |
| "loss/twn": 0.0, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.015625, |
| "grad_norm": 17.875, |
| "grad_norm_var": 574.1311848958334, |
| "learning_rate": 0.0001, |
| "loss": 7.8326, |
| "loss/crossentropy": 2.5030391216278076, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.009584227576851845, |
| "loss/reg": 5.233513355255127, |
| "loss/twn": 0.0, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.01565, |
| "grad_norm": 14.8125, |
| "grad_norm_var": 163.484228515625, |
| "learning_rate": 0.0001, |
| "loss": 6.0231, |
| "loss/crossentropy": 0.6415687203407288, |
| "loss/hidden": 0.1376953125, |
| "loss/logits": 0.010496280156075954, |
| "loss/reg": 5.233306884765625, |
| "loss/twn": 0.0, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.015675, |
| "grad_norm": 20.125, |
| "grad_norm_var": 156.77355143229167, |
| "learning_rate": 0.0001, |
| "loss": 6.8528, |
| "loss/crossentropy": 1.486595630645752, |
| "loss/hidden": 0.126953125, |
| "loss/logits": 0.005804477259516716, |
| "loss/reg": 5.2334794998168945, |
| "loss/twn": 0.0, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.0157, |
| "grad_norm": 18.0, |
| "grad_norm_var": 147.53177083333333, |
| "learning_rate": 0.0001, |
| "loss": 6.2299, |
| "loss/crossentropy": 0.6993483304977417, |
| "loss/hidden": 0.287109375, |
| "loss/logits": 0.010279776528477669, |
| "loss/reg": 5.233189582824707, |
| "loss/twn": 0.0, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.015725, |
| "grad_norm": 9.0625, |
| "grad_norm_var": 134.67849934895833, |
| "learning_rate": 0.0001, |
| "loss": 7.3681, |
| "loss/crossentropy": 2.038987874984741, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.009393742308020592, |
| "loss/reg": 5.2332634925842285, |
| "loss/twn": 0.0, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.01575, |
| "grad_norm": 13.625, |
| "grad_norm_var": 136.13567708333332, |
| "learning_rate": 0.0001, |
| "loss": 7.7675, |
| "loss/crossentropy": 2.3605449199676514, |
| "loss/hidden": 0.16796875, |
| "loss/logits": 0.006129886955022812, |
| "loss/reg": 5.232873916625977, |
| "loss/twn": 0.0, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.015775, |
| "grad_norm": 14.6875, |
| "grad_norm_var": 106.847900390625, |
| "learning_rate": 0.0001, |
| "loss": 8.0393, |
| "loss/crossentropy": 2.6303060054779053, |
| "loss/hidden": 0.1689453125, |
| "loss/logits": 0.007038387469947338, |
| "loss/reg": 5.23299503326416, |
| "loss/twn": 0.0, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.0158, |
| "grad_norm": 72.0, |
| "grad_norm_var": 287.03904622395834, |
| "learning_rate": 0.0001, |
| "loss": 8.0743, |
| "loss/crossentropy": 2.7000534534454346, |
| "loss/hidden": 0.130859375, |
| "loss/logits": 0.010081219486892223, |
| "loss/reg": 5.233316421508789, |
| "loss/twn": 0.0, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.015825, |
| "grad_norm": 39.0, |
| "grad_norm_var": 300.17649739583334, |
| "learning_rate": 0.0001, |
| "loss": 7.4968, |
| "loss/crossentropy": 2.066685676574707, |
| "loss/hidden": 0.1875, |
| "loss/logits": 0.009645121172070503, |
| "loss/reg": 5.233018398284912, |
| "loss/twn": 0.0, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.01585, |
| "grad_norm": 11.125, |
| "grad_norm_var": 299.664697265625, |
| "learning_rate": 0.0001, |
| "loss": 7.4915, |
| "loss/crossentropy": 2.1979663372039795, |
| "loss/hidden": 0.0546875, |
| "loss/logits": 0.0055001177825033665, |
| "loss/reg": 5.23332405090332, |
| "loss/twn": 0.0, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.015875, |
| "grad_norm": 13.5625, |
| "grad_norm_var": 295.147509765625, |
| "learning_rate": 0.0001, |
| "loss": 8.1414, |
| "loss/crossentropy": 2.8441083431243896, |
| "loss/hidden": 0.05712890625, |
| "loss/logits": 0.007112159393727779, |
| "loss/reg": 5.233099460601807, |
| "loss/twn": 0.0, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.0159, |
| "grad_norm": 14.4375, |
| "grad_norm_var": 298.4408854166667, |
| "learning_rate": 0.0001, |
| "loss": 7.8434, |
| "loss/crossentropy": 2.419523239135742, |
| "loss/hidden": 0.1787109375, |
| "loss/logits": 0.011547038331627846, |
| "loss/reg": 5.233601093292236, |
| "loss/twn": 0.0, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.015925, |
| "grad_norm": 135.0, |
| "grad_norm_var": 1077.4806640625, |
| "learning_rate": 0.0001, |
| "loss": 7.9874, |
| "loss/crossentropy": 2.4825010299682617, |
| "loss/hidden": 0.25390625, |
| "loss/logits": 0.017681429162621498, |
| "loss/reg": 5.233287811279297, |
| "loss/twn": 0.0, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.01595, |
| "grad_norm": 79.5, |
| "grad_norm_var": 1226.39296875, |
| "learning_rate": 0.0001, |
| "loss": 5.6235, |
| "loss/crossentropy": 0.20704708993434906, |
| "loss/hidden": 0.1806640625, |
| "loss/logits": 0.0022514096926897764, |
| "loss/reg": 5.233528137207031, |
| "loss/twn": 0.0, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.015975, |
| "grad_norm": 12.3125, |
| "grad_norm_var": 1224.665869140625, |
| "learning_rate": 0.0001, |
| "loss": 7.9267, |
| "loss/crossentropy": 2.5261454582214355, |
| "loss/hidden": 0.1572265625, |
| "loss/logits": 0.010081654414534569, |
| "loss/reg": 5.233248233795166, |
| "loss/twn": 0.0, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 384.0, |
| "grad_norm_var": 8927.619205729166, |
| "learning_rate": 0.0001, |
| "loss": 7.0656, |
| "loss/crossentropy": 1.7351843118667603, |
| "loss/hidden": 0.0927734375, |
| "loss/logits": 0.004287827759981155, |
| "loss/reg": 5.233373641967773, |
| "loss/twn": 0.0, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.016025, |
| "grad_norm": 18.125, |
| "grad_norm_var": 8926.408268229166, |
| "learning_rate": 0.0001, |
| "loss": 7.0317, |
| "loss/crossentropy": 1.592779278755188, |
| "loss/hidden": 0.193359375, |
| "loss/logits": 0.012589013203978539, |
| "loss/reg": 5.232937335968018, |
| "loss/twn": 0.0, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.01605, |
| "grad_norm": 9.125, |
| "grad_norm_var": 8958.401936848959, |
| "learning_rate": 0.0001, |
| "loss": 8.2427, |
| "loss/crossentropy": 2.891831874847412, |
| "loss/hidden": 0.1123046875, |
| "loss/logits": 0.005527087952941656, |
| "loss/reg": 5.233004093170166, |
| "loss/twn": 0.0, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.016075, |
| "grad_norm": 93.0, |
| "grad_norm_var": 8961.362483723959, |
| "learning_rate": 0.0001, |
| "loss": 6.9493, |
| "loss/crossentropy": 1.5797322988510132, |
| "loss/hidden": 0.1259765625, |
| "loss/logits": 0.010628938674926758, |
| "loss/reg": 5.232941627502441, |
| "loss/twn": 0.0, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.0161, |
| "grad_norm": 9.875, |
| "grad_norm_var": 9009.401546223959, |
| "learning_rate": 0.0001, |
| "loss": 8.1281, |
| "loss/crossentropy": 2.7545292377471924, |
| "loss/hidden": 0.1328125, |
| "loss/logits": 0.007755103521049023, |
| "loss/reg": 5.232993125915527, |
| "loss/twn": 0.0, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.016125, |
| "grad_norm": 28.75, |
| "grad_norm_var": 8905.093684895834, |
| "learning_rate": 0.0001, |
| "loss": 7.9328, |
| "loss/crossentropy": 2.643615245819092, |
| "loss/hidden": 0.054443359375, |
| "loss/logits": 0.0016623031115159392, |
| "loss/reg": 5.2330780029296875, |
| "loss/twn": 0.0, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.01615, |
| "grad_norm": 20.625, |
| "grad_norm_var": 8865.565559895833, |
| "learning_rate": 0.0001, |
| "loss": 6.9237, |
| "loss/crossentropy": 1.4672698974609375, |
| "loss/hidden": 0.2138671875, |
| "loss/logits": 0.009410521015524864, |
| "loss/reg": 5.233164310455322, |
| "loss/twn": 0.0, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.016175, |
| "grad_norm": 15.25, |
| "grad_norm_var": 8862.209749348958, |
| "learning_rate": 0.0001, |
| "loss": 7.9973, |
| "loss/crossentropy": 2.6482272148132324, |
| "loss/hidden": 0.10791015625, |
| "loss/logits": 0.00825223047286272, |
| "loss/reg": 5.232880592346191, |
| "loss/twn": 0.0, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.0162, |
| "grad_norm": 13.6875, |
| "grad_norm_var": 8979.335872395834, |
| "learning_rate": 0.0001, |
| "loss": 6.1538, |
| "loss/crossentropy": 0.7194666862487793, |
| "loss/hidden": 0.1884765625, |
| "loss/logits": 0.012791863642632961, |
| "loss/reg": 5.233090877532959, |
| "loss/twn": 0.0, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.016225, |
| "grad_norm": 24.875, |
| "grad_norm_var": 9023.984114583332, |
| "learning_rate": 0.0001, |
| "loss": 7.1305, |
| "loss/crossentropy": 1.7185572385787964, |
| "loss/hidden": 0.1669921875, |
| "loss/logits": 0.011910820379853249, |
| "loss/reg": 5.233007431030273, |
| "loss/twn": 0.0, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.01625, |
| "grad_norm": 12.4375, |
| "grad_norm_var": 9016.378108723959, |
| "learning_rate": 0.0001, |
| "loss": 7.4389, |
| "loss/crossentropy": 2.082362174987793, |
| "loss/hidden": 0.11279296875, |
| "loss/logits": 0.010908122174441814, |
| "loss/reg": 5.232817649841309, |
| "loss/twn": 0.0, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.016275, |
| "grad_norm": 8.9375, |
| "grad_norm_var": 9043.443994140625, |
| "learning_rate": 0.0001, |
| "loss": 7.4177, |
| "loss/crossentropy": 2.062870502471924, |
| "loss/hidden": 0.11328125, |
| "loss/logits": 0.008752668276429176, |
| "loss/reg": 5.232777118682861, |
| "loss/twn": 0.0, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.0163, |
| "grad_norm": 12.0625, |
| "grad_norm_var": 9056.640087890624, |
| "learning_rate": 0.0001, |
| "loss": 7.9823, |
| "loss/crossentropy": 2.6078176498413086, |
| "loss/hidden": 0.1328125, |
| "loss/logits": 0.008596043102443218, |
| "loss/reg": 5.23306941986084, |
| "loss/twn": 0.0, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.016325, |
| "grad_norm": 122.5, |
| "grad_norm_var": 8932.818473307292, |
| "learning_rate": 0.0001, |
| "loss": 7.9264, |
| "loss/crossentropy": 2.5046682357788086, |
| "loss/hidden": 0.1748046875, |
| "loss/logits": 0.014288893900811672, |
| "loss/reg": 5.232659339904785, |
| "loss/twn": 0.0, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.01635, |
| "grad_norm": 21.5, |
| "grad_norm_var": 8946.382014973959, |
| "learning_rate": 0.0001, |
| "loss": 7.2315, |
| "loss/crossentropy": 1.8828779458999634, |
| "loss/hidden": 0.10986328125, |
| "loss/logits": 0.005740518681704998, |
| "loss/reg": 5.23300838470459, |
| "loss/twn": 0.0, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.016375, |
| "grad_norm": 352.0, |
| "grad_norm_var": 14431.18515625, |
| "learning_rate": 0.0001, |
| "loss": 6.8525, |
| "loss/crossentropy": 1.471374273300171, |
| "loss/hidden": 0.138671875, |
| "loss/logits": 0.009916655719280243, |
| "loss/reg": 5.232522010803223, |
| "loss/twn": 0.0, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.0164, |
| "grad_norm": 33.75, |
| "grad_norm_var": 7512.653125, |
| "learning_rate": 0.0001, |
| "loss": 6.2595, |
| "loss/crossentropy": 0.7713863253593445, |
| "loss/hidden": 0.244140625, |
| "loss/logits": 0.01109264511615038, |
| "loss/reg": 5.232911109924316, |
| "loss/twn": 0.0, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.016425, |
| "grad_norm": 15.0625, |
| "grad_norm_var": 7526.165608723958, |
| "learning_rate": 0.0001, |
| "loss": 8.093, |
| "loss/crossentropy": 2.709456205368042, |
| "loss/hidden": 0.1337890625, |
| "loss/logits": 0.01727338880300522, |
| "loss/reg": 5.23248815536499, |
| "loss/twn": 0.0, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.01645, |
| "grad_norm": 9.5625, |
| "grad_norm_var": 7523.817122395833, |
| "learning_rate": 0.0001, |
| "loss": 6.334, |
| "loss/crossentropy": 0.9124006032943726, |
| "loss/hidden": 0.1806640625, |
| "loss/logits": 0.007904157042503357, |
| "loss/reg": 5.2329912185668945, |
| "loss/twn": 0.0, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.016475, |
| "grad_norm": 9.625, |
| "grad_norm_var": 7476.006770833334, |
| "learning_rate": 0.0001, |
| "loss": 7.0775, |
| "loss/crossentropy": 1.669623613357544, |
| "loss/hidden": 0.1640625, |
| "loss/logits": 0.011253604665398598, |
| "loss/reg": 5.232557773590088, |
| "loss/twn": 0.0, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.0165, |
| "grad_norm": 9.8125, |
| "grad_norm_var": 7476.294775390625, |
| "learning_rate": 0.0001, |
| "loss": 8.0574, |
| "loss/crossentropy": 2.7882609367370605, |
| "loss/hidden": 0.03515625, |
| "loss/logits": 0.0012600821210071445, |
| "loss/reg": 5.23272705078125, |
| "loss/twn": 0.0, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.016525, |
| "grad_norm": 64.5, |
| "grad_norm_var": 7481.564176432292, |
| "learning_rate": 0.0001, |
| "loss": 7.1712, |
| "loss/crossentropy": 1.5283738374710083, |
| "loss/hidden": 0.40234375, |
| "loss/logits": 0.008047623559832573, |
| "loss/reg": 5.2324652671813965, |
| "loss/twn": 0.0, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.01655, |
| "grad_norm": 14.0625, |
| "grad_norm_var": 7507.016080729167, |
| "learning_rate": 0.0001, |
| "loss": 6.9527, |
| "loss/crossentropy": 1.5561813116073608, |
| "loss/hidden": 0.150390625, |
| "loss/logits": 0.013308672234416008, |
| "loss/reg": 5.232776641845703, |
| "loss/twn": 0.0, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.016575, |
| "grad_norm": 8.5625, |
| "grad_norm_var": 7537.432014973959, |
| "learning_rate": 0.0001, |
| "loss": 6.883, |
| "loss/crossentropy": 1.4900498390197754, |
| "loss/hidden": 0.15234375, |
| "loss/logits": 0.008140160702168941, |
| "loss/reg": 5.232505798339844, |
| "loss/twn": 0.0, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.0166, |
| "grad_norm": 10.4375, |
| "grad_norm_var": 7552.011311848958, |
| "learning_rate": 0.0001, |
| "loss": 7.9879, |
| "loss/crossentropy": 2.649501085281372, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.006926264148205519, |
| "loss/reg": 5.2328362464904785, |
| "loss/twn": 0.0, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.016625, |
| "grad_norm": 20.875, |
| "grad_norm_var": 7564.067561848959, |
| "learning_rate": 0.0001, |
| "loss": 7.9074, |
| "loss/crossentropy": 2.5822925567626953, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.006065651774406433, |
| "loss/reg": 5.232655048370361, |
| "loss/twn": 0.0, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.01665, |
| "grad_norm": 10.125, |
| "grad_norm_var": 7574.551497395833, |
| "learning_rate": 0.0001, |
| "loss": 8.0297, |
| "loss/crossentropy": 2.7020585536956787, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.008298511616885662, |
| "loss/reg": 5.232966899871826, |
| "loss/twn": 0.0, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.016675, |
| "grad_norm": 11.3125, |
| "grad_norm_var": 7563.417447916667, |
| "learning_rate": 0.0001, |
| "loss": 6.2821, |
| "loss/crossentropy": 0.8871417045593262, |
| "loss/hidden": 0.1572265625, |
| "loss/logits": 0.005114687606692314, |
| "loss/reg": 5.232606410980225, |
| "loss/twn": 0.0, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.0167, |
| "grad_norm": 16.75, |
| "grad_norm_var": 7543.980192057292, |
| "learning_rate": 0.0001, |
| "loss": 7.9399, |
| "loss/crossentropy": 2.610417604446411, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.010183998383581638, |
| "loss/reg": 5.2328290939331055, |
| "loss/twn": 0.0, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.016725, |
| "grad_norm": 7.71875, |
| "grad_norm_var": 7191.31181233724, |
| "learning_rate": 0.0001, |
| "loss": 7.23, |
| "loss/crossentropy": 1.891376256942749, |
| "loss/hidden": 0.09765625, |
| "loss/logits": 0.008510958403348923, |
| "loss/reg": 5.232450485229492, |
| "loss/twn": 0.0, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.01675, |
| "grad_norm": 16.5, |
| "grad_norm_var": 7204.193322753907, |
| "learning_rate": 0.0001, |
| "loss": 6.8222, |
| "loss/crossentropy": 1.4736872911453247, |
| "loss/hidden": 0.10546875, |
| "loss/logits": 0.010109667666256428, |
| "loss/reg": 5.232895374298096, |
| "loss/twn": 0.0, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.016775, |
| "grad_norm": 17.0, |
| "grad_norm_var": 200.33785400390624, |
| "learning_rate": 0.0001, |
| "loss": 8.133, |
| "loss/crossentropy": 2.7790119647979736, |
| "loss/hidden": 0.11279296875, |
| "loss/logits": 0.008631115779280663, |
| "loss/reg": 5.232613563537598, |
| "loss/twn": 0.0, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.0168, |
| "grad_norm": 21.875, |
| "grad_norm_var": 182.99231363932293, |
| "learning_rate": 0.0001, |
| "loss": 8.1379, |
| "loss/crossentropy": 2.766389846801758, |
| "loss/hidden": 0.12890625, |
| "loss/logits": 0.009614645503461361, |
| "loss/reg": 5.232971668243408, |
| "loss/twn": 0.0, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.016825, |
| "grad_norm": 10.1875, |
| "grad_norm_var": 185.4031534830729, |
| "learning_rate": 0.0001, |
| "loss": 7.7498, |
| "loss/crossentropy": 2.4996728897094727, |
| "loss/hidden": 0.0140380859375, |
| "loss/logits": 0.0034008692018687725, |
| "loss/reg": 5.2326860427856445, |
| "loss/twn": 0.0, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.01685, |
| "grad_norm": 11.6875, |
| "grad_norm_var": 183.8099568684896, |
| "learning_rate": 0.0001, |
| "loss": 6.7223, |
| "loss/crossentropy": 1.3949775695800781, |
| "loss/hidden": 0.0927734375, |
| "loss/logits": 0.0020429021678864956, |
| "loss/reg": 5.23252010345459, |
| "loss/twn": 0.0, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.016875, |
| "grad_norm": 11.0, |
| "grad_norm_var": 182.70172119140625, |
| "learning_rate": 0.0001, |
| "loss": 7.9354, |
| "loss/crossentropy": 2.6207685470581055, |
| "loss/hidden": 0.07666015625, |
| "loss/logits": 0.005197848193347454, |
| "loss/reg": 5.232755184173584, |
| "loss/twn": 0.0, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.0169, |
| "grad_norm": 20.5, |
| "grad_norm_var": 180.45289306640626, |
| "learning_rate": 0.0001, |
| "loss": 8.3505, |
| "loss/crossentropy": 2.9047460556030273, |
| "loss/hidden": 0.193359375, |
| "loss/logits": 0.019472159445285797, |
| "loss/reg": 5.232929706573486, |
| "loss/twn": 0.0, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.016925, |
| "grad_norm": 8.5, |
| "grad_norm_var": 22.29664306640625, |
| "learning_rate": 0.0001, |
| "loss": 7.8368, |
| "loss/crossentropy": 2.4469549655914307, |
| "loss/hidden": 0.1484375, |
| "loss/logits": 0.008610617369413376, |
| "loss/reg": 5.232751369476318, |
| "loss/twn": 0.0, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.01695, |
| "grad_norm": 14.9375, |
| "grad_norm_var": 22.402144368489584, |
| "learning_rate": 0.0001, |
| "loss": 7.847, |
| "loss/crossentropy": 2.5082645416259766, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.007502686232328415, |
| "loss/reg": 5.2326483726501465, |
| "loss/twn": 0.0, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.016975, |
| "grad_norm": 179.0, |
| "grad_norm_var": 1722.9600545247397, |
| "learning_rate": 0.0001, |
| "loss": 6.7476, |
| "loss/crossentropy": 1.1506078243255615, |
| "loss/hidden": 0.35546875, |
| "loss/logits": 0.008823427371680737, |
| "loss/reg": 5.2327399253845215, |
| "loss/twn": 0.0, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.017, |
| "grad_norm": 10.0625, |
| "grad_norm_var": 1723.6607381184897, |
| "learning_rate": 0.0001, |
| "loss": 7.9045, |
| "loss/crossentropy": 2.541614055633545, |
| "loss/hidden": 0.12109375, |
| "loss/logits": 0.008851654827594757, |
| "loss/reg": 5.232950687408447, |
| "loss/twn": 0.0, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.017025, |
| "grad_norm": 38.5, |
| "grad_norm_var": 1735.1399373372396, |
| "learning_rate": 0.0001, |
| "loss": 6.9999, |
| "loss/crossentropy": 1.6033979654312134, |
| "loss/hidden": 0.1572265625, |
| "loss/logits": 0.006385164335370064, |
| "loss/reg": 5.232907772064209, |
| "loss/twn": 0.0, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.01705, |
| "grad_norm": 17.625, |
| "grad_norm_var": 1723.4270467122396, |
| "learning_rate": 0.0001, |
| "loss": 7.998, |
| "loss/crossentropy": 2.6382601261138916, |
| "loss/hidden": 0.11279296875, |
| "loss/logits": 0.014375717379152775, |
| "loss/reg": 5.232568264007568, |
| "loss/twn": 0.0, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.017075, |
| "grad_norm": 10.6875, |
| "grad_norm_var": 1724.6606079101562, |
| "learning_rate": 0.0001, |
| "loss": 6.5706, |
| "loss/crossentropy": 1.2485917806625366, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.002942750696092844, |
| "loss/reg": 5.232631206512451, |
| "loss/twn": 0.0, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.0171, |
| "grad_norm": 55.25, |
| "grad_norm_var": 1770.930790201823, |
| "learning_rate": 0.0001, |
| "loss": 7.0946, |
| "loss/crossentropy": 1.641523003578186, |
| "loss/hidden": 0.212890625, |
| "loss/logits": 0.007442857138812542, |
| "loss/reg": 5.2327880859375, |
| "loss/twn": 0.0, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.017125, |
| "grad_norm": 12.0, |
| "grad_norm_var": 1760.3909993489583, |
| "learning_rate": 0.0001, |
| "loss": 6.5653, |
| "loss/crossentropy": 1.1899452209472656, |
| "loss/hidden": 0.130859375, |
| "loss/logits": 0.0118794534355402, |
| "loss/reg": 5.232624530792236, |
| "loss/twn": 0.0, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.01715, |
| "grad_norm": 11.0625, |
| "grad_norm_var": 1770.9077473958334, |
| "learning_rate": 0.0001, |
| "loss": 7.3521, |
| "loss/crossentropy": 2.0265913009643555, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.006208708509802818, |
| "loss/reg": 5.2328410148620605, |
| "loss/twn": 0.0, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.017175, |
| "grad_norm": 17.75, |
| "grad_norm_var": 1769.8311848958333, |
| "learning_rate": 0.0001, |
| "loss": 6.2951, |
| "loss/crossentropy": 0.9038959741592407, |
| "loss/hidden": 0.1513671875, |
| "loss/logits": 0.007222220301628113, |
| "loss/reg": 5.2325825691223145, |
| "loss/twn": 0.0, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.0172, |
| "grad_norm": 137.0, |
| "grad_norm_var": 2501.6544270833333, |
| "learning_rate": 0.0001, |
| "loss": 7.9399, |
| "loss/crossentropy": 2.5502278804779053, |
| "loss/hidden": 0.1455078125, |
| "loss/logits": 0.011638427153229713, |
| "loss/reg": 5.232552528381348, |
| "loss/twn": 0.0, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.017225, |
| "grad_norm": 16.25, |
| "grad_norm_var": 2483.604280598958, |
| "learning_rate": 0.0001, |
| "loss": 8.1215, |
| "loss/crossentropy": 2.7514421939849854, |
| "loss/hidden": 0.126953125, |
| "loss/logits": 0.010679876431822777, |
| "loss/reg": 5.232418537139893, |
| "loss/twn": 0.0, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.01725, |
| "grad_norm": 9.75, |
| "grad_norm_var": 2490.0520182291666, |
| "learning_rate": 0.0001, |
| "loss": 7.1755, |
| "loss/crossentropy": 1.8485499620437622, |
| "loss/hidden": 0.08837890625, |
| "loss/logits": 0.005838857963681221, |
| "loss/reg": 5.2327117919921875, |
| "loss/twn": 0.0, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.017275, |
| "grad_norm": 12.5, |
| "grad_norm_var": 2485.2692057291665, |
| "learning_rate": 0.0001, |
| "loss": 7.1405, |
| "loss/crossentropy": 1.8488080501556396, |
| "loss/hidden": 0.0546875, |
| "loss/logits": 0.0044571696780622005, |
| "loss/reg": 5.232502460479736, |
| "loss/twn": 0.0, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.0173, |
| "grad_norm": 1448.0, |
| "grad_norm_var": 126949.88639322917, |
| "learning_rate": 0.0001, |
| "loss": 6.8951, |
| "loss/crossentropy": 1.4486842155456543, |
| "loss/hidden": 0.2001953125, |
| "loss/logits": 0.013465155847370625, |
| "loss/reg": 5.23276424407959, |
| "loss/twn": 0.0, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.017325, |
| "grad_norm": 192.0, |
| "grad_norm_var": 126205.7556640625, |
| "learning_rate": 0.0001, |
| "loss": 6.7837, |
| "loss/crossentropy": 1.3860431909561157, |
| "loss/hidden": 0.1591796875, |
| "loss/logits": 0.006227361969649792, |
| "loss/reg": 5.232283592224121, |
| "loss/twn": 0.0, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.01735, |
| "grad_norm": 8.8125, |
| "grad_norm_var": 126307.29348958333, |
| "learning_rate": 0.0001, |
| "loss": 7.7481, |
| "loss/crossentropy": 2.4704062938690186, |
| "loss/hidden": 0.0400390625, |
| "loss/logits": 0.004876245744526386, |
| "loss/reg": 5.232789993286133, |
| "loss/twn": 0.0, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.017375, |
| "grad_norm": 13.9375, |
| "grad_norm_var": 127064.13084309896, |
| "learning_rate": 0.0001, |
| "loss": 7.4, |
| "loss/crossentropy": 2.0642364025115967, |
| "loss/hidden": 0.09375, |
| "loss/logits": 0.009522231295704842, |
| "loss/reg": 5.2324628829956055, |
| "loss/twn": 0.0, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.0174, |
| "grad_norm": 9.3125, |
| "grad_norm_var": 127075.72967122396, |
| "learning_rate": 0.0001, |
| "loss": 7.1133, |
| "loss/crossentropy": 1.8082016706466675, |
| "loss/hidden": 0.0693359375, |
| "loss/logits": 0.0033433041535317898, |
| "loss/reg": 5.232382297515869, |
| "loss/twn": 0.0, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.017425, |
| "grad_norm": 15.5, |
| "grad_norm_var": 127376.05935872396, |
| "learning_rate": 0.0001, |
| "loss": 8.0679, |
| "loss/crossentropy": 2.6998050212860107, |
| "loss/hidden": 0.125, |
| "loss/logits": 0.011031926609575748, |
| "loss/reg": 5.232105731964111, |
| "loss/twn": 0.0, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.01745, |
| "grad_norm": 16.5, |
| "grad_norm_var": 127392.12693684896, |
| "learning_rate": 0.0001, |
| "loss": 7.1902, |
| "loss/crossentropy": 1.788904070854187, |
| "loss/hidden": 0.158203125, |
| "loss/logits": 0.010435621254146099, |
| "loss/reg": 5.232694149017334, |
| "loss/twn": 0.0, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.017475, |
| "grad_norm": 26.25, |
| "grad_norm_var": 127171.84055989583, |
| "learning_rate": 0.0001, |
| "loss": 8.0089, |
| "loss/crossentropy": 2.5942537784576416, |
| "loss/hidden": 0.1728515625, |
| "loss/logits": 0.009584179148077965, |
| "loss/reg": 5.232229232788086, |
| "loss/twn": 0.0, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.0175, |
| "grad_norm": 7.375, |
| "grad_norm_var": 127761.07708333334, |
| "learning_rate": 0.0001, |
| "loss": 7.1149, |
| "loss/crossentropy": 1.7783420085906982, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.0052945781499147415, |
| "loss/reg": 5.232630252838135, |
| "loss/twn": 0.0, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.017525, |
| "grad_norm": 14.875, |
| "grad_norm_var": 127719.3791015625, |
| "learning_rate": 0.0001, |
| "loss": 8.0908, |
| "loss/crossentropy": 2.7603297233581543, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.011579321697354317, |
| "loss/reg": 5.232461929321289, |
| "loss/twn": 0.0, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.01755, |
| "grad_norm": 25.75, |
| "grad_norm_var": 127515.01248372396, |
| "learning_rate": 0.0001, |
| "loss": 7.8012, |
| "loss/crossentropy": 2.4607961177825928, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.009348167106509209, |
| "loss/reg": 5.232429504394531, |
| "loss/twn": 0.0, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.017575, |
| "grad_norm": 36.25, |
| "grad_norm_var": 127276.2372233073, |
| "learning_rate": 0.0001, |
| "loss": 7.7355, |
| "loss/crossentropy": 2.2735610008239746, |
| "loss/hidden": 0.216796875, |
| "loss/logits": 0.012781517580151558, |
| "loss/reg": 5.232335090637207, |
| "loss/twn": 0.0, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.0176, |
| "grad_norm": 12.8125, |
| "grad_norm_var": 128031.16139322917, |
| "learning_rate": 0.0001, |
| "loss": 7.7608, |
| "loss/crossentropy": 2.4146182537078857, |
| "loss/hidden": 0.10595703125, |
| "loss/logits": 0.007775201462209225, |
| "loss/reg": 5.232488632202148, |
| "loss/twn": 0.0, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.017625, |
| "grad_norm": 12.875, |
| "grad_norm_var": 128077.03854166667, |
| "learning_rate": 0.0001, |
| "loss": 7.6663, |
| "loss/crossentropy": 2.3123011589050293, |
| "loss/hidden": 0.11279296875, |
| "loss/logits": 0.008896533399820328, |
| "loss/reg": 5.232283115386963, |
| "loss/twn": 0.0, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.01765, |
| "grad_norm": 9.8125, |
| "grad_norm_var": 128076.14998372395, |
| "learning_rate": 0.0001, |
| "loss": 6.7063, |
| "loss/crossentropy": 1.328172206878662, |
| "loss/hidden": 0.138671875, |
| "loss/logits": 0.006915399804711342, |
| "loss/reg": 5.232503414154053, |
| "loss/twn": 0.0, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.017675, |
| "grad_norm": 10.25, |
| "grad_norm_var": 128107.63943684896, |
| "learning_rate": 0.0001, |
| "loss": 5.6211, |
| "loss/crossentropy": 0.28784415125846863, |
| "loss/hidden": 0.09521484375, |
| "loss/logits": 0.005815575830638409, |
| "loss/reg": 5.232184410095215, |
| "loss/twn": 0.0, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.0177, |
| "grad_norm": 9.875, |
| "grad_norm_var": 2011.0417805989584, |
| "learning_rate": 0.0001, |
| "loss": 7.6542, |
| "loss/crossentropy": 2.2898013591766357, |
| "loss/hidden": 0.126953125, |
| "loss/logits": 0.005081703420728445, |
| "loss/reg": 5.232325553894043, |
| "loss/twn": 0.0, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.017725, |
| "grad_norm": 9.6875, |
| "grad_norm_var": 62.6244140625, |
| "learning_rate": 0.0001, |
| "loss": 6.8514, |
| "loss/crossentropy": 1.4818062782287598, |
| "loss/hidden": 0.1328125, |
| "loss/logits": 0.0045208255760371685, |
| "loss/reg": 5.232298851013184, |
| "loss/twn": 0.0, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.01775, |
| "grad_norm": 14.9375, |
| "grad_norm_var": 59.92239583333333, |
| "learning_rate": 0.0001, |
| "loss": 7.9023, |
| "loss/crossentropy": 2.559528112411499, |
| "loss/hidden": 0.103515625, |
| "loss/logits": 0.006969613488763571, |
| "loss/reg": 5.232254505157471, |
| "loss/twn": 0.0, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.017775, |
| "grad_norm": 11.375, |
| "grad_norm_var": 60.82394205729167, |
| "learning_rate": 0.0001, |
| "loss": 6.8892, |
| "loss/crossentropy": 1.5907115936279297, |
| "loss/hidden": 0.064453125, |
| "loss/logits": 0.0018022289732471108, |
| "loss/reg": 5.232196807861328, |
| "loss/twn": 0.0, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.0178, |
| "grad_norm": 12.5, |
| "grad_norm_var": 58.95045572916667, |
| "learning_rate": 0.0001, |
| "loss": 8.0051, |
| "loss/crossentropy": 2.6705007553100586, |
| "loss/hidden": 0.09619140625, |
| "loss/logits": 0.005942562595009804, |
| "loss/reg": 5.232419967651367, |
| "loss/twn": 0.0, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.017825, |
| "grad_norm": 7.6875, |
| "grad_norm_var": 62.675634765625, |
| "learning_rate": 0.0001, |
| "loss": 7.4538, |
| "loss/crossentropy": 2.1238763332366943, |
| "loss/hidden": 0.09130859375, |
| "loss/logits": 0.006380223203450441, |
| "loss/reg": 5.232213497161865, |
| "loss/twn": 0.0, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.01785, |
| "grad_norm": 11.5, |
| "grad_norm_var": 63.188655598958334, |
| "learning_rate": 0.0001, |
| "loss": 7.0007, |
| "loss/crossentropy": 1.551921010017395, |
| "loss/hidden": 0.2099609375, |
| "loss/logits": 0.006598391104489565, |
| "loss/reg": 5.2322468757629395, |
| "loss/twn": 0.0, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.017875, |
| "grad_norm": 12.5625, |
| "grad_norm_var": 53.66087239583333, |
| "learning_rate": 0.0001, |
| "loss": 8.5447, |
| "loss/crossentropy": 3.1880903244018555, |
| "loss/hidden": 0.11474609375, |
| "loss/logits": 0.009867793880403042, |
| "loss/reg": 5.2319817543029785, |
| "loss/twn": 0.0, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.0179, |
| "grad_norm": 8.8125, |
| "grad_norm_var": 52.566650390625, |
| "learning_rate": 0.0001, |
| "loss": 7.3708, |
| "loss/crossentropy": 2.0305135250091553, |
| "loss/hidden": 0.10107421875, |
| "loss/logits": 0.00682512391358614, |
| "loss/reg": 5.232370853424072, |
| "loss/twn": 0.0, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.017925, |
| "grad_norm": 9.5, |
| "grad_norm_var": 53.636051432291666, |
| "learning_rate": 0.0001, |
| "loss": 6.9861, |
| "loss/crossentropy": 1.6283918619155884, |
| "loss/hidden": 0.1201171875, |
| "loss/logits": 0.0056338622234761715, |
| "loss/reg": 5.231950759887695, |
| "loss/twn": 0.0, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.01795, |
| "grad_norm": 16.375, |
| "grad_norm_var": 43.831363932291666, |
| "learning_rate": 0.0001, |
| "loss": 7.5541, |
| "loss/crossentropy": 2.1077566146850586, |
| "loss/hidden": 0.2041015625, |
| "loss/logits": 0.009965687990188599, |
| "loss/reg": 5.232308387756348, |
| "loss/twn": 0.0, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.017975, |
| "grad_norm": 12.8125, |
| "grad_norm_var": 5.275455729166667, |
| "learning_rate": 0.0001, |
| "loss": 8.3363, |
| "loss/crossentropy": 2.959925413131714, |
| "loss/hidden": 0.1298828125, |
| "loss/logits": 0.014515706337988377, |
| "loss/reg": 5.231976509094238, |
| "loss/twn": 0.0, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.018, |
| "grad_norm": 13.1875, |
| "grad_norm_var": 5.351822916666666, |
| "learning_rate": 0.0001, |
| "loss": 8.3041, |
| "loss/crossentropy": 2.966278553009033, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.007250492461025715, |
| "loss/reg": 5.2319159507751465, |
| "loss/twn": 0.0, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.018025, |
| "grad_norm": 16.25, |
| "grad_norm_var": 6.689518229166667, |
| "learning_rate": 0.0001, |
| "loss": 8.1075, |
| "loss/crossentropy": 2.7332916259765625, |
| "loss/hidden": 0.12890625, |
| "loss/logits": 0.0132482023909688, |
| "loss/reg": 5.232076644897461, |
| "loss/twn": 0.0, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.01805, |
| "grad_norm": 12.0, |
| "grad_norm_var": 6.439436848958334, |
| "learning_rate": 0.0001, |
| "loss": 6.6983, |
| "loss/crossentropy": 1.3738974332809448, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.006151068024337292, |
| "loss/reg": 5.231801986694336, |
| "loss/twn": 0.0, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.018075, |
| "grad_norm": 8.8125, |
| "grad_norm_var": 6.871809895833334, |
| "learning_rate": 0.0001, |
| "loss": 7.4411, |
| "loss/crossentropy": 2.184509515762329, |
| "loss/hidden": 0.02099609375, |
| "loss/logits": 0.00361478328704834, |
| "loss/reg": 5.23202657699585, |
| "loss/twn": 0.0, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.0181, |
| "grad_norm": 13.75, |
| "grad_norm_var": 6.845572916666667, |
| "learning_rate": 0.0001, |
| "loss": 7.2513, |
| "loss/crossentropy": 1.84177827835083, |
| "loss/hidden": 0.16796875, |
| "loss/logits": 0.009550071321427822, |
| "loss/reg": 5.231957912445068, |
| "loss/twn": 0.0, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.018125, |
| "grad_norm": 11.625, |
| "grad_norm_var": 6.486832682291666, |
| "learning_rate": 0.0001, |
| "loss": 8.1392, |
| "loss/crossentropy": 2.7632906436920166, |
| "loss/hidden": 0.1318359375, |
| "loss/logits": 0.012371431104838848, |
| "loss/reg": 5.231691360473633, |
| "loss/twn": 0.0, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.01815, |
| "grad_norm": 13.625, |
| "grad_norm_var": 6.098893229166666, |
| "learning_rate": 0.0001, |
| "loss": 8.1336, |
| "loss/crossentropy": 2.7336745262145996, |
| "loss/hidden": 0.158203125, |
| "loss/logits": 0.00991059560328722, |
| "loss/reg": 5.231861114501953, |
| "loss/twn": 0.0, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.018175, |
| "grad_norm": 9.3125, |
| "grad_norm_var": 6.543082682291667, |
| "learning_rate": 0.0001, |
| "loss": 8.1011, |
| "loss/crossentropy": 2.7824230194091797, |
| "loss/hidden": 0.08154296875, |
| "loss/logits": 0.005494968965649605, |
| "loss/reg": 5.231605052947998, |
| "loss/twn": 0.0, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.0182, |
| "grad_norm": 214.0, |
| "grad_norm_var": 2560.450634765625, |
| "learning_rate": 0.0001, |
| "loss": 7.8675, |
| "loss/crossentropy": 2.6006758213043213, |
| "loss/hidden": 0.03271484375, |
| "loss/logits": 0.0022825347259640694, |
| "loss/reg": 5.231857776641846, |
| "loss/twn": 0.0, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.018225, |
| "grad_norm": 10.25, |
| "grad_norm_var": 2555.1207682291665, |
| "learning_rate": 0.0001, |
| "loss": 7.9332, |
| "loss/crossentropy": 2.5436322689056396, |
| "loss/hidden": 0.150390625, |
| "loss/logits": 0.007534053176641464, |
| "loss/reg": 5.231611251831055, |
| "loss/twn": 0.0, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.01825, |
| "grad_norm": 7.46875, |
| "grad_norm_var": 2563.203739420573, |
| "learning_rate": 0.0001, |
| "loss": 6.4295, |
| "loss/crossentropy": 1.0562663078308105, |
| "loss/hidden": 0.1357421875, |
| "loss/logits": 0.005792177282273769, |
| "loss/reg": 5.231747150421143, |
| "loss/twn": 0.0, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.018275, |
| "grad_norm": 9.5, |
| "grad_norm_var": 2568.6221313476562, |
| "learning_rate": 0.0001, |
| "loss": 6.1, |
| "loss/crossentropy": 0.7717524766921997, |
| "loss/hidden": 0.0908203125, |
| "loss/logits": 0.005497816018760204, |
| "loss/reg": 5.231908321380615, |
| "loss/twn": 0.0, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.0183, |
| "grad_norm": 15.25, |
| "grad_norm_var": 2558.0002563476564, |
| "learning_rate": 0.0001, |
| "loss": 8.2472, |
| "loss/crossentropy": 2.8519446849823, |
| "loss/hidden": 0.1484375, |
| "loss/logits": 0.015055367723107338, |
| "loss/reg": 5.231762886047363, |
| "loss/twn": 0.0, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.018325, |
| "grad_norm": 18.625, |
| "grad_norm_var": 2544.823661295573, |
| "learning_rate": 0.0001, |
| "loss": 8.0862, |
| "loss/crossentropy": 2.667858600616455, |
| "loss/hidden": 0.1708984375, |
| "loss/logits": 0.015612177550792694, |
| "loss/reg": 5.231838703155518, |
| "loss/twn": 0.0, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.01835, |
| "grad_norm": 11.0, |
| "grad_norm_var": 2552.937951660156, |
| "learning_rate": 0.0001, |
| "loss": 8.1949, |
| "loss/crossentropy": 2.86808705329895, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.00871000811457634, |
| "loss/reg": 5.231663703918457, |
| "loss/twn": 0.0, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.018375, |
| "grad_norm": 10.375, |
| "grad_norm_var": 2557.2188110351562, |
| "learning_rate": 0.0001, |
| "loss": 5.7664, |
| "loss/crossentropy": 0.43561050295829773, |
| "loss/hidden": 0.0947265625, |
| "loss/logits": 0.004361970815807581, |
| "loss/reg": 5.231712818145752, |
| "loss/twn": 0.0, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.0184, |
| "grad_norm": 10.375, |
| "grad_norm_var": 2562.0264282226562, |
| "learning_rate": 0.0001, |
| "loss": 5.8451, |
| "loss/crossentropy": 0.3734654188156128, |
| "loss/hidden": 0.232421875, |
| "loss/logits": 0.0075777387246489525, |
| "loss/reg": 5.231621265411377, |
| "loss/twn": 0.0, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.018425, |
| "grad_norm": 14.625, |
| "grad_norm_var": 2563.9819295247394, |
| "learning_rate": 0.0001, |
| "loss": 6.356, |
| "loss/crossentropy": 0.9930484294891357, |
| "loss/hidden": 0.12451171875, |
| "loss/logits": 0.006776281166821718, |
| "loss/reg": 5.231681823730469, |
| "loss/twn": 0.0, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.01845, |
| "grad_norm": 7.75, |
| "grad_norm_var": 2572.144364420573, |
| "learning_rate": 0.0001, |
| "loss": 6.7646, |
| "loss/crossentropy": 1.4421303272247314, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.0040518054738640785, |
| "loss/reg": 5.231963157653809, |
| "loss/twn": 0.0, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.018475, |
| "grad_norm": 13.5, |
| "grad_norm_var": 2563.933915201823, |
| "learning_rate": 0.0001, |
| "loss": 8.2343, |
| "loss/crossentropy": 2.907639741897583, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.008740050718188286, |
| "loss/reg": 5.23153829574585, |
| "loss/twn": 0.0, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.0185, |
| "grad_norm": 13.875, |
| "grad_norm_var": 2563.7567342122397, |
| "learning_rate": 0.0001, |
| "loss": 8.0317, |
| "loss/crossentropy": 2.705251693725586, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.008279213681817055, |
| "loss/reg": 5.231719493865967, |
| "loss/twn": 0.0, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.018525, |
| "grad_norm": 13.6875, |
| "grad_norm_var": 2560.4964803059897, |
| "learning_rate": 0.0001, |
| "loss": 6.2045, |
| "loss/crossentropy": 0.7274767160415649, |
| "loss/hidden": 0.236328125, |
| "loss/logits": 0.009199721738696098, |
| "loss/reg": 5.231486797332764, |
| "loss/twn": 0.0, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.01855, |
| "grad_norm": 17.5, |
| "grad_norm_var": 2555.7768513997394, |
| "learning_rate": 0.0001, |
| "loss": 7.1638, |
| "loss/crossentropy": 1.6126173734664917, |
| "loss/hidden": 0.30078125, |
| "loss/logits": 0.018833626061677933, |
| "loss/reg": 5.231540203094482, |
| "loss/twn": 0.0, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.018575, |
| "grad_norm": 63.25, |
| "grad_norm_var": 2626.092248535156, |
| "learning_rate": 0.0001, |
| "loss": 7.4974, |
| "loss/crossentropy": 1.9825630187988281, |
| "loss/hidden": 0.263671875, |
| "loss/logits": 0.019452253356575966, |
| "loss/reg": 5.231677055358887, |
| "loss/twn": 0.0, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.0186, |
| "grad_norm": 11.5625, |
| "grad_norm_var": 172.0647420247396, |
| "learning_rate": 0.0001, |
| "loss": 7.2546, |
| "loss/crossentropy": 1.7497669458389282, |
| "loss/hidden": 0.259765625, |
| "loss/logits": 0.013276930898427963, |
| "loss/reg": 5.231839179992676, |
| "loss/twn": 0.0, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.018625, |
| "grad_norm": 9.5, |
| "grad_norm_var": 172.62860921223958, |
| "learning_rate": 0.0001, |
| "loss": 7.8067, |
| "loss/crossentropy": 2.420611619949341, |
| "loss/hidden": 0.1455078125, |
| "loss/logits": 0.009184225462377071, |
| "loss/reg": 5.231430530548096, |
| "loss/twn": 0.0, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.01865, |
| "grad_norm": 13.875, |
| "grad_norm_var": 168.34192708333333, |
| "learning_rate": 0.0001, |
| "loss": 6.583, |
| "loss/crossentropy": 1.1484166383743286, |
| "loss/hidden": 0.1943359375, |
| "loss/logits": 0.008578259497880936, |
| "loss/reg": 5.231657028198242, |
| "loss/twn": 0.0, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.018675, |
| "grad_norm": 118.0, |
| "grad_norm_var": 811.6565104166667, |
| "learning_rate": 0.0001, |
| "loss": 6.9941, |
| "loss/crossentropy": 1.6018927097320557, |
| "loss/hidden": 0.154296875, |
| "loss/logits": 0.0065146745182573795, |
| "loss/reg": 5.231416702270508, |
| "loss/twn": 0.0, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.0187, |
| "grad_norm": 17.625, |
| "grad_norm_var": 809.6587890625, |
| "learning_rate": 0.0001, |
| "loss": 7.9705, |
| "loss/crossentropy": 2.5781874656677246, |
| "loss/hidden": 0.150390625, |
| "loss/logits": 0.010151069611310959, |
| "loss/reg": 5.2317585945129395, |
| "loss/twn": 0.0, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.018725, |
| "grad_norm": 15.75, |
| "grad_norm_var": 811.78359375, |
| "learning_rate": 0.0001, |
| "loss": 7.9057, |
| "loss/crossentropy": 2.5473275184631348, |
| "loss/hidden": 0.11767578125, |
| "loss/logits": 0.009290559217333794, |
| "loss/reg": 5.231451034545898, |
| "loss/twn": 0.0, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.01875, |
| "grad_norm": 57.0, |
| "grad_norm_var": 872.6377604166667, |
| "learning_rate": 0.0001, |
| "loss": 5.8577, |
| "loss/crossentropy": 0.43267643451690674, |
| "loss/hidden": 0.1845703125, |
| "loss/logits": 0.008671510964632034, |
| "loss/reg": 5.231771469116211, |
| "loss/twn": 0.0, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.018775, |
| "grad_norm": 12.875, |
| "grad_norm_var": 867.9815104166667, |
| "learning_rate": 0.0001, |
| "loss": 8.1731, |
| "loss/crossentropy": 2.8458542823791504, |
| "loss/hidden": 0.0888671875, |
| "loss/logits": 0.00722483079880476, |
| "loss/reg": 5.231186389923096, |
| "loss/twn": 0.0, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.0188, |
| "grad_norm": 31.25, |
| "grad_norm_var": 852.6405598958333, |
| "learning_rate": 0.0001, |
| "loss": 6.5314, |
| "loss/crossentropy": 1.1035902500152588, |
| "loss/hidden": 0.19140625, |
| "loss/logits": 0.0045470790937542915, |
| "loss/reg": 5.231815814971924, |
| "loss/twn": 0.0, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.018825, |
| "grad_norm": 9.6875, |
| "grad_norm_var": 862.2956868489583, |
| "learning_rate": 0.0001, |
| "loss": 5.7019, |
| "loss/crossentropy": 0.3551396429538727, |
| "loss/hidden": 0.1123046875, |
| "loss/logits": 0.003157853614538908, |
| "loss/reg": 5.2312774658203125, |
| "loss/twn": 0.0, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.01885, |
| "grad_norm": 9.6875, |
| "grad_norm_var": 857.6431640625, |
| "learning_rate": 0.0001, |
| "loss": 7.8963, |
| "loss/crossentropy": 2.603400945663452, |
| "loss/hidden": 0.05712890625, |
| "loss/logits": 0.004422674421221018, |
| "loss/reg": 5.231386661529541, |
| "loss/twn": 0.0, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.018875, |
| "grad_norm": 14.125, |
| "grad_norm_var": 856.56015625, |
| "learning_rate": 0.0001, |
| "loss": 7.4962, |
| "loss/crossentropy": 2.134359359741211, |
| "loss/hidden": 0.1201171875, |
| "loss/logits": 0.010161285288631916, |
| "loss/reg": 5.231522083282471, |
| "loss/twn": 0.0, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.0189, |
| "grad_norm": 16.125, |
| "grad_norm_var": 852.990625, |
| "learning_rate": 0.0001, |
| "loss": 8.2635, |
| "loss/crossentropy": 2.91304874420166, |
| "loss/hidden": 0.10595703125, |
| "loss/logits": 0.01325392909348011, |
| "loss/reg": 5.231236934661865, |
| "loss/twn": 0.0, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.018925, |
| "grad_norm": 8.75, |
| "grad_norm_var": 863.2577962239583, |
| "learning_rate": 0.0001, |
| "loss": 6.6673, |
| "loss/crossentropy": 1.2952691316604614, |
| "loss/hidden": 0.1337890625, |
| "loss/logits": 0.006863090209662914, |
| "loss/reg": 5.231356143951416, |
| "loss/twn": 0.0, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.01895, |
| "grad_norm": 9.4375, |
| "grad_norm_var": 877.1677083333333, |
| "learning_rate": 0.0001, |
| "loss": 6.9557, |
| "loss/crossentropy": 1.5827580690383911, |
| "loss/hidden": 0.1328125, |
| "loss/logits": 0.008454329334199429, |
| "loss/reg": 5.231715202331543, |
| "loss/twn": 0.0, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.018975, |
| "grad_norm": 16.0, |
| "grad_norm_var": 783.0122395833333, |
| "learning_rate": 0.0001, |
| "loss": 7.1366, |
| "loss/crossentropy": 1.7788175344467163, |
| "loss/hidden": 0.1201171875, |
| "loss/logits": 0.006595224142074585, |
| "loss/reg": 5.231091499328613, |
| "loss/twn": 0.0, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.019, |
| "grad_norm": 9.75, |
| "grad_norm_var": 786.030712890625, |
| "learning_rate": 0.0001, |
| "loss": 8.1245, |
| "loss/crossentropy": 2.800469160079956, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.006068210117518902, |
| "loss/reg": 5.231574535369873, |
| "loss/twn": 0.0, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.019025, |
| "grad_norm": 19.25, |
| "grad_norm_var": 774.305322265625, |
| "learning_rate": 0.0001, |
| "loss": 6.9579, |
| "loss/crossentropy": 1.6193712949752808, |
| "loss/hidden": 0.10107421875, |
| "loss/logits": 0.006502064410597086, |
| "loss/reg": 5.2309889793396, |
| "loss/twn": 0.0, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.01905, |
| "grad_norm": 20.625, |
| "grad_norm_var": 768.311181640625, |
| "learning_rate": 0.0001, |
| "loss": 8.3539, |
| "loss/crossentropy": 2.9865291118621826, |
| "loss/hidden": 0.11962890625, |
| "loss/logits": 0.016289401799440384, |
| "loss/reg": 5.231488227844238, |
| "loss/twn": 0.0, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.019075, |
| "grad_norm": 31.625, |
| "grad_norm_var": 153.429150390625, |
| "learning_rate": 0.0001, |
| "loss": 7.0021, |
| "loss/crossentropy": 1.6152843236923218, |
| "loss/hidden": 0.1494140625, |
| "loss/logits": 0.006050444208085537, |
| "loss/reg": 5.2313923835754395, |
| "loss/twn": 0.0, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.0191, |
| "grad_norm": 39.25, |
| "grad_norm_var": 179.49178059895834, |
| "learning_rate": 0.0001, |
| "loss": 6.1086, |
| "loss/crossentropy": 0.6142204403877258, |
| "loss/hidden": 0.2578125, |
| "loss/logits": 0.005147262010723352, |
| "loss/reg": 5.231447219848633, |
| "loss/twn": 0.0, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.019125, |
| "grad_norm": 28.0, |
| "grad_norm_var": 181.80779622395832, |
| "learning_rate": 0.0001, |
| "loss": 7.0546, |
| "loss/crossentropy": 1.6059232950210571, |
| "loss/hidden": 0.212890625, |
| "loss/logits": 0.00476275198161602, |
| "loss/reg": 5.231032371520996, |
| "loss/twn": 0.0, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.01915, |
| "grad_norm": 11.875, |
| "grad_norm_var": 91.510791015625, |
| "learning_rate": 0.0001, |
| "loss": 7.9657, |
| "loss/crossentropy": 2.631168842315674, |
| "loss/hidden": 0.09619140625, |
| "loss/logits": 0.007133619859814644, |
| "loss/reg": 5.231229305267334, |
| "loss/twn": 0.0, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.019175, |
| "grad_norm": 43.75, |
| "grad_norm_var": 129.911572265625, |
| "learning_rate": 0.0001, |
| "loss": 6.3914, |
| "loss/crossentropy": 1.015294075012207, |
| "loss/hidden": 0.13671875, |
| "loss/logits": 0.00839436985552311, |
| "loss/reg": 5.231020450592041, |
| "loss/twn": 0.0, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.0192, |
| "grad_norm": 13.625, |
| "grad_norm_var": 122.769775390625, |
| "learning_rate": 0.0001, |
| "loss": 6.8334, |
| "loss/crossentropy": 1.33900785446167, |
| "loss/hidden": 0.251953125, |
| "loss/logits": 0.011178944259881973, |
| "loss/reg": 5.2312493324279785, |
| "loss/twn": 0.0, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.019225, |
| "grad_norm": 11.5625, |
| "grad_norm_var": 120.699462890625, |
| "learning_rate": 0.0001, |
| "loss": 6.9225, |
| "loss/crossentropy": 1.5955766439437866, |
| "loss/hidden": 0.0908203125, |
| "loss/logits": 0.005241988226771355, |
| "loss/reg": 5.230888366699219, |
| "loss/twn": 0.0, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.01925, |
| "grad_norm": 17.125, |
| "grad_norm_var": 114.95670572916667, |
| "learning_rate": 0.0001, |
| "loss": 8.1154, |
| "loss/crossentropy": 2.7243542671203613, |
| "loss/hidden": 0.142578125, |
| "loss/logits": 0.017005622386932373, |
| "loss/reg": 5.231443881988525, |
| "loss/twn": 0.0, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.019275, |
| "grad_norm": 306.0, |
| "grad_norm_var": 5232.954427083333, |
| "learning_rate": 0.0001, |
| "loss": 7.935, |
| "loss/crossentropy": 2.5942704677581787, |
| "loss/hidden": 0.10205078125, |
| "loss/logits": 0.007505115121603012, |
| "loss/reg": 5.231191635131836, |
| "loss/twn": 0.0, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.0193, |
| "grad_norm": 13.0, |
| "grad_norm_var": 5242.542643229167, |
| "learning_rate": 0.0001, |
| "loss": 7.9428, |
| "loss/crossentropy": 2.644366979598999, |
| "loss/hidden": 0.0595703125, |
| "loss/logits": 0.007467132993042469, |
| "loss/reg": 5.231389045715332, |
| "loss/twn": 0.0, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.019325, |
| "grad_norm": 49.5, |
| "grad_norm_var": 5190.246809895833, |
| "learning_rate": 0.0001, |
| "loss": 6.8313, |
| "loss/crossentropy": 1.4516693353652954, |
| "loss/hidden": 0.1416015625, |
| "loss/logits": 0.006859183311462402, |
| "loss/reg": 5.231204032897949, |
| "loss/twn": 0.0, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.01935, |
| "grad_norm": 10.25, |
| "grad_norm_var": 5186.974593098958, |
| "learning_rate": 0.0001, |
| "loss": 8.2807, |
| "loss/crossentropy": 3.0418143272399902, |
| "loss/hidden": 0.00469970703125, |
| "loss/logits": 0.002671225229278207, |
| "loss/reg": 5.23149299621582, |
| "loss/twn": 0.0, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.019375, |
| "grad_norm": 12.8125, |
| "grad_norm_var": 5197.841145833333, |
| "learning_rate": 0.0001, |
| "loss": 7.9916, |
| "loss/crossentropy": 2.7271320819854736, |
| "loss/hidden": 0.0302734375, |
| "loss/logits": 0.00295096542686224, |
| "loss/reg": 5.231270790100098, |
| "loss/twn": 0.0, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.0194, |
| "grad_norm": 23.0, |
| "grad_norm_var": 5155.59296875, |
| "learning_rate": 0.0001, |
| "loss": 8.1296, |
| "loss/crossentropy": 2.7584662437438965, |
| "loss/hidden": 0.126953125, |
| "loss/logits": 0.012552576139569283, |
| "loss/reg": 5.231604099273682, |
| "loss/twn": 0.0, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.019425, |
| "grad_norm": 14.1875, |
| "grad_norm_var": 5171.675634765625, |
| "learning_rate": 0.0001, |
| "loss": 5.5239, |
| "loss/crossentropy": 0.21823735535144806, |
| "loss/hidden": 0.0693359375, |
| "loss/logits": 0.005251707974821329, |
| "loss/reg": 5.231090545654297, |
| "loss/twn": 0.0, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.01945, |
| "grad_norm": 11.875, |
| "grad_norm_var": 5199.516129557292, |
| "learning_rate": 0.0001, |
| "loss": 7.0206, |
| "loss/crossentropy": 1.654329538345337, |
| "loss/hidden": 0.12451171875, |
| "loss/logits": 0.01058058813214302, |
| "loss/reg": 5.231167316436768, |
| "loss/twn": 0.0, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.019475, |
| "grad_norm": 31.75, |
| "grad_norm_var": 5199.380192057291, |
| "learning_rate": 0.0001, |
| "loss": 7.4021, |
| "loss/crossentropy": 2.093484878540039, |
| "loss/hidden": 0.07568359375, |
| "loss/logits": 0.0018800008110702038, |
| "loss/reg": 5.231100082397461, |
| "loss/twn": 0.0, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.0195, |
| "grad_norm": 230.0, |
| "grad_norm_var": 7458.277457682291, |
| "learning_rate": 0.0001, |
| "loss": 7.047, |
| "loss/crossentropy": 1.6409082412719727, |
| "loss/hidden": 0.1650390625, |
| "loss/logits": 0.00975135900080204, |
| "loss/reg": 5.231270790100098, |
| "loss/twn": 0.0, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.019525, |
| "grad_norm": 17.875, |
| "grad_norm_var": 7496.773551432291, |
| "learning_rate": 0.0001, |
| "loss": 8.1639, |
| "loss/crossentropy": 2.775987148284912, |
| "loss/hidden": 0.150390625, |
| "loss/logits": 0.006807660683989525, |
| "loss/reg": 5.230672836303711, |
| "loss/twn": 0.0, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.01955, |
| "grad_norm": 13.6875, |
| "grad_norm_var": 7487.490625, |
| "learning_rate": 0.0001, |
| "loss": 6.7803, |
| "loss/crossentropy": 1.4760520458221436, |
| "loss/hidden": 0.0693359375, |
| "loss/logits": 0.003954825457185507, |
| "loss/reg": 5.230950355529785, |
| "loss/twn": 0.0, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.019575, |
| "grad_norm": 15.0625, |
| "grad_norm_var": 7567.613916015625, |
| "learning_rate": 0.0001, |
| "loss": 8.3797, |
| "loss/crossentropy": 3.015629291534424, |
| "loss/hidden": 0.11962890625, |
| "loss/logits": 0.013525455258786678, |
| "loss/reg": 5.230944633483887, |
| "loss/twn": 0.0, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.0196, |
| "grad_norm": 13.125, |
| "grad_norm_var": 7570.018343098958, |
| "learning_rate": 0.0001, |
| "loss": 8.0947, |
| "loss/crossentropy": 2.698845863342285, |
| "loss/hidden": 0.1474609375, |
| "loss/logits": 0.017367932945489883, |
| "loss/reg": 5.231037616729736, |
| "loss/twn": 0.0, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.019625, |
| "grad_norm": 98.0, |
| "grad_norm_var": 7600.609114583333, |
| "learning_rate": 0.0001, |
| "loss": 8.2002, |
| "loss/crossentropy": 2.826629161834717, |
| "loss/hidden": 0.1318359375, |
| "loss/logits": 0.0108743105083704, |
| "loss/reg": 5.230876445770264, |
| "loss/twn": 0.0, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.01965, |
| "grad_norm": 10.125, |
| "grad_norm_var": 7638.861197916666, |
| "learning_rate": 0.0001, |
| "loss": 7.1464, |
| "loss/crossentropy": 1.752783179283142, |
| "loss/hidden": 0.15625, |
| "loss/logits": 0.006541845388710499, |
| "loss/reg": 5.230816841125488, |
| "loss/twn": 0.0, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.019675, |
| "grad_norm": 12.75, |
| "grad_norm_var": 3175.657291666667, |
| "learning_rate": 0.0001, |
| "loss": 7.4269, |
| "loss/crossentropy": 2.056227207183838, |
| "loss/hidden": 0.125, |
| "loss/logits": 0.014510264620184898, |
| "loss/reg": 5.231206893920898, |
| "loss/twn": 0.0, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.0197, |
| "grad_norm": 10.3125, |
| "grad_norm_var": 3184.372770182292, |
| "learning_rate": 0.0001, |
| "loss": 6.9013, |
| "loss/crossentropy": 1.5427310466766357, |
| "loss/hidden": 0.1201171875, |
| "loss/logits": 0.007622131146490574, |
| "loss/reg": 5.230830192565918, |
| "loss/twn": 0.0, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.019725, |
| "grad_norm": 9.375, |
| "grad_norm_var": 3212.2094889322916, |
| "learning_rate": 0.0001, |
| "loss": 8.0128, |
| "loss/crossentropy": 2.7377614974975586, |
| "loss/hidden": 0.0400390625, |
| "loss/logits": 0.004178863950073719, |
| "loss/reg": 5.230835914611816, |
| "loss/twn": 0.0, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.01975, |
| "grad_norm": 9.625, |
| "grad_norm_var": 3214.161962890625, |
| "learning_rate": 0.0001, |
| "loss": 6.8224, |
| "loss/crossentropy": 1.4642736911773682, |
| "loss/hidden": 0.11767578125, |
| "loss/logits": 0.009586405009031296, |
| "loss/reg": 5.230891704559326, |
| "loss/twn": 0.0, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.019775, |
| "grad_norm": 9.125, |
| "grad_norm_var": 3225.1082682291667, |
| "learning_rate": 0.0001, |
| "loss": 7.9752, |
| "loss/crossentropy": 2.6582653522491455, |
| "loss/hidden": 0.0791015625, |
| "loss/logits": 0.007132208906114101, |
| "loss/reg": 5.230742931365967, |
| "loss/twn": 0.0, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.0198, |
| "grad_norm": 14.0625, |
| "grad_norm_var": 3242.157014973958, |
| "learning_rate": 0.0001, |
| "loss": 7.6474, |
| "loss/crossentropy": 2.298769235610962, |
| "loss/hidden": 0.11279296875, |
| "loss/logits": 0.004840749781578779, |
| "loss/reg": 5.2309794425964355, |
| "loss/twn": 0.0, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.019825, |
| "grad_norm": 17.375, |
| "grad_norm_var": 3234.984309895833, |
| "learning_rate": 0.0001, |
| "loss": 7.5051, |
| "loss/crossentropy": 2.1144237518310547, |
| "loss/hidden": 0.1484375, |
| "loss/logits": 0.011511989869177341, |
| "loss/reg": 5.230709552764893, |
| "loss/twn": 0.0, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.01985, |
| "grad_norm": 12.6875, |
| "grad_norm_var": 3232.7632649739585, |
| "learning_rate": 0.0001, |
| "loss": 6.3903, |
| "loss/crossentropy": 0.9461896419525146, |
| "loss/hidden": 0.2080078125, |
| "loss/logits": 0.005450299009680748, |
| "loss/reg": 5.230684757232666, |
| "loss/twn": 0.0, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.019875, |
| "grad_norm": 13.9375, |
| "grad_norm_var": 3255.1077473958335, |
| "learning_rate": 0.0001, |
| "loss": 7.4702, |
| "loss/crossentropy": 2.1320881843566895, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.008736222982406616, |
| "loss/reg": 5.230694770812988, |
| "loss/twn": 0.0, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.0199, |
| "grad_norm": 9.8125, |
| "grad_norm_var": 463.37107747395834, |
| "learning_rate": 0.0001, |
| "loss": 5.7159, |
| "loss/crossentropy": 0.34444770216941833, |
| "loss/hidden": 0.1357421875, |
| "loss/logits": 0.005040573887526989, |
| "loss/reg": 5.230666160583496, |
| "loss/twn": 0.0, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.019925, |
| "grad_norm": 7.125, |
| "grad_norm_var": 470.67771809895834, |
| "learning_rate": 0.0001, |
| "loss": 6.3741, |
| "loss/crossentropy": 0.9926528334617615, |
| "loss/hidden": 0.142578125, |
| "loss/logits": 0.007962658070027828, |
| "loss/reg": 5.23092794418335, |
| "loss/twn": 0.0, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.01995, |
| "grad_norm": 9.0, |
| "grad_norm_var": 474.28489583333334, |
| "learning_rate": 0.0001, |
| "loss": 7.4732, |
| "loss/crossentropy": 2.123459815979004, |
| "loss/hidden": 0.1123046875, |
| "loss/logits": 0.0064601292833685875, |
| "loss/reg": 5.2309346199035645, |
| "loss/twn": 0.0, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.019975, |
| "grad_norm": 17.25, |
| "grad_norm_var": 474.027978515625, |
| "learning_rate": 0.0001, |
| "loss": 7.4841, |
| "loss/crossentropy": 2.1429977416992188, |
| "loss/hidden": 0.10107421875, |
| "loss/logits": 0.009501198306679726, |
| "loss/reg": 5.230530738830566, |
| "loss/twn": 0.0, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 13.3125, |
| "grad_norm_var": 473.9306640625, |
| "learning_rate": 0.0001, |
| "loss": 7.9787, |
| "loss/crossentropy": 2.5355384349823, |
| "loss/hidden": 0.203125, |
| "loss/logits": 0.009105566889047623, |
| "loss/reg": 5.230905055999756, |
| "loss/twn": 0.0, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.020025, |
| "grad_norm": 15.5625, |
| "grad_norm_var": 9.640087890625, |
| "learning_rate": 0.0001, |
| "loss": 7.6371, |
| "loss/crossentropy": 2.3353328704833984, |
| "loss/hidden": 0.064453125, |
| "loss/logits": 0.00665889261290431, |
| "loss/reg": 5.230616569519043, |
| "loss/twn": 0.0, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.02005, |
| "grad_norm": 64.0, |
| "grad_norm_var": 177.831103515625, |
| "learning_rate": 0.0001, |
| "loss": 8.2014, |
| "loss/crossentropy": 2.8111629486083984, |
| "loss/hidden": 0.1513671875, |
| "loss/logits": 0.008202088996767998, |
| "loss/reg": 5.230618476867676, |
| "loss/twn": 0.0, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.020075, |
| "grad_norm": 12.0625, |
| "grad_norm_var": 178.09733072916666, |
| "learning_rate": 0.0001, |
| "loss": 6.0497, |
| "loss/crossentropy": 0.6109923124313354, |
| "loss/hidden": 0.2021484375, |
| "loss/logits": 0.005807585082948208, |
| "loss/reg": 5.230772495269775, |
| "loss/twn": 0.0, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.0201, |
| "grad_norm": 8.8125, |
| "grad_norm_var": 179.23326822916667, |
| "learning_rate": 0.0001, |
| "loss": 7.8262, |
| "loss/crossentropy": 2.455592393875122, |
| "loss/hidden": 0.1328125, |
| "loss/logits": 0.0069784787483513355, |
| "loss/reg": 5.230773448944092, |
| "loss/twn": 0.0, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.020125, |
| "grad_norm": 73.5, |
| "grad_norm_var": 386.47057291666664, |
| "learning_rate": 0.0001, |
| "loss": 7.1917, |
| "loss/crossentropy": 1.826701283454895, |
| "loss/hidden": 0.125, |
| "loss/logits": 0.0094426479190588, |
| "loss/reg": 5.230578422546387, |
| "loss/twn": 0.0, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.02015, |
| "grad_norm": 10.25, |
| "grad_norm_var": 385.6968098958333, |
| "learning_rate": 0.0001, |
| "loss": 7.9012, |
| "loss/crossentropy": 2.539992094039917, |
| "loss/hidden": 0.12158203125, |
| "loss/logits": 0.008753599599003792, |
| "loss/reg": 5.230825424194336, |
| "loss/twn": 0.0, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.020175, |
| "grad_norm": 19.875, |
| "grad_norm_var": 378.4181640625, |
| "learning_rate": 0.0001, |
| "loss": 6.9229, |
| "loss/crossentropy": 1.5474615097045898, |
| "loss/hidden": 0.1376953125, |
| "loss/logits": 0.00709810946136713, |
| "loss/reg": 5.230693817138672, |
| "loss/twn": 0.0, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.0202, |
| "grad_norm": 10.25, |
| "grad_norm_var": 382.30115559895836, |
| "learning_rate": 0.0001, |
| "loss": 7.2085, |
| "loss/crossentropy": 1.7812546491622925, |
| "loss/hidden": 0.189453125, |
| "loss/logits": 0.007315409369766712, |
| "loss/reg": 5.230427265167236, |
| "loss/twn": 0.0, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.020225, |
| "grad_norm": 9.5, |
| "grad_norm_var": 388.59295247395835, |
| "learning_rate": 0.0001, |
| "loss": 7.8477, |
| "loss/crossentropy": 2.541975498199463, |
| "loss/hidden": 0.07177734375, |
| "loss/logits": 0.003062914125621319, |
| "loss/reg": 5.230909824371338, |
| "loss/twn": 0.0, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.02025, |
| "grad_norm": 15.5625, |
| "grad_norm_var": 386.619384765625, |
| "learning_rate": 0.0001, |
| "loss": 7.3771, |
| "loss/crossentropy": 2.025947332382202, |
| "loss/hidden": 0.11083984375, |
| "loss/logits": 0.009859994053840637, |
| "loss/reg": 5.230443954467773, |
| "loss/twn": 0.0, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.020275, |
| "grad_norm": 8.6875, |
| "grad_norm_var": 392.140087890625, |
| "learning_rate": 0.0001, |
| "loss": 7.2273, |
| "loss/crossentropy": 1.893878698348999, |
| "loss/hidden": 0.09375, |
| "loss/logits": 0.009128249250352383, |
| "loss/reg": 5.2305426597595215, |
| "loss/twn": 0.0, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.0203, |
| "grad_norm": 33.75, |
| "grad_norm_var": 398.5171875, |
| "learning_rate": 0.0001, |
| "loss": 8.1449, |
| "loss/crossentropy": 2.717454195022583, |
| "loss/hidden": 0.1796875, |
| "loss/logits": 0.016887273639440536, |
| "loss/reg": 5.230856895446777, |
| "loss/twn": 0.0, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.020325, |
| "grad_norm": 12.875, |
| "grad_norm_var": 390.30546875, |
| "learning_rate": 0.0001, |
| "loss": 8.3305, |
| "loss/crossentropy": 3.0012078285217285, |
| "loss/hidden": 0.09326171875, |
| "loss/logits": 0.005363960284739733, |
| "loss/reg": 5.230653285980225, |
| "loss/twn": 0.0, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.02035, |
| "grad_norm": 34.0, |
| "grad_norm_var": 389.73255208333336, |
| "learning_rate": 0.0001, |
| "loss": 6.834, |
| "loss/crossentropy": 1.4066799879074097, |
| "loss/hidden": 0.193359375, |
| "loss/logits": 0.003266718937084079, |
| "loss/reg": 5.230650901794434, |
| "loss/twn": 0.0, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.020375, |
| "grad_norm": 326.0, |
| "grad_norm_var": 6133.447395833334, |
| "learning_rate": 0.0001, |
| "loss": 8.109, |
| "loss/crossentropy": 2.785377025604248, |
| "loss/hidden": 0.087890625, |
| "loss/logits": 0.005352129694074392, |
| "loss/reg": 5.230373859405518, |
| "loss/twn": 0.0, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.0204, |
| "grad_norm": 61.75, |
| "grad_norm_var": 6096.425504557292, |
| "learning_rate": 0.0001, |
| "loss": 6.3327, |
| "loss/crossentropy": 0.9138974547386169, |
| "loss/hidden": 0.17578125, |
| "loss/logits": 0.012529873289167881, |
| "loss/reg": 5.230535984039307, |
| "loss/twn": 0.0, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.020425, |
| "grad_norm": 15.8125, |
| "grad_norm_var": 6095.455582682292, |
| "learning_rate": 0.0001, |
| "loss": 8.4489, |
| "loss/crossentropy": 3.057579517364502, |
| "loss/hidden": 0.142578125, |
| "loss/logits": 0.018565690144896507, |
| "loss/reg": 5.230161666870117, |
| "loss/twn": 0.0, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.02045, |
| "grad_norm": 18.125, |
| "grad_norm_var": 6109.504801432292, |
| "learning_rate": 0.0001, |
| "loss": 7.9928, |
| "loss/crossentropy": 2.607743740081787, |
| "loss/hidden": 0.1494140625, |
| "loss/logits": 0.004932316951453686, |
| "loss/reg": 5.230733394622803, |
| "loss/twn": 0.0, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.020475, |
| "grad_norm": 21.25, |
| "grad_norm_var": 6078.197916666667, |
| "learning_rate": 0.0001, |
| "loss": 8.2796, |
| "loss/crossentropy": 2.8803536891937256, |
| "loss/hidden": 0.15234375, |
| "loss/logits": 0.016475437209010124, |
| "loss/reg": 5.230462551116943, |
| "loss/twn": 0.0, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.0205, |
| "grad_norm": 9.6875, |
| "grad_norm_var": 6074.315559895834, |
| "learning_rate": 0.0001, |
| "loss": 7.3712, |
| "loss/crossentropy": 1.9426707029342651, |
| "loss/hidden": 0.1923828125, |
| "loss/logits": 0.005748513620346785, |
| "loss/reg": 5.230403900146484, |
| "loss/twn": 0.0, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.020525, |
| "grad_norm": 10.0, |
| "grad_norm_var": 6064.3275390625, |
| "learning_rate": 0.0001, |
| "loss": 8.233, |
| "loss/crossentropy": 2.895735025405884, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.008359922096133232, |
| "loss/reg": 5.230307102203369, |
| "loss/twn": 0.0, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.02055, |
| "grad_norm": 11.8125, |
| "grad_norm_var": 6058.576806640625, |
| "learning_rate": 0.0001, |
| "loss": 7.0902, |
| "loss/crossentropy": 1.73037588596344, |
| "loss/hidden": 0.11962890625, |
| "loss/logits": 0.009449999779462814, |
| "loss/reg": 5.230698585510254, |
| "loss/twn": 0.0, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.020575, |
| "grad_norm": 11.125, |
| "grad_norm_var": 6085.305322265625, |
| "learning_rate": 0.0001, |
| "loss": 7.9687, |
| "loss/crossentropy": 2.613971471786499, |
| "loss/hidden": 0.11767578125, |
| "loss/logits": 0.0068368250504136086, |
| "loss/reg": 5.230223178863525, |
| "loss/twn": 0.0, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.0206, |
| "grad_norm": 13.625, |
| "grad_norm_var": 6073.468212890625, |
| "learning_rate": 0.0001, |
| "loss": 7.6646, |
| "loss/crossentropy": 2.2981951236724854, |
| "loss/hidden": 0.12451171875, |
| "loss/logits": 0.01143670454621315, |
| "loss/reg": 5.230466842651367, |
| "loss/twn": 0.0, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.020625, |
| "grad_norm": 9.1875, |
| "grad_norm_var": 6074.676302083333, |
| "learning_rate": 0.0001, |
| "loss": 7.9187, |
| "loss/crossentropy": 2.578167676925659, |
| "loss/hidden": 0.10546875, |
| "loss/logits": 0.005032903980463743, |
| "loss/reg": 5.2300333976745605, |
| "loss/twn": 0.0, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.02065, |
| "grad_norm": 12.3125, |
| "grad_norm_var": 6085.2015625, |
| "learning_rate": 0.0001, |
| "loss": 7.4645, |
| "loss/crossentropy": 2.171114444732666, |
| "loss/hidden": 0.0595703125, |
| "loss/logits": 0.003213082440197468, |
| "loss/reg": 5.23060417175293, |
| "loss/twn": 0.0, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.020675, |
| "grad_norm": 7.75, |
| "grad_norm_var": 6088.936181640625, |
| "learning_rate": 0.0001, |
| "loss": 7.2651, |
| "loss/crossentropy": 1.9576009511947632, |
| "loss/hidden": 0.0693359375, |
| "loss/logits": 0.008241134695708752, |
| "loss/reg": 5.229961395263672, |
| "loss/twn": 0.0, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.0207, |
| "grad_norm": 9.1875, |
| "grad_norm_var": 6140.7796875, |
| "learning_rate": 0.0001, |
| "loss": 6.9102, |
| "loss/crossentropy": 1.5859615802764893, |
| "loss/hidden": 0.09130859375, |
| "loss/logits": 0.0022871571127325296, |
| "loss/reg": 5.230637550354004, |
| "loss/twn": 0.0, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.020725, |
| "grad_norm": 8.875, |
| "grad_norm_var": 6154.396354166666, |
| "learning_rate": 0.0001, |
| "loss": 6.8867, |
| "loss/crossentropy": 1.5643407106399536, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.005755975842475891, |
| "loss/reg": 5.230214595794678, |
| "loss/twn": 0.0, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.02075, |
| "grad_norm": 79.5, |
| "grad_norm_var": 6269.947395833334, |
| "learning_rate": 0.0001, |
| "loss": 6.9087, |
| "loss/crossentropy": 1.5483061075210571, |
| "loss/hidden": 0.12451171875, |
| "loss/logits": 0.005769835785031319, |
| "loss/reg": 5.230134963989258, |
| "loss/twn": 0.0, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.020775, |
| "grad_norm": 10.25, |
| "grad_norm_var": 423.63880208333336, |
| "learning_rate": 0.0001, |
| "loss": 8.0232, |
| "loss/crossentropy": 2.6613450050354004, |
| "loss/hidden": 0.126953125, |
| "loss/logits": 0.004578165709972382, |
| "loss/reg": 5.2303147315979, |
| "loss/twn": 0.0, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.0208, |
| "grad_norm": 11.9375, |
| "grad_norm_var": 297.382275390625, |
| "learning_rate": 0.0001, |
| "loss": 7.1745, |
| "loss/crossentropy": 1.7624868154525757, |
| "loss/hidden": 0.1708984375, |
| "loss/logits": 0.0105954110622406, |
| "loss/reg": 5.2305684089660645, |
| "loss/twn": 0.0, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.020825, |
| "grad_norm": 11.8125, |
| "grad_norm_var": 298.63019205729165, |
| "learning_rate": 0.0001, |
| "loss": 8.0084, |
| "loss/crossentropy": 2.670598030090332, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.009010246023535728, |
| "loss/reg": 5.230124473571777, |
| "loss/twn": 0.0, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.02085, |
| "grad_norm": 11.6875, |
| "grad_norm_var": 299.41979166666664, |
| "learning_rate": 0.0001, |
| "loss": 7.9989, |
| "loss/crossentropy": 2.6066150665283203, |
| "loss/hidden": 0.1533203125, |
| "loss/logits": 0.008532057516276836, |
| "loss/reg": 5.230454444885254, |
| "loss/twn": 0.0, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.020875, |
| "grad_norm": 21.375, |
| "grad_norm_var": 299.51451822916664, |
| "learning_rate": 0.0001, |
| "loss": 8.5087, |
| "loss/crossentropy": 3.1679840087890625, |
| "loss/hidden": 0.10107421875, |
| "loss/logits": 0.009620252065360546, |
| "loss/reg": 5.230065822601318, |
| "loss/twn": 0.0, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.0209, |
| "grad_norm": 44.0, |
| "grad_norm_var": 345.89894205729166, |
| "learning_rate": 0.0001, |
| "loss": 7.9201, |
| "loss/crossentropy": 2.6046769618988037, |
| "loss/hidden": 0.080078125, |
| "loss/logits": 0.004920288920402527, |
| "loss/reg": 5.230381488800049, |
| "loss/twn": 0.0, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.020925, |
| "grad_norm": 8.875, |
| "grad_norm_var": 347.14464518229164, |
| "learning_rate": 0.0001, |
| "loss": 7.3371, |
| "loss/crossentropy": 1.9817231893539429, |
| "loss/hidden": 0.11767578125, |
| "loss/logits": 0.007638626731932163, |
| "loss/reg": 5.230074405670166, |
| "loss/twn": 0.0, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.02095, |
| "grad_norm": 42.25, |
| "grad_norm_var": 381.12526041666666, |
| "learning_rate": 0.0001, |
| "loss": 8.104, |
| "loss/crossentropy": 2.708381414413452, |
| "loss/hidden": 0.1513671875, |
| "loss/logits": 0.013956461101770401, |
| "loss/reg": 5.230310440063477, |
| "loss/twn": 0.0, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.020975, |
| "grad_norm": 15.75, |
| "grad_norm_var": 377.2301432291667, |
| "learning_rate": 0.0001, |
| "loss": 7.4726, |
| "loss/crossentropy": 2.0920002460479736, |
| "loss/hidden": 0.142578125, |
| "loss/logits": 0.00805431604385376, |
| "loss/reg": 5.230012893676758, |
| "loss/twn": 0.0, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.021, |
| "grad_norm": 15.5, |
| "grad_norm_var": 375.8815104166667, |
| "learning_rate": 0.0001, |
| "loss": 8.1933, |
| "loss/crossentropy": 2.8643245697021484, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.012173913419246674, |
| "loss/reg": 5.230340480804443, |
| "loss/twn": 0.0, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.021025, |
| "grad_norm": 15.8125, |
| "grad_norm_var": 369.05983072916666, |
| "learning_rate": 0.0001, |
| "loss": 7.1166, |
| "loss/crossentropy": 1.7462717294692993, |
| "loss/hidden": 0.126953125, |
| "loss/logits": 0.0133826844394207, |
| "loss/reg": 5.2299885749816895, |
| "loss/twn": 0.0, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.02105, |
| "grad_norm": 11.25, |
| "grad_norm_var": 370.280322265625, |
| "learning_rate": 0.0001, |
| "loss": 8.2839, |
| "loss/crossentropy": 2.8843467235565186, |
| "loss/hidden": 0.1591796875, |
| "loss/logits": 0.01038344856351614, |
| "loss/reg": 5.229991912841797, |
| "loss/twn": 0.0, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.021075, |
| "grad_norm": 10.5, |
| "grad_norm_var": 366.12810872395835, |
| "learning_rate": 0.0001, |
| "loss": 7.9547, |
| "loss/crossentropy": 2.5944228172302246, |
| "loss/hidden": 0.1201171875, |
| "loss/logits": 0.009886080399155617, |
| "loss/reg": 5.230307579040527, |
| "loss/twn": 0.0, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.0211, |
| "grad_norm": 9.125, |
| "grad_norm_var": 366.22291666666666, |
| "learning_rate": 0.0001, |
| "loss": 7.8067, |
| "loss/crossentropy": 2.4823381900787354, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.007714688777923584, |
| "loss/reg": 5.2301812171936035, |
| "loss/twn": 0.0, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.021125, |
| "grad_norm": 10.125, |
| "grad_norm_var": 364.37786458333335, |
| "learning_rate": 0.0001, |
| "loss": 7.77, |
| "loss/crossentropy": 2.477754592895508, |
| "loss/hidden": 0.058837890625, |
| "loss/logits": 0.0033183712512254715, |
| "loss/reg": 5.2300519943237305, |
| "loss/twn": 0.0, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.02115, |
| "grad_norm": 8.125, |
| "grad_norm_var": 122.33483072916667, |
| "learning_rate": 0.0001, |
| "loss": 6.1579, |
| "loss/crossentropy": 0.859074056148529, |
| "loss/hidden": 0.06689453125, |
| "loss/logits": 0.0018144365167245269, |
| "loss/reg": 5.230114936828613, |
| "loss/twn": 0.0, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.021175, |
| "grad_norm": 11.375, |
| "grad_norm_var": 121.52916666666667, |
| "learning_rate": 0.0001, |
| "loss": 7.8874, |
| "loss/crossentropy": 2.5228824615478516, |
| "loss/hidden": 0.1279296875, |
| "loss/logits": 0.006642586551606655, |
| "loss/reg": 5.229929447174072, |
| "loss/twn": 0.0, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.0212, |
| "grad_norm": 7.84375, |
| "grad_norm_var": 124.91343994140625, |
| "learning_rate": 0.0001, |
| "loss": 7.0446, |
| "loss/crossentropy": 1.6815242767333984, |
| "loss/hidden": 0.123046875, |
| "loss/logits": 0.009667545557022095, |
| "loss/reg": 5.230370044708252, |
| "loss/twn": 0.0, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.021225, |
| "grad_norm": 10.875, |
| "grad_norm_var": 125.48717041015625, |
| "learning_rate": 0.0001, |
| "loss": 7.8503, |
| "loss/crossentropy": 2.5361642837524414, |
| "loss/hidden": 0.0791015625, |
| "loss/logits": 0.004664687905460596, |
| "loss/reg": 5.230417728424072, |
| "loss/twn": 0.0, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.02125, |
| "grad_norm": 8.5625, |
| "grad_norm_var": 127.85452067057291, |
| "learning_rate": 0.0001, |
| "loss": 7.3983, |
| "loss/crossentropy": 2.035668134689331, |
| "loss/hidden": 0.125, |
| "loss/logits": 0.00722795445472002, |
| "loss/reg": 5.230389595031738, |
| "loss/twn": 0.0, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.021275, |
| "grad_norm": 12.5, |
| "grad_norm_var": 126.07258707682291, |
| "learning_rate": 0.0001, |
| "loss": 7.8854, |
| "loss/crossentropy": 2.4939327239990234, |
| "loss/hidden": 0.1494140625, |
| "loss/logits": 0.012096907943487167, |
| "loss/reg": 5.229991912841797, |
| "loss/twn": 0.0, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.0213, |
| "grad_norm": 11.5, |
| "grad_norm_var": 67.09016520182291, |
| "learning_rate": 0.0001, |
| "loss": 6.8451, |
| "loss/crossentropy": 1.4572025537490845, |
| "loss/hidden": 0.14453125, |
| "loss/logits": 0.013090159744024277, |
| "loss/reg": 5.230262756347656, |
| "loss/twn": 0.0, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.021325, |
| "grad_norm": 12.0625, |
| "grad_norm_var": 65.91975504557291, |
| "learning_rate": 0.0001, |
| "loss": 7.7312, |
| "loss/crossentropy": 2.376828193664551, |
| "loss/hidden": 0.115234375, |
| "loss/logits": 0.009080484509468079, |
| "loss/reg": 5.230012893676758, |
| "loss/twn": 0.0, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.02135, |
| "grad_norm": 11.75, |
| "grad_norm_var": 6.420926920572916, |
| "learning_rate": 0.0001, |
| "loss": 7.0291, |
| "loss/crossentropy": 1.6736173629760742, |
| "loss/hidden": 0.12060546875, |
| "loss/logits": 0.004477534908801317, |
| "loss/reg": 5.230405807495117, |
| "loss/twn": 0.0, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.021375, |
| "grad_norm": 12.0, |
| "grad_norm_var": 5.132840983072916, |
| "learning_rate": 0.0001, |
| "loss": 6.8586, |
| "loss/crossentropy": 1.5102664232254028, |
| "loss/hidden": 0.1103515625, |
| "loss/logits": 0.008198726922273636, |
| "loss/reg": 5.229762077331543, |
| "loss/twn": 0.0, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.0214, |
| "grad_norm": 38.0, |
| "grad_norm_var": 49.72854410807292, |
| "learning_rate": 0.0001, |
| "loss": 7.6098, |
| "loss/crossentropy": 2.24526047706604, |
| "loss/hidden": 0.12255859375, |
| "loss/logits": 0.011424753814935684, |
| "loss/reg": 5.23051118850708, |
| "loss/twn": 0.0, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.021425, |
| "grad_norm": 164.0, |
| "grad_norm_var": 1485.9123982747396, |
| "learning_rate": 0.0001, |
| "loss": 7.3561, |
| "loss/crossentropy": 1.972996711730957, |
| "loss/hidden": 0.14453125, |
| "loss/logits": 0.008586418814957142, |
| "loss/reg": 5.230007648468018, |
| "loss/twn": 0.0, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.02145, |
| "grad_norm": 24.25, |
| "grad_norm_var": 1478.1022420247396, |
| "learning_rate": 0.0001, |
| "loss": 7.1992, |
| "loss/crossentropy": 1.7982319593429565, |
| "loss/hidden": 0.1611328125, |
| "loss/logits": 0.009712583385407925, |
| "loss/reg": 5.230077743530273, |
| "loss/twn": 0.0, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.021475, |
| "grad_norm": 55.0, |
| "grad_norm_var": 1529.7060180664062, |
| "learning_rate": 0.0001, |
| "loss": 6.8691, |
| "loss/crossentropy": 1.487571358680725, |
| "loss/hidden": 0.1416015625, |
| "loss/logits": 0.009719829075038433, |
| "loss/reg": 5.230188369750977, |
| "loss/twn": 0.0, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.0215, |
| "grad_norm": 37.75, |
| "grad_norm_var": 1518.6361938476562, |
| "learning_rate": 0.0001, |
| "loss": 6.2527, |
| "loss/crossentropy": 0.7488301396369934, |
| "loss/hidden": 0.263671875, |
| "loss/logits": 0.010243739932775497, |
| "loss/reg": 5.229933261871338, |
| "loss/twn": 0.0, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.021525, |
| "grad_norm": 22.75, |
| "grad_norm_var": 1499.8006144205729, |
| "learning_rate": 0.0001, |
| "loss": 6.8342, |
| "loss/crossentropy": 1.4506139755249023, |
| "loss/hidden": 0.150390625, |
| "loss/logits": 0.002946457825601101, |
| "loss/reg": 5.23027229309082, |
| "loss/twn": 0.0, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.02155, |
| "grad_norm": 26.125, |
| "grad_norm_var": 1472.299051920573, |
| "learning_rate": 0.0001, |
| "loss": 8.298, |
| "loss/crossentropy": 2.793138265609741, |
| "loss/hidden": 0.2578125, |
| "loss/logits": 0.016960376873612404, |
| "loss/reg": 5.230113506317139, |
| "loss/twn": 0.0, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.021575, |
| "grad_norm": 12.0, |
| "grad_norm_var": 1470.842508951823, |
| "learning_rate": 0.0001, |
| "loss": 8.426, |
| "loss/crossentropy": 3.0826714038848877, |
| "loss/hidden": 0.10302734375, |
| "loss/logits": 0.009999147616326809, |
| "loss/reg": 5.230251789093018, |
| "loss/twn": 0.0, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.0216, |
| "grad_norm": 170.0, |
| "grad_norm_var": 2652.8306640625, |
| "learning_rate": 0.0001, |
| "loss": 8.011, |
| "loss/crossentropy": 2.557135820388794, |
| "loss/hidden": 0.205078125, |
| "loss/logits": 0.018819302320480347, |
| "loss/reg": 5.2300004959106445, |
| "loss/twn": 0.0, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.021625, |
| "grad_norm": 9.8125, |
| "grad_norm_var": 2656.930973307292, |
| "learning_rate": 0.0001, |
| "loss": 6.4711, |
| "loss/crossentropy": 1.1575171947479248, |
| "loss/hidden": 0.0791015625, |
| "loss/logits": 0.004193156957626343, |
| "loss/reg": 5.230248928070068, |
| "loss/twn": 0.0, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.02165, |
| "grad_norm": 10.125, |
| "grad_norm_var": 2650.689518229167, |
| "learning_rate": 0.0001, |
| "loss": 7.0724, |
| "loss/crossentropy": 1.6960041522979736, |
| "loss/hidden": 0.13671875, |
| "loss/logits": 0.009182040579617023, |
| "loss/reg": 5.230460166931152, |
| "loss/twn": 0.0, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.021675, |
| "grad_norm": 7.875, |
| "grad_norm_var": 2668.584895833333, |
| "learning_rate": 0.0001, |
| "loss": 7.1586, |
| "loss/crossentropy": 1.821337103843689, |
| "loss/hidden": 0.10107421875, |
| "loss/logits": 0.00650972593575716, |
| "loss/reg": 5.229717254638672, |
| "loss/twn": 0.0, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.0217, |
| "grad_norm": 14.6875, |
| "grad_norm_var": 2657.5058430989584, |
| "learning_rate": 0.0001, |
| "loss": 8.054, |
| "loss/crossentropy": 2.651723623275757, |
| "loss/hidden": 0.1552734375, |
| "loss/logits": 0.01694151759147644, |
| "loss/reg": 5.230074405670166, |
| "loss/twn": 0.0, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.021725, |
| "grad_norm": 17.5, |
| "grad_norm_var": 2639.634309895833, |
| "learning_rate": 0.0001, |
| "loss": 7.8281, |
| "loss/crossentropy": 2.49051570892334, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.009045520797371864, |
| "loss/reg": 5.229867935180664, |
| "loss/twn": 0.0, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.02175, |
| "grad_norm": 13.4375, |
| "grad_norm_var": 2633.545686848958, |
| "learning_rate": 0.0001, |
| "loss": 7.0012, |
| "loss/crossentropy": 1.546895146369934, |
| "loss/hidden": 0.21484375, |
| "loss/logits": 0.009267905727028847, |
| "loss/reg": 5.230212211608887, |
| "loss/twn": 0.0, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.021775, |
| "grad_norm": 14.9375, |
| "grad_norm_var": 2623.2330729166665, |
| "learning_rate": 0.0001, |
| "loss": 7.582, |
| "loss/crossentropy": 2.2645156383514404, |
| "loss/hidden": 0.08154296875, |
| "loss/logits": 0.006237420719116926, |
| "loss/reg": 5.229740619659424, |
| "loss/twn": 0.0, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.0218, |
| "grad_norm": 12.6875, |
| "grad_norm_var": 2669.6590983072915, |
| "learning_rate": 0.0001, |
| "loss": 6.8665, |
| "loss/crossentropy": 1.5306649208068848, |
| "loss/hidden": 0.1005859375, |
| "loss/logits": 0.004954389296472073, |
| "loss/reg": 5.230282306671143, |
| "loss/twn": 0.0, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.021825, |
| "grad_norm": 35.0, |
| "grad_norm_var": 1547.8294108072917, |
| "learning_rate": 0.0001, |
| "loss": 8.1648, |
| "loss/crossentropy": 2.7598485946655273, |
| "loss/hidden": 0.1611328125, |
| "loss/logits": 0.014029700309038162, |
| "loss/reg": 5.229771137237549, |
| "loss/twn": 0.0, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.02185, |
| "grad_norm": 9.375, |
| "grad_norm_var": 1573.5507649739584, |
| "learning_rate": 0.0001, |
| "loss": 6.9286, |
| "loss/crossentropy": 1.5998884439468384, |
| "loss/hidden": 0.09521484375, |
| "loss/logits": 0.003966475836932659, |
| "loss/reg": 5.2295050621032715, |
| "loss/twn": 0.0, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.021875, |
| "grad_norm": 13.375, |
| "grad_norm_var": 1539.2968587239584, |
| "learning_rate": 0.0001, |
| "loss": 8.1337, |
| "loss/crossentropy": 2.756443977355957, |
| "loss/hidden": 0.1357421875, |
| "loss/logits": 0.011736356653273106, |
| "loss/reg": 5.229776859283447, |
| "loss/twn": 0.0, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.0219, |
| "grad_norm": 14.1875, |
| "grad_norm_var": 1539.3275390625, |
| "learning_rate": 0.0001, |
| "loss": 8.4196, |
| "loss/crossentropy": 3.001187324523926, |
| "loss/hidden": 0.171875, |
| "loss/logits": 0.01653527095913887, |
| "loss/reg": 5.230004787445068, |
| "loss/twn": 0.0, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.021925, |
| "grad_norm": 10.0625, |
| "grad_norm_var": 1553.6042805989584, |
| "learning_rate": 0.0001, |
| "loss": 7.6758, |
| "loss/crossentropy": 2.371596097946167, |
| "loss/hidden": 0.0693359375, |
| "loss/logits": 0.004934161901473999, |
| "loss/reg": 5.229954242706299, |
| "loss/twn": 0.0, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.02195, |
| "grad_norm": 15.25, |
| "grad_norm_var": 1558.5659993489583, |
| "learning_rate": 0.0001, |
| "loss": 7.0978, |
| "loss/crossentropy": 1.6408387422561646, |
| "loss/hidden": 0.208984375, |
| "loss/logits": 0.017855621874332428, |
| "loss/reg": 5.230149745941162, |
| "loss/twn": 0.0, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.021975, |
| "grad_norm": 11.6875, |
| "grad_norm_var": 1559.0625, |
| "learning_rate": 0.0001, |
| "loss": 7.826, |
| "loss/crossentropy": 2.489450454711914, |
| "loss/hidden": 0.10107421875, |
| "loss/logits": 0.005990723147988319, |
| "loss/reg": 5.22952938079834, |
| "loss/twn": 0.0, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.022, |
| "grad_norm": 142.0, |
| "grad_norm_var": 1062.0625, |
| "learning_rate": 0.0001, |
| "loss": 6.2769, |
| "loss/crossentropy": 0.8815757632255554, |
| "loss/hidden": 0.1513671875, |
| "loss/logits": 0.01392771303653717, |
| "loss/reg": 5.230021953582764, |
| "loss/twn": 0.0, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.022025, |
| "grad_norm": 9.75, |
| "grad_norm_var": 1062.164306640625, |
| "learning_rate": 0.0001, |
| "loss": 7.2034, |
| "loss/crossentropy": 1.8355108499526978, |
| "loss/hidden": 0.1318359375, |
| "loss/logits": 0.00601241085678339, |
| "loss/reg": 5.230048656463623, |
| "loss/twn": 0.0, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.02205, |
| "grad_norm": 10.5625, |
| "grad_norm_var": 1061.4837890625, |
| "learning_rate": 0.0001, |
| "loss": 6.8518, |
| "loss/crossentropy": 1.397723913192749, |
| "loss/hidden": 0.2158203125, |
| "loss/logits": 0.00827928725630045, |
| "loss/reg": 5.229991912841797, |
| "loss/twn": 0.0, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.022075, |
| "grad_norm": 10.6875, |
| "grad_norm_var": 1056.672509765625, |
| "learning_rate": 0.0001, |
| "loss": 8.2004, |
| "loss/crossentropy": 2.9133853912353516, |
| "loss/hidden": 0.05224609375, |
| "loss/logits": 0.005087848752737045, |
| "loss/reg": 5.2297210693359375, |
| "loss/twn": 0.0, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.0221, |
| "grad_norm": 37.75, |
| "grad_norm_var": 1066.81640625, |
| "learning_rate": 0.0001, |
| "loss": 6.094, |
| "loss/crossentropy": 0.7456091046333313, |
| "loss/hidden": 0.1162109375, |
| "loss/logits": 0.002344908192753792, |
| "loss/reg": 5.229843616485596, |
| "loss/twn": 0.0, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.022125, |
| "grad_norm": 14.3125, |
| "grad_norm_var": 1070.061181640625, |
| "learning_rate": 0.0001, |
| "loss": 7.1178, |
| "loss/crossentropy": 1.7599776983261108, |
| "loss/hidden": 0.12109375, |
| "loss/logits": 0.0070150988176465034, |
| "loss/reg": 5.229736804962158, |
| "loss/twn": 0.0, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.02215, |
| "grad_norm": 18.0, |
| "grad_norm_var": 1065.2764973958333, |
| "learning_rate": 0.0001, |
| "loss": 6.9191, |
| "loss/crossentropy": 1.5698696374893188, |
| "loss/hidden": 0.10986328125, |
| "loss/logits": 0.009311170317232609, |
| "loss/reg": 5.230006217956543, |
| "loss/twn": 0.0, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.022175, |
| "grad_norm": 12.875, |
| "grad_norm_var": 1067.9593587239583, |
| "learning_rate": 0.0001, |
| "loss": 7.0143, |
| "loss/crossentropy": 1.6162168979644775, |
| "loss/hidden": 0.1552734375, |
| "loss/logits": 0.013361955992877483, |
| "loss/reg": 5.229493618011475, |
| "loss/twn": 0.0, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.0222, |
| "grad_norm": 13.4375, |
| "grad_norm_var": 1066.9034993489583, |
| "learning_rate": 0.0001, |
| "loss": 8.1799, |
| "loss/crossentropy": 2.848146915435791, |
| "loss/hidden": 0.09375, |
| "loss/logits": 0.008583602495491505, |
| "loss/reg": 5.229381561279297, |
| "loss/twn": 0.0, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.022225, |
| "grad_norm": 18.75, |
| "grad_norm_var": 1058.8038899739583, |
| "learning_rate": 0.0001, |
| "loss": 8.1476, |
| "loss/crossentropy": 2.7504210472106934, |
| "loss/hidden": 0.158203125, |
| "loss/logits": 0.00982433557510376, |
| "loss/reg": 5.229184627532959, |
| "loss/twn": 0.0, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.02225, |
| "grad_norm": 57.25, |
| "grad_norm_var": 1117.4507649739583, |
| "learning_rate": 0.0001, |
| "loss": 6.9922, |
| "loss/crossentropy": 1.6320585012435913, |
| "loss/hidden": 0.1171875, |
| "loss/logits": 0.01302691176533699, |
| "loss/reg": 5.229902267456055, |
| "loss/twn": 0.0, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.022275, |
| "grad_norm": 11.125, |
| "grad_norm_var": 1121.4409993489583, |
| "learning_rate": 0.0001, |
| "loss": 7.9641, |
| "loss/crossentropy": 2.6222574710845947, |
| "loss/hidden": 0.10107421875, |
| "loss/logits": 0.011231745593249798, |
| "loss/reg": 5.229504108428955, |
| "loss/twn": 0.0, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.0223, |
| "grad_norm": 7.65625, |
| "grad_norm_var": 1133.9413696289062, |
| "learning_rate": 0.0001, |
| "loss": 7.5977, |
| "loss/crossentropy": 2.262641191482544, |
| "loss/hidden": 0.09619140625, |
| "loss/logits": 0.008943114429712296, |
| "loss/reg": 5.229957103729248, |
| "loss/twn": 0.0, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.022325, |
| "grad_norm": 11.3125, |
| "grad_norm_var": 1131.5373982747396, |
| "learning_rate": 0.0001, |
| "loss": 7.885, |
| "loss/crossentropy": 2.514071226119995, |
| "loss/hidden": 0.134765625, |
| "loss/logits": 0.006755891256034374, |
| "loss/reg": 5.229411602020264, |
| "loss/twn": 0.0, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.02235, |
| "grad_norm": 31.875, |
| "grad_norm_var": 1126.865946451823, |
| "learning_rate": 0.0001, |
| "loss": 6.8167, |
| "loss/crossentropy": 1.4057066440582275, |
| "loss/hidden": 0.17578125, |
| "loss/logits": 0.005477352067828178, |
| "loss/reg": 5.229723930358887, |
| "loss/twn": 0.0, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.022375, |
| "grad_norm": 10.375, |
| "grad_norm_var": 1129.511454264323, |
| "learning_rate": 0.0001, |
| "loss": 7.7274, |
| "loss/crossentropy": 2.414677381515503, |
| "loss/hidden": 0.0791015625, |
| "loss/logits": 0.004009230528026819, |
| "loss/reg": 5.229562282562256, |
| "loss/twn": 0.0, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.0224, |
| "grad_norm": 11.125, |
| "grad_norm_var": 177.70256754557292, |
| "learning_rate": 0.0001, |
| "loss": 6.6999, |
| "loss/crossentropy": 1.3421200513839722, |
| "loss/hidden": 0.1220703125, |
| "loss/logits": 0.006061128340661526, |
| "loss/reg": 5.229669094085693, |
| "loss/twn": 0.0, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.022425, |
| "grad_norm": 20.125, |
| "grad_norm_var": 173.1175740559896, |
| "learning_rate": 0.0001, |
| "loss": 7.0618, |
| "loss/crossentropy": 1.6401444673538208, |
| "loss/hidden": 0.1806640625, |
| "loss/logits": 0.011477080173790455, |
| "loss/reg": 5.229546546936035, |
| "loss/twn": 0.0, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.02245, |
| "grad_norm": 9.5, |
| "grad_norm_var": 174.32340087890626, |
| "learning_rate": 0.0001, |
| "loss": 7.0679, |
| "loss/crossentropy": 1.6865355968475342, |
| "loss/hidden": 0.1435546875, |
| "loss/logits": 0.007968233898282051, |
| "loss/reg": 5.229793071746826, |
| "loss/twn": 0.0, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.022475, |
| "grad_norm": 9.5625, |
| "grad_norm_var": 175.57584228515626, |
| "learning_rate": 0.0001, |
| "loss": 6.9239, |
| "loss/crossentropy": 1.5547279119491577, |
| "loss/hidden": 0.1328125, |
| "loss/logits": 0.006813929416239262, |
| "loss/reg": 5.229519844055176, |
| "loss/twn": 0.0, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.0225, |
| "grad_norm": 15.1875, |
| "grad_norm_var": 149.29993082682293, |
| "learning_rate": 0.0001, |
| "loss": 7.6572, |
| "loss/crossentropy": 2.416555166244507, |
| "loss/hidden": 0.0093994140625, |
| "loss/logits": 0.001800237107090652, |
| "loss/reg": 5.229480266571045, |
| "loss/twn": 0.0, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.022525, |
| "grad_norm": 21.5, |
| "grad_norm_var": 149.9250935872396, |
| "learning_rate": 0.0001, |
| "loss": 8.2505, |
| "loss/crossentropy": 2.8510868549346924, |
| "loss/hidden": 0.1591796875, |
| "loss/logits": 0.01060121227055788, |
| "loss/reg": 5.229599952697754, |
| "loss/twn": 0.0, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.02255, |
| "grad_norm": 9.6875, |
| "grad_norm_var": 153.66571858723958, |
| "learning_rate": 0.0001, |
| "loss": 7.8264, |
| "loss/crossentropy": 2.532890796661377, |
| "loss/hidden": 0.0595703125, |
| "loss/logits": 0.004101074766367674, |
| "loss/reg": 5.229794979095459, |
| "loss/twn": 0.0, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.022575, |
| "grad_norm": 41.75, |
| "grad_norm_var": 190.05273030598957, |
| "learning_rate": 0.0001, |
| "loss": 7.0146, |
| "loss/crossentropy": 1.6412537097930908, |
| "loss/hidden": 0.134765625, |
| "loss/logits": 0.008777379989624023, |
| "loss/reg": 5.229798793792725, |
| "loss/twn": 0.0, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.0226, |
| "grad_norm": 15.625, |
| "grad_norm_var": 188.79833577473957, |
| "learning_rate": 0.0001, |
| "loss": 8.2633, |
| "loss/crossentropy": 2.9667930603027344, |
| "loss/hidden": 0.06201171875, |
| "loss/logits": 0.005043432116508484, |
| "loss/reg": 5.229448318481445, |
| "loss/twn": 0.0, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.022625, |
| "grad_norm": 12.4375, |
| "grad_norm_var": 191.41539306640624, |
| "learning_rate": 0.0001, |
| "loss": 8.0256, |
| "loss/crossentropy": 2.680783987045288, |
| "loss/hidden": 0.10986328125, |
| "loss/logits": 0.005312731955200434, |
| "loss/reg": 5.229615211486816, |
| "loss/twn": 0.0, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.02265, |
| "grad_norm": 16.625, |
| "grad_norm_var": 84.70071207682291, |
| "learning_rate": 0.0001, |
| "loss": 7.5648, |
| "loss/crossentropy": 2.088520050048828, |
| "loss/hidden": 0.234375, |
| "loss/logits": 0.012642334215342999, |
| "loss/reg": 5.229223251342773, |
| "loss/twn": 0.0, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.022675, |
| "grad_norm": 24.125, |
| "grad_norm_var": 86.87076416015626, |
| "learning_rate": 0.0001, |
| "loss": 5.9125, |
| "loss/crossentropy": 0.48404479026794434, |
| "loss/hidden": 0.1904296875, |
| "loss/logits": 0.008365976624190807, |
| "loss/reg": 5.2296600341796875, |
| "loss/twn": 0.0, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.0227, |
| "grad_norm": 11.0, |
| "grad_norm_var": 83.502197265625, |
| "learning_rate": 0.0001, |
| "loss": 6.8349, |
| "loss/crossentropy": 1.4288034439086914, |
| "loss/hidden": 0.1669921875, |
| "loss/logits": 0.009537655860185623, |
| "loss/reg": 5.229605674743652, |
| "loss/twn": 0.0, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.022725, |
| "grad_norm": 16.75, |
| "grad_norm_var": 81.23515625, |
| "learning_rate": 0.0001, |
| "loss": 7.9611, |
| "loss/crossentropy": 2.5989267826080322, |
| "loss/hidden": 0.1201171875, |
| "loss/logits": 0.012637370266020298, |
| "loss/reg": 5.22941255569458, |
| "loss/twn": 0.0, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.02275, |
| "grad_norm": 15.8125, |
| "grad_norm_var": 66.20584309895834, |
| "learning_rate": 0.0001, |
| "loss": 7.4412, |
| "loss/crossentropy": 2.0751564502716064, |
| "loss/hidden": 0.125, |
| "loss/logits": 0.011572781018912792, |
| "loss/reg": 5.229443550109863, |
| "loss/twn": 0.0, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.022775, |
| "grad_norm": 13.0, |
| "grad_norm_var": 64.55428059895833, |
| "learning_rate": 0.0001, |
| "loss": 6.7192, |
| "loss/crossentropy": 1.3851910829544067, |
| "loss/hidden": 0.09716796875, |
| "loss/logits": 0.007330389227718115, |
| "loss/reg": 5.229530334472656, |
| "loss/twn": 0.0, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.0228, |
| "grad_norm": 12.125, |
| "grad_norm_var": 63.901676432291666, |
| "learning_rate": 0.0001, |
| "loss": 7.152, |
| "loss/crossentropy": 1.8044177293777466, |
| "loss/hidden": 0.1123046875, |
| "loss/logits": 0.005986911244690418, |
| "loss/reg": 5.22929048538208, |
| "loss/twn": 0.0, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.022825, |
| "grad_norm": 8.8125, |
| "grad_norm_var": 66.50885416666667, |
| "learning_rate": 0.0001, |
| "loss": 8.4326, |
| "loss/crossentropy": 3.201655864715576, |
| "loss/hidden": 4.380941390991211e-06, |
| "loss/logits": 0.0015021440340206027, |
| "loss/reg": 5.229450702667236, |
| "loss/twn": 0.0, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.02285, |
| "grad_norm": 10.3125, |
| "grad_norm_var": 65.86287434895833, |
| "learning_rate": 0.0001, |
| "loss": 6.5774, |
| "loss/crossentropy": 1.2178183794021606, |
| "loss/hidden": 0.12255859375, |
| "loss/logits": 0.007649564184248447, |
| "loss/reg": 5.22938346862793, |
| "loss/twn": 0.0, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.022875, |
| "grad_norm": 13.0, |
| "grad_norm_var": 63.69921875, |
| "learning_rate": 0.0001, |
| "loss": 7.8085, |
| "loss/crossentropy": 2.4297561645507812, |
| "loss/hidden": 0.1376953125, |
| "loss/logits": 0.011574456468224525, |
| "loss/reg": 5.2294511795043945, |
| "loss/twn": 0.0, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.0229, |
| "grad_norm": 13.25, |
| "grad_norm_var": 64.17198893229167, |
| "learning_rate": 0.0001, |
| "loss": 7.9386, |
| "loss/crossentropy": 2.616173028945923, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.0062749385833740234, |
| "loss/reg": 5.229771137237549, |
| "loss/twn": 0.0, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.022925, |
| "grad_norm": 10.6875, |
| "grad_norm_var": 63.5328125, |
| "learning_rate": 0.0001, |
| "loss": 8.1081, |
| "loss/crossentropy": 2.7487690448760986, |
| "loss/hidden": 0.1201171875, |
| "loss/logits": 0.009604476392269135, |
| "loss/reg": 5.2296576499938965, |
| "loss/twn": 0.0, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.02295, |
| "grad_norm": 21.625, |
| "grad_norm_var": 63.486181640625, |
| "learning_rate": 0.0001, |
| "loss": 7.0533, |
| "loss/crossentropy": 1.7067608833312988, |
| "loss/hidden": 0.1123046875, |
| "loss/logits": 0.004787761718034744, |
| "loss/reg": 5.2294087409973145, |
| "loss/twn": 0.0, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.022975, |
| "grad_norm": 14.6875, |
| "grad_norm_var": 16.556705729166666, |
| "learning_rate": 0.0001, |
| "loss": 6.862, |
| "loss/crossentropy": 1.5273187160491943, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.006548475474119186, |
| "loss/reg": 5.229493618011475, |
| "loss/twn": 0.0, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.023, |
| "grad_norm": 10.25, |
| "grad_norm_var": 17.4609375, |
| "learning_rate": 0.0001, |
| "loss": 8.096, |
| "loss/crossentropy": 2.765378952026367, |
| "loss/hidden": 0.09375, |
| "loss/logits": 0.007458665873855352, |
| "loss/reg": 5.229459285736084, |
| "loss/twn": 0.0, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.023025, |
| "grad_norm": 15.875, |
| "grad_norm_var": 17.468994140625, |
| "learning_rate": 0.0001, |
| "loss": 6.2566, |
| "loss/crossentropy": 0.8696529865264893, |
| "loss/hidden": 0.1484375, |
| "loss/logits": 0.009295967407524586, |
| "loss/reg": 5.229222297668457, |
| "loss/twn": 0.0, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.02305, |
| "grad_norm": 14.5, |
| "grad_norm_var": 17.077197265625, |
| "learning_rate": 0.0001, |
| "loss": 7.8853, |
| "loss/crossentropy": 2.496795892715454, |
| "loss/hidden": 0.1494140625, |
| "loss/logits": 0.009545085951685905, |
| "loss/reg": 5.229542255401611, |
| "loss/twn": 0.0, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.023075, |
| "grad_norm": 23.25, |
| "grad_norm_var": 15.957014973958334, |
| "learning_rate": 0.0001, |
| "loss": 7.049, |
| "loss/crossentropy": 1.6003493070602417, |
| "loss/hidden": 0.2060546875, |
| "loss/logits": 0.013520617038011551, |
| "loss/reg": 5.229060173034668, |
| "loss/twn": 0.0, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.0231, |
| "grad_norm": 9.75, |
| "grad_norm_var": 16.564436848958334, |
| "learning_rate": 0.0001, |
| "loss": 6.8671, |
| "loss/crossentropy": 1.5159342288970947, |
| "loss/hidden": 0.10986328125, |
| "loss/logits": 0.011563955806195736, |
| "loss/reg": 5.2297444343566895, |
| "loss/twn": 0.0, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.023125, |
| "grad_norm": 9.125, |
| "grad_norm_var": 17.382535807291667, |
| "learning_rate": 0.0001, |
| "loss": 7.8368, |
| "loss/crossentropy": 2.537524938583374, |
| "loss/hidden": 0.064453125, |
| "loss/logits": 0.005966213531792164, |
| "loss/reg": 5.228902339935303, |
| "loss/twn": 0.0, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.02315, |
| "grad_norm": 10.6875, |
| "grad_norm_var": 17.446598307291666, |
| "learning_rate": 0.0001, |
| "loss": 7.097, |
| "loss/crossentropy": 1.695339560508728, |
| "loss/hidden": 0.1630859375, |
| "loss/logits": 0.009303221479058266, |
| "loss/reg": 5.229240894317627, |
| "loss/twn": 0.0, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.023175, |
| "grad_norm": 10.375, |
| "grad_norm_var": 17.941520182291665, |
| "learning_rate": 0.0001, |
| "loss": 8.0381, |
| "loss/crossentropy": 2.6705760955810547, |
| "loss/hidden": 0.12890625, |
| "loss/logits": 0.00920666940510273, |
| "loss/reg": 5.229386329650879, |
| "loss/twn": 0.0, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.0232, |
| "grad_norm": 8.75, |
| "grad_norm_var": 19.055973307291666, |
| "learning_rate": 0.0001, |
| "loss": 7.8173, |
| "loss/crossentropy": 2.529360055923462, |
| "loss/hidden": 0.0546875, |
| "loss/logits": 0.003768081543967128, |
| "loss/reg": 5.229437351226807, |
| "loss/twn": 0.0, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.023225, |
| "grad_norm": 74.5, |
| "grad_norm_var": 253.73483072916667, |
| "learning_rate": 0.0001, |
| "loss": 6.3033, |
| "loss/crossentropy": 0.9277183413505554, |
| "loss/hidden": 0.138671875, |
| "loss/logits": 0.007658226415514946, |
| "loss/reg": 5.229298114776611, |
| "loss/twn": 0.0, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.02325, |
| "grad_norm": 19.875, |
| "grad_norm_var": 251.03292643229167, |
| "learning_rate": 0.0001, |
| "loss": 8.3524, |
| "loss/crossentropy": 2.9925942420959473, |
| "loss/hidden": 0.1201171875, |
| "loss/logits": 0.010561013594269753, |
| "loss/reg": 5.229130268096924, |
| "loss/twn": 0.0, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.023275, |
| "grad_norm": 25.5, |
| "grad_norm_var": 253.27902018229167, |
| "learning_rate": 0.0001, |
| "loss": 7.8689, |
| "loss/crossentropy": 2.5310251712799072, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.010016044601798058, |
| "loss/reg": 5.229218482971191, |
| "loss/twn": 0.0, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.0233, |
| "grad_norm": 138.0, |
| "grad_norm_var": 1142.0577962239583, |
| "learning_rate": 0.0001, |
| "loss": 6.7784, |
| "loss/crossentropy": 1.2963286638259888, |
| "loss/hidden": 0.2431640625, |
| "loss/logits": 0.00944933295249939, |
| "loss/reg": 5.229430198669434, |
| "loss/twn": 0.0, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.023325, |
| "grad_norm": 12.0625, |
| "grad_norm_var": 1139.352197265625, |
| "learning_rate": 0.0001, |
| "loss": 8.0218, |
| "loss/crossentropy": 2.6391210556030273, |
| "loss/hidden": 0.142578125, |
| "loss/logits": 0.011118912138044834, |
| "loss/reg": 5.229004383087158, |
| "loss/twn": 0.0, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.02335, |
| "grad_norm": 10.75, |
| "grad_norm_var": 1153.342431640625, |
| "learning_rate": 0.0001, |
| "loss": 7.3416, |
| "loss/crossentropy": 2.0831658840179443, |
| "loss/hidden": 0.0279541015625, |
| "loss/logits": 0.0012758576776832342, |
| "loss/reg": 5.229192733764648, |
| "loss/twn": 0.0, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.023375, |
| "grad_norm": 29.125, |
| "grad_norm_var": 1145.5634765625, |
| "learning_rate": 0.0001, |
| "loss": 6.8365, |
| "loss/crossentropy": 1.4616435766220093, |
| "loss/hidden": 0.138671875, |
| "loss/logits": 0.007113803178071976, |
| "loss/reg": 5.229030132293701, |
| "loss/twn": 0.0, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.0234, |
| "grad_norm": 13.625, |
| "grad_norm_var": 1139.00859375, |
| "learning_rate": 0.0001, |
| "loss": 6.1257, |
| "loss/crossentropy": 0.7412286400794983, |
| "loss/hidden": 0.1484375, |
| "loss/logits": 0.006910163909196854, |
| "loss/reg": 5.229117393493652, |
| "loss/twn": 0.0, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.023425, |
| "grad_norm": 98.5, |
| "grad_norm_var": 1447.4322265625, |
| "learning_rate": 0.0001, |
| "loss": 7.7219, |
| "loss/crossentropy": 2.3482983112335205, |
| "loss/hidden": 0.130859375, |
| "loss/logits": 0.013444026932120323, |
| "loss/reg": 5.229300498962402, |
| "loss/twn": 0.0, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.02345, |
| "grad_norm": 6.3125, |
| "grad_norm_var": 1470.478759765625, |
| "learning_rate": 0.0001, |
| "loss": 6.4927, |
| "loss/crossentropy": 1.20167076587677, |
| "loss/hidden": 0.0595703125, |
| "loss/logits": 0.0022249873727560043, |
| "loss/reg": 5.229248523712158, |
| "loss/twn": 0.0, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.023475, |
| "grad_norm": 9.3125, |
| "grad_norm_var": 1497.5080729166666, |
| "learning_rate": 0.0001, |
| "loss": 7.1103, |
| "loss/crossentropy": 1.7726614475250244, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.009686892852187157, |
| "loss/reg": 5.2292704582214355, |
| "loss/twn": 0.0, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.0235, |
| "grad_norm": 62.75, |
| "grad_norm_var": 1527.21015625, |
| "learning_rate": 0.0001, |
| "loss": 6.9551, |
| "loss/crossentropy": 1.6360843181610107, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.0034621984232217073, |
| "loss/reg": 5.229147434234619, |
| "loss/twn": 0.0, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.023525, |
| "grad_norm": 25.0, |
| "grad_norm_var": 1490.9374348958333, |
| "learning_rate": 0.0001, |
| "loss": 6.9106, |
| "loss/crossentropy": 1.543732762336731, |
| "loss/hidden": 0.12890625, |
| "loss/logits": 0.008542709052562714, |
| "loss/reg": 5.229380130767822, |
| "loss/twn": 0.0, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.02355, |
| "grad_norm": 15.0, |
| "grad_norm_var": 1478.2952962239583, |
| "learning_rate": 0.0001, |
| "loss": 8.1479, |
| "loss/crossentropy": 2.7828969955444336, |
| "loss/hidden": 0.12255859375, |
| "loss/logits": 0.013092401437461376, |
| "loss/reg": 5.229334831237793, |
| "loss/twn": 0.0, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.023575, |
| "grad_norm": 10.375, |
| "grad_norm_var": 1478.2952962239583, |
| "learning_rate": 0.0001, |
| "loss": 6.8721, |
| "loss/crossentropy": 1.47151517868042, |
| "loss/hidden": 0.1611328125, |
| "loss/logits": 0.010217259638011456, |
| "loss/reg": 5.229234218597412, |
| "loss/twn": 0.0, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.0236, |
| "grad_norm": 10.5, |
| "grad_norm_var": 1472.3699055989584, |
| "learning_rate": 0.0001, |
| "loss": 6.1565, |
| "loss/crossentropy": 0.7564952373504639, |
| "loss/hidden": 0.1630859375, |
| "loss/logits": 0.007633813191205263, |
| "loss/reg": 5.229316234588623, |
| "loss/twn": 0.0, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.023625, |
| "grad_norm": 12.0, |
| "grad_norm_var": 1387.962353515625, |
| "learning_rate": 0.0001, |
| "loss": 8.4192, |
| "loss/crossentropy": 3.100782871246338, |
| "loss/hidden": 0.08154296875, |
| "loss/logits": 0.00750060984864831, |
| "loss/reg": 5.22934627532959, |
| "loss/twn": 0.0, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.02365, |
| "grad_norm": 10.5, |
| "grad_norm_var": 1407.571728515625, |
| "learning_rate": 0.0001, |
| "loss": 7.9888, |
| "loss/crossentropy": 2.6322426795959473, |
| "loss/hidden": 0.11962890625, |
| "loss/logits": 0.008000584319233894, |
| "loss/reg": 5.2289581298828125, |
| "loss/twn": 0.0, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.023675, |
| "grad_norm": 15.5625, |
| "grad_norm_var": 1420.4775390625, |
| "learning_rate": 0.0001, |
| "loss": 8.039, |
| "loss/crossentropy": 2.6621694564819336, |
| "loss/hidden": 0.138671875, |
| "loss/logits": 0.008840564638376236, |
| "loss/reg": 5.229346752166748, |
| "loss/twn": 0.0, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.0237, |
| "grad_norm": 9.1875, |
| "grad_norm_var": 601.947900390625, |
| "learning_rate": 0.0001, |
| "loss": 6.9809, |
| "loss/crossentropy": 1.6236486434936523, |
| "loss/hidden": 0.1201171875, |
| "loss/logits": 0.0081618158146739, |
| "loss/reg": 5.229002475738525, |
| "loss/twn": 0.0, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.023725, |
| "grad_norm": 52.0, |
| "grad_norm_var": 649.196875, |
| "learning_rate": 0.0001, |
| "loss": 7.1623, |
| "loss/crossentropy": 1.6345115900039673, |
| "loss/hidden": 0.294921875, |
| "loss/logits": 0.00377917499281466, |
| "loss/reg": 5.2290778160095215, |
| "loss/twn": 0.0, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.02375, |
| "grad_norm": 11.8125, |
| "grad_norm_var": 647.3327962239583, |
| "learning_rate": 0.0001, |
| "loss": 6.9575, |
| "loss/crossentropy": 1.564661979675293, |
| "loss/hidden": 0.1572265625, |
| "loss/logits": 0.006771073676645756, |
| "loss/reg": 5.228812217712402, |
| "loss/twn": 0.0, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.023775, |
| "grad_norm": 9.5, |
| "grad_norm_var": 659.2304524739583, |
| "learning_rate": 0.0001, |
| "loss": 7.8587, |
| "loss/crossentropy": 2.5682647228240967, |
| "loss/hidden": 0.05712890625, |
| "loss/logits": 0.003940091468393803, |
| "loss/reg": 5.229336261749268, |
| "loss/twn": 0.0, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.0238, |
| "grad_norm": 12.25, |
| "grad_norm_var": 661.1124837239583, |
| "learning_rate": 0.0001, |
| "loss": 7.4498, |
| "loss/crossentropy": 2.0796985626220703, |
| "loss/hidden": 0.1328125, |
| "loss/logits": 0.007796227466315031, |
| "loss/reg": 5.229506015777588, |
| "loss/twn": 0.0, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.023825, |
| "grad_norm": 12.9375, |
| "grad_norm_var": 259.1692708333333, |
| "learning_rate": 0.0001, |
| "loss": 7.7385, |
| "loss/crossentropy": 2.351168155670166, |
| "loss/hidden": 0.1484375, |
| "loss/logits": 0.010073304176330566, |
| "loss/reg": 5.228799819946289, |
| "loss/twn": 0.0, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.02385, |
| "grad_norm": 7.3125, |
| "grad_norm_var": 257.6984375, |
| "learning_rate": 0.0001, |
| "loss": 6.6797, |
| "loss/crossentropy": 1.3460360765457153, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.005743211135268211, |
| "loss/reg": 5.229253768920898, |
| "loss/twn": 0.0, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.023875, |
| "grad_norm": 50.5, |
| "grad_norm_var": 316.70167643229166, |
| "learning_rate": 0.0001, |
| "loss": 7.4466, |
| "loss/crossentropy": 2.0729310512542725, |
| "loss/hidden": 0.134765625, |
| "loss/logits": 0.009920709766447544, |
| "loss/reg": 5.228950023651123, |
| "loss/twn": 0.0, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.0239, |
| "grad_norm": 32.0, |
| "grad_norm_var": 202.36612955729166, |
| "learning_rate": 0.0001, |
| "loss": 6.7081, |
| "loss/crossentropy": 1.379569172859192, |
| "loss/hidden": 0.09521484375, |
| "loss/logits": 0.00438508577644825, |
| "loss/reg": 5.228931903839111, |
| "loss/twn": 0.0, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.023925, |
| "grad_norm": 8.875, |
| "grad_norm_var": 204.70089518229167, |
| "learning_rate": 0.0001, |
| "loss": 6.1735, |
| "loss/crossentropy": 0.8404383063316345, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.0052786958403885365, |
| "loss/reg": 5.229166030883789, |
| "loss/twn": 0.0, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.02395, |
| "grad_norm": 11.0625, |
| "grad_norm_var": 206.99264322916667, |
| "learning_rate": 0.0001, |
| "loss": 8.0646, |
| "loss/crossentropy": 2.8019535541534424, |
| "loss/hidden": 0.0302734375, |
| "loss/logits": 0.003258619224652648, |
| "loss/reg": 5.229119777679443, |
| "loss/twn": 0.0, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.023975, |
| "grad_norm": 62.25, |
| "grad_norm_var": 327.46692708333336, |
| "learning_rate": 0.0001, |
| "loss": 6.2366, |
| "loss/crossentropy": 0.899376630783081, |
| "loss/hidden": 0.1044921875, |
| "loss/logits": 0.0037081395275890827, |
| "loss/reg": 5.229043960571289, |
| "loss/twn": 0.0, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 19.75, |
| "grad_norm_var": 320.46197916666665, |
| "learning_rate": 0.0001, |
| "loss": 8.3454, |
| "loss/crossentropy": 2.9434502124786377, |
| "loss/hidden": 0.1572265625, |
| "loss/logits": 0.015512878075242043, |
| "loss/reg": 5.2291717529296875, |
| "loss/twn": 0.0, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.024025, |
| "grad_norm": 12.8125, |
| "grad_norm_var": 319.51808268229166, |
| "learning_rate": 0.0001, |
| "loss": 6.8387, |
| "loss/crossentropy": 1.3557770252227783, |
| "loss/hidden": 0.2470703125, |
| "loss/logits": 0.006780410185456276, |
| "loss/reg": 5.229072093963623, |
| "loss/twn": 0.0, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.02405, |
| "grad_norm": 69.0, |
| "grad_norm_var": 450.38136393229166, |
| "learning_rate": 0.0001, |
| "loss": 6.6024, |
| "loss/crossentropy": 1.2391810417175293, |
| "loss/hidden": 0.12255859375, |
| "loss/logits": 0.011559647507965565, |
| "loss/reg": 5.229081630706787, |
| "loss/twn": 0.0, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.024075, |
| "grad_norm": 11.1875, |
| "grad_norm_var": 456.966650390625, |
| "learning_rate": 0.0001, |
| "loss": 6.8096, |
| "loss/crossentropy": 1.4291319847106934, |
| "loss/hidden": 0.150390625, |
| "loss/logits": 0.0013875500299036503, |
| "loss/reg": 5.228731632232666, |
| "loss/twn": 0.0, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.0241, |
| "grad_norm": 13.8125, |
| "grad_norm_var": 448.843994140625, |
| "learning_rate": 0.0001, |
| "loss": 6.9963, |
| "loss/crossentropy": 1.6549384593963623, |
| "loss/hidden": 0.1025390625, |
| "loss/logits": 0.009464550763368607, |
| "loss/reg": 5.22934103012085, |
| "loss/twn": 0.0, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.024125, |
| "grad_norm": 160.0, |
| "grad_norm_var": 1569.287744140625, |
| "learning_rate": 0.0001, |
| "loss": 5.8326, |
| "loss/crossentropy": 0.4071745276451111, |
| "loss/hidden": 0.19140625, |
| "loss/logits": 0.005319996736943722, |
| "loss/reg": 5.228703498840332, |
| "loss/twn": 0.0, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.02415, |
| "grad_norm": 9.625, |
| "grad_norm_var": 1575.3483723958334, |
| "learning_rate": 0.0001, |
| "loss": 7.4917, |
| "loss/crossentropy": 2.1543774604797363, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.00969572365283966, |
| "loss/reg": 5.229022026062012, |
| "loss/twn": 0.0, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.024175, |
| "grad_norm": 22.5, |
| "grad_norm_var": 1547.8994140625, |
| "learning_rate": 0.0001, |
| "loss": 8.001, |
| "loss/crossentropy": 2.677924156188965, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.007645599078387022, |
| "loss/reg": 5.228973388671875, |
| "loss/twn": 0.0, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.0242, |
| "grad_norm": 14.1875, |
| "grad_norm_var": 1542.969384765625, |
| "learning_rate": 0.0001, |
| "loss": 7.5317, |
| "loss/crossentropy": 2.1442387104034424, |
| "loss/hidden": 0.146484375, |
| "loss/logits": 0.012024961411952972, |
| "loss/reg": 5.228928089141846, |
| "loss/twn": 0.0, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.024225, |
| "grad_norm": 12.5, |
| "grad_norm_var": 1544.1145182291666, |
| "learning_rate": 0.0001, |
| "loss": 6.3484, |
| "loss/crossentropy": 0.9374382495880127, |
| "loss/hidden": 0.177734375, |
| "loss/logits": 0.004194112028926611, |
| "loss/reg": 5.229069232940674, |
| "loss/twn": 0.0, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.02425, |
| "grad_norm": 17.75, |
| "grad_norm_var": 1516.099072265625, |
| "learning_rate": 0.0001, |
| "loss": 6.8346, |
| "loss/crossentropy": 1.4595236778259277, |
| "loss/hidden": 0.142578125, |
| "loss/logits": 0.0036797509528696537, |
| "loss/reg": 5.228834629058838, |
| "loss/twn": 0.0, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.024275, |
| "grad_norm": 12.3125, |
| "grad_norm_var": 1518.0780598958333, |
| "learning_rate": 0.0001, |
| "loss": 6.2077, |
| "loss/crossentropy": 0.8395573496818542, |
| "loss/hidden": 0.1328125, |
| "loss/logits": 0.0061765448190271854, |
| "loss/reg": 5.22913122177124, |
| "loss/twn": 0.0, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.0243, |
| "grad_norm": 8.9375, |
| "grad_norm_var": 1547.0202962239584, |
| "learning_rate": 0.0001, |
| "loss": 6.998, |
| "loss/crossentropy": 1.55341374874115, |
| "loss/hidden": 0.2080078125, |
| "loss/logits": 0.008093073032796383, |
| "loss/reg": 5.228493690490723, |
| "loss/twn": 0.0, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.024325, |
| "grad_norm": 8.5625, |
| "grad_norm_var": 1547.8716145833334, |
| "learning_rate": 0.0001, |
| "loss": 7.1173, |
| "loss/crossentropy": 1.7166783809661865, |
| "loss/hidden": 0.1640625, |
| "loss/logits": 0.007523189298808575, |
| "loss/reg": 5.229001045227051, |
| "loss/twn": 0.0, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.02435, |
| "grad_norm": 224.0, |
| "grad_norm_var": 3868.502197265625, |
| "learning_rate": 0.0001, |
| "loss": 6.3737, |
| "loss/crossentropy": 0.9948546886444092, |
| "loss/hidden": 0.1435546875, |
| "loss/logits": 0.00641383184120059, |
| "loss/reg": 5.228926658630371, |
| "loss/twn": 0.0, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.024375, |
| "grad_norm": 15.375, |
| "grad_norm_var": 3882.076416015625, |
| "learning_rate": 0.0001, |
| "loss": 8.2234, |
| "loss/crossentropy": 2.891629219055176, |
| "loss/hidden": 0.09375, |
| "loss/logits": 0.00904359295964241, |
| "loss/reg": 5.228950500488281, |
| "loss/twn": 0.0, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.0244, |
| "grad_norm": 740.0, |
| "grad_norm_var": 34406.04633789063, |
| "learning_rate": 0.0001, |
| "loss": 6.273, |
| "loss/crossentropy": 0.9103600978851318, |
| "loss/hidden": 0.1298828125, |
| "loss/logits": 0.003978141117841005, |
| "loss/reg": 5.228822231292725, |
| "loss/twn": 0.0, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.024425, |
| "grad_norm": 12.75, |
| "grad_norm_var": 34406.644270833334, |
| "learning_rate": 0.0001, |
| "loss": 7.4745, |
| "loss/crossentropy": 2.0163183212280273, |
| "loss/hidden": 0.2197265625, |
| "loss/logits": 0.009444335475564003, |
| "loss/reg": 5.229053497314453, |
| "loss/twn": 0.0, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.02445, |
| "grad_norm": 12.4375, |
| "grad_norm_var": 34723.73305664062, |
| "learning_rate": 0.0001, |
| "loss": 7.8159, |
| "loss/crossentropy": 2.453317642211914, |
| "loss/hidden": 0.125, |
| "loss/logits": 0.008999479934573174, |
| "loss/reg": 5.228621959686279, |
| "loss/twn": 0.0, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.024475, |
| "grad_norm": 15.0, |
| "grad_norm_var": 34689.15546875, |
| "learning_rate": 0.0001, |
| "loss": 8.3269, |
| "loss/crossentropy": 2.9546549320220947, |
| "loss/hidden": 0.130859375, |
| "loss/logits": 0.012454254552721977, |
| "loss/reg": 5.2289299964904785, |
| "loss/twn": 0.0, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.0245, |
| "grad_norm": 23.625, |
| "grad_norm_var": 34606.96300455729, |
| "learning_rate": 0.0001, |
| "loss": 5.9559, |
| "loss/crossentropy": 0.5534784197807312, |
| "loss/hidden": 0.166015625, |
| "loss/logits": 0.007569343317300081, |
| "loss/reg": 5.2288498878479, |
| "loss/twn": 0.0, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.024525, |
| "grad_norm": 12.125, |
| "grad_norm_var": 34432.74777018229, |
| "learning_rate": 0.0001, |
| "loss": 8.0158, |
| "loss/crossentropy": 2.6883370876312256, |
| "loss/hidden": 0.08642578125, |
| "loss/logits": 0.012242003343999386, |
| "loss/reg": 5.228771686553955, |
| "loss/twn": 0.0, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.02455, |
| "grad_norm": 17.0, |
| "grad_norm_var": 34374.21638997396, |
| "learning_rate": 0.0001, |
| "loss": 8.0208, |
| "loss/crossentropy": 2.660618782043457, |
| "loss/hidden": 0.11767578125, |
| "loss/logits": 0.013429110869765282, |
| "loss/reg": 5.229035377502441, |
| "loss/twn": 0.0, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.024575, |
| "grad_norm": 26.875, |
| "grad_norm_var": 34345.91560872396, |
| "learning_rate": 0.0001, |
| "loss": 7.0489, |
| "loss/crossentropy": 1.6950386762619019, |
| "loss/hidden": 0.119140625, |
| "loss/logits": 0.006002393085509539, |
| "loss/reg": 5.2287116050720215, |
| "loss/twn": 0.0, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.0246, |
| "grad_norm": 44.0, |
| "grad_norm_var": 34166.33411458333, |
| "learning_rate": 0.0001, |
| "loss": 7.4035, |
| "loss/crossentropy": 2.058884859085083, |
| "loss/hidden": 0.107421875, |
| "loss/logits": 0.008141661062836647, |
| "loss/reg": 5.22901725769043, |
| "loss/twn": 0.0, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.024625, |
| "grad_norm": 196.0, |
| "grad_norm_var": 34736.71328125, |
| "learning_rate": 0.0001, |
| "loss": 8.0381, |
| "loss/crossentropy": 2.5864806175231934, |
| "loss/hidden": 0.2119140625, |
| "loss/logits": 0.01062602736055851, |
| "loss/reg": 5.229076862335205, |
| "loss/twn": 0.0, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.02465, |
| "grad_norm": 11.8125, |
| "grad_norm_var": 34793.479801432295, |
| "learning_rate": 0.0001, |
| "loss": 7.9697, |
| "loss/crossentropy": 2.6345629692077637, |
| "loss/hidden": 0.0986328125, |
| "loss/logits": 0.007676620967686176, |
| "loss/reg": 5.228846073150635, |
| "loss/twn": 0.0, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.024675, |
| "grad_norm": 11.8125, |
| "grad_norm_var": 34798.427978515625, |
| "learning_rate": 0.0001, |
| "loss": 7.7632, |
| "loss/crossentropy": 2.396425724029541, |
| "loss/hidden": 0.1240234375, |
| "loss/logits": 0.013703764416277409, |
| "loss/reg": 5.229032516479492, |
| "loss/twn": 0.0, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.0247, |
| "grad_norm": 10.125, |
| "grad_norm_var": 34786.271875, |
| "learning_rate": 0.0001, |
| "loss": 8.1079, |
| "loss/crossentropy": 2.747255325317383, |
| "loss/hidden": 0.1240234375, |
| "loss/logits": 0.007296864874660969, |
| "loss/reg": 5.2292985916137695, |
| "loss/twn": 0.0, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.024725, |
| "grad_norm": 15.0, |
| "grad_norm_var": 34722.09972330729, |
| "learning_rate": 0.0001, |
| "loss": 6.9097, |
| "loss/crossentropy": 1.5659387111663818, |
| "loss/hidden": 0.10986328125, |
| "loss/logits": 0.005029057152569294, |
| "loss/reg": 5.228851795196533, |
| "loss/twn": 0.0, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.02475, |
| "grad_norm": 15.75, |
| "grad_norm_var": 33621.52016601562, |
| "learning_rate": 0.0001, |
| "loss": 7.1496, |
| "loss/crossentropy": 1.7617563009262085, |
| "loss/hidden": 0.1513671875, |
| "loss/logits": 0.007417085114866495, |
| "loss/reg": 5.229080677032471, |
| "loss/twn": 0.0, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.024775, |
| "grad_norm": 14.75, |
| "grad_norm_var": 33626.407535807295, |
| "learning_rate": 0.0001, |
| "loss": 7.4938, |
| "loss/crossentropy": 2.093966245651245, |
| "loss/hidden": 0.162109375, |
| "loss/logits": 0.008837287314236164, |
| "loss/reg": 5.228903770446777, |
| "loss/twn": 0.0, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.0248, |
| "grad_norm": 21.375, |
| "grad_norm_var": 2059.307275390625, |
| "learning_rate": 0.0001, |
| "loss": 5.6135, |
| "loss/crossentropy": 0.2334800660610199, |
| "loss/hidden": 0.14453125, |
| "loss/logits": 0.00651364354416728, |
| "loss/reg": 5.228927135467529, |
| "loss/twn": 0.0, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.024825, |
| "grad_norm": 11.875, |
| "grad_norm_var": 2061.2249837239583, |
| "learning_rate": 0.0001, |
| "loss": 7.9624, |
| "loss/crossentropy": 2.5844943523406982, |
| "loss/hidden": 0.1396484375, |
| "loss/logits": 0.009160241112112999, |
| "loss/reg": 5.229069709777832, |
| "loss/twn": 0.0, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.02485, |
| "grad_norm": 16.75, |
| "grad_norm_var": 2053.023372395833, |
| "learning_rate": 0.0001, |
| "loss": 7.9445, |
| "loss/crossentropy": 2.5577454566955566, |
| "loss/hidden": 0.146484375, |
| "loss/logits": 0.011256640776991844, |
| "loss/reg": 5.229057312011719, |
| "loss/twn": 0.0, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.024875, |
| "grad_norm": 15.875, |
| "grad_norm_var": 2051.4388020833335, |
| "learning_rate": 0.0001, |
| "loss": 8.5701, |
| "loss/crossentropy": 3.218754529953003, |
| "loss/hidden": 0.115234375, |
| "loss/logits": 0.0072383033111691475, |
| "loss/reg": 5.228893280029297, |
| "loss/twn": 0.0, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.0249, |
| "grad_norm": 11.6875, |
| "grad_norm_var": 2068.9751139322916, |
| "learning_rate": 0.0001, |
| "loss": 6.8167, |
| "loss/crossentropy": 1.4376857280731201, |
| "loss/hidden": 0.140625, |
| "loss/logits": 0.009291324764490128, |
| "loss/reg": 5.22910737991333, |
| "loss/twn": 0.0, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.024925, |
| "grad_norm": 23.875, |
| "grad_norm_var": 2052.261962890625, |
| "learning_rate": 0.0001, |
| "loss": 7.6128, |
| "loss/crossentropy": 2.2152881622314453, |
| "loss/hidden": 0.1630859375, |
| "loss/logits": 0.0057820603251457214, |
| "loss/reg": 5.228606700897217, |
| "loss/twn": 0.0, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.02495, |
| "grad_norm": 84.5, |
| "grad_norm_var": 2228.711181640625, |
| "learning_rate": 0.0001, |
| "loss": 8.1583, |
| "loss/crossentropy": 2.797482490539551, |
| "loss/hidden": 0.1220703125, |
| "loss/logits": 0.009698813781142235, |
| "loss/reg": 5.229077339172363, |
| "loss/twn": 0.0, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.024975, |
| "grad_norm": 10.125, |
| "grad_norm_var": 2260.4925618489583, |
| "learning_rate": 0.0001, |
| "loss": 7.9002, |
| "loss/crossentropy": 2.5686769485473633, |
| "loss/hidden": 0.09619140625, |
| "loss/logits": 0.006712072994560003, |
| "loss/reg": 5.228668212890625, |
| "loss/twn": 0.0, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.025, |
| "grad_norm": 71.0, |
| "grad_norm_var": 2348.5097493489584, |
| "learning_rate": 0.0001, |
| "loss": 7.9685, |
| "loss/crossentropy": 2.6334779262542725, |
| "loss/hidden": 0.099609375, |
| "loss/logits": 0.0062838364392519, |
| "loss/reg": 5.229094505310059, |
| "loss/twn": 0.0, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 40000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": true, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.0457034088448e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|